Add isolated-namespace restore drill (CNPG cluster, PVC, orchestration script) and document successful 2026-07-04 run: production forgejo dump restored with health 200 and pilot repos visible via API. Scheduled backups remain open.
115 lines
5.0 KiB
Bash
Executable File
115 lines
5.0 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Non-production Forgejo backup/restore drill (RAIL-HO-WP-0005-T09).
|
|
# Re-run: DRILL_CLEAN=1 ./tools/forgejo-restore-drill.sh (wipes namespace first)
|
|
set -euo pipefail
|
|
|
|
KUBECONFIG="${KUBECONFIG:-$HOME/.kube/config-hosteurope}"
|
|
export KUBECONFIG
|
|
NS=forgejo-restore-drill
|
|
DRILL_CLEAN="${DRILL_CLEAN:-0}"
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
ROOT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
|
BACKUP_LOCAL="${BACKUP_LOCAL:-/tmp/forgejo-drill/forgejo-drill-backup.zip}"
|
|
PROD_POD="${PROD_POD:-$(kubectl get pods -n forgejo -l app.kubernetes.io/instance=forgejo -o jsonpath='{.items[0].metadata.name}')}"
|
|
|
|
step() { echo "==> $*"; }
|
|
|
|
if [[ "${DRILL_CLEAN}" == "1" ]]; then
|
|
step "Clean prior drill namespace ${NS}"
|
|
kubectl delete namespace "${NS}" --wait=true --timeout=5m || true
|
|
fi
|
|
|
|
step "Create namespace ${NS}"
|
|
kubectl create namespace "${NS}" --dry-run=client -o yaml | kubectl apply -f -
|
|
|
|
step "Copy forgejo-db-credentials into ${NS}"
|
|
kubectl get secret forgejo-db-credentials -n databases -o json \
|
|
| python3 -c "import json,sys; s=json.load(sys.stdin); s['metadata']={k:v for k,v in s['metadata'].items() if k in ('name','labels','annotations')}; s['metadata']['namespace']='${NS}'; print(json.dumps(s))" \
|
|
| kubectl apply -f -
|
|
|
|
step "Deploy restore CNPG cluster"
|
|
kubectl apply -f "${ROOT_DIR}/infra/forgejo-restore-drill/forgejo-db-restore-cluster.yaml"
|
|
kubectl wait --for=condition=Ready cluster/forgejo-db-restore -n "${NS}" --timeout=10m
|
|
|
|
step "Ensure local backup exists"
|
|
if [[ ! -f "${BACKUP_LOCAL}" ]]; then
|
|
kubectl exec -n forgejo "${PROD_POD}" -c gitea -- forgejo dump -f /tmp/forgejo-drill-backup.zip
|
|
mkdir -p "$(dirname "${BACKUP_LOCAL}")"
|
|
kubectl cp "forgejo/${PROD_POD}:/tmp/forgejo-drill-backup.zip" "${BACKUP_LOCAL}" -c gitea
|
|
fi
|
|
ls -lh "${BACKUP_LOCAL}"
|
|
|
|
step "Apply restore PVC"
|
|
kubectl apply -f "${ROOT_DIR}/infra/forgejo-restore-drill/restore-job.yaml"
|
|
|
|
step "Run restore pod (stage backup, import files + SQL)"
|
|
kubectl delete pod forgejo-restore-import -n "${NS}" --ignore-not-found --wait=true
|
|
cat <<EOF | kubectl apply -f -
|
|
apiVersion: v1
|
|
kind: Pod
|
|
metadata:
|
|
name: forgejo-restore-import
|
|
namespace: ${NS}
|
|
spec:
|
|
restartPolicy: Never
|
|
containers:
|
|
- name: restore
|
|
image: code.forgejo.org/forgejo/forgejo:11.0.3
|
|
command: ["sleep", "3600"]
|
|
volumeMounts:
|
|
- name: data
|
|
mountPath: /data
|
|
- name: backup
|
|
mountPath: /backup
|
|
volumes:
|
|
- name: data
|
|
persistentVolumeClaim:
|
|
claimName: forgejo-restore-data
|
|
- name: backup
|
|
emptyDir: {}
|
|
EOF
|
|
kubectl wait --for=condition=Ready pod/forgejo-restore-import -n "${NS}" --timeout=3m
|
|
kubectl cp "${BACKUP_LOCAL}" "${NS}/forgejo-restore-import:/backup/forgejo-drill-backup.zip" -c restore
|
|
DB_PASS="$(kubectl get secret forgejo-db-credentials -n "${NS}" -o jsonpath='{.data.password}' | base64 -d)"
|
|
kubectl exec -n "${NS}" forgejo-restore-import -c restore -- env POSTGRES_PASSWORD="${DB_PASS}" sh -c '
|
|
set -eu
|
|
apk add --no-cache unzip postgresql-client >/dev/null
|
|
rm -rf /data/*
|
|
mkdir -p /data/git/gitea-repositories
|
|
unzip -q /backup/forgejo-drill-backup.zip -d /tmp/dump
|
|
cp -a /tmp/dump/repos/. /data/git/gitea-repositories/
|
|
cp -a /tmp/dump/data/. /data/
|
|
chown -R git:git /data
|
|
PGPASSWORD="${POSTGRES_PASSWORD}" psql -h forgejo-db-restore-rw.forgejo-restore-drill.svc.cluster.local -U forgejo -d forgejo -v ON_ERROR_STOP=1 -f /tmp/dump/forgejo-db.sql
|
|
echo restore-import-ok
|
|
'
|
|
unset DB_PASS
|
|
kubectl delete pod forgejo-restore-import -n "${NS}" --wait=true
|
|
|
|
step "Deploy isolated Forgejo release"
|
|
cd "${HOME}/railiance-apps"
|
|
DB_PASS="$(kubectl get secret forgejo-db-credentials -n "${NS}" -o jsonpath='{.data.password}' | base64 -d)"
|
|
helm upgrade --install forgejo-restore gitea-charts/gitea --version 12.5.0 \
|
|
--namespace "${NS}" --create-namespace \
|
|
-f helm/forgejo-values.yaml \
|
|
-f helm/forgejo-registry-values.yaml \
|
|
--set strategy.type=Recreate \
|
|
--set persistence.existingClaim=forgejo-restore-data \
|
|
--set gitea.config.database.HOST=forgejo-db-restore-rw.${NS}.svc.cluster.local:5432 \
|
|
--set gitea.config.database.PASSWD="${DB_PASS}" \
|
|
--set gitea.config.server.DOMAIN=forgejo-restore.local \
|
|
--set gitea.config.server.ROOT_URL=http://forgejo-restore.local:3000/ \
|
|
--set gitea.admin.password=restore-drill-local-only \
|
|
--set ingress.enabled=false \
|
|
--wait --timeout=10m
|
|
unset DB_PASS
|
|
|
|
step "Post-restore checks via port-forward"
|
|
kubectl port-forward -n "${NS}" svc/forgejo-restore-gitea-http 13000:3000 >/tmp/forgejo-restore-pf.log 2>&1 &
|
|
PF_PID=$!
|
|
sleep 5
|
|
curl -fsS -o /dev/null -w 'health:%{http_code}\n' http://127.0.0.1:13000/
|
|
curl -fsS http://127.0.0.1:13000/api/v1/repos/coulomb/glas-harness | python3 -c "import json,sys; d=json.load(sys.stdin); print('repo', d.get('full_name'), d.get('default_branch'))"
|
|
curl -fsS http://127.0.0.1:13000/api/v1/repos/coulomb/key-cape | python3 -c "import json,sys; d=json.load(sys.stdin); print('repo', d.get('full_name'), d.get('default_branch'))"
|
|
kill "${PF_PID}" 2>/dev/null || true
|
|
echo "restore-drill-complete" |