generated from coulomb/repo-seed
- Apply SQLite backup CronJobs (LLDAP, Authelia, privacyIDEA) — all verified running - Fix authelia-backup: remove scale-down/up dance; concurrent local-path PVC mount works on single-node k3s, sqlite3 .backup is safe for concurrent access - Fix privacyidea-backup: add supplementalGroups: [999] so uid=1000 can read enckey - Add allow-backup-to-kube-api NetworkPolicy (backup pod → 10.43.0.1:443) - Create break-glass LLDAP account (net-kingdom-admins); fix ((PASS++)) set-e trap - SQLite restore drill: LLDAP backup valid (2 users, all tables) - verify-t08.sh: PASS=15, FAIL=0; fix counter bug + enckey PVC path (/etc/privacyidea) - Update DR-RUNBOOK.md Authelia restore procedure - T09 deferred: CNPG backup (needs MinIO/S3), Prometheus (needs kube-prometheus-stack) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
176 lines
8.2 KiB
Bash
Executable File
176 lines
8.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
# verify-t08.sh — verify NK-WP-0001-T08 done-criteria
|
||
#
|
||
# Checks backups, DR readiness, and break-glass account.
|
||
#
|
||
# Sections:
|
||
# 1. Backup CronJobs exist (lldap-backup, authelia-backup, privacyidea-backup)
|
||
# 2. backup-sa ServiceAccount and RBAC exist
|
||
# 3. lldap-backup has run successfully at least once
|
||
# 4. authelia-backup has run successfully at least once
|
||
# 5. privacyidea-backup has run successfully at least once
|
||
# 6. privacyIDEA enckey backup exists on PVC
|
||
# 7. LLDAP SQLite backup exists on PVC
|
||
# 8. DR-RUNBOOK.md present in repo
|
||
# 9. KeePassXC ops bundle (pack-bundle.sh) — manual confirmation required
|
||
#
|
||
# Usage:
|
||
# chmod +x verify-t08.sh
|
||
# ./verify-t08.sh
|
||
|
||
set -euo pipefail
|
||
|
||
SSO_NAMESPACE="sso"
|
||
MFA_NAMESPACE="mfa"
|
||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
|
||
PASS=0
|
||
FAIL=0
|
||
WARN=0
|
||
|
||
pass() { echo " [PASS] $1"; PASS=$((PASS + 1)); }
|
||
fail() { echo " [FAIL] $1"; FAIL=$((FAIL + 1)); }
|
||
warn() { echo " [WARN] $1"; WARN=$((WARN + 1)); }
|
||
|
||
section() { echo ""; echo "── $1 ──────────────────────────────────────"; }
|
||
|
||
check_cronjob() {
|
||
local name="$1"; local ns="$2"
|
||
if kubectl get cronjob "$name" -n "$ns" &>/dev/null; then
|
||
pass "CronJob $name exists (namespace: $ns)"
|
||
local schedule
|
||
schedule=$(kubectl get cronjob "$name" -n "$ns" \
|
||
-o jsonpath='{.spec.schedule}' 2>/dev/null || echo "?")
|
||
pass " Schedule: $schedule"
|
||
else
|
||
fail "CronJob $name not found in namespace $ns — apply backup/cronjob-sqlite-backups.yaml"
|
||
fi
|
||
}
|
||
|
||
check_last_job() {
|
||
local cronjob="$1"; local ns="$2"
|
||
# Find the most recent Job spawned by this CronJob
|
||
LAST_JOB=$(kubectl get job -n "$ns" \
|
||
-l "batch.kubernetes.io/controller-uid" \
|
||
--sort-by=.metadata.creationTimestamp \
|
||
-o jsonpath='{.items[-1].metadata.name}' 2>/dev/null || echo "")
|
||
# Simpler: look for any completed job with the cronjob name prefix
|
||
SUCCEEDED=$(kubectl get job -n "$ns" \
|
||
-o jsonpath="{.items[?(@.metadata.ownerReferences[0].name==\"$cronjob\")].status.succeeded}" \
|
||
2>/dev/null || echo "")
|
||
if [[ "$SUCCEEDED" == *"1"* ]]; then
|
||
pass "CronJob $cronjob has at least one successful run"
|
||
else
|
||
warn "CronJob $cronjob has no successful runs yet — trigger manually to test:"
|
||
warn " kubectl create job -n $ns --from=cronjob/$cronjob ${cronjob}-manual-test"
|
||
fi
|
||
}
|
||
|
||
# ── 1. Backup CronJobs ────────────────────────────────────────────────────────
|
||
section "1. Backup CronJobs"
|
||
check_cronjob "lldap-backup" "$SSO_NAMESPACE"
|
||
check_cronjob "authelia-backup" "$SSO_NAMESPACE"
|
||
check_cronjob "privacyidea-backup" "$MFA_NAMESPACE"
|
||
|
||
# ── 2. RBAC ───────────────────────────────────────────────────────────────────
|
||
section "2. Backup ServiceAccount and RBAC (namespace: $SSO_NAMESPACE)"
|
||
if kubectl get serviceaccount backup-sa -n "$SSO_NAMESPACE" &>/dev/null; then
|
||
pass "ServiceAccount backup-sa exists"
|
||
else
|
||
fail "ServiceAccount backup-sa not found — apply backup/cronjob-sqlite-backups.yaml"
|
||
fi
|
||
if kubectl get role backup-scaler -n "$SSO_NAMESPACE" &>/dev/null; then
|
||
pass "Role backup-scaler exists"
|
||
else
|
||
fail "Role backup-scaler not found"
|
||
fi
|
||
if kubectl get rolebinding backup-sa-scaler -n "$SSO_NAMESPACE" &>/dev/null; then
|
||
pass "RoleBinding backup-sa-scaler exists"
|
||
else
|
||
fail "RoleBinding backup-sa-scaler not found"
|
||
fi
|
||
|
||
# ── 3–5. CronJob run history ──────────────────────────────────────────────────
|
||
section "3. lldap-backup run history"
|
||
check_last_job "lldap-backup" "$SSO_NAMESPACE"
|
||
|
||
section "4. authelia-backup run history"
|
||
check_last_job "authelia-backup" "$SSO_NAMESPACE"
|
||
|
||
section "5. privacyidea-backup run history"
|
||
check_last_job "privacyidea-backup" "$MFA_NAMESPACE"
|
||
|
||
# ── 6. privacyIDEA enckey backup on PVC ──────────────────────────────────────
|
||
section "6. privacyIDEA enckey backup on PVC"
|
||
PI_POD=$(kubectl get pod -n "$MFA_NAMESPACE" \
|
||
-l app.kubernetes.io/name=privacyidea \
|
||
--field-selector=status.phase=Running \
|
||
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
|
||
if [[ -n "$PI_POD" ]]; then
|
||
# PI PVC is mounted at /etc/privacyidea (not /data) in the privacyIDEA container
|
||
BACKUP_COUNT=$(kubectl exec -n "$MFA_NAMESPACE" "$PI_POD" -- \
|
||
sh -c 'ls /etc/privacyidea/backups/enckey.backup.* 2>/dev/null | wc -l' 2>/dev/null || echo "0")
|
||
BACKUP_COUNT="${BACKUP_COUNT// /}"
|
||
if [[ "$BACKUP_COUNT" -gt 0 ]]; then
|
||
pass "privacyIDEA enckey backups found on PVC ($BACKUP_COUNT file(s))"
|
||
else
|
||
warn "No enckey backup files on PVC yet — trigger privacyidea-backup CronJob to create one"
|
||
warn " kubectl create job -n $MFA_NAMESPACE --from=cronjob/privacyidea-backup pi-backup-test"
|
||
fi
|
||
else
|
||
warn "Skipping enckey backup check — no running privacyIDEA pod"
|
||
fi
|
||
|
||
# ── 7. LLDAP SQLite backup on PVC ────────────────────────────────────────────
|
||
section "7. LLDAP SQLite backup on PVC"
|
||
LLDAP_POD=$(kubectl get pod -n "$SSO_NAMESPACE" \
|
||
-l app.kubernetes.io/name=lldap \
|
||
--field-selector=status.phase=Running \
|
||
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
|
||
if [[ -n "$LLDAP_POD" ]]; then
|
||
BACKUP_COUNT=$(kubectl exec -n "$SSO_NAMESPACE" "$LLDAP_POD" -- \
|
||
sh -c 'ls /data/backups/users.backup.* 2>/dev/null | wc -l' 2>/dev/null || echo "0")
|
||
BACKUP_COUNT="${BACKUP_COUNT// /}"
|
||
if [[ "$BACKUP_COUNT" -gt 0 ]]; then
|
||
pass "LLDAP SQLite backups found on PVC ($BACKUP_COUNT file(s))"
|
||
else
|
||
warn "No LLDAP backup files on PVC yet — trigger lldap-backup CronJob to create one"
|
||
warn " kubectl create job -n $SSO_NAMESPACE --from=cronjob/lldap-backup lldap-backup-test"
|
||
fi
|
||
else
|
||
warn "Skipping LLDAP backup check — no running LLDAP pod"
|
||
fi
|
||
|
||
# ── 8. DR runbook present ─────────────────────────────────────────────────────
|
||
section "8. DR runbook"
|
||
RUNBOOK="$SCRIPT_DIR/backup/DR-RUNBOOK.md"
|
||
if [[ -f "$RUNBOOK" ]]; then
|
||
pass "DR-RUNBOOK.md present at $RUNBOOK"
|
||
else
|
||
fail "DR-RUNBOOK.md not found — it should be at sso-mfa/k8s/backup/DR-RUNBOOK.md"
|
||
fi
|
||
|
||
# ── 9. Offsite backup (manual confirmation) ───────────────────────────────────
|
||
section "9. Offsite backup (manual)"
|
||
warn "Cannot verify offsite backup automatically — confirm manually:"
|
||
warn " - pack-bundle.sh has been run with current secrets"
|
||
warn " - ops-bundle.tar.age stored in a separate physical location"
|
||
warn " - age decryption key stored separately (NOT in the same location as the bundle)"
|
||
|
||
# ── Summary ───────────────────────────────────────────────────────────────────
|
||
echo ""
|
||
echo "════════════════════════════════════════════════════════════"
|
||
echo " T08 verification: PASS=$PASS WARN=$WARN FAIL=$FAIL"
|
||
echo "════════════════════════════════════════════════════════════"
|
||
|
||
if [[ "$FAIL" -gt 0 ]]; then
|
||
echo " Result: INCOMPLETE — resolve FAIL items before marking T08 done"
|
||
exit 1
|
||
elif [[ "$WARN" -gt 0 ]]; then
|
||
echo " Result: PARTIAL — structure is in place; resolve WARN items (trigger CronJobs)"
|
||
exit 0
|
||
else
|
||
echo " Result: COMPLETE — T08 done-criteria met; SSO/MFA platform workplan complete!"
|
||
exit 0
|
||
fi
|