Files
net-kingdom/sso-mfa/k8s/backup/cronjob-sqlite-backups.yaml
Bernd Worsch 6c062e1295 feat(sso-mfa): T07/T08 user mgmt, backups, DR & break-glass (NK-WP-0001-T07/T08)
T07 — User management & self-service:
- k8s/lldap/bootstrap-users.sh: creates net-kingdom-users and net-kingdom-admins
  groups in LLDAP via GraphQL API; idempotent.
- k8s/lldap/break-glass.sh: creates break-glass bypass account in LLDAP,
  sets BREAKGLASS_PASSWORD, assigns to net-kingdom-admins.
- k8s/verify-t07.sh: 6 checks — groups, break-glass, self-service portal,
  KeyCape OIDC client registrations.

T08 — Backups, DR, break-glass:
- k8s/backup/cronjob-sqlite-backups.yaml: daily CronJobs for LLDAP SQLite,
  Authelia SQLite (with scale-down/up RBAC), and privacyIDEA enckey backup.
  7-day retention, 03:00/03:15/03:30 UTC staggered schedule.
- k8s/backup/DR-RUNBOOK.md: full restore runbook — scenarios, restore order,
  LLDAP/Authelia/PI SQLite restore procedure, full node rebuild sequence,
  offsite age-encrypted export.
- k8s/verify-t08.sh: 9 checks — CronJobs, RBAC, run history, backup files
  on PVCs, DR runbook presence, offsite backup (manual confirmation).
- WORKPLAN.md: T07/T08 sections with done-criteria added.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-19 09:17:03 +00:00

305 lines
11 KiB
YAML

# SQLite backup CronJobs — sso and mfa namespaces
#
# Three CronJobs, one per stateful SQLite database:
# 1. lldap-backup — LLDAP user/group store (namespace: sso)
# 2. authelia-backup — Authelia session/storage DB (namespace: sso)
# 3. privacyidea-backup — privacyIDEA token store (namespace: mfa)
#
# Each CronJob runs daily at 03:00 UTC. It uses `sqlite3 .backup` for a
# hot backup that is consistent even while the parent pod is running.
# Backups land on the same PVC next to the live database — to protect
# against pod failure, not PVC failure. Export the backup files offsite
# using pack-bundle.sh or a separate volume snapshot mechanism.
#
# PostgreSQL (privacyIDEA DB) is handled by CNPG ScheduledBackup in
# postgresql/scheduled-backup.yaml. Do not duplicate it here.
#
# Backup file naming:
# <db>.backup.<YYYY-MM-DD> — created daily, pruned after 7 days
#
# Prerequisites:
# - SQLite3 available in the target pod (privacyIDEA and LLDAP images
# include it; Authelia's distroless image does NOT — so Authelia backup
# runs in a separate Job pod with sqlite:alpine image mounted on the PVC).
#
# Apply:
# kubectl apply -f cronjob-sqlite-backups.yaml
---
# ── 1. LLDAP backup (namespace: sso) ─────────────────────────────────────────
# LLDAP includes sqlite3 in its image — run the backup inside the live pod
# via a sidecar-style CronJob that mounts the same PVC.
apiVersion: batch/v1
kind: CronJob
metadata:
name: lldap-backup
namespace: sso
labels:
app.kubernetes.io/name: lldap-backup
app.kubernetes.io/part-of: net-kingdom-sso-mfa
net-kingdom/component: backup
spec:
schedule: "0 3 * * *" # daily at 03:00 UTC
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 3
jobTemplate:
spec:
template:
metadata:
labels:
app.kubernetes.io/name: lldap-backup
net-kingdom/component: backup
spec:
restartPolicy: OnFailure
securityContext:
runAsNonRoot: true
runAsUser: 1000
fsGroup: 1000
volumes:
- name: data
persistentVolumeClaim:
claimName: lldap-data
containers:
- name: backup
# Use a lightweight SQLite image — LLDAP's image may not have sqlite3 CLI
image: nouchka/sqlite3:latest
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
- |
set -eu
DB=/data/users.db
BACKUP_DIR=/data/backups
DATE=$(date +%Y-%m-%d)
mkdir -p "$BACKUP_DIR"
if [ ! -f "$DB" ]; then
echo "WARN: $DB not found — LLDAP may not have been bootstrapped yet"
exit 0
fi
sqlite3 "$DB" ".backup '$BACKUP_DIR/users.backup.$DATE'"
echo "OK: backed up $DB to $BACKUP_DIR/users.backup.$DATE"
# Prune backups older than 7 days
find "$BACKUP_DIR" -name 'users.backup.*' -mtime +7 -delete
echo "OK: pruned backups older than 7 days"
volumeMounts:
- name: data
mountPath: /data
resources:
requests:
cpu: "10m"
memory: "32Mi"
limits:
cpu: "100m"
memory: "64Mi"
---
# ── 2. Authelia backup (namespace: sso) ──────────────────────────────────────
# Authelia uses a distroless image — run backup in a separate pod on the same PVC.
# NOTE: Authelia uses ReadWriteOnce PVC. The backup pod and Authelia pod cannot
# both mount it simultaneously on most K3s setups. This CronJob scales Authelia
# to 0 replicas, takes the backup, then restores the replica count.
# For production: prefer a storage-level snapshot (Longhorn/Velero) instead.
apiVersion: batch/v1
kind: CronJob
metadata:
name: authelia-backup
namespace: sso
labels:
app.kubernetes.io/name: authelia-backup
app.kubernetes.io/part-of: net-kingdom-sso-mfa
net-kingdom/component: backup
spec:
schedule: "15 3 * * *" # 03:15 UTC — offset from lldap-backup
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 3
jobTemplate:
spec:
template:
metadata:
labels:
app.kubernetes.io/name: authelia-backup
net-kingdom/component: backup
spec:
restartPolicy: OnFailure
serviceAccountName: backup-sa # needs scale permission — see RBAC below
securityContext:
runAsNonRoot: true
runAsUser: 1000
fsGroup: 1000
volumes:
- name: data
persistentVolumeClaim:
claimName: authelia-data
initContainers:
# Scale Authelia to 0 to release the PVC before mounting
- name: scale-down
image: bitnami/kubectl:latest
imagePullPolicy: IfNotPresent
command:
- kubectl
- scale
- deployment/authelia
- --replicas=0
- -n
- sso
resources:
requests:
cpu: "10m"
memory: "32Mi"
limits:
cpu: "100m"
memory: "64Mi"
containers:
- name: backup
image: nouchka/sqlite3:latest
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
- |
set -eu
DB=/data/db.sqlite3
BACKUP_DIR=/data/backups
DATE=$(date +%Y-%m-%d)
mkdir -p "$BACKUP_DIR"
if [ ! -f "$DB" ]; then
echo "WARN: $DB not found — Authelia may not have been bootstrapped yet"
else
sqlite3 "$DB" ".backup '$BACKUP_DIR/authelia.backup.$DATE'"
echo "OK: backed up $DB to $BACKUP_DIR/authelia.backup.$DATE"
find "$BACKUP_DIR" -name 'authelia.backup.*' -mtime +7 -delete
echo "OK: pruned backups older than 7 days"
fi
# Always scale Authelia back up, even on backup failure
kubectl scale deployment/authelia --replicas=1 -n sso || true
volumeMounts:
- name: data
mountPath: /data
resources:
requests:
cpu: "10m"
memory: "32Mi"
limits:
cpu: "100m"
memory: "64Mi"
---
# ── 3. privacyIDEA backup (namespace: mfa) ───────────────────────────────────
# privacyIDEA's enckey and token store live in the PVC.
# The SQLite database (if configured) and enckey are both backed up here.
# NOTE: The main PI database is PostgreSQL (handled by CNPG). This backs up
# the PI_ENCFILE (encryption key) stored on the PVC and any local config files.
apiVersion: batch/v1
kind: CronJob
metadata:
name: privacyidea-backup
namespace: mfa
labels:
app.kubernetes.io/name: privacyidea-backup
app.kubernetes.io/part-of: net-kingdom-sso-mfa
net-kingdom/component: backup
spec:
schedule: "30 3 * * *" # 03:30 UTC — offset from previous jobs
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 3
jobTemplate:
spec:
template:
metadata:
labels:
app.kubernetes.io/name: privacyidea-backup
net-kingdom/component: backup
spec:
restartPolicy: OnFailure
securityContext:
runAsNonRoot: true
runAsUser: 1000
fsGroup: 1000
volumes:
- name: data
persistentVolumeClaim:
claimName: privacyidea-data
containers:
- name: backup
image: busybox:stable
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
- |
set -eu
BACKUP_DIR=/data/backups
DATE=$(date +%Y-%m-%d)
mkdir -p "$BACKUP_DIR"
# Back up the enckey — this is the most critical file on this PVC.
# Loss of enckey = all enrolled MFA tokens become invalid.
if [ -f /data/enckey ]; then
cp /data/enckey "$BACKUP_DIR/enckey.backup.$DATE"
echo "OK: backed up enckey to $BACKUP_DIR/enckey.backup.$DATE"
else
echo "WARN: /data/enckey not found — enckey-bootstrap.sh may not have run yet"
fi
# Back up any local config files
if [ -f /data/privacyidea.cfg ]; then
cp /data/privacyidea.cfg "$BACKUP_DIR/privacyidea.cfg.backup.$DATE"
fi
# Prune files older than 7 days
find "$BACKUP_DIR" \( -name 'enckey.backup.*' -o -name '*.cfg.backup.*' \) \
-mtime +7 -delete
echo "OK: pruned backups older than 7 days"
volumeMounts:
- name: data
mountPath: /data
resources:
requests:
cpu: "10m"
memory: "16Mi"
limits:
cpu: "50m"
memory: "32Mi"
---
# ── RBAC for backup-sa (Authelia scale-down/up) ───────────────────────────────
apiVersion: v1
kind: ServiceAccount
metadata:
name: backup-sa
namespace: sso
labels:
app.kubernetes.io/part-of: net-kingdom-sso-mfa
net-kingdom/component: backup
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: backup-scaler
namespace: sso
labels:
app.kubernetes.io/part-of: net-kingdom-sso-mfa
net-kingdom/component: backup
rules:
- apiGroups: ["apps"]
resources: ["deployments/scale", "deployments"]
verbs: ["get", "update", "patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: backup-sa-scaler
namespace: sso
labels:
app.kubernetes.io/part-of: net-kingdom-sso-mfa
net-kingdom/component: backup
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: backup-scaler
subjects:
- kind: ServiceAccount
name: backup-sa
namespace: sso