Files
railiance-cluster/tools/cmd/railiance-backup-s2
Bernd Worsch 2420915d30
Some checks failed
railiance-tests / smoke (push) Has been cancelled
fix(backup): SQLite hot backup instead of etcd snapshot
k3s runs in SQLite mode (no --cluster-init). Replace etcd-snapshot
with sqlite3 .backup for a WAL-aware hot copy of state.db.
Update restore guide to match. Cron installed under root crontab.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-26 21:56:19 +00:00

84 lines
4.1 KiB
Bash
Executable File

#!/usr/bin/env bash
# tools/cmd/railiance-backup-s2 — S2 Kubernetes Runtime backup
# Backs up: k3s etcd snapshot, Helm release values, kubeconfig
# Encryption: age (reuses SOPS key pair from .sops.yaml)
# Output: /opt/backup/railiance/cluster/
# No network required. Requires root (etcd snapshot + kubeconfig).
set -euo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
source "${ROOT}/lib/railiance-print.sh"
# ── Configuration ──────────────────────────────────────────────────────────────
AGE_PUBLIC_KEY="age1aq8twfd78wvpra0had8cezcnj96tj4q0068edrz5jez8d6xwmflqdepsh4"
BACKUP_DIR="/opt/backup/railiance/cluster"
KUBECONFIG_PATH="/etc/rancher/k3s/k3s.yaml"
K3S_STATE_DB="/var/lib/rancher/k3s/server/db/state.db"
KEEP=7
TS="$(date -u +%Y%m%dT%H%M%SZ)"
print_hdr "railiance-cluster backup — ${TS}"
# ── Root check ─────────────────────────────────────────────────────────────────
if [[ $EUID -ne 0 ]]; then
bad "root" "this script requires root — run via: sudo make backup"
exit 1
fi
mkdir -p "${BACKUP_DIR}"
# ── 1. k3s state (SQLite hot backup) ──────────────────────────────────────────
# This cluster runs k3s in SQLite mode (no --cluster-init).
# sqlite3 .backup performs a WAL-aware hot copy — no k3s stop required.
if [[ -f "${K3S_STATE_DB}" ]]; then
ok "state-db" "taking hot backup…"
TMP_STATE="$(mktemp /tmp/k3s-state-XXXXXX.db)"
sqlite3 "${K3S_STATE_DB}" ".backup ${TMP_STATE}"
age -r "${AGE_PUBLIC_KEY}" -o "${BACKUP_DIR}/k3s-state-${TS}.db.age" "${TMP_STATE}"
rm -f "${TMP_STATE}"
ok "state-db" "encrypted → k3s-state-${TS}.db.age"
else
warn "state-db" "${K3S_STATE_DB} not found — skipping"
fi
# ── 2. Helm release values ─────────────────────────────────────────────────────
if command -v helm &>/dev/null; then
ok "helm" "capturing release values…"
TMP_HELM="$(mktemp -d)"
export KUBECONFIG="${KUBECONFIG_PATH}"
helm list -A -o json 2>/dev/null \
| jq -r '.[] | .name + " " + .namespace' \
| while read -r name ns; do
helm get values "${name}" -n "${ns}" -o yaml 2>/dev/null \
> "${TMP_HELM}/${ns}-${name}.yaml" || true
done
tar -czf - -C "${TMP_HELM}" . \
| age -r "${AGE_PUBLIC_KEY}" -o "${BACKUP_DIR}/helm-values-${TS}.tar.gz.age"
rm -rf "${TMP_HELM}"
ok "helm" "encrypted → helm-values-${TS}.tar.gz.age"
else
warn "helm" "helm not found — skipping"
fi
# ── 3. kubeconfig ─────────────────────────────────────────────────────────────
if [[ -f "${KUBECONFIG_PATH}" ]]; then
age -r "${AGE_PUBLIC_KEY}" -o "${BACKUP_DIR}/kubeconfig-${TS}.yaml.age" "${KUBECONFIG_PATH}"
ok "kubeconfig" "encrypted → kubeconfig-${TS}.yaml.age"
else
warn "kubeconfig" "${KUBECONFIG_PATH} not found — skipping"
fi
# ── 4. Prune local cache ───────────────────────────────────────────────────────
for pattern in "k3s-state-*.db.age" "helm-values-*.tar.gz.age" "kubeconfig-*.yaml.age"; do
find "${BACKUP_DIR}" -name "${pattern}" | sort -r | tail -n +$((KEEP + 1)) | xargs -r rm -f
done
ok "prune" "kept last ${KEEP} of each type"
# ── 5. Stamp ───────────────────────────────────────────────────────────────────
echo "${TS}" > "${BACKUP_DIR}/.last-backup"
echo
ok "done" "backup complete — ${TS}"
echo " Location: ${BACKUP_DIR}"
echo " Decrypt with: age -d -i ~/.config/sops/age/keys.txt <file>"