Files
railiance-cluster/tools/cmd/railiance-backup-s2
Bernd Worsch 4e1a90032b
Some checks failed
railiance-tests / smoke (push) Has been cancelled
fix(backup): elevate sudo in Makefile and guard mkdir after root check
- `make backup` now invokes `sudo tools/cmd/railiance-backup-s2` directly
- Move `mkdir -p` in railiance-backup-s2 to after the root check so the
  script emits a clear error instead of a raw permission-denied failure

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-10 22:33:49 +00:00

88 lines
4.3 KiB
Bash
Executable File

#!/usr/bin/env bash
# tools/cmd/railiance-backup-s2 — S2 Kubernetes Runtime backup
# Backs up: k3s etcd snapshot, Helm release values, kubeconfig
# Encryption: age (reuses SOPS key pair from .sops.yaml)
# Output: /opt/backup/railiance/cluster/
# No network required. Requires root (etcd snapshot + kubeconfig).
set -euo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
source "${ROOT}/lib/railiance-print.sh"
# ── Configuration ──────────────────────────────────────────────────────────────
AGE_PUBLIC_KEY="age1aq8twfd78wvpra0had8cezcnj96tj4q0068edrz5jez8d6xwmflqdepsh4"
BACKUP_DIR="/opt/backup/railiance/cluster"
KUBECONFIG_PATH="/etc/rancher/k3s/k3s.yaml"
ETCD_SNAP_DIR="/var/lib/rancher/k3s/server/db/snapshots"
KEEP=7
TS="$(date -u +%Y%m%dT%H%M%SZ)"
print_hdr "railiance-cluster backup — ${TS}"
# ── Root check ─────────────────────────────────────────────────────────────────
if [[ $EUID -ne 0 ]]; then
bad "root" "this script requires root — run via: sudo make backup"
exit 1
fi
mkdir -p "${BACKUP_DIR}"
# ── 1. k3s etcd snapshot ───────────────────────────────────────────────────────
if k3s etcd-snapshot ls &>/dev/null; then
ok "etcd" "taking snapshot…"
SNAP_NAME="railiance-${TS}"
k3s etcd-snapshot save --name "${SNAP_NAME}" &>/dev/null
SNAP_FILE="${ETCD_SNAP_DIR}/${SNAP_NAME}"
if [[ ! -f "${SNAP_FILE}" ]]; then
# k3s may append a suffix — find the most recent matching file
SNAP_FILE="$(find "${ETCD_SNAP_DIR}" -name "${SNAP_NAME}*" | sort -r | head -1)"
fi
age -r "${AGE_PUBLIC_KEY}" -o "${BACKUP_DIR}/etcd-${TS}.snap.age" "${SNAP_FILE}"
ok "etcd" "encrypted → etcd-${TS}.snap.age"
# Prune old snapshots from k3s store (keep last KEEP)
k3s etcd-snapshot prune --snapshot-retention "${KEEP}" &>/dev/null || true
else
warn "etcd" "k3s etcd not available (SQLite mode?) — skipping snapshot"
fi
# ── 2. Helm release values ─────────────────────────────────────────────────────
if command -v helm &>/dev/null; then
ok "helm" "capturing release values…"
TMP_HELM="$(mktemp -d)"
export KUBECONFIG="${KUBECONFIG_PATH}"
helm list -A -o json 2>/dev/null \
| jq -r '.[] | .name + " " + .namespace' \
| while read -r name ns; do
helm get values "${name}" -n "${ns}" -o yaml 2>/dev/null \
> "${TMP_HELM}/${ns}-${name}.yaml" || true
done
tar -czf - -C "${TMP_HELM}" . \
| age -r "${AGE_PUBLIC_KEY}" -o "${BACKUP_DIR}/helm-values-${TS}.tar.gz.age"
rm -rf "${TMP_HELM}"
ok "helm" "encrypted → helm-values-${TS}.tar.gz.age"
else
warn "helm" "helm not found — skipping"
fi
# ── 3. kubeconfig ─────────────────────────────────────────────────────────────
if [[ -f "${KUBECONFIG_PATH}" ]]; then
age -r "${AGE_PUBLIC_KEY}" -o "${BACKUP_DIR}/kubeconfig-${TS}.yaml.age" "${KUBECONFIG_PATH}"
ok "kubeconfig" "encrypted → kubeconfig-${TS}.yaml.age"
else
warn "kubeconfig" "${KUBECONFIG_PATH} not found — skipping"
fi
# ── 4. Prune local cache ───────────────────────────────────────────────────────
for pattern in "etcd-*.snap.age" "helm-values-*.tar.gz.age" "kubeconfig-*.yaml.age"; do
find "${BACKUP_DIR}" -name "${pattern}" | sort -r | tail -n +$((KEEP + 1)) | xargs -r rm -f
done
ok "prune" "kept last ${KEEP} of each type"
# ── 5. Stamp ───────────────────────────────────────────────────────────────────
echo "${TS}" > "${BACKUP_DIR}/.last-backup"
echo
ok "done" "backup complete — ${TS}"
echo " Location: ${BACKUP_DIR}"
echo " Decrypt with: age -d -i ~/.config/sops/age/keys.txt <file>"