Files
net-kingdom/sso-mfa/bootstrap/creds-bootstrap-agent.sh
Bernd Worsch 59ba9e6fe1 fix(creds-bootstrap): harden agent bootstrap for non-interactive execution
- creds-bootstrap-agent.sh: skip Phase 3 if all secrets already applied
  (avoids CNPG SSL connection drops from repeated reconciliation)
- creds-bootstrap-agent.sh: wait for rollout to complete after restart
  before running enckey/admin bootstrap (fixes race with old pod)
- creds-bootstrap-agent.sh: only restart privacyIDEA when Phase 3 ran
- create-pi-token.sh: use env-var + retry for token fetch (no heredoc
  stdin; handles transient 500 from idle connection pool)
- create-pi-token.sh: create keycape-pi-token K8s Secret after fetching
- creds-verify.sh: map keycape-pi-token to secrets_applied.keycape
  (not pi_admin_created, which caused spurious Phase 5 re-runs)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-21 12:11:13 +00:00

384 lines
16 KiB
Bash
Executable File

#!/usr/bin/env bash
# creds-bootstrap-agent.sh — fully automated credential bootstrap (NK-WP-0005)
#
# Usage:
# bash sso-mfa/bootstrap/creds-bootstrap-agent.sh [--dry-run] [--resume]
# make creds-agent-init
#
# Runs end-to-end without human input until the emergency bundle confirmation
# gate. Each phase updates creds-state.yaml so interrupted runs resume
# automatically from where they left off.
#
# Prerequisites:
# - age (apt install age)
# - kubectl with a reachable cluster (KUBECONFIG set or ~/.kube/config)
# - git (configured with commit access)
# - openssl
# - ~/.config/sops/age/keys.txt — age private key (generated here if missing)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
K8S_DIR="$REPO_ROOT/sso-mfa/k8s"
SECRETS_DIR="$SCRIPT_DIR/secrets"
STATE_FILE="$SCRIPT_DIR/creds-state.yaml"
AGE_KEY="$HOME/.config/sops/age/keys.txt"
DRY_RUN=false
for arg in "$@"; do [[ "$arg" == "--dry-run" ]] && DRY_RUN=true; done
# ── Helpers ───────────────────────────────────────────────────────────────────
log() { echo " [bootstrap] $*"; }
step() { echo ""; echo "══════════════════════════════════════════════════════"; echo " Phase $*"; echo "══════════════════════════════════════════════════════"; }
ok() { echo "$*"; }
warn() { echo "$*"; }
die() { echo ""; echo " ERROR: $*" >&2; exit 1; }
dry_run_guard() {
if [[ "$DRY_RUN" == true ]]; then
echo " [dry-run] would run: $*"
return 0
fi
"$@"
}
# Read a top-level value from creds-state.yaml
state_get() { grep -E "^$1:" "$STATE_FILE" | sed 's/^[^:]*: *//' | sed 's/ *#.*//' | tr -d '"'; }
state_get_nested() { grep -E "^ $1:" "$STATE_FILE" | sed 's/^[^:]*: *//' | sed 's/ *#.*//' | tr -d '"'; }
# Update a top-level key in creds-state.yaml
state_set() {
local key="$1" value="$2"
if [[ -f "$STATE_FILE" ]]; then
sed -i "s|^$key: .*|$key: $value|" "$STATE_FILE"
fi
}
# Update a nested (2-space indent) key
state_set_nested() {
local key="$1" value="$2"
if [[ -f "$STATE_FILE" ]]; then
sed -i "s|^ $key: .*| $key: $value|" "$STATE_FILE"
fi
}
# ── Pre-flight ────────────────────────────────────────────────────────────────
step "0 — Pre-flight"
command -v age >/dev/null 2>&1 || die "age not installed (apt install age)"
command -v kubectl >/dev/null 2>&1 || die "kubectl not found — install it and configure KUBECONFIG"
command -v git >/dev/null 2>&1 || die "git not found"
command -v openssl >/dev/null 2>&1 || die "openssl not found"
ok "required tools present"
# Age key — generate if missing
if [[ ! -f "$AGE_KEY" ]]; then
log "age key not found at $AGE_KEY — generating..."
mkdir -p "$(dirname "$AGE_KEY")"
if [[ "$DRY_RUN" == false ]]; then
age-keygen -o "$AGE_KEY" 2>/dev/null
chmod 600 "$AGE_KEY"
ok "age key generated: $AGE_KEY"
log "Public key: $(grep 'public key:' "$AGE_KEY" | awk '{print $NF}')"
else
echo " [dry-run] would run: age-keygen -o $AGE_KEY"
fi
fi
AGE_PUBKEY=$(grep 'public key:' "$AGE_KEY" | awk '{print $NF}')
[[ -z "$AGE_PUBKEY" ]] && die "could not read public key from $AGE_KEY"
ok "age key ready: ${AGE_PUBKEY:0:20}"
state_set "age_key_present" "true"
# Cluster reachability
if ! kubectl cluster-info &>/dev/null; then
die "Cannot reach the Kubernetes cluster. Check KUBECONFIG / cluster status."
fi
KUBE_CTX=$(kubectl config current-context 2>/dev/null || echo '(unknown)')
ok "cluster reachable: $KUBE_CTX"
# ── Phase 1: Generate secrets ─────────────────────────────────────────────────
step "1 — Generate secrets"
if [[ "$(state_get secrets_generated)" == "true" ]]; then
ok "secrets already generated — skipping"
else
# Clean up any partial generation from a failed prior run
if [[ -d "$SECRETS_DIR" ]]; then
warn "leftover secrets/ found from previous run — removing"
find "$SECRETS_DIR" -type f -exec shred -u {} \; 2>/dev/null || true
rm -rf "$SECRETS_DIR"
fi
log "running gen-secrets.sh..."
if [[ "$DRY_RUN" == false ]]; then
(cd "$SCRIPT_DIR" && bash gen-secrets.sh "$SECRETS_DIR")
ok "secrets generated in $SECRETS_DIR"
state_set "secrets_generated" "true"
else
echo " [dry-run] would run: bash gen-secrets.sh $SECRETS_DIR"
fi
fi
# ── Phase 2: Encrypt + commit ─────────────────────────────────────────────────
step "2 — Encrypt secrets to secrets.enc/ and commit"
# Re-check: if secrets_generated is true but secrets/ is gone, decrypt first
if [[ "$(state_get secrets_generated)" == "true" && ! -d "$SECRETS_DIR" ]]; then
log "secrets/ absent (was shredded) — decrypting from secrets.enc/ for re-apply..."
if [[ "$DRY_RUN" == false ]]; then
(cd "$SCRIPT_DIR" && bash decrypt-secrets.sh "$SECRETS_DIR" "$HOME/.config/net-kingdom/age.key" 2>/dev/null) \
|| (cd "$SCRIPT_DIR" && bash decrypt-secrets.sh "$SECRETS_DIR" "$AGE_KEY")
else
echo " [dry-run] would decrypt secrets.enc/ → secrets/"
fi
fi
# Always (re-)encrypt in case secrets were just regenerated
if [[ "$DRY_RUN" == false ]]; then
log "encrypting secrets → secrets.enc/ ..."
(cd "$SCRIPT_DIR" && bash encrypt-secrets.sh "$SECRETS_DIR" "$AGE_KEY" --no-shred)
ok "secrets encrypted to secrets.enc/"
# Commit the encrypted secrets
cd "$REPO_ROOT"
if git diff --quiet HEAD sso-mfa/bootstrap/secrets.enc/ 2>/dev/null && \
git diff --cached --quiet sso-mfa/bootstrap/secrets.enc/ 2>/dev/null; then
ok "secrets.enc/ already committed — no changes"
else
git add sso-mfa/bootstrap/secrets.enc/ sso-mfa/bootstrap/creds-state.yaml
git commit -m "chore(creds): encrypted secrets [agent NK-WP-0005]"
ok "encrypted secrets committed"
fi
else
echo " [dry-run] would encrypt + git commit secrets.enc/"
fi
# ── Phase 3: Inject into cluster ──────────────────────────────────────────────
step "3 — Inject secrets into cluster (postgres → lldap → authelia → privacyidea)"
# Check if all non-keycape components are already applied
PG=$(state_get_nested postgres); LLDAP=$(state_get_nested lldap)
AUTH=$(state_get_nested authelia); PI=$(state_get_nested privacyidea)
if [[ "$PG" == "true" && "$LLDAP" == "true" && "$AUTH" == "true" && "$PI" == "true" ]]; then
ok "all cluster secrets already applied — skipping (avoids CNPG connection disruption)"
else
if [[ "$DRY_RUN" == false ]]; then
(cd "$SCRIPT_DIR" && bash creds-apply.sh "$SECRETS_DIR")
ok "secrets applied to cluster"
_PHASE3_RAN=true
else
echo " [dry-run] would run: bash creds-apply.sh $SECRETS_DIR"
fi
fi
# ── Phase 4: Verify initial secrets ───────────────────────────────────────────
step "4 — Verify K8s secrets (pre-bootstrap)"
if [[ "$DRY_RUN" == false ]]; then
# Verify postgres, lldap, authelia, privacyidea (keycape not yet applied)
ALL_OK=true
for ns_secret in "databases/net-kingdom-pg-privacyidea-app" "sso/lldap-secrets" "sso/authelia-secrets" "mfa/privacyidea-config"; do
ns="${ns_secret%%/*}"
name="${ns_secret##*/}"
if kubectl get secret "$name" --namespace="$ns" --ignore-not-found -o name 2>/dev/null | grep -q .; then
ok "secret $ns/$name exists"
else
warn "secret $ns/$name is missing"
ALL_OK=false
fi
done
[[ "$ALL_OK" == true ]] || die "One or more required secrets are missing — check creds-apply output above"
# Restart privacyIDEA ONLY if secrets were just applied this run.
# Re-applying an unchanged secret still marks it "configured", so we track
# whether Phase 3 actually ran to avoid spurious restarts on resume.
if [[ "${_PHASE3_RAN:-false}" == "true" ]] && \
kubectl get deployment privacyidea -n mfa &>/dev/null 2>&1; then
log "restarting privacyIDEA deployment to pick up new secrets..."
kubectl rollout restart deployment/privacyidea -n mfa
ok "privacyIDEA restart triggered"
log "waiting for rollout to complete..."
kubectl rollout status deployment/privacyidea -n mfa --timeout=300s
ok "privacyIDEA rollout complete"
fi
else
echo " [dry-run] would verify K8s secrets"
echo " [dry-run] would restart privacyIDEA if deployment exists"
fi
# ── Phase 5: Post-apply bootstrap — wait for privacyIDEA ──────────────────────
step "5 — Post-apply bootstrap (privacyIDEA enckey + admin)"
NAMESPACE="mfa"
MAX_WAIT=300 # 5 minutes
if [[ "$(state_get enckey_bootstrapped)" == "true" && "$(state_get pi_admin_created)" == "true" ]]; then
ok "privacyIDEA already bootstrapped — skipping"
else
log "waiting for privacyIDEA pod to be Ready (max ${MAX_WAIT}s)..."
if [[ "$DRY_RUN" == false ]]; then
# Wait for the deployment rollout to fully complete — this ensures any
# previous pod from a rollout restart is terminated and the new pod is Ready.
if ! kubectl rollout status deployment/privacyidea -n "$NAMESPACE" \
--timeout="${MAX_WAIT}s" 2>/dev/null; then
die "privacyIDEA deployment did not roll out within ${MAX_WAIT}s — check: kubectl get pods -n $NAMESPACE"
fi
# Find the ready pod (rollout status guarantees exactly one ready pod)
PI_POD=$(kubectl get pod -n "$NAMESPACE" \
-l app.kubernetes.io/name=privacyidea \
--field-selector=status.phase=Running \
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
[[ -z "$PI_POD" ]] && die "privacyIDEA pod not found after rollout — check: kubectl get pods -n $NAMESPACE"
ok "privacyIDEA pod ready: $PI_POD"
# Run enckey bootstrap
if [[ "$(state_get enckey_bootstrapped)" != "true" ]]; then
log "running enckey-bootstrap.sh..."
(cd "$K8S_DIR/privacyidea" && bash enckey-bootstrap.sh "$SECRETS_DIR")
state_set "enckey_bootstrapped" "true"
ok "enckey bootstrapped"
else
ok "enckey already bootstrapped"
fi
# Run pi-admin bootstrap
if [[ "$(state_get pi_admin_created)" != "true" ]]; then
log "running bootstrap-admin.sh..."
(cd "$K8S_DIR/privacyidea" && bash bootstrap-admin.sh "$SECRETS_DIR")
state_set "pi_admin_created" "true"
ok "pi-admin created"
else
ok "pi-admin already created"
fi
else
echo " [dry-run] would wait for privacyIDEA pod, run enckey-bootstrap.sh and bootstrap-admin.sh"
fi
fi
# ── Phase 6: Apply keycape secrets ────────────────────────────────────────────
step "6 — Apply KeyCape secrets (requires pi-admin)"
if [[ "$(state_get_nested keycape)" == "true" ]]; then
ok "keycape secrets already applied — skipping"
else
if [[ "$DRY_RUN" == false ]]; then
log "fetching PI admin token..."
(cd "$K8S_DIR/keycape" && bash create-pi-token.sh "$SECRETS_DIR")
ok "PI admin token fetched"
log "applying keycape secrets..."
(cd "$K8S_DIR/keycape" && bash create-secrets.sh "$SECRETS_DIR")
state_set_nested "keycape" "true"
ok "keycape secrets applied"
else
echo " [dry-run] would run create-pi-token.sh + keycape/create-secrets.sh"
fi
fi
# ── Phase 7: Final verification ────────────────────────────────────────────────
step "7 — Final verification (all components)"
if [[ "$DRY_RUN" == false ]]; then
(cd "$SCRIPT_DIR" && bash creds-verify.sh)
ok "all secrets verified"
else
echo " [dry-run] would run: bash creds-verify.sh"
fi
# ── Phase 8: Ops bundle ────────────────────────────────────────────────────────
step "8 — Create ops bundle (age-encrypted snapshot)"
BUNDLE_NAME="ops-bundle-$(date +%Y%m%dT%H%M%S).tar.age"
BUNDLE_PATH="$REPO_ROOT/$BUNDLE_NAME"
if [[ "$(state_get ops_bundle_created)" == "true" ]]; then
EXISTING_LOC="$(state_get ops_bundle_location)"
ok "ops bundle already created: ${EXISTING_LOC:-<path unknown>} — skipping"
else
if [[ "$DRY_RUN" == false ]]; then
[[ -d "$SECRETS_DIR" ]] || die "secrets/ not found — cannot create ops bundle (re-run from phase 1)"
log "creating ops bundle → $BUNDLE_PATH"
(cd "$SCRIPT_DIR" && bash pack-bundle.sh "$SECRETS_DIR" "$AGE_PUBKEY" "$BUNDLE_PATH")
state_set "ops_bundle_created" "true"
state_set "ops_bundle_location" "\"$BUNDLE_PATH\""
ok "ops bundle created: $BUNDLE_PATH"
else
echo " [dry-run] would run: bash pack-bundle.sh $SECRETS_DIR $AGE_PUBKEY $BUNDLE_PATH"
fi
fi
# ── Phase 9: Emergency bundle ─────────────────────────────────────────────────
step "9 — Emergency bundle (human confirmation required)"
if [[ "$(state_get emergency_bundle_delivered)" == "true" ]]; then
ok "emergency bundle already delivered — skipping"
else
if [[ "$DRY_RUN" == false ]]; then
log "assembling emergency bundle..."
OPS_LOC="$(state_get ops_bundle_location | tr -d '"')"
(cd "$SCRIPT_DIR" && bash emergency-bundle.sh \
--age-key "$AGE_KEY" \
--secrets-dir "$SECRETS_DIR" \
--ops-bundle "${OPS_LOC:-$BUNDLE_PATH}")
state_set "emergency_bundle_delivered" "true"
state_set "emergency_bundle_delivered_at" "\"$(date -Iseconds)\""
ok "emergency bundle delivered and confirmed"
else
echo " [dry-run] would run: bash emergency-bundle.sh ..."
fi
fi
# ── Phase 10: Cleanup + finalise ──────────────────────────────────────────────
step "10 — Cleanup and finalise"
if [[ "$DRY_RUN" == false ]]; then
if [[ -d "$SECRETS_DIR" ]]; then
log "shredding plaintext secrets..."
find "$SECRETS_DIR" -type f -exec shred -u {} \;
rm -rf "$SECRETS_DIR"
ok "plaintext secrets shredded"
fi
state_set "bootstrap_complete" "true"
# Commit final state
cd "$REPO_ROOT"
git add sso-mfa/bootstrap/creds-state.yaml
if ! git diff --cached --quiet; then
git commit -m "chore(creds): bootstrap complete [agent NK-WP-0005]"
ok "final state committed"
fi
else
echo " [dry-run] would shred secrets/ and set bootstrap_complete: true"
fi
# ── Done ──────────────────────────────────────────────────────────────────────
echo ""
echo "╔══════════════════════════════════════════════════════════════════╗"
echo "║ NET-KINGDOM CREDENTIAL BOOTSTRAP COMPLETE ║"
echo "╚══════════════════════════════════════════════════════════════════╝"
echo ""
echo " All service secrets have been generated, encrypted, committed,"
echo " and injected into the cluster. The emergency bundle has been"
echo " delivered to you for storage in your personal password manager."
echo ""
echo " Run 'make creds-agent-status' to review the final state."
echo ""