fix(creds-bootstrap): harden agent bootstrap for non-interactive execution

- creds-bootstrap-agent.sh: skip Phase 3 if all secrets already applied
  (avoids CNPG SSL connection drops from repeated reconciliation)
- creds-bootstrap-agent.sh: wait for rollout to complete after restart
  before running enckey/admin bootstrap (fixes race with old pod)
- creds-bootstrap-agent.sh: only restart privacyIDEA when Phase 3 ran
- create-pi-token.sh: use env-var + retry for token fetch (no heredoc
  stdin; handles transient 500 from idle connection pool)
- create-pi-token.sh: create keycape-pi-token K8s Secret after fetching
- creds-verify.sh: map keycape-pi-token to secrets_applied.keycape
  (not pi_admin_created, which caused spurious Phase 5 re-runs)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-21 12:11:13 +00:00
parent 56036cd4be
commit 59ba9e6fe1
3 changed files with 80 additions and 47 deletions

View File

@@ -163,11 +163,19 @@ fi
step "3 — Inject secrets into cluster (postgres → lldap → authelia → privacyidea)"
if [[ "$DRY_RUN" == false ]]; then
(cd "$SCRIPT_DIR" && bash creds-apply.sh "$SECRETS_DIR")
ok "secrets applied to cluster"
# Check if all non-keycape components are already applied
PG=$(state_get_nested postgres); LLDAP=$(state_get_nested lldap)
AUTH=$(state_get_nested authelia); PI=$(state_get_nested privacyidea)
if [[ "$PG" == "true" && "$LLDAP" == "true" && "$AUTH" == "true" && "$PI" == "true" ]]; then
ok "all cluster secrets already applied — skipping (avoids CNPG connection disruption)"
else
echo " [dry-run] would run: bash creds-apply.sh $SECRETS_DIR"
if [[ "$DRY_RUN" == false ]]; then
(cd "$SCRIPT_DIR" && bash creds-apply.sh "$SECRETS_DIR")
ok "secrets applied to cluster"
_PHASE3_RAN=true
else
echo " [dry-run] would run: bash creds-apply.sh $SECRETS_DIR"
fi
fi
# ── Phase 4: Verify initial secrets ───────────────────────────────────────────
@@ -189,12 +197,17 @@ if [[ "$DRY_RUN" == false ]]; then
done
[[ "$ALL_OK" == true ]] || die "One or more required secrets are missing — check creds-apply output above"
# Restart privacyIDEA if the deployment exists, so it picks up the newly
# generated secrets. Without this, a running pod would have stale env vars.
if kubectl get deployment privacyidea -n mfa &>/dev/null 2>&1; then
# Restart privacyIDEA ONLY if secrets were just applied this run.
# Re-applying an unchanged secret still marks it "configured", so we track
# whether Phase 3 actually ran to avoid spurious restarts on resume.
if [[ "${_PHASE3_RAN:-false}" == "true" ]] && \
kubectl get deployment privacyidea -n mfa &>/dev/null 2>&1; then
log "restarting privacyIDEA deployment to pick up new secrets..."
kubectl rollout restart deployment/privacyidea -n mfa
ok "privacyIDEA restart triggered"
log "waiting for rollout to complete..."
kubectl rollout status deployment/privacyidea -n mfa --timeout=300s
ok "privacyIDEA rollout complete"
fi
else
echo " [dry-run] would verify K8s secrets"
@@ -213,22 +226,20 @@ if [[ "$(state_get enckey_bootstrapped)" == "true" && "$(state_get pi_admin_crea
else
log "waiting for privacyIDEA pod to be Ready (max ${MAX_WAIT}s)..."
if [[ "$DRY_RUN" == false ]]; then
# Wait for pod to appear and be Ready
WAITED=0
PI_POD=""
while [[ -z "$PI_POD" && $WAITED -lt $MAX_WAIT ]]; do
PI_POD=$(kubectl get pod -n "$NAMESPACE" \
-l app.kubernetes.io/name=privacyidea \
--field-selector=status.phase=Running \
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
if [[ -z "$PI_POD" ]]; then
sleep 10
WAITED=$((WAITED + 10))
log " waiting... (${WAITED}s / ${MAX_WAIT}s)"
fi
done
# Wait for the deployment rollout to fully complete — this ensures any
# previous pod from a rollout restart is terminated and the new pod is Ready.
if ! kubectl rollout status deployment/privacyidea -n "$NAMESPACE" \
--timeout="${MAX_WAIT}s" 2>/dev/null; then
die "privacyIDEA deployment did not roll out within ${MAX_WAIT}s — check: kubectl get pods -n $NAMESPACE"
fi
[[ -z "$PI_POD" ]] && die "privacyIDEA pod did not reach Running state within ${MAX_WAIT}s — check: kubectl get pods -n $NAMESPACE"
# Find the ready pod (rollout status guarantees exactly one ready pod)
PI_POD=$(kubectl get pod -n "$NAMESPACE" \
-l app.kubernetes.io/name=privacyidea \
--field-selector=status.phase=Running \
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
[[ -z "$PI_POD" ]] && die "privacyIDEA pod not found after rollout — check: kubectl get pods -n $NAMESPACE"
ok "privacyIDEA pod ready: $PI_POD"
# Run enckey bootstrap

View File

@@ -92,7 +92,7 @@ check "enckey (privacyidea-enckey)" \
check "pi-admin token (keycape-pi-token)" \
sso keycape-pi-token \
update_state_top pi_admin_created
update_state_nested keycape
echo ""
echo "Results: $pass present, $fail missing"

View File

@@ -38,35 +38,49 @@ if [[ -z "$PI_ADMIN_PASSWORD" ]]; then
exit 1
fi
# Determine privacyIDEA base URL — use cluster-internal URL if kubectl is available
# and we can reach the service, otherwise fall back to the public hostname.
PI_BASE_URL=""
if kubectl get service privacyidea -n mfa &>/dev/null 2>&1; then
# Prefer running a one-shot pod inside the cluster to avoid needing
# public TLS to be up during bootstrap.
PI_BASE_URL="http://privacyidea.mfa.svc.cluster.local:8080"
USE_CLUSTER=true
else
PI_BASE_URL="https://pink.coulomb.social"
USE_CLUSTER=false
# Fetch token by exec-ing into the privacyIDEA pod (localhost call, bypasses
# NetworkPolicy which restricts external ingress to the service).
PI_POD=$(kubectl get pod -n mfa \
-l app.kubernetes.io/name=privacyidea \
--field-selector=status.phase=Running \
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
if [[ -z "$PI_POD" ]]; then
# Fall back to public hostname if pod is not directly accessible
PI_POD=""
fi
echo "Fetching privacyIDEA admin token from: $PI_BASE_URL"
echo "Fetching privacyIDEA admin token..."
if [[ "$USE_CLUSTER" == "true" ]]; then
# Run curl inside the cluster (avoids needing public TLS to be live)
TOKEN=$(kubectl run -n mfa --rm -i --restart=Never pi-token-fetch \
--image=curlimages/curl:8 --quiet \
-- curl -sf \
-X POST "$PI_BASE_URL/auth" \
-H "Content-Type: application/x-www-form-urlencoded" \
-d "username=pi-admin&password=${PI_ADMIN_PASSWORD}" \
2>/dev/null \
| python3 -c "import sys,json; data=json.load(sys.stdin); print(data['result']['value']['token'])" \
2>/dev/null || echo "")
if [[ -n "$PI_POD" ]]; then
echo " Method: kubectl exec into $PI_POD (avoids NetworkPolicy restriction)"
# Pass the password via env var. Retry up to 3 times — the PostgreSQL connection
# pool can return 500 if the idle SSL connection was dropped; it recovers on retry.
TOKEN=""
for _ATTEMPT in 1 2 3; do
TOKEN=$(kubectl exec -n mfa "$PI_POD" -- \
env PI_ADMIN_PASSWORD="${PI_ADMIN_PASSWORD}" \
python3 -c '
import urllib.request, urllib.parse, json, os, sys
pw = os.environ["PI_ADMIN_PASSWORD"]
data = urllib.parse.urlencode({"username": "pi-admin", "password": pw}).encode()
req = urllib.request.Request("http://localhost:8080/auth", data=data)
try:
with urllib.request.urlopen(req, timeout=10) as r:
body = json.load(r)
print(body["result"]["value"]["token"])
except Exception as e:
print(str(e), file=__import__("sys").stderr)
sys.exit(1)
' 2>/dev/null || echo "")
if [[ -n "$TOKEN" ]]; then break; fi
echo " Attempt $_ATTEMPT failed (likely transient DB connection drop) — retrying in 5s..."
sleep 5
done
else
echo " Method: public URL https://pink.coulomb.social"
TOKEN=$(curl -sf \
-X POST "$PI_BASE_URL/auth" \
-X POST "https://pink.coulomb.social/auth" \
-H "Content-Type: application/x-www-form-urlencoded" \
-d "username=pi-admin&password=${PI_ADMIN_PASSWORD}" \
| python3 -c "import sys,json; data=json.load(sys.stdin); print(data['result']['value']['token'])" \
@@ -84,6 +98,14 @@ mkdir -p "$(dirname "$TOKEN_FILE")"
echo -n "$TOKEN" > "$TOKEN_FILE"
chmod 600 "$TOKEN_FILE"
# Create the keycape-pi-token K8s Secret (KeyCape reads it at startup)
echo "Creating K8s Secret: keycape-pi-token (namespace: sso)"
kubectl create secret generic keycape-pi-token \
--namespace=sso \
--from-literal=token="$TOKEN" \
--dry-run=client -o yaml | kubectl apply -f -
echo " Done."
echo ""
echo "Token written to: $TOKEN_FILE"
echo "Token preview : ${TOKEN:0:32}"