From 50ab78392f6fcdbffc0e87006feceb89d8c16c22 Mon Sep 17 00:00:00 2001 From: tegwick Date: Mon, 29 Jun 2026 00:40:20 +0200 Subject: [PATCH] feat(smoke): joint-smoke mode against deployed flex-auth (assist FLEX-WP-0007 T4) flex-auth asked ops-warden to help close FLEX-WP-0007 T4 (joint OpenBao + policy-gate production smoke) against their deployed runtime (reachable on CoulombCore via the flex-auth-coulombcore tunnel at 127.0.0.1:18090). The smoke previously spawned its own local flex-auth, so it never exercised the deployed runtime. Add FLEX_AUTH_EXTERNAL=1 to scripts/policy_gate_production_smoke.sh: skip the local serve/load-registry and run the allow/deny/vault paths against the already-running flex-auth, with a /healthz precheck that fails fast with a tunnel-up hint. Verified the committed production_registry_snapshot.json is current vs inventory (4 actors). Recorded in ADHOC-2026-06-29. Co-Authored-By: Claude Opus 4.8 --- scripts/policy_gate_production_smoke.sh | 40 +++++++++++++++++-------- workplans/ADHOC-2026-06-29.md | 38 +++++++++++++++++++++++ 2 files changed, 66 insertions(+), 12 deletions(-) create mode 100644 workplans/ADHOC-2026-06-29.md diff --git a/scripts/policy_gate_production_smoke.sh b/scripts/policy_gate_production_smoke.sh index 8633e5e..6c96584 100755 --- a/scripts/policy_gate_production_smoke.sh +++ b/scripts/policy_gate_production_smoke.sh @@ -8,6 +8,12 @@ # ./scripts/policy_gate_production_smoke.sh # INVENTORY=~/.config/warden/inventory.yaml ./scripts/policy_gate_production_smoke.sh # SMOKE_VAULT=1 ./scripts/policy_gate_production_smoke.sh # also test backend: vault +# +# Joint smoke against the DEPLOYED flex-auth (FLEX-WP-0007 T4): point at the runtime +# already reachable via the flex-auth-coulombcore tunnel instead of spawning a local +# binary. Run this on CoulombCore where the tunnel serves $FLEX_AUTH_ADDR: +# FLEX_AUTH_EXTERNAL=1 SMOKE_VAULT=1 VAULT_TOKEN= \ +# ./scripts/policy_gate_production_smoke.sh set -euo pipefail ROOT="$(cd "$(dirname "$0")/.." && pwd)" @@ -28,19 +34,29 @@ cleanup() { } trap cleanup EXIT -echo "==> Building registry from $INVENTORY" -uv run --directory "$ROOT" python scripts/build_flex_auth_registry.py \ - "$INVENTORY" -o "$REGISTRY" -"$FLEX_AUTH_BIN" load-registry --file "$REGISTRY" >/dev/null +if [[ "${FLEX_AUTH_EXTERNAL:-0}" == "1" ]]; then + # Joint mode: use the already-running deployed flex-auth (via the tunnel). Do not + # spawn a local binary or reload the registry — the runtime owns its loaded snapshot. + echo "==> Using already-running flex-auth at $ADDR (joint smoke; no local binary)" + curl -fsS -m 5 "http://$ADDR/healthz" >/dev/null || { + echo "flex-auth not reachable at http://$ADDR/healthz — is the flex-auth-coulombcore tunnel up?" >&2 + exit 2 + } +else + echo "==> Building registry from $INVENTORY" + uv run --directory "$ROOT" python scripts/build_flex_auth_registry.py \ + "$INVENTORY" -o "$REGISTRY" + "$FLEX_AUTH_BIN" load-registry --file "$REGISTRY" >/dev/null -echo "==> Starting flex-auth on $ADDR" -"$FLEX_AUTH_BIN" serve \ - --addr "$ADDR" \ - --registry "$REGISTRY" \ - --policy "$POLICY" \ - --log "$SMOKE_DIR/flex-auth-decisions.jsonl" & -FA_PID=$! -sleep 0.6 + echo "==> Starting flex-auth on $ADDR" + "$FLEX_AUTH_BIN" serve \ + --addr "$ADDR" \ + --registry "$REGISTRY" \ + --policy "$POLICY" \ + --log "$SMOKE_DIR/flex-auth-decisions.jsonl" & + FA_PID=$! + sleep 0.6 +fi ssh-keygen -t ed25519 -f "$SMOKE_DIR/ca_key" -N "" -q diff --git a/workplans/ADHOC-2026-06-29.md b/workplans/ADHOC-2026-06-29.md new file mode 100644 index 0000000..a33325c --- /dev/null +++ b/workplans/ADHOC-2026-06-29.md @@ -0,0 +1,38 @@ +--- +id: ADHOC-2026-06-29 +type: workplan +title: "Ad Hoc Tasks — 2026-06-29" +domain: infotech +repo: ops-warden +status: finished +owner: claude +topic_slug: custodian +created: "2026-06-29" +updated: "2026-06-29" +--- + +# Ad Hoc Tasks — 2026-06-29 + +### T01 — Joint-smoke mode for the deployed flex-auth (assist FLEX-WP-0007 T4) + +```task +id: ADHOC-2026-06-29-T01 +status: done +priority: medium +``` + +flex-auth (msg `ea00620b`) asked ops-warden to help close FLEX-WP-0007 T4 (joint OpenBao ++ policy-gate production smoke). Their deployed runtime is reachable on CoulombCore via +the flex-auth-coulombcore tunnel at `127.0.0.1:18090`, but `policy_gate_production_smoke.sh` +spawned its **own** local flex-auth binary — so it never exercised the deployed runtime. + +- [x] Added `FLEX_AUTH_EXTERNAL=1` mode to `scripts/policy_gate_production_smoke.sh`: skips + the local `serve`/`load-registry` and runs the allow/deny/vault paths against the + already-running deployed flex-auth, with a `/healthz` precheck that fails fast with a + "is the flex-auth-coulombcore tunnel up?" hint (verified: clean exit 2 when down). +- [x] Verified the committed `production_registry_snapshot.json` is **current** (rebuilt + from `~/.config/warden/inventory.yaml`, diff-clean; 4 actors). +- [x] Answered flex-auth's three questions and handed the operator the exact CoulombCore + runbook (see reply). Remaining T4 steps are operator-gated and cannot run from the + workstation: mint a scoped `VAULT_TOKEN` (ops-warden holds no standing token by + design), run the joint smoke on CoulombCore, then flip `policy.enabled: true`.