Files
ops-warden/scripts/policy_gate_production_smoke.sh
tegwick 50ab78392f feat(smoke): joint-smoke mode against deployed flex-auth (assist FLEX-WP-0007 T4)
flex-auth asked ops-warden to help close FLEX-WP-0007 T4 (joint OpenBao + policy-gate
production smoke) against their deployed runtime (reachable on CoulombCore via the
flex-auth-coulombcore tunnel at 127.0.0.1:18090). The smoke previously spawned its own
local flex-auth, so it never exercised the deployed runtime.

Add FLEX_AUTH_EXTERNAL=1 to scripts/policy_gate_production_smoke.sh: skip the local
serve/load-registry and run the allow/deny/vault paths against the already-running
flex-auth, with a /healthz precheck that fails fast with a tunnel-up hint. Verified the
committed production_registry_snapshot.json is current vs inventory (4 actors). Recorded
in ADHOC-2026-06-29.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 00:40:20 +02:00

121 lines
4.3 KiB
Bash
Executable File

#!/usr/bin/env bash
# Production policy-gate smoke for WARDEN-WP-0009 T02.
#
# Validates flex-auth registry (from inventory), allow/deny paths through
# warden sign, and optionally OpenBao-backed signing when VAULT_TOKEN works.
#
# Usage:
# ./scripts/policy_gate_production_smoke.sh
# INVENTORY=~/.config/warden/inventory.yaml ./scripts/policy_gate_production_smoke.sh
# SMOKE_VAULT=1 ./scripts/policy_gate_production_smoke.sh # also test backend: vault
#
# Joint smoke against the DEPLOYED flex-auth (FLEX-WP-0007 T4): point at the runtime
# already reachable via the flex-auth-coulombcore tunnel instead of spawning a local
# binary. Run this on CoulombCore where the tunnel serves $FLEX_AUTH_ADDR:
# FLEX_AUTH_EXTERNAL=1 SMOKE_VAULT=1 VAULT_TOKEN=<scoped> \
# ./scripts/policy_gate_production_smoke.sh
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
INVENTORY="${INVENTORY:-$HOME/.config/warden/inventory.yaml}"
REGISTRY="$ROOT/registry/flex-auth/production_registry_snapshot.json"
POLICY="${FLEX_AUTH_POLICY:-$HOME/flex-auth/examples/ops-warden/policy_package.md}"
FLEX_AUTH_BIN="${FLEX_AUTH_BIN:-/tmp/flex-auth}"
ADDR="${FLEX_AUTH_ADDR:-127.0.0.1:18090}"
PUBKEY="${PUBKEY:-$HOME/.ssh/agt-state-hub-bridge_ed25519.pub}"
ACTOR="${ACTOR:-agt-state-hub-bridge}"
SMOKE_DIR="$(mktemp -d /tmp/warden-prod-policy-smoke-XXXXXX)"
cleanup() {
if [[ -n "${FA_PID:-}" ]] && kill -0 "$FA_PID" 2>/dev/null; then
kill "$FA_PID" 2>/dev/null || true
wait "$FA_PID" 2>/dev/null || true
fi
}
trap cleanup EXIT
if [[ "${FLEX_AUTH_EXTERNAL:-0}" == "1" ]]; then
# Joint mode: use the already-running deployed flex-auth (via the tunnel). Do not
# spawn a local binary or reload the registry — the runtime owns its loaded snapshot.
echo "==> Using already-running flex-auth at $ADDR (joint smoke; no local binary)"
curl -fsS -m 5 "http://$ADDR/healthz" >/dev/null || {
echo "flex-auth not reachable at http://$ADDR/healthz — is the flex-auth-coulombcore tunnel up?" >&2
exit 2
}
else
echo "==> Building registry from $INVENTORY"
uv run --directory "$ROOT" python scripts/build_flex_auth_registry.py \
"$INVENTORY" -o "$REGISTRY"
"$FLEX_AUTH_BIN" load-registry --file "$REGISTRY" >/dev/null
echo "==> Starting flex-auth on $ADDR"
"$FLEX_AUTH_BIN" serve \
--addr "$ADDR" \
--registry "$REGISTRY" \
--policy "$POLICY" \
--log "$SMOKE_DIR/flex-auth-decisions.jsonl" &
FA_PID=$!
sleep 0.6
fi
ssh-keygen -t ed25519 -f "$SMOKE_DIR/ca_key" -N "" -q
cat >"$SMOKE_DIR/warden.yaml" <<EOF
backend: local
ca_key: $SMOKE_DIR/ca_key
state_dir: $SMOKE_DIR/state
inventory_path: $INVENTORY
policy:
enabled: true
flex_auth_url: http://$ADDR
fail_closed: true
tenant: tenant:platform
system: ops-warden
EOF
export WARDEN_CONFIG="$SMOKE_DIR/warden.yaml"
echo "==> Allow path: warden sign $ACTOR"
uv run --directory "$ROOT" warden sign "$ACTOR" --pubkey "$PUBKEY" >/dev/null
ALLOW_LINE="$(tail -1 "$SMOKE_DIR/state/signatures.log")"
python3 -c "import json,sys; e=json.loads(sys.argv[1]); assert e.get('policy_decision_id'), e; print('policy_decision_id:', e['policy_decision_id'])" "$ALLOW_LINE"
echo "==> Deny path: ttl above max"
set +e
DENY_OUT="$(uv run --directory "$ROOT" warden sign "$ACTOR" --pubkey "$PUBKEY" --ttl 999 2>&1)"
DENY_RC=$?
set -e
if [[ "$DENY_RC" -ne 1 ]]; then
echo "expected deny exit 1, got $DENY_RC" >&2
exit 1
fi
echo "$DENY_OUT" | grep -q "ttl_out_of_bounds"
if [[ "${SMOKE_VAULT:-0}" == "1" ]]; then
echo "==> Vault-backed allow (requires scoped VAULT_TOKEN)"
cat >"$SMOKE_DIR/warden-vault.yaml" <<EOF
backend: vault
vault:
addr: https://bao.coulomb.social
mount: ssh
role_map:
adm: adm-role
agt: agt-role
atm: atm-role
token_env: VAULT_TOKEN
inventory_path: $INVENTORY
state_dir: $SMOKE_DIR/state-vault
policy:
enabled: true
flex_auth_url: http://$ADDR
fail_closed: true
tenant: tenant:platform
system: ops-warden
EOF
export WARDEN_CONFIG="$SMOKE_DIR/warden-vault.yaml"
uv run --directory "$ROOT" warden sign "$ACTOR" --pubkey "$PUBKEY" >/dev/null
VAULT_LINE="$(tail -1 "$SMOKE_DIR/state-vault/signatures.log")"
python3 -c "import json,sys; e=json.loads(sys.argv[1]); assert e.get('backend')=='vault' and e.get('policy_decision_id'); print('vault policy_decision_id:', e['policy_decision_id'])" "$VAULT_LINE"
fi
echo "OK — production registry policy gate smoke passed"