FLEX-WP-0007: production registry fixture, tests, and sync runbook
Some checks are pending
CI / Build and Test (push) Waiting to run
CI / Lint (push) Waiting to run

Add production_registry_snapshot.json from ops-warden inventory with CI
coverage for real actors, IAM subject binding, ttl_out_of_bounds, and
unknown_actor_resource. Extend serve contract tests with /healthz and
publish the registry sync contract for operator deployment.
This commit is contained in:
2026-06-24 14:52:35 +02:00
parent fae0f00a69
commit 941501c590
7 changed files with 981 additions and 3 deletions

View File

@@ -111,6 +111,15 @@ func TestServeOpsWardenCheckContract(t *testing.T) {
server := httptest.NewServer(newServeMux(engine))
defer server.Close()
resp, err := http.Get(server.URL + "/healthz")
if err != nil {
t.Fatalf("GET /healthz: %v", err)
}
resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Fatalf("GET /healthz status = %d; want 200", resp.StatusCode)
}
allow := postCheck(t, server.URL+"/v1/check", opsPath("check_request_allow_adm.json"))
if allow.Effect != api.DecisionEffectAllow || allow.ID == "" {
t.Fatalf("allow decision = %+v; want allow with id", allow)
@@ -121,7 +130,7 @@ func TestServeOpsWardenCheckContract(t *testing.T) {
t.Fatalf("deny decision = %+v; want ttl_out_of_bounds deny", deny)
}
resp, err := http.Get(server.URL + "/v1/check")
resp, err = http.Get(server.URL + "/v1/check")
if err != nil {
t.Fatalf("GET /v1/check: %v", err)
}
@@ -148,6 +157,124 @@ func TestServeOpsWardenCheckContract(t *testing.T) {
}
}
func TestRunLoadRegistryOpsWardenProduction(t *testing.T) {
var stdout, stderr bytes.Buffer
code := run([]string{"load-registry", "--file", opsPath("production_registry_snapshot.json")}, &stdout, &stderr)
if code != 0 {
t.Fatalf("code = %d, stderr = %s", code, stderr.String())
}
var result map[string]any
if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
t.Fatalf("unmarshal load-registry output: %v; stdout = %s", err, stdout.String())
}
if result["subjects"] != float64(4) || result["relationships"] != float64(4) || result["resource_manifests"] != float64(1) {
t.Fatalf("load-registry result = %+v; want production actor registry counts", result)
}
}
func TestOpsWardenProductionRegistryActors(t *testing.T) {
engine, err := buildEngine(context.Background(), opsPath("production_registry_snapshot.json"), opsPath("policy_package.md"), "")
if err != nil {
t.Fatalf("buildEngine: %v", err)
}
cases := []struct {
name string
subjectID string
actor string
actorType string
principal string
ttlHours float64
wantEffect api.DecisionEffect
wantReason string
}{
{
name: "state hub bridge agent allow",
subjectID: "agt-state-hub-bridge",
actor: "agt-state-hub-bridge",
actorType: "agt",
principal: "agt-task-bridge",
ttlHours: 1,
wantEffect: api.DecisionEffectAllow,
},
{
name: "state hub bridge IAM subject allow",
subjectID: "iam:agt-state-hub-bridge",
actor: "agt-state-hub-bridge",
actorType: "agt",
principal: "agt-task-bridge",
ttlHours: 1,
wantEffect: api.DecisionEffectAllow,
},
{
name: "codex interhub bootstrap agent allow",
subjectID: "agt-codex-interhub-bootstrap",
actor: "agt-codex-interhub-bootstrap",
actorType: "agt",
principal: "agt-interhub-bootstrap",
ttlHours: 1,
wantEffect: api.DecisionEffectAllow,
},
{
name: "admin actor allow",
subjectID: "adm-example",
actor: "adm-example",
actorType: "adm",
principal: "adm-full",
ttlHours: 4,
wantEffect: api.DecisionEffectAllow,
},
{
name: "automation actor allow",
subjectID: "atm-backup-daily",
actor: "atm-backup-daily",
actorType: "atm",
principal: "atm-backup-daily",
ttlHours: 1,
wantEffect: api.DecisionEffectAllow,
},
{
name: "ttl above production max denies",
subjectID: "agt-state-hub-bridge",
actor: "agt-state-hub-bridge",
actorType: "agt",
principal: "agt-task-bridge",
ttlHours: 999,
wantEffect: api.DecisionEffectDeny,
wantReason: "ttl_out_of_bounds",
},
{
name: "unregistered production actor denies",
subjectID: "agt-missing",
actor: "agt-missing",
actorType: "agt",
principal: "agt-missing",
ttlHours: 1,
wantEffect: api.DecisionEffectDeny,
wantReason: "unknown_actor_resource",
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
decision, err := engine.Check(context.Background(), opsWardenProductionSignRequest(tt.subjectID, tt.actor, tt.actorType, tt.principal, tt.ttlHours))
if err != nil {
t.Fatalf("Check: %v", err)
}
if decision.Effect != tt.wantEffect {
t.Fatalf("decision.Effect = %q; want %q; decision: %+v", decision.Effect, tt.wantEffect, decision)
}
if tt.wantReason != "" && decision.Reason != tt.wantReason {
t.Fatalf("decision.Reason = %q; want %q; decision: %+v", decision.Reason, tt.wantReason, decision)
}
if tt.wantEffect == api.DecisionEffectAllow && decision.ID == "" {
t.Fatal("allow decision ID is empty")
}
})
}
}
func TestRunValidateAccessDescriptor(t *testing.T) {
var stdout, stderr bytes.Buffer
code := run([]string{"validate", "--kind", "access-descriptor", "--file", examplePath("access_descriptor.yaml")}, &stdout, &stderr)
@@ -167,6 +294,29 @@ func opsPath(name string) string {
return filepath.Join("..", "..", "examples", "ops-warden", name)
}
func opsWardenProductionSignRequest(subjectID, actor, actorType, principal string, ttlHours float64) api.CheckRequest {
return api.CheckRequest{
ID: "check:ops-warden-production-" + actor,
Tenant: "tenant:platform",
Subject: api.SubjectRef{
ID: subjectID,
Type: api.SubjectType(actorType),
},
Action: "sign",
Resource: api.ResourceRef{
ID: "ssh-cert:actor/" + actor,
Type: "ssh-certificate",
System: "ops-warden",
},
Context: map[string]any{
"principals": []string{principal},
"actor_type": actorType,
"ttl_hours": ttlHours,
"pubkey_fingerprint": "SHA256:example-production-fingerprint",
},
}
}
func postCheck(t *testing.T, url, path string) api.DecisionEnvelope {
t.Helper()

View File

@@ -80,3 +80,25 @@ integration, host documentation, and signatures.log production evidence.
No SSH private keys, OpenBao tokens, database credentials, or real public-key
material are stored in these fixtures.
## FLEX-WP-0007 Production Update
Additional published assets:
- Production registry fixture: examples/ops-warden/production_registry_snapshot.json
- Registry sync runbook: docs/ops-warden-registry-sync.md
Production runtime command:
flex-auth serve --addr 0.0.0.0:8080 --registry examples/ops-warden/production_registry_snapshot.json --policy examples/ops-warden/policy_package.md --log /var/log/flex-auth/ops-warden-decisions.jsonl
Use http://flex-auth.flex-auth.svc.cluster.local:8080 when cluster DNS is
reachable from warden workstations. Otherwise use the approved operator tunnel
or ingress URL. Always pre-flight GET /healthz from the same workstation before
enabling policy.enabled with fail_closed true.
Production actor coverage now verifies agt-state-hub-bridge,
agt-codex-interhub-bootstrap, adm-example, atm-backup-daily, ttl_out_of_bounds,
unknown_actor_resource, and the iam:agt-state-hub-bridge subject path used by
WARDEN_POLICY_SUBJECT.

View File

@@ -0,0 +1,128 @@
# Ops-Warden Registry Sync
Date: 2026-06-23
Workplan: FLEX-WP-0007
This is the flex-auth side of the production policy gate runbook for ops-warden
SSH signing. ops-warden owns actor inventory and generated registry content;
flex-auth hosts that registry, evaluates the policy package, and returns the
decision envelope used by warden sign.
## Production Runtime Target
Use the NetKingdom operator-reachable service URL as the canonical
policy.flex_auth_url. The preferred target is an in-cluster flex-auth Service
fronted by the existing operator access path:
http://flex-auth.flex-auth.svc.cluster.local:8080
If cluster DNS is not reachable from the workstation that runs warden sign, use
an approved operator tunnel or ingress URL with the same base path semantics. Do
not turn on policy.enabled with fail_closed true until this pre-flight succeeds
from the same workstation:
curl -fsS <policy.flex_auth_url>/healthz
Start the runtime with the production registry snapshot and the ops-warden
policy package:
flex-auth serve --addr 0.0.0.0:8080 --registry examples/ops-warden/production_registry_snapshot.json --policy examples/ops-warden/policy_package.md --log /var/log/flex-auth/ops-warden-decisions.jsonl
The checked-in production snapshot is a non-secret fixture and initial load
target. Regenerate it from ops-warden inventory whenever actors, principals, or
TTL defaults change.
## Current Operator Tunnel
As of 2026-06-24, the reachable operator-tunnel URL for CoulombCore is:
http://127.0.0.1:18090
The tunnel name is flex-auth-coulombcore. It forwards CoulombCore
127.0.0.1:18090 to the local flex-auth runtime on 127.0.0.1:18090. Verified
checks from CoulombCore:
- GET /healthz returned HTTP 200.
- POST /v1/check for agt-state-hub-bridge returned allow with decision:873c6c682a52bebc.
This is an operator tunnel pattern, not a substitute for a future in-cluster
Service if flex-auth should run inside the cluster.
## Ownership Contract
| Concern | Owner | Notes |
| --- | --- | --- |
| Actor names and actor types | ops-warden | inventory.yaml defines adm, agt, and atm actors. |
| Default principals and TTLs | ops-warden | Used by warden sign and by generated registry attributes. |
| Registry hosting and reload | flex-auth | Runtime serves the generated snapshot and evaluates it with the policy package. |
| Policy package semantics | flex-auth | examples/ops-warden/policy_package.md owns allow and deny reasons. |
| OpenBao SSH signing | ops-warden | flex-auth never receives SSH private keys or Vault tokens. |
| Production policy.enabled flip | ops-warden operator | Only after healthz and allow/deny smoke pass. |
## Sync Procedure
1. In ops-warden, update the managed inventory source or ~/.config/warden/inventory.yaml.
2. Regenerate the flex-auth snapshot from ops-warden:
python scripts/build_flex_auth_registry.py ~/.config/warden/inventory.yaml -o registry/flex-auth/production_registry_snapshot.json
3. Validate the generated file before handoff:
flex-auth load-registry --file registry/flex-auth/production_registry_snapshot.json
4. Copy or promote the snapshot to the flex-auth runtime. For repo-level drift
coverage, update examples/ops-warden/production_registry_snapshot.json when
the intended production fixture changes.
5. Restart or reload the flex-auth runtime with the new snapshot.
6. From the workstation that runs warden sign, verify:
curl -fsS <policy.flex_auth_url>/healthz
7. Run one allow smoke and one deny smoke. Record only non-secret evidence:
actor name, decision id, effect, reason, backend, and whether a certificate
was issued.
## Current Production Fixture
The initial fixture mirrors ops-warden production inventory as of 2026-06-23.
It registers:
| Actor | Type | Principal | Max TTL hours | Allowed subjects |
| --- | --- | --- | --- | --- |
| adm-example | adm | adm-full | 48 | adm-example, iam:adm-example |
| agt-codex-interhub-bootstrap | agt | agt-interhub-bootstrap | 2 | agt-codex-interhub-bootstrap, iam:agt-codex-interhub-bootstrap |
| agt-state-hub-bridge | agt | agt-task-bridge | 24 | agt-state-hub-bridge, iam:agt-state-hub-bridge |
| atm-backup-daily | atm | atm-backup-daily | 8 | atm-backup-daily, iam:atm-backup-daily |
The IAM subject form is intended for WARDEN_POLICY_SUBJECT. If that environment
variable is unset, ops-warden sends the actor name and the same policy path
continues to work.
## Smoke Expectations
Allow path:
warden sign agt-state-hub-bridge
Expected non-secret evidence: decision effect allow, reason
signing_policy_matched, signatures.log includes policy_decision_id.
Deny path:
warden sign agt-state-hub-bridge --ttl 999
Expected non-secret evidence: effect deny, reason ttl_out_of_bounds, no
certificate issued. With fail_closed true, unreachable flex-auth must also block
signing.
OpenBao-backed signing remains an operator smoke because it requires a scoped
VAULT_TOKEN. The previous session returned HTTP 403 on 2026-06-23; retry with:
SMOKE_VAULT=1 ~/ops-warden/scripts/policy_gate_production_smoke.sh
## References
- docs/ops-warden-policy-gate-handoff.md
- examples/ops-warden/production_registry_snapshot.json
- ~/ops-warden/wiki/PolicyGatedSigning.md
- ~/ops-warden/history/2026-06-23-flex-auth-policy-gate-production-smoke.md

View File

@@ -25,6 +25,7 @@ This document captures the current sequencing view for flex-auth workplans.
| `FLEX-WP-0003` | complete | completed | `FLEX-WP-0002` | Markitect consumer integration and first CARING benchmark are complete: resource namespace, manifest import, action vocabulary, descriptor fixtures, decision fixtures, integration docs. |
| `FLEX-WP-0004` | complete | completed | `FLEX-WP-0002`, `FLEX-WP-0005` | Delegated PDP and directory adapter boundary work is complete: Topaz adapter shape, OpenFGA/SpiceDB, OPA/Cedar, Keycloak Authorization Services, Entra/Graph/SCIM, CARING envelope preservation. |
| `FLEX-WP-0006` | complete | finished | `FLEX-WP-0002`, `FLEX-WP-0005` | Ops-warden unblocker is complete: flex-auth publishes `ssh-certificate` / `sign` policies, fixtures, and `/v1/check` smoke evidence for the opt-in pre-sign gate shipped in ops-warden `WARDEN-WP-0007` and tracked for production in `WARDEN-WP-0009`. |
| `FLEX-WP-0007` | `P0` | blocked | `FLEX-WP-0006` | Repo-side production registry fixture, sync contract, runtime command, healthz coverage, and real actor/IAM tests are implemented. Operator deployment and OpenBao smoke remain blocked on reachable runtime selection and scoped VAULT_TOKEN refresh. |
## Dependency Notes
@@ -79,5 +80,6 @@ Native State Hub dependency edges:
- `FLEX-WP-0004 -> FLEX-WP-0005` (Topaz adapter consumes the spike)
- `FLEX-WP-0006 -> FLEX-WP-0002`
- `FLEX-WP-0006 -> FLEX-WP-0005`
- ops-warden: `WARDEN-WP-0009` waits for `FLEX-WP-0006` output before
production enablement of `policy.enabled`.
- ops-warden: `WARDEN-WP-0009` finished (caller + registry smoke). Production
`policy.enabled: true` waits for `FLEX-WP-0007` (reachable flex-auth runtime).
- `FLEX-WP-0007 -> FLEX-WP-0006`

View File

@@ -32,3 +32,18 @@ flex-auth check --registry examples/ops-warden/registry_snapshot.json --policy e
The fixture public-key fingerprints are examples only. Do not put real keys,
OpenBao tokens, or private signing material in these files.
## Production Registry Fixture
production_registry_snapshot.json is a non-secret fixture generated by
ops-warden for FLEX-WP-0007 coverage. It mirrors the current production actor
names used by ops-warden inventory and should be refreshed when that inventory
changes.
Validate both registries locally:
flex-auth load-registry --file examples/ops-warden/registry_snapshot.json
flex-auth load-registry --file examples/ops-warden/production_registry_snapshot.json
The production sync contract is documented in docs/ops-warden-registry-sync.md.

View File

@@ -0,0 +1,450 @@
{
"systems": [
{
"id": "ops-warden",
"name": "Ops Warden",
"resource_types": [
{
"name": "ssh-certificate",
"scope_level": "Resource",
"planes": [
"Identity",
"Secret",
"Audit"
],
"metadata": {
"description": "Short-lived SSH certificate signing request."
}
}
],
"actions": [
{
"name": "sign",
"capabilities": [
"Use",
"Operate",
"Audit"
],
"planes": [
"Identity",
"Secret",
"Audit"
],
"exposure_modes": [
"Metadata"
],
"metadata": {
"required_context": [
"principals",
"actor_type",
"pubkey_fingerprint",
"ttl_hours"
]
}
}
],
"caring_profiles": [
"caring-0.4.0-rc2"
],
"metadata": {
"flex_auth_contract": "protected-system-v0",
"ops_warden_policy_gate": "v2",
"policy_enabled_config": "policy.enabled",
"tenant": "tenant:platform"
}
}
],
"resource_manifests": [
{
"id": "ops-warden-ssh-certificates",
"system": "ops-warden",
"resources": [
{
"id": "ssh-cert:actor/adm-example",
"type": "ssh-certificate",
"labels": [
"ssh-signing",
"adm"
],
"trust_zone": "platform",
"owner": "team:platform-security",
"attributes": {
"actor_id": "adm-example",
"actor_type": "adm",
"allowed_subjects": [
"adm-example",
"iam:adm-example"
],
"allowed_principals": [
"adm-full"
],
"max_ttl_hours": 48
}
},
{
"id": "ssh-cert:actor/agt-codex-interhub-bootstrap",
"type": "ssh-certificate",
"labels": [
"ssh-signing",
"agt"
],
"trust_zone": "platform",
"owner": "team:platform-security",
"attributes": {
"actor_id": "agt-codex-interhub-bootstrap",
"actor_type": "agt",
"allowed_subjects": [
"agt-codex-interhub-bootstrap",
"iam:agt-codex-interhub-bootstrap"
],
"allowed_principals": [
"agt-interhub-bootstrap"
],
"max_ttl_hours": 2
}
},
{
"id": "ssh-cert:actor/agt-state-hub-bridge",
"type": "ssh-certificate",
"labels": [
"ssh-signing",
"agt"
],
"trust_zone": "platform",
"owner": "team:platform-security",
"attributes": {
"actor_id": "agt-state-hub-bridge",
"actor_type": "agt",
"allowed_subjects": [
"agt-state-hub-bridge",
"iam:agt-state-hub-bridge"
],
"allowed_principals": [
"agt-task-bridge"
],
"max_ttl_hours": 24
}
},
{
"id": "ssh-cert:actor/atm-backup-daily",
"type": "ssh-certificate",
"labels": [
"ssh-signing",
"atm"
],
"trust_zone": "platform",
"owner": "team:platform-security",
"attributes": {
"actor_id": "atm-backup-daily",
"actor_type": "atm",
"allowed_subjects": [
"atm-backup-daily",
"iam:atm-backup-daily"
],
"allowed_principals": [
"atm-backup-daily"
],
"max_ttl_hours": 8
}
}
],
"actions": [
"sign"
],
"caring_profile": "caring-0.4.0-rc2",
"metadata": {
"flex_auth_contract": "resource-registration-v0",
"tenant": "tenant:platform"
}
}
],
"tenants": [
{
"id": "tenant:platform",
"name": "Platform Tenant"
}
],
"subjects": [
{
"id": "adm-example",
"type": "Agent",
"display_name": "Example human operator \u2014 replace with per-person adm-* actors",
"organization_relation": "ServiceProvider",
"roles": [
"Operator"
],
"groups": [
"group:ops-warden-admins"
],
"tenant": "tenant:platform",
"metadata": {
"actor_type": "adm"
}
},
{
"id": "agt-codex-interhub-bootstrap",
"type": "Agent",
"display_name": "Short-lived agent access for attended Inter-Hub bootstrap",
"organization_relation": "ServiceProvider",
"roles": [
"Operator"
],
"groups": [
"group:ops-warden-agents"
],
"tenant": "tenant:platform",
"metadata": {
"actor_type": "agt"
}
},
{
"id": "agt-state-hub-bridge",
"type": "Agent",
"display_name": "ops-bridge tunnel agent for state-hub",
"organization_relation": "ServiceProvider",
"roles": [
"Operator"
],
"groups": [
"group:ops-warden-agents"
],
"tenant": "tenant:platform",
"metadata": {
"actor_type": "agt"
}
},
{
"id": "atm-backup-daily",
"type": "Automation",
"display_name": "Example nightly automation actor",
"organization_relation": "ServiceProvider",
"roles": [
"Operator"
],
"groups": [
"group:ops-warden-automations"
],
"tenant": "tenant:platform",
"metadata": {
"actor_type": "atm"
}
}
],
"groups": [
{
"id": "group:ops-warden-admins",
"display_name": "Ops Warden Admins",
"members": [
"adm-example"
],
"tenant": "tenant:platform"
},
{
"id": "group:ops-warden-agents",
"display_name": "Ops Warden Agents",
"members": [
"agt-codex-interhub-bootstrap",
"agt-state-hub-bridge"
],
"tenant": "tenant:platform"
},
{
"id": "group:ops-warden-automations",
"display_name": "Ops Warden Automations",
"members": [
"atm-backup-daily"
],
"tenant": "tenant:platform"
}
],
"relationships": [
{
"id": "rel:adm-example-sign-adm-example",
"system": "ops-warden",
"subject": "group:ops-warden-admins",
"relation": "signer",
"object": "ssh-cert:actor/adm-example",
"tenant": "tenant:platform",
"conditions": [
"TimeLimited",
"Logged"
],
"caring": {
"id": "descriptor:ops-warden-adm-signer",
"profile": "caring-0.4.0-rc2",
"subject_type": "Group",
"organization_relation": "ServiceProvider",
"canonical_role": "Operator",
"scope": {
"level": "Resource",
"id": "ssh-cert:actor/adm-example",
"tenant": "tenant:platform",
"resource": "ssh-cert:actor/adm-example"
},
"planes": [
"Identity",
"Secret",
"Audit"
],
"capabilities": [
"Use",
"Operate",
"Audit"
],
"exposure_modes": [
"Metadata"
],
"conditions": [
"TimeLimited",
"Logged"
],
"restrictions": [
"PrivilegeEscalationBlocked",
"SecretAccessBlocked"
],
"access_path": "mediated"
}
},
{
"id": "rel:agt-codex-interhub-bootstrap-sign-agt-codex-interhub-bootstrap",
"system": "ops-warden",
"subject": "group:ops-warden-agents",
"relation": "signer",
"object": "ssh-cert:actor/agt-codex-interhub-bootstrap",
"tenant": "tenant:platform",
"conditions": [
"TimeLimited",
"Logged"
],
"caring": {
"id": "descriptor:ops-warden-agt-signer",
"profile": "caring-0.4.0-rc2",
"subject_type": "Group",
"organization_relation": "ServiceProvider",
"canonical_role": "Operator",
"scope": {
"level": "Resource",
"id": "ssh-cert:actor/agt-codex-interhub-bootstrap",
"tenant": "tenant:platform",
"resource": "ssh-cert:actor/agt-codex-interhub-bootstrap"
},
"planes": [
"Identity",
"Secret",
"Audit"
],
"capabilities": [
"Use",
"Operate",
"Audit"
],
"exposure_modes": [
"Metadata"
],
"conditions": [
"TimeLimited",
"Logged"
],
"restrictions": [
"PrivilegeEscalationBlocked",
"SecretAccessBlocked"
],
"access_path": "mediated"
}
},
{
"id": "rel:agt-state-hub-bridge-sign-agt-state-hub-bridge",
"system": "ops-warden",
"subject": "group:ops-warden-agents",
"relation": "signer",
"object": "ssh-cert:actor/agt-state-hub-bridge",
"tenant": "tenant:platform",
"conditions": [
"TimeLimited",
"Logged"
],
"caring": {
"id": "descriptor:ops-warden-agt-signer",
"profile": "caring-0.4.0-rc2",
"subject_type": "Group",
"organization_relation": "ServiceProvider",
"canonical_role": "Operator",
"scope": {
"level": "Resource",
"id": "ssh-cert:actor/agt-state-hub-bridge",
"tenant": "tenant:platform",
"resource": "ssh-cert:actor/agt-state-hub-bridge"
},
"planes": [
"Identity",
"Secret",
"Audit"
],
"capabilities": [
"Use",
"Operate",
"Audit"
],
"exposure_modes": [
"Metadata"
],
"conditions": [
"TimeLimited",
"Logged"
],
"restrictions": [
"PrivilegeEscalationBlocked",
"SecretAccessBlocked"
],
"access_path": "mediated"
}
},
{
"id": "rel:atm-backup-daily-sign-atm-backup-daily",
"system": "ops-warden",
"subject": "group:ops-warden-automations",
"relation": "signer",
"object": "ssh-cert:actor/atm-backup-daily",
"tenant": "tenant:platform",
"conditions": [
"TimeLimited",
"Logged"
],
"caring": {
"id": "descriptor:ops-warden-atm-signer",
"profile": "caring-0.4.0-rc2",
"subject_type": "Group",
"organization_relation": "ServiceProvider",
"canonical_role": "Operator",
"scope": {
"level": "Resource",
"id": "ssh-cert:actor/atm-backup-daily",
"tenant": "tenant:platform",
"resource": "ssh-cert:actor/atm-backup-daily"
},
"planes": [
"Identity",
"Secret",
"Audit"
],
"capabilities": [
"Use",
"Operate",
"Audit"
],
"exposure_modes": [
"Metadata"
],
"conditions": [
"TimeLimited",
"Logged"
],
"restrictions": [
"PrivilegeEscalationBlocked",
"SecretAccessBlocked"
],
"access_path": "mediated"
}
}
]
}

View File

@@ -0,0 +1,211 @@
---
id: FLEX-WP-0007
type: workplan
title: "Ops-Warden Policy Gate Production Deployment"
domain: infotech
repo: flex-auth
status: blocked
owner: codex
topic_slug: flex-auth
planning_priority: P0
planning_order: 70
depends_on_workplans:
- FLEX-WP-0006
related_workplans:
- WARDEN-WP-0009
created: "2026-06-23"
updated: "2026-06-23"
state_hub_workstream_id: "358ce697-2611-4fe9-89ab-63e86ceb00fa"
---
# FLEX-WP-0007: Ops-Warden Policy Gate Production Deployment
## Purpose
Deploy flex-auth as a reachable production runtime for ops-warden's opt-in SSH
signing policy gate, load a production registry aligned with real inventory
actors, and complete joint smoke evidence so operators can set policy.enabled:
true in warden.yaml.
Review update: repo-side production readiness is now separated from
operator-only work. flex-auth can publish the production fixture, tests,
runtime command, and sync contract in this repo. The actual stable URL
deployment and OpenBao smoke remain blocked because they need NetKingdom
reachability and a refreshed scoped VAULT_TOKEN.
## Background
ops-warden finished WARDEN-WP-0009 on the caller side: local and
production-registry smoke passed, and the production registry generator exists.
The remaining risk is operational, not policy shape: warden workstations need a
reachable flex-auth URL, and the vault-backed joint smoke needs a valid scoped
VAULT_TOKEN.
Production registry artifacts:
- flex-auth fixture: examples/ops-warden/production_registry_snapshot.json
- ops-warden source artifact: ~/ops-warden/registry/flex-auth/production_registry_snapshot.json
- ops-warden generator: ~/ops-warden/scripts/build_flex_auth_registry.py
## Ownership Boundary
| Concern | Owner |
| --- | --- |
| Policy package and PDP decision | flex-auth |
| Actor inventory and TTL/principal defaults | ops-warden |
| SSH CA and OpenBao signing | ops-warden |
| Production registry content for SSH actors | Joint: ops-warden generates, flex-auth hosts |
| policy.enabled flip | ops-warden operator after flex-auth is reachable |
No SSH private keys, OpenBao tokens, or other secrets belong in fixtures, docs,
State Hub messages, or smoke evidence.
## T1 - Deploy production flex-auth runtime
```task
id: FLEX-WP-0007-T01
status: done
priority: high
state_hub_task_id: "727573fc-86a3-4f5a-abd7-40b0ccb01e68"
```
Deploy flex-auth serve, or equivalent, to a stable URL reachable from
workstations that run warden sign.
- [x] Choose preferred target: in-cluster Service at http://flex-auth.flex-auth.svc.cluster.local:8080 when reachable; otherwise approved operator tunnel or ingress with the same base path
- [x] Document canonical policy.flex_auth_url selection in docs/ops-warden-registry-sync.md
- [x] Document healthz pre-flight: GET /healthz returns HTTP 200
- [x] Add service test coverage for /healthz
- [x] Operator tunnel deployed as flex-auth-coulombcore and confirmed POST /v1/check is reachable from CoulombCore
Acceptance: operator runs curl <flex_auth_url>/healthz from the warden
workstation and receives HTTP 200. Verified from CoulombCore on 2026-06-24 with
flex_auth_url http://127.0.0.1:18090.
## T2 - Load production registry and verify real actors
```task
id: FLEX-WP-0007-T02
status: done
priority: high
state_hub_task_id: "6ec1e00c-4a3a-475b-aefb-af3961de7070"
```
Load the production registry snapshot derived from ops-warden inventory, not
only the template actors in examples/ops-warden/registry_snapshot.json.
- [x] Add examples/ops-warden/production_registry_snapshot.json from the ops-warden generated artifact
- [x] Document regenerate and load procedure in docs/ops-warden-registry-sync.md
- [x] Verify allow for agt-state-hub-bridge / sign
- [x] Verify deny for ttl_out_of_bounds
- [x] Verify deny for unregistered actors with unknown_actor_resource
- [x] Add CI tests using production actor names: agt-state-hub-bridge, agt-codex-interhub-bootstrap, adm-example, atm-backup-daily
Acceptance: local flex-auth coverage allows agt-state-hub-bridge without
ops-warden-local registry patching. Deployed runtime verification remains part
of T1.
## T3 - Publish registry sync contract with ops-warden
```task
id: FLEX-WP-0007-T03
status: done
priority: medium
state_hub_task_id: "afa09ec3-516c-433d-87a7-330cb79845a8"
```
Document the two-repo workflow when inventory or policy boundaries change.
- [x] Publish docs/ops-warden-registry-sync.md
- [x] Cover ops-warden ownership of actor names, actor types, principals, and TTL defaults
- [x] Cover flex-auth ownership of hosted registry, relationships, and policy package evaluation
- [x] Document trigger: inventory add/change -> regenerate snapshot -> flex-auth reload
- [x] Cross-link from docs/ops-warden-policy-gate-handoff.md
- [x] Confirm ops-warden wiki/PolicyGatedSigning.md already points to the flex-auth handoff; flex-auth now points back from the sync runbook
Acceptance: a new agt-* actor addition has an unambiguous procedure across both
repos.
## T4 - Joint OpenBao + policy gate production smoke
```task
id: FLEX-WP-0007-T04
status: wait
priority: medium
state_hub_task_id: "32a96f1c-e0e8-4e27-baa6-7b8c445cf7a1"
```
Coordinate with ops-warden for vault-backed signing through the deployed
flex-auth runtime.
- [x] flex-auth deployed with production registry via operator tunnel, completing T1
- [ ] ops-warden policy.enabled: true and policy.flex_auth_url points to deployed URL http://127.0.0.1:18090 on CoulombCore
- [ ] Valid scoped VAULT_TOKEN with warden-sign policy, operator-provided
- [ ] Allow smoke: warden sign agt-state-hub-bridge records backend vault and policy_decision_id
- [ ] Deny smoke: TTL above registry max is denied by flex-auth before OpenBao
- [ ] Record non-secret evidence: decision ids, reasons, actor names only
Blocked on: scoped VAULT_TOKEN refresh. Previous ops-warden session returned
HTTP 403 on 2026-06-23; no VAULT_TOKEN is present in this session.
Smoke runner when token is valid:
SMOKE_VAULT=1 ~/ops-warden/scripts/policy_gate_production_smoke.sh
## T5 - IAM subject binding for production
```task
id: FLEX-WP-0007-T05
status: done
priority: low
state_hub_task_id: "65dc3c59-1e4b-4335-b6a0-db492ea9b2b5"
```
Clarify how WARDEN_POLICY_SUBJECT maps to flex-auth allowed_subjects in
production.
- [x] Document production default: actor name as subject.id unless WARDEN_POLICY_SUBJECT supplies the IAM subject
- [x] Confirm production registry allowed_subjects includes iam:<actor> entries
- [x] Add test coverage for iam:agt-state-hub-bridge allow path
Acceptance: documented subject-id strategy; no ops-warden special-casing is
required beyond existing policy behavior.
## Exit Criteria
- flex-auth production runtime reachable from CoulombCore warden path: done via flex-auth-coulombcore operator tunnel
- Production registry loaded and real inventory actors covered locally: done
- Registry sync contract published and cross-linked: done
- Joint vault-backed smoke evidence recorded, or T4 explicitly waits on token: T4 waits on scoped VAULT_TOKEN
- ops-warden operator has the repo-side artifacts needed to set policy.enabled: true after the stable URL and token are ready
## Implementation Notes
2026-06-23 repo-side implementation:
- Added examples/ops-warden/production_registry_snapshot.json from the ops-warden generated production registry artifact.
- Added Go coverage for production actor allows, IAM subject allow, ttl_out_of_bounds, unknown_actor_resource, production registry counts, and /healthz.
- Published docs/ops-warden-registry-sync.md and cross-linked it from the handoff and examples docs.
Remaining blocked work:
- Operator refreshes scoped VAULT_TOKEN and reruns the OpenBao-backed smoke.
- After workplan file changes, run make fix-consistency REPO=flex-auth from ~/state-hub to mirror these statuses into State Hub.
## See Also
- docs/ops-warden-policy-gate-handoff.md
- docs/ops-warden-registry-sync.md
- workplans/FLEX-WP-0006-ops-warden-ssh-signing-policy-gate.md
- ~/ops-warden/wiki/PolicyGatedSigning.md
- ~/ops-warden/workplans/WARDEN-WP-0009-flex-auth-policy-gate-production.md
- ~/ops-warden/history/2026-06-23-flex-auth-production-pickup-suggestion.md
2026-06-24 operator tunnel update:
- Built /tmp/flex-auth and started the production registry runtime on local 127.0.0.1:18090.
- Added local ops-bridge tunnel flex-auth-coulombcore, forwarding CoulombCore 127.0.0.1:18090 to the local runtime.
- Verified remote health from CoulombCore: GET /healthz returned HTTP 200.
- Verified remote POST /v1/check from CoulombCore allowed agt-state-hub-bridge with decision:873c6c682a52bebc.
- VAULT_TOKEN is absent, so OpenBao-backed smoke remains blocked on operator credential refresh.