generated from coulomb/repo-seed
FLEX-WP-0007: production registry fixture, tests, and sync runbook
Add production_registry_snapshot.json from ops-warden inventory with CI coverage for real actors, IAM subject binding, ttl_out_of_bounds, and unknown_actor_resource. Extend serve contract tests with /healthz and publish the registry sync contract for operator deployment.
This commit is contained in:
@@ -111,6 +111,15 @@ func TestServeOpsWardenCheckContract(t *testing.T) {
|
||||
server := httptest.NewServer(newServeMux(engine))
|
||||
defer server.Close()
|
||||
|
||||
resp, err := http.Get(server.URL + "/healthz")
|
||||
if err != nil {
|
||||
t.Fatalf("GET /healthz: %v", err)
|
||||
}
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("GET /healthz status = %d; want 200", resp.StatusCode)
|
||||
}
|
||||
|
||||
allow := postCheck(t, server.URL+"/v1/check", opsPath("check_request_allow_adm.json"))
|
||||
if allow.Effect != api.DecisionEffectAllow || allow.ID == "" {
|
||||
t.Fatalf("allow decision = %+v; want allow with id", allow)
|
||||
@@ -121,7 +130,7 @@ func TestServeOpsWardenCheckContract(t *testing.T) {
|
||||
t.Fatalf("deny decision = %+v; want ttl_out_of_bounds deny", deny)
|
||||
}
|
||||
|
||||
resp, err := http.Get(server.URL + "/v1/check")
|
||||
resp, err = http.Get(server.URL + "/v1/check")
|
||||
if err != nil {
|
||||
t.Fatalf("GET /v1/check: %v", err)
|
||||
}
|
||||
@@ -148,6 +157,124 @@ func TestServeOpsWardenCheckContract(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunLoadRegistryOpsWardenProduction(t *testing.T) {
|
||||
var stdout, stderr bytes.Buffer
|
||||
code := run([]string{"load-registry", "--file", opsPath("production_registry_snapshot.json")}, &stdout, &stderr)
|
||||
if code != 0 {
|
||||
t.Fatalf("code = %d, stderr = %s", code, stderr.String())
|
||||
}
|
||||
|
||||
var result map[string]any
|
||||
if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
|
||||
t.Fatalf("unmarshal load-registry output: %v; stdout = %s", err, stdout.String())
|
||||
}
|
||||
if result["subjects"] != float64(4) || result["relationships"] != float64(4) || result["resource_manifests"] != float64(1) {
|
||||
t.Fatalf("load-registry result = %+v; want production actor registry counts", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpsWardenProductionRegistryActors(t *testing.T) {
|
||||
engine, err := buildEngine(context.Background(), opsPath("production_registry_snapshot.json"), opsPath("policy_package.md"), "")
|
||||
if err != nil {
|
||||
t.Fatalf("buildEngine: %v", err)
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
subjectID string
|
||||
actor string
|
||||
actorType string
|
||||
principal string
|
||||
ttlHours float64
|
||||
wantEffect api.DecisionEffect
|
||||
wantReason string
|
||||
}{
|
||||
{
|
||||
name: "state hub bridge agent allow",
|
||||
subjectID: "agt-state-hub-bridge",
|
||||
actor: "agt-state-hub-bridge",
|
||||
actorType: "agt",
|
||||
principal: "agt-task-bridge",
|
||||
ttlHours: 1,
|
||||
wantEffect: api.DecisionEffectAllow,
|
||||
},
|
||||
{
|
||||
name: "state hub bridge IAM subject allow",
|
||||
subjectID: "iam:agt-state-hub-bridge",
|
||||
actor: "agt-state-hub-bridge",
|
||||
actorType: "agt",
|
||||
principal: "agt-task-bridge",
|
||||
ttlHours: 1,
|
||||
wantEffect: api.DecisionEffectAllow,
|
||||
},
|
||||
{
|
||||
name: "codex interhub bootstrap agent allow",
|
||||
subjectID: "agt-codex-interhub-bootstrap",
|
||||
actor: "agt-codex-interhub-bootstrap",
|
||||
actorType: "agt",
|
||||
principal: "agt-interhub-bootstrap",
|
||||
ttlHours: 1,
|
||||
wantEffect: api.DecisionEffectAllow,
|
||||
},
|
||||
{
|
||||
name: "admin actor allow",
|
||||
subjectID: "adm-example",
|
||||
actor: "adm-example",
|
||||
actorType: "adm",
|
||||
principal: "adm-full",
|
||||
ttlHours: 4,
|
||||
wantEffect: api.DecisionEffectAllow,
|
||||
},
|
||||
{
|
||||
name: "automation actor allow",
|
||||
subjectID: "atm-backup-daily",
|
||||
actor: "atm-backup-daily",
|
||||
actorType: "atm",
|
||||
principal: "atm-backup-daily",
|
||||
ttlHours: 1,
|
||||
wantEffect: api.DecisionEffectAllow,
|
||||
},
|
||||
{
|
||||
name: "ttl above production max denies",
|
||||
subjectID: "agt-state-hub-bridge",
|
||||
actor: "agt-state-hub-bridge",
|
||||
actorType: "agt",
|
||||
principal: "agt-task-bridge",
|
||||
ttlHours: 999,
|
||||
wantEffect: api.DecisionEffectDeny,
|
||||
wantReason: "ttl_out_of_bounds",
|
||||
},
|
||||
{
|
||||
name: "unregistered production actor denies",
|
||||
subjectID: "agt-missing",
|
||||
actor: "agt-missing",
|
||||
actorType: "agt",
|
||||
principal: "agt-missing",
|
||||
ttlHours: 1,
|
||||
wantEffect: api.DecisionEffectDeny,
|
||||
wantReason: "unknown_actor_resource",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
decision, err := engine.Check(context.Background(), opsWardenProductionSignRequest(tt.subjectID, tt.actor, tt.actorType, tt.principal, tt.ttlHours))
|
||||
if err != nil {
|
||||
t.Fatalf("Check: %v", err)
|
||||
}
|
||||
if decision.Effect != tt.wantEffect {
|
||||
t.Fatalf("decision.Effect = %q; want %q; decision: %+v", decision.Effect, tt.wantEffect, decision)
|
||||
}
|
||||
if tt.wantReason != "" && decision.Reason != tt.wantReason {
|
||||
t.Fatalf("decision.Reason = %q; want %q; decision: %+v", decision.Reason, tt.wantReason, decision)
|
||||
}
|
||||
if tt.wantEffect == api.DecisionEffectAllow && decision.ID == "" {
|
||||
t.Fatal("allow decision ID is empty")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunValidateAccessDescriptor(t *testing.T) {
|
||||
var stdout, stderr bytes.Buffer
|
||||
code := run([]string{"validate", "--kind", "access-descriptor", "--file", examplePath("access_descriptor.yaml")}, &stdout, &stderr)
|
||||
@@ -167,6 +294,29 @@ func opsPath(name string) string {
|
||||
return filepath.Join("..", "..", "examples", "ops-warden", name)
|
||||
}
|
||||
|
||||
func opsWardenProductionSignRequest(subjectID, actor, actorType, principal string, ttlHours float64) api.CheckRequest {
|
||||
return api.CheckRequest{
|
||||
ID: "check:ops-warden-production-" + actor,
|
||||
Tenant: "tenant:platform",
|
||||
Subject: api.SubjectRef{
|
||||
ID: subjectID,
|
||||
Type: api.SubjectType(actorType),
|
||||
},
|
||||
Action: "sign",
|
||||
Resource: api.ResourceRef{
|
||||
ID: "ssh-cert:actor/" + actor,
|
||||
Type: "ssh-certificate",
|
||||
System: "ops-warden",
|
||||
},
|
||||
Context: map[string]any{
|
||||
"principals": []string{principal},
|
||||
"actor_type": actorType,
|
||||
"ttl_hours": ttlHours,
|
||||
"pubkey_fingerprint": "SHA256:example-production-fingerprint",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func postCheck(t *testing.T, url, path string) api.DecisionEnvelope {
|
||||
t.Helper()
|
||||
|
||||
|
||||
@@ -80,3 +80,25 @@ integration, host documentation, and signatures.log production evidence.
|
||||
|
||||
No SSH private keys, OpenBao tokens, database credentials, or real public-key
|
||||
material are stored in these fixtures.
|
||||
|
||||
|
||||
## FLEX-WP-0007 Production Update
|
||||
|
||||
Additional published assets:
|
||||
|
||||
- Production registry fixture: examples/ops-warden/production_registry_snapshot.json
|
||||
- Registry sync runbook: docs/ops-warden-registry-sync.md
|
||||
|
||||
Production runtime command:
|
||||
|
||||
flex-auth serve --addr 0.0.0.0:8080 --registry examples/ops-warden/production_registry_snapshot.json --policy examples/ops-warden/policy_package.md --log /var/log/flex-auth/ops-warden-decisions.jsonl
|
||||
|
||||
Use http://flex-auth.flex-auth.svc.cluster.local:8080 when cluster DNS is
|
||||
reachable from warden workstations. Otherwise use the approved operator tunnel
|
||||
or ingress URL. Always pre-flight GET /healthz from the same workstation before
|
||||
enabling policy.enabled with fail_closed true.
|
||||
|
||||
Production actor coverage now verifies agt-state-hub-bridge,
|
||||
agt-codex-interhub-bootstrap, adm-example, atm-backup-daily, ttl_out_of_bounds,
|
||||
unknown_actor_resource, and the iam:agt-state-hub-bridge subject path used by
|
||||
WARDEN_POLICY_SUBJECT.
|
||||
|
||||
128
docs/ops-warden-registry-sync.md
Normal file
128
docs/ops-warden-registry-sync.md
Normal file
@@ -0,0 +1,128 @@
|
||||
# Ops-Warden Registry Sync
|
||||
|
||||
Date: 2026-06-23
|
||||
Workplan: FLEX-WP-0007
|
||||
|
||||
This is the flex-auth side of the production policy gate runbook for ops-warden
|
||||
SSH signing. ops-warden owns actor inventory and generated registry content;
|
||||
flex-auth hosts that registry, evaluates the policy package, and returns the
|
||||
decision envelope used by warden sign.
|
||||
|
||||
## Production Runtime Target
|
||||
|
||||
Use the NetKingdom operator-reachable service URL as the canonical
|
||||
policy.flex_auth_url. The preferred target is an in-cluster flex-auth Service
|
||||
fronted by the existing operator access path:
|
||||
|
||||
http://flex-auth.flex-auth.svc.cluster.local:8080
|
||||
|
||||
If cluster DNS is not reachable from the workstation that runs warden sign, use
|
||||
an approved operator tunnel or ingress URL with the same base path semantics. Do
|
||||
not turn on policy.enabled with fail_closed true until this pre-flight succeeds
|
||||
from the same workstation:
|
||||
|
||||
curl -fsS <policy.flex_auth_url>/healthz
|
||||
|
||||
Start the runtime with the production registry snapshot and the ops-warden
|
||||
policy package:
|
||||
|
||||
flex-auth serve --addr 0.0.0.0:8080 --registry examples/ops-warden/production_registry_snapshot.json --policy examples/ops-warden/policy_package.md --log /var/log/flex-auth/ops-warden-decisions.jsonl
|
||||
|
||||
The checked-in production snapshot is a non-secret fixture and initial load
|
||||
target. Regenerate it from ops-warden inventory whenever actors, principals, or
|
||||
TTL defaults change.
|
||||
|
||||
## Current Operator Tunnel
|
||||
|
||||
As of 2026-06-24, the reachable operator-tunnel URL for CoulombCore is:
|
||||
|
||||
http://127.0.0.1:18090
|
||||
|
||||
The tunnel name is flex-auth-coulombcore. It forwards CoulombCore
|
||||
127.0.0.1:18090 to the local flex-auth runtime on 127.0.0.1:18090. Verified
|
||||
checks from CoulombCore:
|
||||
|
||||
- GET /healthz returned HTTP 200.
|
||||
- POST /v1/check for agt-state-hub-bridge returned allow with decision:873c6c682a52bebc.
|
||||
|
||||
This is an operator tunnel pattern, not a substitute for a future in-cluster
|
||||
Service if flex-auth should run inside the cluster.
|
||||
|
||||
## Ownership Contract
|
||||
|
||||
| Concern | Owner | Notes |
|
||||
| --- | --- | --- |
|
||||
| Actor names and actor types | ops-warden | inventory.yaml defines adm, agt, and atm actors. |
|
||||
| Default principals and TTLs | ops-warden | Used by warden sign and by generated registry attributes. |
|
||||
| Registry hosting and reload | flex-auth | Runtime serves the generated snapshot and evaluates it with the policy package. |
|
||||
| Policy package semantics | flex-auth | examples/ops-warden/policy_package.md owns allow and deny reasons. |
|
||||
| OpenBao SSH signing | ops-warden | flex-auth never receives SSH private keys or Vault tokens. |
|
||||
| Production policy.enabled flip | ops-warden operator | Only after healthz and allow/deny smoke pass. |
|
||||
|
||||
## Sync Procedure
|
||||
|
||||
1. In ops-warden, update the managed inventory source or ~/.config/warden/inventory.yaml.
|
||||
2. Regenerate the flex-auth snapshot from ops-warden:
|
||||
|
||||
python scripts/build_flex_auth_registry.py ~/.config/warden/inventory.yaml -o registry/flex-auth/production_registry_snapshot.json
|
||||
|
||||
3. Validate the generated file before handoff:
|
||||
|
||||
flex-auth load-registry --file registry/flex-auth/production_registry_snapshot.json
|
||||
|
||||
4. Copy or promote the snapshot to the flex-auth runtime. For repo-level drift
|
||||
coverage, update examples/ops-warden/production_registry_snapshot.json when
|
||||
the intended production fixture changes.
|
||||
5. Restart or reload the flex-auth runtime with the new snapshot.
|
||||
6. From the workstation that runs warden sign, verify:
|
||||
|
||||
curl -fsS <policy.flex_auth_url>/healthz
|
||||
|
||||
7. Run one allow smoke and one deny smoke. Record only non-secret evidence:
|
||||
actor name, decision id, effect, reason, backend, and whether a certificate
|
||||
was issued.
|
||||
|
||||
## Current Production Fixture
|
||||
|
||||
The initial fixture mirrors ops-warden production inventory as of 2026-06-23.
|
||||
It registers:
|
||||
|
||||
| Actor | Type | Principal | Max TTL hours | Allowed subjects |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| adm-example | adm | adm-full | 48 | adm-example, iam:adm-example |
|
||||
| agt-codex-interhub-bootstrap | agt | agt-interhub-bootstrap | 2 | agt-codex-interhub-bootstrap, iam:agt-codex-interhub-bootstrap |
|
||||
| agt-state-hub-bridge | agt | agt-task-bridge | 24 | agt-state-hub-bridge, iam:agt-state-hub-bridge |
|
||||
| atm-backup-daily | atm | atm-backup-daily | 8 | atm-backup-daily, iam:atm-backup-daily |
|
||||
|
||||
The IAM subject form is intended for WARDEN_POLICY_SUBJECT. If that environment
|
||||
variable is unset, ops-warden sends the actor name and the same policy path
|
||||
continues to work.
|
||||
|
||||
## Smoke Expectations
|
||||
|
||||
Allow path:
|
||||
|
||||
warden sign agt-state-hub-bridge
|
||||
|
||||
Expected non-secret evidence: decision effect allow, reason
|
||||
signing_policy_matched, signatures.log includes policy_decision_id.
|
||||
|
||||
Deny path:
|
||||
|
||||
warden sign agt-state-hub-bridge --ttl 999
|
||||
|
||||
Expected non-secret evidence: effect deny, reason ttl_out_of_bounds, no
|
||||
certificate issued. With fail_closed true, unreachable flex-auth must also block
|
||||
signing.
|
||||
|
||||
OpenBao-backed signing remains an operator smoke because it requires a scoped
|
||||
VAULT_TOKEN. The previous session returned HTTP 403 on 2026-06-23; retry with:
|
||||
|
||||
SMOKE_VAULT=1 ~/ops-warden/scripts/policy_gate_production_smoke.sh
|
||||
|
||||
## References
|
||||
|
||||
- docs/ops-warden-policy-gate-handoff.md
|
||||
- examples/ops-warden/production_registry_snapshot.json
|
||||
- ~/ops-warden/wiki/PolicyGatedSigning.md
|
||||
- ~/ops-warden/history/2026-06-23-flex-auth-policy-gate-production-smoke.md
|
||||
@@ -25,6 +25,7 @@ This document captures the current sequencing view for flex-auth workplans.
|
||||
| `FLEX-WP-0003` | complete | completed | `FLEX-WP-0002` | Markitect consumer integration and first CARING benchmark are complete: resource namespace, manifest import, action vocabulary, descriptor fixtures, decision fixtures, integration docs. |
|
||||
| `FLEX-WP-0004` | complete | completed | `FLEX-WP-0002`, `FLEX-WP-0005` | Delegated PDP and directory adapter boundary work is complete: Topaz adapter shape, OpenFGA/SpiceDB, OPA/Cedar, Keycloak Authorization Services, Entra/Graph/SCIM, CARING envelope preservation. |
|
||||
| `FLEX-WP-0006` | complete | finished | `FLEX-WP-0002`, `FLEX-WP-0005` | Ops-warden unblocker is complete: flex-auth publishes `ssh-certificate` / `sign` policies, fixtures, and `/v1/check` smoke evidence for the opt-in pre-sign gate shipped in ops-warden `WARDEN-WP-0007` and tracked for production in `WARDEN-WP-0009`. |
|
||||
| `FLEX-WP-0007` | `P0` | blocked | `FLEX-WP-0006` | Repo-side production registry fixture, sync contract, runtime command, healthz coverage, and real actor/IAM tests are implemented. Operator deployment and OpenBao smoke remain blocked on reachable runtime selection and scoped VAULT_TOKEN refresh. |
|
||||
|
||||
## Dependency Notes
|
||||
|
||||
@@ -79,5 +80,6 @@ Native State Hub dependency edges:
|
||||
- `FLEX-WP-0004 -> FLEX-WP-0005` (Topaz adapter consumes the spike)
|
||||
- `FLEX-WP-0006 -> FLEX-WP-0002`
|
||||
- `FLEX-WP-0006 -> FLEX-WP-0005`
|
||||
- ops-warden: `WARDEN-WP-0009` waits for `FLEX-WP-0006` output before
|
||||
production enablement of `policy.enabled`.
|
||||
- ops-warden: `WARDEN-WP-0009` finished (caller + registry smoke). Production
|
||||
`policy.enabled: true` waits for `FLEX-WP-0007` (reachable flex-auth runtime).
|
||||
- `FLEX-WP-0007 -> FLEX-WP-0006`
|
||||
|
||||
@@ -32,3 +32,18 @@ flex-auth check --registry examples/ops-warden/registry_snapshot.json --policy e
|
||||
|
||||
The fixture public-key fingerprints are examples only. Do not put real keys,
|
||||
OpenBao tokens, or private signing material in these files.
|
||||
|
||||
|
||||
## Production Registry Fixture
|
||||
|
||||
production_registry_snapshot.json is a non-secret fixture generated by
|
||||
ops-warden for FLEX-WP-0007 coverage. It mirrors the current production actor
|
||||
names used by ops-warden inventory and should be refreshed when that inventory
|
||||
changes.
|
||||
|
||||
Validate both registries locally:
|
||||
|
||||
flex-auth load-registry --file examples/ops-warden/registry_snapshot.json
|
||||
flex-auth load-registry --file examples/ops-warden/production_registry_snapshot.json
|
||||
|
||||
The production sync contract is documented in docs/ops-warden-registry-sync.md.
|
||||
|
||||
450
examples/ops-warden/production_registry_snapshot.json
Normal file
450
examples/ops-warden/production_registry_snapshot.json
Normal file
@@ -0,0 +1,450 @@
|
||||
{
|
||||
"systems": [
|
||||
{
|
||||
"id": "ops-warden",
|
||||
"name": "Ops Warden",
|
||||
"resource_types": [
|
||||
{
|
||||
"name": "ssh-certificate",
|
||||
"scope_level": "Resource",
|
||||
"planes": [
|
||||
"Identity",
|
||||
"Secret",
|
||||
"Audit"
|
||||
],
|
||||
"metadata": {
|
||||
"description": "Short-lived SSH certificate signing request."
|
||||
}
|
||||
}
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"name": "sign",
|
||||
"capabilities": [
|
||||
"Use",
|
||||
"Operate",
|
||||
"Audit"
|
||||
],
|
||||
"planes": [
|
||||
"Identity",
|
||||
"Secret",
|
||||
"Audit"
|
||||
],
|
||||
"exposure_modes": [
|
||||
"Metadata"
|
||||
],
|
||||
"metadata": {
|
||||
"required_context": [
|
||||
"principals",
|
||||
"actor_type",
|
||||
"pubkey_fingerprint",
|
||||
"ttl_hours"
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"caring_profiles": [
|
||||
"caring-0.4.0-rc2"
|
||||
],
|
||||
"metadata": {
|
||||
"flex_auth_contract": "protected-system-v0",
|
||||
"ops_warden_policy_gate": "v2",
|
||||
"policy_enabled_config": "policy.enabled",
|
||||
"tenant": "tenant:platform"
|
||||
}
|
||||
}
|
||||
],
|
||||
"resource_manifests": [
|
||||
{
|
||||
"id": "ops-warden-ssh-certificates",
|
||||
"system": "ops-warden",
|
||||
"resources": [
|
||||
{
|
||||
"id": "ssh-cert:actor/adm-example",
|
||||
"type": "ssh-certificate",
|
||||
"labels": [
|
||||
"ssh-signing",
|
||||
"adm"
|
||||
],
|
||||
"trust_zone": "platform",
|
||||
"owner": "team:platform-security",
|
||||
"attributes": {
|
||||
"actor_id": "adm-example",
|
||||
"actor_type": "adm",
|
||||
"allowed_subjects": [
|
||||
"adm-example",
|
||||
"iam:adm-example"
|
||||
],
|
||||
"allowed_principals": [
|
||||
"adm-full"
|
||||
],
|
||||
"max_ttl_hours": 48
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "ssh-cert:actor/agt-codex-interhub-bootstrap",
|
||||
"type": "ssh-certificate",
|
||||
"labels": [
|
||||
"ssh-signing",
|
||||
"agt"
|
||||
],
|
||||
"trust_zone": "platform",
|
||||
"owner": "team:platform-security",
|
||||
"attributes": {
|
||||
"actor_id": "agt-codex-interhub-bootstrap",
|
||||
"actor_type": "agt",
|
||||
"allowed_subjects": [
|
||||
"agt-codex-interhub-bootstrap",
|
||||
"iam:agt-codex-interhub-bootstrap"
|
||||
],
|
||||
"allowed_principals": [
|
||||
"agt-interhub-bootstrap"
|
||||
],
|
||||
"max_ttl_hours": 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "ssh-cert:actor/agt-state-hub-bridge",
|
||||
"type": "ssh-certificate",
|
||||
"labels": [
|
||||
"ssh-signing",
|
||||
"agt"
|
||||
],
|
||||
"trust_zone": "platform",
|
||||
"owner": "team:platform-security",
|
||||
"attributes": {
|
||||
"actor_id": "agt-state-hub-bridge",
|
||||
"actor_type": "agt",
|
||||
"allowed_subjects": [
|
||||
"agt-state-hub-bridge",
|
||||
"iam:agt-state-hub-bridge"
|
||||
],
|
||||
"allowed_principals": [
|
||||
"agt-task-bridge"
|
||||
],
|
||||
"max_ttl_hours": 24
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "ssh-cert:actor/atm-backup-daily",
|
||||
"type": "ssh-certificate",
|
||||
"labels": [
|
||||
"ssh-signing",
|
||||
"atm"
|
||||
],
|
||||
"trust_zone": "platform",
|
||||
"owner": "team:platform-security",
|
||||
"attributes": {
|
||||
"actor_id": "atm-backup-daily",
|
||||
"actor_type": "atm",
|
||||
"allowed_subjects": [
|
||||
"atm-backup-daily",
|
||||
"iam:atm-backup-daily"
|
||||
],
|
||||
"allowed_principals": [
|
||||
"atm-backup-daily"
|
||||
],
|
||||
"max_ttl_hours": 8
|
||||
}
|
||||
}
|
||||
],
|
||||
"actions": [
|
||||
"sign"
|
||||
],
|
||||
"caring_profile": "caring-0.4.0-rc2",
|
||||
"metadata": {
|
||||
"flex_auth_contract": "resource-registration-v0",
|
||||
"tenant": "tenant:platform"
|
||||
}
|
||||
}
|
||||
],
|
||||
"tenants": [
|
||||
{
|
||||
"id": "tenant:platform",
|
||||
"name": "Platform Tenant"
|
||||
}
|
||||
],
|
||||
"subjects": [
|
||||
{
|
||||
"id": "adm-example",
|
||||
"type": "Agent",
|
||||
"display_name": "Example human operator \u2014 replace with per-person adm-* actors",
|
||||
"organization_relation": "ServiceProvider",
|
||||
"roles": [
|
||||
"Operator"
|
||||
],
|
||||
"groups": [
|
||||
"group:ops-warden-admins"
|
||||
],
|
||||
"tenant": "tenant:platform",
|
||||
"metadata": {
|
||||
"actor_type": "adm"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "agt-codex-interhub-bootstrap",
|
||||
"type": "Agent",
|
||||
"display_name": "Short-lived agent access for attended Inter-Hub bootstrap",
|
||||
"organization_relation": "ServiceProvider",
|
||||
"roles": [
|
||||
"Operator"
|
||||
],
|
||||
"groups": [
|
||||
"group:ops-warden-agents"
|
||||
],
|
||||
"tenant": "tenant:platform",
|
||||
"metadata": {
|
||||
"actor_type": "agt"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "agt-state-hub-bridge",
|
||||
"type": "Agent",
|
||||
"display_name": "ops-bridge tunnel agent for state-hub",
|
||||
"organization_relation": "ServiceProvider",
|
||||
"roles": [
|
||||
"Operator"
|
||||
],
|
||||
"groups": [
|
||||
"group:ops-warden-agents"
|
||||
],
|
||||
"tenant": "tenant:platform",
|
||||
"metadata": {
|
||||
"actor_type": "agt"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "atm-backup-daily",
|
||||
"type": "Automation",
|
||||
"display_name": "Example nightly automation actor",
|
||||
"organization_relation": "ServiceProvider",
|
||||
"roles": [
|
||||
"Operator"
|
||||
],
|
||||
"groups": [
|
||||
"group:ops-warden-automations"
|
||||
],
|
||||
"tenant": "tenant:platform",
|
||||
"metadata": {
|
||||
"actor_type": "atm"
|
||||
}
|
||||
}
|
||||
],
|
||||
"groups": [
|
||||
{
|
||||
"id": "group:ops-warden-admins",
|
||||
"display_name": "Ops Warden Admins",
|
||||
"members": [
|
||||
"adm-example"
|
||||
],
|
||||
"tenant": "tenant:platform"
|
||||
},
|
||||
{
|
||||
"id": "group:ops-warden-agents",
|
||||
"display_name": "Ops Warden Agents",
|
||||
"members": [
|
||||
"agt-codex-interhub-bootstrap",
|
||||
"agt-state-hub-bridge"
|
||||
],
|
||||
"tenant": "tenant:platform"
|
||||
},
|
||||
{
|
||||
"id": "group:ops-warden-automations",
|
||||
"display_name": "Ops Warden Automations",
|
||||
"members": [
|
||||
"atm-backup-daily"
|
||||
],
|
||||
"tenant": "tenant:platform"
|
||||
}
|
||||
],
|
||||
"relationships": [
|
||||
{
|
||||
"id": "rel:adm-example-sign-adm-example",
|
||||
"system": "ops-warden",
|
||||
"subject": "group:ops-warden-admins",
|
||||
"relation": "signer",
|
||||
"object": "ssh-cert:actor/adm-example",
|
||||
"tenant": "tenant:platform",
|
||||
"conditions": [
|
||||
"TimeLimited",
|
||||
"Logged"
|
||||
],
|
||||
"caring": {
|
||||
"id": "descriptor:ops-warden-adm-signer",
|
||||
"profile": "caring-0.4.0-rc2",
|
||||
"subject_type": "Group",
|
||||
"organization_relation": "ServiceProvider",
|
||||
"canonical_role": "Operator",
|
||||
"scope": {
|
||||
"level": "Resource",
|
||||
"id": "ssh-cert:actor/adm-example",
|
||||
"tenant": "tenant:platform",
|
||||
"resource": "ssh-cert:actor/adm-example"
|
||||
},
|
||||
"planes": [
|
||||
"Identity",
|
||||
"Secret",
|
||||
"Audit"
|
||||
],
|
||||
"capabilities": [
|
||||
"Use",
|
||||
"Operate",
|
||||
"Audit"
|
||||
],
|
||||
"exposure_modes": [
|
||||
"Metadata"
|
||||
],
|
||||
"conditions": [
|
||||
"TimeLimited",
|
||||
"Logged"
|
||||
],
|
||||
"restrictions": [
|
||||
"PrivilegeEscalationBlocked",
|
||||
"SecretAccessBlocked"
|
||||
],
|
||||
"access_path": "mediated"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "rel:agt-codex-interhub-bootstrap-sign-agt-codex-interhub-bootstrap",
|
||||
"system": "ops-warden",
|
||||
"subject": "group:ops-warden-agents",
|
||||
"relation": "signer",
|
||||
"object": "ssh-cert:actor/agt-codex-interhub-bootstrap",
|
||||
"tenant": "tenant:platform",
|
||||
"conditions": [
|
||||
"TimeLimited",
|
||||
"Logged"
|
||||
],
|
||||
"caring": {
|
||||
"id": "descriptor:ops-warden-agt-signer",
|
||||
"profile": "caring-0.4.0-rc2",
|
||||
"subject_type": "Group",
|
||||
"organization_relation": "ServiceProvider",
|
||||
"canonical_role": "Operator",
|
||||
"scope": {
|
||||
"level": "Resource",
|
||||
"id": "ssh-cert:actor/agt-codex-interhub-bootstrap",
|
||||
"tenant": "tenant:platform",
|
||||
"resource": "ssh-cert:actor/agt-codex-interhub-bootstrap"
|
||||
},
|
||||
"planes": [
|
||||
"Identity",
|
||||
"Secret",
|
||||
"Audit"
|
||||
],
|
||||
"capabilities": [
|
||||
"Use",
|
||||
"Operate",
|
||||
"Audit"
|
||||
],
|
||||
"exposure_modes": [
|
||||
"Metadata"
|
||||
],
|
||||
"conditions": [
|
||||
"TimeLimited",
|
||||
"Logged"
|
||||
],
|
||||
"restrictions": [
|
||||
"PrivilegeEscalationBlocked",
|
||||
"SecretAccessBlocked"
|
||||
],
|
||||
"access_path": "mediated"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "rel:agt-state-hub-bridge-sign-agt-state-hub-bridge",
|
||||
"system": "ops-warden",
|
||||
"subject": "group:ops-warden-agents",
|
||||
"relation": "signer",
|
||||
"object": "ssh-cert:actor/agt-state-hub-bridge",
|
||||
"tenant": "tenant:platform",
|
||||
"conditions": [
|
||||
"TimeLimited",
|
||||
"Logged"
|
||||
],
|
||||
"caring": {
|
||||
"id": "descriptor:ops-warden-agt-signer",
|
||||
"profile": "caring-0.4.0-rc2",
|
||||
"subject_type": "Group",
|
||||
"organization_relation": "ServiceProvider",
|
||||
"canonical_role": "Operator",
|
||||
"scope": {
|
||||
"level": "Resource",
|
||||
"id": "ssh-cert:actor/agt-state-hub-bridge",
|
||||
"tenant": "tenant:platform",
|
||||
"resource": "ssh-cert:actor/agt-state-hub-bridge"
|
||||
},
|
||||
"planes": [
|
||||
"Identity",
|
||||
"Secret",
|
||||
"Audit"
|
||||
],
|
||||
"capabilities": [
|
||||
"Use",
|
||||
"Operate",
|
||||
"Audit"
|
||||
],
|
||||
"exposure_modes": [
|
||||
"Metadata"
|
||||
],
|
||||
"conditions": [
|
||||
"TimeLimited",
|
||||
"Logged"
|
||||
],
|
||||
"restrictions": [
|
||||
"PrivilegeEscalationBlocked",
|
||||
"SecretAccessBlocked"
|
||||
],
|
||||
"access_path": "mediated"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "rel:atm-backup-daily-sign-atm-backup-daily",
|
||||
"system": "ops-warden",
|
||||
"subject": "group:ops-warden-automations",
|
||||
"relation": "signer",
|
||||
"object": "ssh-cert:actor/atm-backup-daily",
|
||||
"tenant": "tenant:platform",
|
||||
"conditions": [
|
||||
"TimeLimited",
|
||||
"Logged"
|
||||
],
|
||||
"caring": {
|
||||
"id": "descriptor:ops-warden-atm-signer",
|
||||
"profile": "caring-0.4.0-rc2",
|
||||
"subject_type": "Group",
|
||||
"organization_relation": "ServiceProvider",
|
||||
"canonical_role": "Operator",
|
||||
"scope": {
|
||||
"level": "Resource",
|
||||
"id": "ssh-cert:actor/atm-backup-daily",
|
||||
"tenant": "tenant:platform",
|
||||
"resource": "ssh-cert:actor/atm-backup-daily"
|
||||
},
|
||||
"planes": [
|
||||
"Identity",
|
||||
"Secret",
|
||||
"Audit"
|
||||
],
|
||||
"capabilities": [
|
||||
"Use",
|
||||
"Operate",
|
||||
"Audit"
|
||||
],
|
||||
"exposure_modes": [
|
||||
"Metadata"
|
||||
],
|
||||
"conditions": [
|
||||
"TimeLimited",
|
||||
"Logged"
|
||||
],
|
||||
"restrictions": [
|
||||
"PrivilegeEscalationBlocked",
|
||||
"SecretAccessBlocked"
|
||||
],
|
||||
"access_path": "mediated"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,211 @@
|
||||
---
|
||||
id: FLEX-WP-0007
|
||||
type: workplan
|
||||
title: "Ops-Warden Policy Gate Production Deployment"
|
||||
domain: infotech
|
||||
repo: flex-auth
|
||||
status: blocked
|
||||
owner: codex
|
||||
topic_slug: flex-auth
|
||||
planning_priority: P0
|
||||
planning_order: 70
|
||||
depends_on_workplans:
|
||||
- FLEX-WP-0006
|
||||
related_workplans:
|
||||
- WARDEN-WP-0009
|
||||
created: "2026-06-23"
|
||||
updated: "2026-06-23"
|
||||
state_hub_workstream_id: "358ce697-2611-4fe9-89ab-63e86ceb00fa"
|
||||
---
|
||||
|
||||
# FLEX-WP-0007: Ops-Warden Policy Gate Production Deployment
|
||||
|
||||
## Purpose
|
||||
|
||||
Deploy flex-auth as a reachable production runtime for ops-warden's opt-in SSH
|
||||
signing policy gate, load a production registry aligned with real inventory
|
||||
actors, and complete joint smoke evidence so operators can set policy.enabled:
|
||||
true in warden.yaml.
|
||||
|
||||
Review update: repo-side production readiness is now separated from
|
||||
operator-only work. flex-auth can publish the production fixture, tests,
|
||||
runtime command, and sync contract in this repo. The actual stable URL
|
||||
deployment and OpenBao smoke remain blocked because they need NetKingdom
|
||||
reachability and a refreshed scoped VAULT_TOKEN.
|
||||
|
||||
## Background
|
||||
|
||||
ops-warden finished WARDEN-WP-0009 on the caller side: local and
|
||||
production-registry smoke passed, and the production registry generator exists.
|
||||
The remaining risk is operational, not policy shape: warden workstations need a
|
||||
reachable flex-auth URL, and the vault-backed joint smoke needs a valid scoped
|
||||
VAULT_TOKEN.
|
||||
|
||||
Production registry artifacts:
|
||||
|
||||
- flex-auth fixture: examples/ops-warden/production_registry_snapshot.json
|
||||
- ops-warden source artifact: ~/ops-warden/registry/flex-auth/production_registry_snapshot.json
|
||||
- ops-warden generator: ~/ops-warden/scripts/build_flex_auth_registry.py
|
||||
|
||||
## Ownership Boundary
|
||||
|
||||
| Concern | Owner |
|
||||
| --- | --- |
|
||||
| Policy package and PDP decision | flex-auth |
|
||||
| Actor inventory and TTL/principal defaults | ops-warden |
|
||||
| SSH CA and OpenBao signing | ops-warden |
|
||||
| Production registry content for SSH actors | Joint: ops-warden generates, flex-auth hosts |
|
||||
| policy.enabled flip | ops-warden operator after flex-auth is reachable |
|
||||
|
||||
No SSH private keys, OpenBao tokens, or other secrets belong in fixtures, docs,
|
||||
State Hub messages, or smoke evidence.
|
||||
|
||||
## T1 - Deploy production flex-auth runtime
|
||||
|
||||
```task
|
||||
id: FLEX-WP-0007-T01
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "727573fc-86a3-4f5a-abd7-40b0ccb01e68"
|
||||
```
|
||||
|
||||
Deploy flex-auth serve, or equivalent, to a stable URL reachable from
|
||||
workstations that run warden sign.
|
||||
|
||||
- [x] Choose preferred target: in-cluster Service at http://flex-auth.flex-auth.svc.cluster.local:8080 when reachable; otherwise approved operator tunnel or ingress with the same base path
|
||||
- [x] Document canonical policy.flex_auth_url selection in docs/ops-warden-registry-sync.md
|
||||
- [x] Document healthz pre-flight: GET /healthz returns HTTP 200
|
||||
- [x] Add service test coverage for /healthz
|
||||
- [x] Operator tunnel deployed as flex-auth-coulombcore and confirmed POST /v1/check is reachable from CoulombCore
|
||||
|
||||
Acceptance: operator runs curl <flex_auth_url>/healthz from the warden
|
||||
workstation and receives HTTP 200. Verified from CoulombCore on 2026-06-24 with
|
||||
flex_auth_url http://127.0.0.1:18090.
|
||||
|
||||
## T2 - Load production registry and verify real actors
|
||||
|
||||
```task
|
||||
id: FLEX-WP-0007-T02
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "6ec1e00c-4a3a-475b-aefb-af3961de7070"
|
||||
```
|
||||
|
||||
Load the production registry snapshot derived from ops-warden inventory, not
|
||||
only the template actors in examples/ops-warden/registry_snapshot.json.
|
||||
|
||||
- [x] Add examples/ops-warden/production_registry_snapshot.json from the ops-warden generated artifact
|
||||
- [x] Document regenerate and load procedure in docs/ops-warden-registry-sync.md
|
||||
- [x] Verify allow for agt-state-hub-bridge / sign
|
||||
- [x] Verify deny for ttl_out_of_bounds
|
||||
- [x] Verify deny for unregistered actors with unknown_actor_resource
|
||||
- [x] Add CI tests using production actor names: agt-state-hub-bridge, agt-codex-interhub-bootstrap, adm-example, atm-backup-daily
|
||||
|
||||
Acceptance: local flex-auth coverage allows agt-state-hub-bridge without
|
||||
ops-warden-local registry patching. Deployed runtime verification remains part
|
||||
of T1.
|
||||
|
||||
## T3 - Publish registry sync contract with ops-warden
|
||||
|
||||
```task
|
||||
id: FLEX-WP-0007-T03
|
||||
status: done
|
||||
priority: medium
|
||||
state_hub_task_id: "afa09ec3-516c-433d-87a7-330cb79845a8"
|
||||
```
|
||||
|
||||
Document the two-repo workflow when inventory or policy boundaries change.
|
||||
|
||||
- [x] Publish docs/ops-warden-registry-sync.md
|
||||
- [x] Cover ops-warden ownership of actor names, actor types, principals, and TTL defaults
|
||||
- [x] Cover flex-auth ownership of hosted registry, relationships, and policy package evaluation
|
||||
- [x] Document trigger: inventory add/change -> regenerate snapshot -> flex-auth reload
|
||||
- [x] Cross-link from docs/ops-warden-policy-gate-handoff.md
|
||||
- [x] Confirm ops-warden wiki/PolicyGatedSigning.md already points to the flex-auth handoff; flex-auth now points back from the sync runbook
|
||||
|
||||
Acceptance: a new agt-* actor addition has an unambiguous procedure across both
|
||||
repos.
|
||||
|
||||
## T4 - Joint OpenBao + policy gate production smoke
|
||||
|
||||
```task
|
||||
id: FLEX-WP-0007-T04
|
||||
status: wait
|
||||
priority: medium
|
||||
state_hub_task_id: "32a96f1c-e0e8-4e27-baa6-7b8c445cf7a1"
|
||||
```
|
||||
|
||||
Coordinate with ops-warden for vault-backed signing through the deployed
|
||||
flex-auth runtime.
|
||||
|
||||
- [x] flex-auth deployed with production registry via operator tunnel, completing T1
|
||||
- [ ] ops-warden policy.enabled: true and policy.flex_auth_url points to deployed URL http://127.0.0.1:18090 on CoulombCore
|
||||
- [ ] Valid scoped VAULT_TOKEN with warden-sign policy, operator-provided
|
||||
- [ ] Allow smoke: warden sign agt-state-hub-bridge records backend vault and policy_decision_id
|
||||
- [ ] Deny smoke: TTL above registry max is denied by flex-auth before OpenBao
|
||||
- [ ] Record non-secret evidence: decision ids, reasons, actor names only
|
||||
|
||||
Blocked on: scoped VAULT_TOKEN refresh. Previous ops-warden session returned
|
||||
HTTP 403 on 2026-06-23; no VAULT_TOKEN is present in this session.
|
||||
|
||||
Smoke runner when token is valid:
|
||||
|
||||
SMOKE_VAULT=1 ~/ops-warden/scripts/policy_gate_production_smoke.sh
|
||||
|
||||
## T5 - IAM subject binding for production
|
||||
|
||||
```task
|
||||
id: FLEX-WP-0007-T05
|
||||
status: done
|
||||
priority: low
|
||||
state_hub_task_id: "65dc3c59-1e4b-4335-b6a0-db492ea9b2b5"
|
||||
```
|
||||
|
||||
Clarify how WARDEN_POLICY_SUBJECT maps to flex-auth allowed_subjects in
|
||||
production.
|
||||
|
||||
- [x] Document production default: actor name as subject.id unless WARDEN_POLICY_SUBJECT supplies the IAM subject
|
||||
- [x] Confirm production registry allowed_subjects includes iam:<actor> entries
|
||||
- [x] Add test coverage for iam:agt-state-hub-bridge allow path
|
||||
|
||||
Acceptance: documented subject-id strategy; no ops-warden special-casing is
|
||||
required beyond existing policy behavior.
|
||||
|
||||
## Exit Criteria
|
||||
|
||||
- flex-auth production runtime reachable from CoulombCore warden path: done via flex-auth-coulombcore operator tunnel
|
||||
- Production registry loaded and real inventory actors covered locally: done
|
||||
- Registry sync contract published and cross-linked: done
|
||||
- Joint vault-backed smoke evidence recorded, or T4 explicitly waits on token: T4 waits on scoped VAULT_TOKEN
|
||||
- ops-warden operator has the repo-side artifacts needed to set policy.enabled: true after the stable URL and token are ready
|
||||
|
||||
## Implementation Notes
|
||||
|
||||
2026-06-23 repo-side implementation:
|
||||
|
||||
- Added examples/ops-warden/production_registry_snapshot.json from the ops-warden generated production registry artifact.
|
||||
- Added Go coverage for production actor allows, IAM subject allow, ttl_out_of_bounds, unknown_actor_resource, production registry counts, and /healthz.
|
||||
- Published docs/ops-warden-registry-sync.md and cross-linked it from the handoff and examples docs.
|
||||
|
||||
Remaining blocked work:
|
||||
|
||||
- Operator refreshes scoped VAULT_TOKEN and reruns the OpenBao-backed smoke.
|
||||
- After workplan file changes, run make fix-consistency REPO=flex-auth from ~/state-hub to mirror these statuses into State Hub.
|
||||
|
||||
## See Also
|
||||
|
||||
- docs/ops-warden-policy-gate-handoff.md
|
||||
- docs/ops-warden-registry-sync.md
|
||||
- workplans/FLEX-WP-0006-ops-warden-ssh-signing-policy-gate.md
|
||||
- ~/ops-warden/wiki/PolicyGatedSigning.md
|
||||
- ~/ops-warden/workplans/WARDEN-WP-0009-flex-auth-policy-gate-production.md
|
||||
- ~/ops-warden/history/2026-06-23-flex-auth-production-pickup-suggestion.md
|
||||
|
||||
|
||||
2026-06-24 operator tunnel update:
|
||||
|
||||
- Built /tmp/flex-auth and started the production registry runtime on local 127.0.0.1:18090.
|
||||
- Added local ops-bridge tunnel flex-auth-coulombcore, forwarding CoulombCore 127.0.0.1:18090 to the local runtime.
|
||||
- Verified remote health from CoulombCore: GET /healthz returned HTTP 200.
|
||||
- Verified remote POST /v1/check from CoulombCore allowed agt-state-hub-bridge with decision:873c6c682a52bebc.
|
||||
- VAULT_TOKEN is absent, so OpenBao-backed smoke remains blocked on operator credential refresh.
|
||||
Reference in New Issue
Block a user