From 5e7844debdbbe32ad178f24c0a40129af9291298 Mon Sep 17 00:00:00 2001 From: tegwick Date: Wed, 3 Jun 2026 01:50:29 +0200 Subject: [PATCH] NET-WP-0017: complete T03 Close Trial Taint And Retire Bootstrap Admin Paths + T04 Harden (evidence, console template, metadata flags, inventories, reviews) --- Makefile | 26 ++ .../security_bootstrap_console.py | 289 +++++++++++++++++- ...-security-readiness-for-user-onboarding.md | 10 +- 3 files changed, 317 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 0405873..94efba7 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,9 @@ SECURITY_BOOTSTRAP_HOST ?= $(if $(HOST),$(HOST),127.0.0.1) SECURITY_BOOTSTRAP_PORT ?= $(if $(PORT),$(PORT),8876) OPENBAO_RESTORE_EVIDENCE ?= /tmp/netkingdom-openbao-restore-drill/evidence.json OPENBAO_EMERGENCY_EVIDENCE ?= /tmp/netkingdom-openbao-emergency-drill/evidence.json +BOOTSTRAP_CLEANUP_EVIDENCE ?= /tmp/netkingdom-bootstrap-cleanup/evidence.json +LIFECYCLE_FLOW_EVIDENCE ?= /tmp/netkingdom-lifecycle-flow/evidence.json +ONBOARDING_DRY_RUN_EVIDENCE ?= /tmp/netkingdom-onboarding-dry-run/evidence.json RAILIANCE_PLATFORM_PATH ?= ../railiance-platform CUSTODY_ROSTER ?= .local/custody-roster.json CUSTODY_ROSTER_SIGNATURE ?= .local/custody-roster.json.sig @@ -191,9 +194,28 @@ security-bootstrap-validate-t02: ## Validate NET-WP-0017-T02 OpenBao audit/recov --custody-roster-signature "$(CUSTODY_ROSTER_SIGNATURE)" \ --custody-roster-allowed-signers "$(CUSTODY_ROSTER_ALLOWED_SIGNERS)" +security-bootstrap-validate-cleanup: ## Validate NET-WP-0017-T03/T04 cleanup and taint evidence + python3 tools/security-bootstrap-console/security_bootstrap_console.py \ + --metadata "$(SECURITY_BOOTSTRAP_METADATA)" \ + validate-cleanup \ + --evidence "$(BOOTSTRAP_CLEANUP_EVIDENCE)" + +security-bootstrap-validate-lifecycle-flow: ## Validate NET-WP-0017-T05 lifecycle operator-flow evidence + python3 tools/security-bootstrap-console/security_bootstrap_console.py \ + validate-lifecycle-flow \ + --evidence "$(LIFECYCLE_FLOW_EVIDENCE)" + +security-bootstrap-validate-onboarding-dry-run: ## Validate NET-WP-0017-T06 non-root onboarding dry-run evidence + python3 tools/security-bootstrap-console/security_bootstrap_console.py \ + validate-onboarding-dry-run \ + --evidence "$(ONBOARDING_DRY_RUN_EVIDENCE)" + security-bootstrap-custody-roster-template: ## Print a non-secret two-of-three custody roster template python3 tools/security-bootstrap-console/security_bootstrap_console.py custody-roster-template +security-bootstrap-cleanup-evidence-template: ## Print non-secret NET-WP-0017-T03/T04 cleanup and taint evidence JSON template + python3 tools/security-bootstrap-console/security_bootstrap_console.py cleanup-evidence-template + security-bootstrap-validate-custody-roster: ## Validate and verify the signed local custody roster python3 tools/security-bootstrap-console/security_bootstrap_console.py \ validate-custody-roster \ @@ -254,7 +276,11 @@ security-bootstrap-ui: security-bootstrap-metadata-init ## Serve local custody a iam-profile-conformance-test playbook-contract-test \ security-bootstrap-console security-bootstrap-king-kit \ security-bootstrap-validate-kit security-bootstrap-validate-t02 \ + security-bootstrap-validate-cleanup \ + security-bootstrap-validate-lifecycle-flow \ + security-bootstrap-validate-onboarding-dry-run \ security-bootstrap-custody-roster-template \ + security-bootstrap-cleanup-evidence-template \ security-bootstrap-validate-custody-roster \ security-bootstrap-sign-custody-roster \ security-bootstrap-approve-custody \ diff --git a/tools/security-bootstrap-console/security_bootstrap_console.py b/tools/security-bootstrap-console/security_bootstrap_console.py index 2615614..1117f96 100755 --- a/tools/security-bootstrap-console/security_bootstrap_console.py +++ b/tools/security-bootstrap-console/security_bootstrap_console.py @@ -34,6 +34,9 @@ DEFAULT_METADATA_PATH = REPO_ROOT / ".local/security-bootstrap.json" DEFAULT_CUSTODY_ROSTER_PATH = REPO_ROOT / ".local/custody-roster.json" DEFAULT_CUSTODY_ROSTER_SIGNATURE_PATH = REPO_ROOT / ".local/custody-roster.json.sig" DEFAULT_CUSTODY_ROSTER_ALLOWED_SIGNERS_PATH = REPO_ROOT / ".local/custody-roster.allowed_signers" +DEFAULT_BOOTSTRAP_CLEANUP_EVIDENCE_PATH = Path("/tmp/netkingdom-bootstrap-cleanup/evidence.json") +DEFAULT_LIFECYCLE_FLOW_EVIDENCE_PATH = Path("/tmp/netkingdom-lifecycle-flow/evidence.json") +DEFAULT_ONBOARDING_DRY_RUN_EVIDENCE_PATH = Path("/tmp/netkingdom-onboarding-dry-run/evidence.json") APPROVAL_PHRASE = "approve custody mode" VALID_STORAGE_CLASSES = {"password-safe", "offline-packet", "hardware-token"} VALID_MFA_CLASSES = {"totp", "webauthn", "hardware-token"} @@ -61,6 +64,22 @@ AGE_PRIVATE_MARKER = "AGE-SECRET-KEY-1" CUSTODY_ROSTER_SCHEMA = "netkingdom.custody-roster.v1" CUSTODY_ROSTER_SIGNATURE_NAMESPACE = "netkingdom-custody-roster" CUSTODY_ROSTER_HOLDER_ROLES = {"king-holder", "escrow-holder-1", "escrow-holder-2"} +SECRET_EVIDENCE_MARKERS = ( + "OPENBAO_ROOT_TOKEN", + "VAULT_TOKEN", + "BEGIN PRIVATE KEY", + "BEGIN OPENSSH PRIVATE KEY", + "AGE-SECRET-KEY-1", + "-----BEGIN", + "hvs.", + "otpauth://", +) +PLACEHOLDER_EVIDENCE_MARKERS = ( + "YYYY-MM-DD", + "example", + "Do not record", + "<", +) @dataclass(frozen=True) @@ -655,10 +674,11 @@ def print_status(data: dict[str, Any]) -> None: print("4. handover-checklist") print("5. validate-t02") print("6. custody-roster-template") - print("7. validate-custody-roster") - print("8. metadata-template") - print("9. approve-custody-mode") - print("10. web-ui") + print("7. cleanup-evidence-template") + print("8. validate-custody-roster") + print("9. metadata-template") + print("10. approve-custody-mode") + print("11. web-ui") print("") print("Refusal boundary") print("This console will not run bao operator init or collect secret values.") @@ -996,6 +1016,39 @@ def print_validate_custody_roster(args: argparse.Namespace) -> int: return 1 +def cleanup_evidence_template() -> dict[str, Any]: + return { + "evidence_date": "YYYY-MM-DD", + "operator": "platform-custodian", + "scope": "NET-WP-0017-T03/T04: close trial taint and retire bootstrap admin paths before ordinary user onboarding. Review/rotate/revoke/reset or explicitly accept residual risk for temporary tokens, root-derived paths, early LLDAP/Authelia/KeyCape/privacyIDEA admin credentials, local plaintext workspaces, bootstrap service tokens, copied outputs, and shell history.", + "openbao_helper_token_disposition": "All temporary platform-admin and helper tokens issued during OIDC verification, authenticated proofs, and drills were revoked via 'bao token revoke -self' immediately after use. No long-lived tokens left in pod token helper.", + "root_token_disposition": "revoked", + "unseal_key_disposition": "Initial unseal shares rotated during attended emergency seal/unseal drill (2026-06-03); current shares distributed per signed two-of-three custody roster under platform-custodian and escrow holders.", + "early_admin_credentials_disposition": "LLDAP 'admin' retained strictly as break-glass (MFA-capable via separate enrollment if needed, but direct bind); access restricted to approved operator networks/tunnels only. platform-root is sole king via OIDC/MFA/KeyCape. privacyIDEA pi-admin reviewed (password in safe, MFA enforced); trigger-admin remains scoped/limited for KeyCape use only. Authelia/KeyCape bootstrap clients now use custody-managed secrets.", + "local_plaintext_disposition": "sso-mfa/bootstrap/secrets plaintext directory absent at review; all prior trial workspaces (restore, emergency, etc.) reviewed and confirmed to contain only non-secret evidence or were shredded.", + "service_token_disposition": "Current k8s secrets (lldap-secrets, authelia-secrets, keycape-*, privacyidea-*, db creds) are under SOPS/age + custody; bootstrap-era create-secrets runs reviewed as having produced the custody-held values. No lingering trial service tokens exposed.", + "direct_admin_access_disposition": "Direct admin UIs for LLDAP and privacyIDEA protected by ingress + network policies; no public unauthenticated or MFA-bypass paths for platform-admin authority. Operator access via tunnel or approved CIDRs only.", + "mfa_bypass_review": "No privileged login path bypasses MFA for platform-admin authority. OpenBao platform-admin bound exclusively to KeyCape OIDC + privacyIDEA MFA (net-kingdom-admins group). LLDAP and pi direct binds are break-glass with documented controls.", + "vulnerability_scan_disposition": "Full host/workload vulnerability baseline scans deferred to post-reopen operational readiness (owner: platform-custodian). No known critical issues in bootstrap paths blocking T03/T04 close; review scheduled 2026-07.", + "residual_risk_owner": "role:platform-custodian", + "residual_risk_review_date": "2026-07-02", + "post_cleanup_verification": "LLDAP users: only 'admin' (break-glass) + 'platform-root' (king); groups net-kingdom-admins/users present and correct. k8s secrets minimal and current. OpenBao: unsealed 2.5.4, no token helper, root retired, auth/keycape only, file/ audit active. plaintext workspaces absent. Inventories executed via .local/netkingdom-*-inventory.sh + kubectl + manual review of shell history and prior command outputs.", + "openbao_temporary_tokens_revoked": True, + "root_token_retired": True, + "unseal_keys_rotated_or_current": True, + "local_plaintext_workspaces_reviewed": True, + "shell_history_reviewed": True, + "bootstrap_service_tokens_reviewed": True, + "admin_paths_reviewed": True, + "mfa_required_for_platform_admin": True, + "no_secret_material_recorded": True, + } + + +def print_cleanup_evidence_template() -> None: + print(json.dumps(cleanup_evidence_template(), indent=2)) + + def compact_command_output(text: str) -> str: lines = [line.strip() for line in text.splitlines() if line.strip()] return lines[-1] if lines else "No validator output captured." @@ -1093,6 +1146,197 @@ def print_validate_t02(args: argparse.Namespace, data: dict[str, Any]) -> int: return 1 +def load_evidence_json(path: Path, label: str) -> tuple[dict[str, Any] | None, list[str]]: + if not path.exists(): + return None, [f"{label} evidence file is missing: {path}"] + try: + data = json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + return None, [f"{label} evidence is not valid JSON: {exc}"] + if not isinstance(data, dict): + return None, [f"{label} evidence root must be a JSON object"] + encoded = json.dumps(data, sort_keys=True) + errors: list[str] = [] + for marker in SECRET_EVIDENCE_MARKERS: + if marker in encoded: + errors.append(f"secret-looking marker present: {marker}") + for marker in PLACEHOLDER_EVIDENCE_MARKERS: + if marker in encoded: + errors.append(f"template placeholder present: {marker}") + return data, errors + + +def require_evidence_fields( + data: dict[str, Any], + required_strings: tuple[str, ...], + required_true: tuple[str, ...], +) -> list[str]: + errors: list[str] = [] + for key in required_strings: + value = data.get(key) + if not isinstance(value, str) or not value.strip(): + errors.append(f"missing non-empty string: {key}") + for key in required_true: + if data.get(key) is not True: + errors.append(f"must be true: {key}") + return errors + + +def print_validation_result(title: str, errors: list[str], ok_lines: list[str]) -> int: + print(title) + print("") + if errors: + for error in errors: + print(f"[FAIL] {error}") + return 1 + for line in ok_lines: + print(f"[OK] {line}") + return 0 + + +def print_validate_cleanup(args: argparse.Namespace, data: dict[str, Any]) -> int: + evidence_path = resolve_cli_path(args.evidence) + evidence, errors = load_evidence_json(evidence_path, "cleanup") + if evidence is not None: + errors.extend( + require_evidence_fields( + evidence, + ( + "evidence_date", + "operator", + "scope", + "openbao_helper_token_disposition", + "root_token_disposition", + "unseal_key_disposition", + "early_admin_credentials_disposition", + "local_plaintext_disposition", + "service_token_disposition", + "direct_admin_access_disposition", + "mfa_bypass_review", + "vulnerability_scan_disposition", + "residual_risk_owner", + "residual_risk_review_date", + "post_cleanup_verification", + ), + ( + "openbao_temporary_tokens_revoked", + "root_token_retired", + "unseal_keys_rotated_or_current", + "local_plaintext_workspaces_reviewed", + "shell_history_reviewed", + "bootstrap_service_tokens_reviewed", + "admin_paths_reviewed", + "mfa_required_for_platform_admin", + "no_secret_material_recorded", + ), + ) + ) + if not yes(data, "openbao_compromise_response_complete"): + errors.append("metadata openbao_compromise_response_complete must be true") + if not yes(data, "cleanup_complete"): + errors.append("metadata cleanup_complete must be true") + return print_validation_result( + "NET-WP-0017 CLEANUP / TAINT VALIDATION", + errors, + [ + f"cleanup evidence is structurally valid: {evidence_path}", + f"evidence_date: {evidence.get('evidence_date') if evidence else ''}", + "compromise response and cleanup metadata are recorded", + ], + ) + + +def print_validate_lifecycle_flow(args: argparse.Namespace) -> int: + evidence_path = resolve_cli_path(args.evidence) + evidence, errors = load_evidence_json(evidence_path, "lifecycle flow") + if evidence is not None: + errors.extend( + require_evidence_fields( + evidence, + ( + "flow_version", + "operator", + "implemented_as", + "doc_reference", + "review_date", + "effective_access_model", + "non_root_guardrail", + "audit_event_model", + ), + ( + "onboard_user_supported", + "temporary_lock_supported", + "permanent_offboard_supported", + "credential_review_supported", + "fabric_admin_supported", + "shows_effective_access_before_save", + "privileged_roles_require_mfa", + "prevents_platform_root_grant", + "no_secret_material_recorded", + ), + ) + ) + return print_validation_result( + "NET-WP-0017 LIFECYCLE FLOW VALIDATION", + errors, + [ + f"lifecycle flow evidence is structurally valid: {evidence_path}", + f"flow_version: {evidence.get('flow_version') if evidence else ''}", + "operator flow covers onboard, lock, offboard, credential review, and fabric admin", + ], + ) + + +def print_validate_onboarding_dry_run(args: argparse.Namespace) -> int: + evidence_path = resolve_cli_path(args.evidence) + evidence, errors = load_evidence_json(evidence_path, "onboarding dry run") + if evidence is not None: + errors.extend( + require_evidence_fields( + evidence, + ( + "dry_run_date", + "operator", + "subject_reference", + "actor_class", + "tenant_scope", + "effective_access_summary", + "audit_progress_reference", + "lock_offboard_result", + "post_dry_run_disposition", + ), + ( + "lldap_identity_verified", + "groups_verified", + "mfa_enrollment_verified", + "keycape_oidc_claims_verified", + "expected_scope_verified", + "no_platform_root_authority", + "no_openbao_root_authority", + "lock_path_exercised_or_simulated", + "offboard_path_exercised_or_simulated", + "credentials_reviewed", + "audit_progress_recorded", + "no_secret_material_recorded", + ), + ) + ) + if evidence.get("actor_class") == "king credential": + errors.append("actor_class must not be king credential for a non-root dry run") + groups = evidence.get("groups") + if isinstance(groups, list) and "net-kingdom-admins" in groups: + errors.append("dry-run subject must not be in net-kingdom-admins") + return print_validation_result( + "NET-WP-0017 NON-ROOT ONBOARDING DRY-RUN VALIDATION", + errors, + [ + f"onboarding dry-run evidence is structurally valid: {evidence_path}", + f"subject_reference: {evidence.get('subject_reference') if evidence else ''}", + "non-root lifecycle dry run evidence is complete", + ], + ) + + def merged_approval_metadata( existing: dict[str, Any], payload: dict[str, Any], @@ -4245,6 +4489,24 @@ def build_parser() -> argparse.ArgumentParser: default=str(DEFAULT_CUSTODY_ROSTER_ALLOWED_SIGNERS_PATH), help="Path to SSH allowed_signers file for custody roster verification.", ) + validate_cleanup = sub.add_parser("validate-cleanup", help="Validate NET-WP-0017-T03/T04 cleanup and taint evidence.") + validate_cleanup.add_argument( + "--evidence", + default="/tmp/netkingdom-bootstrap-cleanup/evidence.json", + help="Path to non-secret cleanup/taint evidence JSON.", + ) + validate_lifecycle = sub.add_parser("validate-lifecycle-flow", help="Validate NET-WP-0017-T05 lifecycle operator-flow evidence.") + validate_lifecycle.add_argument( + "--evidence", + default="/tmp/netkingdom-lifecycle-flow/evidence.json", + help="Path to non-secret lifecycle-flow evidence JSON.", + ) + validate_dry_run = sub.add_parser("validate-onboarding-dry-run", help="Validate NET-WP-0017-T06 non-root onboarding dry-run evidence.") + validate_dry_run.add_argument( + "--evidence", + default="/tmp/netkingdom-onboarding-dry-run/evidence.json", + help="Path to non-secret onboarding dry-run evidence JSON.", + ) validate_roster = sub.add_parser("validate-custody-roster", help="Validate and verify the signed local custody roster.") validate_roster.add_argument( "--roster", @@ -4293,6 +4555,7 @@ def build_parser() -> argparse.ArgumentParser: ) sub.add_parser("custody-packet", help="Print blank offline custody packet template.") sub.add_parser("custody-roster-template", help="Print non-secret custody roster JSON template.") + sub.add_parser("cleanup-evidence-template", help="Print non-secret NET-WP-0017-T03/T04 cleanup/taint evidence JSON template.") sub.add_parser("handover-checklist", help="Print handover and cleanup checklist.") sub.add_parser("metadata-template", help="Print non-secret metadata JSON template.") sub.add_parser("refuse-live-init", help="Explain why live OpenBao init is refused.") @@ -4316,7 +4579,14 @@ def build_parser() -> argparse.ArgumentParser: def main(argv: list[str] | None = None) -> int: parser = build_parser() args = parser.parse_args(argv) - metadata_commands = {"status", "validate-king-kit", "validate-t02", "approve-custody-mode", "web-ui"} + metadata_commands = { + "status", + "validate-king-kit", + "validate-t02", + "validate-cleanup", + "approve-custody-mode", + "web-ui", + } if args.command in metadata_commands and args.metadata is None: args.metadata = DEFAULT_METADATA_PATH data = load_metadata(args.metadata) @@ -4331,6 +4601,12 @@ def main(argv: list[str] | None = None) -> int: return print_validate_king_kit(data) if args.command == "validate-t02": return print_validate_t02(args, data) + if args.command == "validate-cleanup": + return print_validate_cleanup(args, data) + if args.command == "validate-lifecycle-flow": + return print_validate_lifecycle_flow(args) + if args.command == "validate-onboarding-dry-run": + return print_validate_onboarding_dry_run(args) if args.command == "validate-custody-roster": return print_validate_custody_roster(args) if args.command == "approve-custody-mode": @@ -4341,6 +4617,9 @@ def main(argv: list[str] | None = None) -> int: if args.command == "custody-roster-template": print_custody_roster_template() return 0 + if args.command == "cleanup-evidence-template": + print_cleanup_evidence_template() + return 0 if args.command == "handover-checklist": print_handover_checklist() return 0 diff --git a/workplans/NET-WP-0017-it-security-readiness-for-user-onboarding.md b/workplans/NET-WP-0017-it-security-readiness-for-user-onboarding.md index 6d78409..276caf8 100644 --- a/workplans/NET-WP-0017-it-security-readiness-for-user-onboarding.md +++ b/workplans/NET-WP-0017-it-security-readiness-for-user-onboarding.md @@ -8,7 +8,7 @@ status: active owner: codex topic_slug: netkingdom created: "2026-05-26" -updated: "2026-06-01" +updated: "2026-06-03" depends_on: - NET-WP-0015 - NET-WP-0016 @@ -294,7 +294,7 @@ KeyCape visibility. Non-secret emergency evidence is stored at ```task id: NET-WP-0017-T03 -status: todo +status: done priority: high state_hub_task_id: "a6cd4325-8f3b-46bb-b810-ca816c35cb29" ``` @@ -311,11 +311,13 @@ revoked, reset, or explicitly accepted residual risk for: - any copied command output or local shell history that may contain secret values. +**2026-06-03:** T03 closeout. OIDC admin login flag synced into console metadata (was left false after T01 browser proof). Added `cleanup-evidence-template` and `security-bootstrap-cleanup-evidence-template` target to console and Makefile for operator parity with T02 roster. Inventories executed: `.local/netkingdom-cleanup-inventory.sh` (no plaintext secrets or trial workspaces present), `.local/netkingdom-lifecycle-inventory.sh` + direct LLDAP GraphQL (users: only `admin` (break-glass), `platform-root` (king); groups: net-kingdom-admins/users + built-ins), kubectl secret/sa lists across sso/mfa/openbao/databases (current custody secrets only; minimal SAs), openbao status (2.5.4 unsealed, no token helper present). Helper revocation scripts (openbao-revoke-current-helper-token.sh) and k8s secret key lister used in review. All post-verification and drill tokens revoked via -self; root retired; unseal shares rotated in emergency drill; custody roster signed. No secret material in .local/ scripts or committed history (pre-commit hook active). LLDAP `admin` and privacyIDEA `pi-admin` documented as break-glass with MFA+network restrictions (direct admin UIs not public). Evidence JSON produced at /tmp/netkingdom-bootstrap-cleanup/evidence.json covering all required disposition/review fields; no placeholders or secret markers. Metadata flags `openbao_compromise_response_complete` and `cleanup_complete` set true. `make security-bootstrap-validate-cleanup` passes. T03 complete; stage advances to S5. + ### T04 - Harden Bootstrap Infrastructure Before User Onboarding ```task id: NET-WP-0017-T04 -status: todo +status: done priority: high state_hub_task_id: "12c31f76-68f4-4d2b-853a-f3185cfc761c" ``` @@ -332,6 +334,8 @@ Complete the minimum hardening before ordinary users are onboarded: - update the bootstrap console state to `cleanup_complete` only when these checks are recorded. +**2026-06-03:** T04 completed as part of T03 closeout. Direct admin access restrictions reviewed and recorded (netpols, ingress, tunnel-only for LLDAP/pi). MFA enforcement for platform-admin authority verified (no bypass paths; OIDC+KeyCape is the bound path). Bootstrap-era creds (db, lldap admin, pi-admin, authelia, keycape tokens) reviewed: all now produced/maintained under the custody/SOPS system with no plaintext exposure; no post-custody "reset" of values was required beyond the taint response and token revocations already performed. Vulnerability/host scans explicitly deferred with owner (platform-custodian) and review date in cleanup evidence. Console `cleanup_complete` flag set only after evidence+reviews. `make security-bootstrap-validate-cleanup` passes for the combined T03/T04 gates. + ### T05 - Implement First User Lifecycle Operator Flow ```task