Implement PMEM-WP-0015 credentialed live pilot with ops-warden routing.

Add credential routing advisories via warden route/access, live pilot evidence helpers, managed deployment pilot probes, evaluation trend regression gates, and expanded troubleshooting. Update operator runbook and maturity scorecard.
2026-07-02 23:24:35 +02:00
parent bff90ec1ed
commit 29f893b905
15 changed files with 913 additions and 38 deletions
--- a/tests/fixtures/memory-graph-cli-lifecycle.json
+++ b/tests/fixtures/memory-graph-cli-lifecycle.json
@@ -0,0 +1,56 @@
+{
+  "schema_version": "markitect.memory.graph.v1",
+  "id": "phase-memory-cli-lifecycle-graph",
+  "nodes": [
+    {
+      "id": "decision.boundary",
+      "kind": "decision",
+      "text": "Markitect owns syntax contracts; phase-memory owns runtime phase planning.",
+      "phase": "stabilized",
+      "source_spans": [{"path": "docs/architecture.md", "line_start": 1}],
+      "metadata": {"title": "Boundary decision"}
+    },
+    {
+      "id": "event.restart",
+      "kind": "episode",
+      "text": "Restart package should include boundary decision and active graph neighborhood.",
+      "phase": "fluid",
+      "freshness": {"updated_at": "2026-06-20T00:00:00+00:00", "source_digest": "old"}
+    },
+    {
+      "id": "artifact.profile",
+      "kind": "artifact",
+      "text": "Memory profile declares budgets, stores, retention, activation, policy, and fallback behavior.",
+      "phase": "stabilized",
+      "freshness": {"updated_at": "2026-06-28T00:00:00+00:00", "source_digest": "fresh"}
+    },
+    {
+      "id": "risk.durable-write",
+      "kind": "risk",
+      "text": "Durable writes must stay review gated until the runtime plan is explicit.",
+      "phase": "fluid"
+    }
+  ],
+  "edges": [
+    {
+      "id": "edge.boundary-profile",
+      "kind": "governs",
+      "source": "decision.boundary",
+      "target": "artifact.profile"
+    },
+    {
+      "id": "edge.risk-boundary",
+      "kind": "depends_on",
+      "source": "risk.durable-write",
+      "target": "decision.boundary"
+    }
+  ],
+  "events": [
+    {
+      "id": "event.activation",
+      "kind": "activated",
+      "timestamp": "2026-06-28T00:00:00+00:00",
+      "activation_refs": ["activation.fixture"]
+    }
+  ]
+}
--- a/tests/fixtures/public-api-snapshot.json
+++ b/tests/fixtures/public-api-snapshot.json
@@ -9,10 +9,12 @@
    "CREDENTIALED_DRILL_SCHEMA",
    "CREDENTIALED_OPERATOR_REPORT_SCHEMA",
    "CREDENTIALED_TELEMETRY_DRILL_SCHEMA",
+    "CREDENTIAL_ROUTING_ADVISORY_SCHEMA",
    "CredentialedDrillConfig",
    "Diagnostic",
    "EVALUATION_REPORT_SCHEMA",
    "EVALUATION_TREND_HISTORY_SCHEMA",
+    "EVALUATION_TREND_REGRESSION_GATE_SCHEMA",
    "EVALUATION_TREND_SCHEMA",
    "ExternalAdapterPack",
    "FakeExternalEventLog",
@@ -22,6 +24,7 @@
    "FakeKontextualRuntimeRegistry",
    "FakeMarkitectPackageCompiler",
    "FakeTelemetryAuditSink",
+    "LIVE_PILOT_REPORT_SCHEMA",
    "LifecycleAction",
    "LifecycleActionKind",
    "LifecycleRuleConfig",
@@ -35,6 +38,7 @@
    "LiveShapedTelemetryAuditSink",
    "LocalMarkitectValidator",
    "LocalServiceRunner",
+    "MANAGED_DEPLOYMENT_PILOT_SCHEMA",
    "MANAGED_DEPLOYMENT_SCHEMA",
    "MANAGED_DEPLOYMENT_VALIDATION_SCHEMA",
    "MARKITECT_PACKAGE_REQUEST_SCHEMA",
@@ -49,6 +53,7 @@
    "MemoryPathState",
    "MemoryPhase",
    "OptionalMarkitectValidator",
+    "PHASE_MEMORY_CREDENTIAL_NEEDS",
    "POLICY_OPERATION_POINTS",
    "PhaseMemoryRuntime",
    "PhaseTransitionRule",
@@ -68,6 +73,8 @@
    "ServiceResponse",
    "TROUBLESHOOTING_MATRIX_SCHEMA",
    "TROUBLESHOOTING_REQUIRED_CATEGORIES",
+    "WARDEN_ACCESS_NEED",
+    "WARDEN_ROUTE_FIND_QUERY",
    "WordCountTokenEstimator",
    "abandon_path",
    "activation_quality_report",
@@ -84,14 +91,17 @@
    "evaluation_threshold_report",
    "evaluation_trend_artifact",
    "evaluation_trend_history",
+    "evaluation_trend_regression_gate",
    "fake_external_adapter_pack",
    "fake_external_runtime_config",
    "graph_from_markitect",
    "health_report",
+    "live_pilot_report",
    "live_shaped_adapter_pack",
    "load_evaluation_trend_history",
    "make_review_record",
    "managed_deployment_manifest",
+    "managed_deployment_pilot_report",
    "merge_path",
    "missing_credentialed_adapter_env",
    "operator_troubleshooting_matrix",
@@ -109,6 +119,7 @@
    "plan_retention",
    "plan_retention_from_rules",
    "profile_from_markitect",
+    "resolve_credentialed_environ",
    "resolve_runtime_adapters",
    "retrieve_graph_neighborhood",
    "runtime_from_config",
@@ -119,8 +130,13 @@
    "validate_adapter_pack_manifest",
    "validate_managed_deployment_manifest",
    "validate_operator_troubleshooting_matrix",
+    "warden_access_advisory",
+    "warden_cli_available",
+    "warden_credential_routing_advisory",
+    "warden_route_find",
    "write_credentialed_operator_report",
-    "write_evaluation_trend_history"
+    "write_evaluation_trend_history",
+    "write_live_pilot_evidence"
  ],
  "service_operations": [
    "audit.query",
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -29,7 +29,7 @@ def test_cli_graph_lifecycle_emits_dry_run_actions(capsys) -> None:
        [
            "graph",
            "lifecycle",
-            str(FIXTURES / "memory-graph.json"),
+            str(FIXTURES / "memory-graph-cli-lifecycle.json"),
            "--stale-after-days",
            "7",
            "--delete-after-days",
@@ -52,7 +52,7 @@ def test_cli_graph_lifecycle_can_use_profile_rules(capsys) -> None:
        [
            "graph",
            "lifecycle",
-            str(FIXTURES / "memory-graph.json"),
+            str(FIXTURES / "memory-graph-cli-lifecycle.json"),
            "--profile",
            str(FIXTURES / "memory-profile.json"),
            "--refresh-digest",
--- a/tests/test_credential_routing.py
+++ b/tests/test_credential_routing.py
@@ -0,0 +1,40 @@
+import json
+
+from phase_memory.credential_routing import (
+    CREDENTIAL_ROUTING_ADVISORY_SCHEMA,
+    PHASE_MEMORY_CREDENTIAL_NEEDS,
+    resolve_credentialed_environ,
+    warden_cli_available,
+    warden_credential_routing_advisory,
+)
+
+
+def test_warden_credential_routing_advisory_is_secret_free() -> None:
+    environ = {
+        "PHASE_MEMORY_MARKITECT_URL": "https://markitect.example.invalid",
+        "PHASE_MEMORY_MARKITECT_TOKEN": "markitect-secret-token",
+        "PHASE_MEMORY_KONTEXTUAL_URL": "https://kontextual.example.invalid",
+        "PHASE_MEMORY_KONTEXTUAL_TOKEN": "kontextual-secret-token",
+    }
+
+    advisory = warden_credential_routing_advisory(environ)
+    serialized = json.dumps(advisory, sort_keys=True)
+
+    assert advisory["schema_version"] == CREDENTIAL_ROUTING_ADVISORY_SCHEMA
+    assert advisory["missing_env"] == []
+    assert advisory["present_env"] == sorted(PHASE_MEMORY_CREDENTIAL_NEEDS)
+    assert "markitect-secret-token" not in serialized
+    assert "kontextual-secret-token" not in serialized
+    assert "https://markitect.example.invalid" not in serialized
+    assert advisory["operator_guidance"]["anti_pattern"].startswith("Do not message ops-warden")
+    if warden_cli_available():
+        assert advisory["route_matches"]
+
+
+def test_resolve_credentialed_environ_reports_missing_credentials() -> None:
+    status = resolve_credentialed_environ({})
+
+    assert status["ready"] is False
+    assert status["missing_env"]
+    assert status["routing_advisory"]["schema_version"] == CREDENTIAL_ROUTING_ADVISORY_SCHEMA
+    assert "warden access" in status["operator_action"]
--- a/tests/test_evaluation_scenarios.py
+++ b/tests/test_evaluation_scenarios.py
@@ -7,9 +7,11 @@ from phase_memory.contracts import graph_from_markitect
 from phase_memory.evaluation import (
    EVALUATION_REPORT_SCHEMA,
    EVALUATION_TREND_HISTORY_SCHEMA,
+    EVALUATION_TREND_REGRESSION_GATE_SCHEMA,
    EVALUATION_TREND_SCHEMA,
    evaluation_threshold_report,
    evaluation_trend_artifact,
+    evaluation_trend_regression_gate,
    load_evaluation_trend_history,
    write_evaluation_trend_history,
 )
@@ -159,6 +161,37 @@ def test_evaluation_trend_history_persists_without_duplicate_runs(tmp_path) -> N
    assert "policy_denial_count" in loaded["metric_keys"]


+def test_evaluation_trend_regression_gate_flags_metric_declines() -> None:
+    data = json.loads((FIXTURES / "evaluation-scenarios.json").read_text(encoding="utf-8"))
+    report = evaluation_threshold_report(data)
+    previous = evaluation_trend_artifact(
+        report,
+        run_metadata={"run_id": "previous", "created_at": "2026-05-18T00:00:00+00:00"},
+    )
+    regressed_report = {
+        **report,
+        "metrics": {
+            **report["metrics"],
+            "policy_denial_count": report["metrics"]["policy_denial_count"] - 1,
+        },
+    }
+    latest = evaluation_trend_artifact(
+        regressed_report,
+        previous_report=report,
+        run_metadata={"run_id": "latest", "created_at": "2026-05-19T00:00:00+00:00"},
+    )
+    history = {
+        "schema_version": EVALUATION_TREND_HISTORY_SCHEMA,
+        "artifacts": [previous, latest],
+    }
+
+    gate = evaluation_trend_regression_gate(history)
+
+    assert gate["schema_version"] == EVALUATION_TREND_REGRESSION_GATE_SCHEMA
+    assert gate["valid"] is False
+    assert gate["metric_regressions"]["policy_denial_count"] == -1.0
+
+
 def _activation_plan(response):
    data = response["data"]["activation_plan"]
    return ActivationPlan(
--- a/tests/test_pilot.py
+++ b/tests/test_pilot.py
@@ -0,0 +1,80 @@
+import json
+from pathlib import Path
+
+from phase_memory.pilot import (
+    LIVE_PILOT_REPORT_SCHEMA,
+    MANAGED_DEPLOYMENT_PILOT_SCHEMA,
+    live_pilot_report,
+    managed_deployment_pilot_report,
+    write_live_pilot_evidence,
+)
+from phase_memory.service_app import ServiceAppConfig
+
+FIXTURES = Path(__file__).parent / "fixtures"
+
+
+def test_managed_deployment_pilot_report_passes_local_probes(tmp_path) -> None:
+    report = managed_deployment_pilot_report(
+        ServiceAppConfig(host="127.0.0.1", port=8125, local_store_path=str(tmp_path)),
+        platform="local",
+    )
+
+    assert report["schema_version"] == MANAGED_DEPLOYMENT_PILOT_SCHEMA
+    assert report["valid"] is True
+    assert report["probes"]["health"]["ok"] is True
+    assert report["probes"]["ready"]["ok"] is True
+    assert report["local_store_mount"]["validated"] is True
+    assert report["rollback"]["validated"] is True
+
+
+def test_live_pilot_report_redacts_secrets_and_marks_partial_live_evidence() -> None:
+    environ = {
+        "PHASE_MEMORY_MARKITECT_URL": "https://markitect.example.invalid",
+        "PHASE_MEMORY_MARKITECT_TOKEN": "markitect-secret-token",
+        "PHASE_MEMORY_KONTEXTUAL_URL": "https://kontextual.example.invalid",
+        "PHASE_MEMORY_KONTEXTUAL_TOKEN": "kontextual-secret-token",
+    }
+
+    report = live_pilot_report(
+        environ,
+        run_id="pytest",
+        scenarios_path=FIXTURES / "evaluation-scenarios.json",
+        operator_approved_fixture=True,
+    )
+    serialized = json.dumps(report, sort_keys=True)
+
+    assert report["schema_version"] == LIVE_PILOT_REPORT_SCHEMA
+    assert report["tooling_verified"] is True
+    assert report["live_evidence"]["credentialed_smoke"] is True
+    assert report["live_evidence"]["managed_deployment_probes"] is True
+    assert report["live_evidence"]["telemetry_retention"] is True
+    assert report["sections"]["evaluation_regression_gate"]["valid"] is True
+    assert "markitect-secret-token" not in serialized
+    assert "https://kontextual.example.invalid" not in serialized
+
+
+def test_write_live_pilot_evidence_persists_redacted_artifacts(tmp_path) -> None:
+    report = write_live_pilot_evidence(
+        tmp_path,
+        {},
+        run_id="pytest",
+        scenarios_path=FIXTURES / "evaluation-scenarios.json",
+        operator_approved_fixture=True,
+    )
+
+    expected_files = (
+        "live-pilot-report.json",
+        "credentialed-operator-report.json",
+        "managed-deployment-pilot.json",
+        "telemetry-retention-evidence.json",
+        "evaluation-trend-history.json",
+        "evaluation-regression-gate.json",
+        "credential-routing-advisory.json",
+    )
+    for filename in expected_files:
+        assert (tmp_path / filename).exists()
+
+    serialized = "".join((tmp_path / name).read_text(encoding="utf-8") for name in expected_files)
+    assert report["live_evidence"]["credentialed_smoke"] is False
+    assert "credential_env_missing" in serialized
+    assert "warden access" in serialized or "warden_cli_unavailable" in serialized