generated from coulomb/repo-seed
Implement PMEM-WP-0015 credentialed live pilot with ops-warden routing.
Add credential routing advisories via warden route/access, live pilot evidence helpers, managed deployment pilot probes, evaluation trend regression gates, and expanded troubleshooting. Update operator runbook and maturity scorecard.
This commit is contained in:
@@ -7,9 +7,11 @@ from phase_memory.contracts import graph_from_markitect
|
||||
from phase_memory.evaluation import (
|
||||
EVALUATION_REPORT_SCHEMA,
|
||||
EVALUATION_TREND_HISTORY_SCHEMA,
|
||||
EVALUATION_TREND_REGRESSION_GATE_SCHEMA,
|
||||
EVALUATION_TREND_SCHEMA,
|
||||
evaluation_threshold_report,
|
||||
evaluation_trend_artifact,
|
||||
evaluation_trend_regression_gate,
|
||||
load_evaluation_trend_history,
|
||||
write_evaluation_trend_history,
|
||||
)
|
||||
@@ -159,6 +161,37 @@ def test_evaluation_trend_history_persists_without_duplicate_runs(tmp_path) -> N
|
||||
assert "policy_denial_count" in loaded["metric_keys"]
|
||||
|
||||
|
||||
def test_evaluation_trend_regression_gate_flags_metric_declines() -> None:
|
||||
data = json.loads((FIXTURES / "evaluation-scenarios.json").read_text(encoding="utf-8"))
|
||||
report = evaluation_threshold_report(data)
|
||||
previous = evaluation_trend_artifact(
|
||||
report,
|
||||
run_metadata={"run_id": "previous", "created_at": "2026-05-18T00:00:00+00:00"},
|
||||
)
|
||||
regressed_report = {
|
||||
**report,
|
||||
"metrics": {
|
||||
**report["metrics"],
|
||||
"policy_denial_count": report["metrics"]["policy_denial_count"] - 1,
|
||||
},
|
||||
}
|
||||
latest = evaluation_trend_artifact(
|
||||
regressed_report,
|
||||
previous_report=report,
|
||||
run_metadata={"run_id": "latest", "created_at": "2026-05-19T00:00:00+00:00"},
|
||||
)
|
||||
history = {
|
||||
"schema_version": EVALUATION_TREND_HISTORY_SCHEMA,
|
||||
"artifacts": [previous, latest],
|
||||
}
|
||||
|
||||
gate = evaluation_trend_regression_gate(history)
|
||||
|
||||
assert gate["schema_version"] == EVALUATION_TREND_REGRESSION_GATE_SCHEMA
|
||||
assert gate["valid"] is False
|
||||
assert gate["metric_regressions"]["policy_denial_count"] == -1.0
|
||||
|
||||
|
||||
def _activation_plan(response):
|
||||
data = response["data"]["activation_plan"]
|
||||
return ActivationPlan(
|
||||
|
||||
Reference in New Issue
Block a user