generated from coulomb/repo-seed
Add self-scoping assessment comparison
This commit is contained in:
@@ -131,3 +131,26 @@ def test_export_assessment_cli_writes_completed_run_artifact(tmp_path):
|
||||
assert artifact["execution"]["analysis_run_id"] == summary.analysis_run.id
|
||||
assert artifact["assessment"]["role"] == "challenger"
|
||||
assert artifact["generated_tree"]["abilities"]
|
||||
|
||||
|
||||
def test_compare_assessment_cli_writes_markdown_report(tmp_path):
|
||||
output_path = tmp_path / "comparison.md"
|
||||
|
||||
exit_code = main(
|
||||
[
|
||||
"compare-assessment",
|
||||
"--golden",
|
||||
"docs/self-scoping/golden/repo-scoping-golden-profile.v1.json",
|
||||
"--assessment",
|
||||
"docs/self-scoping/assessments/repo-scoping-known-bad-2026-05-15-run-39.json",
|
||||
"--output",
|
||||
str(output_path),
|
||||
"--format",
|
||||
"markdown",
|
||||
]
|
||||
)
|
||||
|
||||
report = output_path.read_text(encoding="utf-8")
|
||||
assert exit_code == 0
|
||||
assert "Status: `regression`" in report
|
||||
assert "Route LLM Requests Across Providers" in report
|
||||
|
||||
62
tests/test_self_scoping_comparison.py
Normal file
62
tests/test_self_scoping_comparison.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from pathlib import Path
|
||||
|
||||
from repo_registry.self_scoping.comparison import (
|
||||
compare_assessment_to_golden,
|
||||
comparison_markdown,
|
||||
load_json,
|
||||
)
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
GOLDEN_PROFILE = (
|
||||
ROOT
|
||||
/ "docs"
|
||||
/ "self-scoping"
|
||||
/ "golden"
|
||||
/ "repo-scoping-golden-profile.v1.json"
|
||||
)
|
||||
KNOWN_BAD = (
|
||||
ROOT
|
||||
/ "docs"
|
||||
/ "self-scoping"
|
||||
/ "assessments"
|
||||
/ "repo-scoping-known-bad-2026-05-15-run-39.json"
|
||||
)
|
||||
|
||||
|
||||
def test_compare_known_bad_assessment_to_golden_flags_regression():
|
||||
comparison = compare_assessment_to_golden(
|
||||
load_json(GOLDEN_PROFILE),
|
||||
load_json(KNOWN_BAD),
|
||||
)
|
||||
|
||||
assert comparison["schema_version"] == "self-scoping-comparison/v1"
|
||||
assert comparison["status"] == "regression"
|
||||
assert "Route LLM Requests Across Providers" in comparison[
|
||||
"forbidden_native_capabilities_present"
|
||||
]
|
||||
assert "Scan Repositories Into Observed Facts" in comparison[
|
||||
"missing_expected_capabilities"
|
||||
]
|
||||
assert {item["id"] for item in comparison["known_regression_patterns"]} >= {
|
||||
"RREG-SELF-REG-001",
|
||||
"RREG-SELF-REG-002",
|
||||
"RREG-SELF-REG-003",
|
||||
}
|
||||
assert any(
|
||||
item["feature_type"] == "API" for item in comparison["misplaced_features"]
|
||||
)
|
||||
|
||||
|
||||
def test_comparison_markdown_summarizes_actionable_sections():
|
||||
comparison = compare_assessment_to_golden(
|
||||
load_json(GOLDEN_PROFILE),
|
||||
load_json(KNOWN_BAD),
|
||||
)
|
||||
|
||||
markdown = comparison_markdown(comparison)
|
||||
|
||||
assert "# Self-Scoping Comparison" in markdown
|
||||
assert "## Missing Expected Capabilities" in markdown
|
||||
assert "## Forbidden Native Capabilities Present" in markdown
|
||||
assert "Route LLM Requests Across Providers" in markdown
|
||||
Reference in New Issue
Block a user