Add self-scoping regression command

This commit is contained in:
2026-05-15 13:33:23 +02:00
parent 18ac5fe2ba
commit 750985839f
4 changed files with 245 additions and 3 deletions

View File

@@ -154,3 +154,100 @@ def test_compare_assessment_cli_writes_markdown_report(tmp_path):
assert exit_code == 0
assert "Status: `regression`" in report
assert "Route LLM Requests Across Providers" in report
def test_self_assess_cli_exports_challenger_and_comparison(tmp_path):
source = write_repo(tmp_path)
golden_path = tmp_path / "golden.json"
golden_path.write_text(
json.dumps(
{
"profile_id": "test-golden",
"ability": {
"expected_capabilities": [
{"name": "Expose Repository Interface"}
]
},
"forbidden_native_capabilities": [],
}
),
encoding="utf-8",
)
assessment_path = tmp_path / "out" / "assessment.json"
comparison_path = tmp_path / "out" / "comparison.json"
exit_code = main(
[
"self-assess",
"--repo",
"Self Assess Repo",
"--source-path",
str(source),
"--golden",
str(golden_path),
"--assessment-output",
str(assessment_path),
"--comparison-output",
str(comparison_path),
"--format",
"json",
"--database-path",
str(tmp_path / "registry.sqlite3"),
"--checkout-root",
str(tmp_path / "checkouts"),
]
)
assessment = json.loads(assessment_path.read_text(encoding="utf-8"))
comparison = json.loads(comparison_path.read_text(encoding="utf-8"))
assert exit_code == 0
assert assessment["target_repository"]["repo_slug"] == "self-assess-repo"
assert assessment["execution"]["mode"] == "deterministic-only"
assert comparison["status"] == "candidate_improvement"
assert comparison["matched_expected_capabilities"] == [
"Expose Repository Interface"
]
def test_self_assess_cli_can_fail_on_regression(tmp_path):
source = tmp_path / "provider-repo"
source.mkdir()
(source / "README.md").write_text("# Provider Repo\n", encoding="utf-8")
(source / "providers.py").write_text(
"provider_registry = {'openrouter': OpenRouterAdapter}\n",
encoding="utf-8",
)
golden_path = tmp_path / "golden.json"
golden_path.write_text(
json.dumps(
{
"profile_id": "test-golden",
"ability": {"expected_capabilities": []},
"forbidden_native_capabilities": [
{"name": "Route LLM Requests Across Providers"}
],
}
),
encoding="utf-8",
)
exit_code = main(
[
"self-assess",
"--repo",
"Provider Repo",
"--source-path",
str(source),
"--golden",
str(golden_path),
"--format",
"json",
"--fail-on-regression",
"--database-path",
str(tmp_path / "registry.sqlite3"),
"--checkout-root",
str(tmp_path / "checkouts"),
]
)
assert exit_code == 1