Add self-scoping regression command

This commit is contained in:
2026-05-15 13:33:23 +02:00
parent 18ac5fe2ba
commit 750985839f
4 changed files with 245 additions and 3 deletions

View File

@@ -60,3 +60,18 @@ repo-scoping compare-assessment \
The first comparison report highlights missing expected capabilities, forbidden
native capabilities, known regression patterns, and misplaced API/CLI features.
Run the full self-assessment loop:
```bash
repo-scoping self-assess \
--source-path . \
--assessment-output docs/self-scoping/assessments/repo-scoping-challenger.json \
--comparison-output docs/self-scoping/assessments/repo-scoping-challenger.md
```
By default this path is deterministic-only and leaves generated candidates
pending review. Add `--with-llm` only when a provider is configured and the run
should include LLM-assisted candidate extraction. Add `--fail-on-regression` in
CI when known regressions should fail the command; ordinary `needs_review`
comparisons still exit successfully.

View File

@@ -99,6 +99,58 @@ def build_parser() -> argparse.ArgumentParser:
default="markdown",
help="Comparison report format.",
)
self_assess = subparsers.add_parser(
"self-assess",
help="Run repo-scoping against a source tree and compare the result to a golden profile.",
)
self_assess.add_argument(
"--repo",
default="repo-scoping",
help="Repository id or exact repository name to reuse; created by name when absent.",
)
self_assess.add_argument(
"--source-path",
default=".",
help="Source tree to analyze; defaults to the current working directory.",
)
self_assess.add_argument(
"--golden",
default="docs/self-scoping/golden/repo-scoping-golden-profile.v1.json",
help="Golden profile JSON path.",
)
self_assess.add_argument(
"--assessment-output",
help="Write challenger assessment artifact JSON to this path.",
)
self_assess.add_argument(
"--comparison-output",
help="Write comparison report to this path instead of stdout.",
)
self_assess.add_argument(
"--format",
choices=["json", "markdown"],
default="markdown",
help="Comparison report format.",
)
self_assess.add_argument(
"--with-llm",
action="store_false",
dest="no_llm",
help="Use configured LLM assistance during the self-assessment run.",
)
self_assess.add_argument(
"--agentic-review",
action="store_true",
help="Reserved for a configured agentic reviewer; currently errors when requested.",
)
self_assess.add_argument(
"--fail-on-regression",
action="store_true",
help="Return exit code 1 only when comparison status is regression.",
)
self_assess.add_argument("--database-path", help="Override REPO_REGISTRY_DATABASE_PATH.")
self_assess.add_argument("--checkout-root", help="Override REPO_REGISTRY_CHECKOUT_ROOT.")
self_assess.set_defaults(no_llm=True)
return parser
@@ -111,6 +163,8 @@ def main(argv: Sequence[str] | None = None) -> int:
return export_assessment_command(args, parser)
if args.command == "compare-assessment":
return compare_assessment_command(args)
if args.command == "self-assess":
return self_assess_command(args, parser)
parser.error(f"unknown command: {args.command}")
return 2
@@ -158,12 +212,57 @@ def compare_assessment_command(args: argparse.Namespace) -> int:
else comparison_markdown(comparison)
)
if args.output:
Path(args.output).write_text(content, encoding="utf-8")
write_text(args.output, content)
else:
print(content, end="" if content.endswith("\n") else "\n")
return 0
def self_assess_command(
args: argparse.Namespace,
parser: argparse.ArgumentParser,
) -> int:
if args.agentic_review:
parser.error("agentic review is not configured yet")
service = service_from_args(args)
source_path = Path(args.source_path).expanduser().resolve()
if not source_path.is_dir():
parser.error(f"source path does not exist or is not a directory: {source_path}")
repository = self_assessment_repository(service, args.repo, source_path)
summary = service.analyze_repository(
repository.id,
source_path=str(source_path),
use_llm_assistance=not args.no_llm,
trusted_auto_approve=False,
)
if summary.analysis_run.status != "completed":
parser.error(summary.analysis_run.error_message or "analysis failed")
artifact = export_assessment_artifact(
service,
repository.id,
summary.analysis_run.id,
role="challenger",
outcome="challenger",
reviewer="self-assess",
)
comparison = compare_assessment_to_golden(load_json(args.golden), artifact)
if args.assessment_output:
write_text(args.assessment_output, artifact_json(artifact))
report = (
comparison_json(comparison)
if args.format == "json"
else comparison_markdown(comparison)
)
if args.comparison_output:
write_text(args.comparison_output, report)
else:
print(report, end="" if report.endswith("\n") else "\n")
if args.fail_on_regression and comparison["status"] == "regression":
return 1
return 0
def export_assessment_command(
args: argparse.Namespace,
parser: argparse.ArgumentParser,
@@ -190,7 +289,7 @@ def export_assessment_command(
content = artifact_json(artifact)
if args.output:
Path(args.output).write_text(content, encoding="utf-8")
write_text(args.output, content)
else:
print(content, end="")
return 0
@@ -231,6 +330,29 @@ def selected_repositories(
return [repository for repository in repositories if repository.name == repo]
def self_assessment_repository(
service: RegistryService,
repo: str,
source_path: Path,
) -> Repository:
selected = selected_repositories(service, argparse.Namespace(repo=repo, all=False))
if selected:
return selected[0]
if repo.isdigit():
raise NotFoundError(f"repository {repo} was not found")
return service.register_repository(
name=repo,
url=str(source_path),
description="Self-scoping assessment target.",
)
def write_text(path: str | Path, content: str) -> None:
target = Path(path)
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(content, encoding="utf-8")
def rebuild_summary_line(
service: RegistryService,
result: CharacteristicRebuildResult,

View File

@@ -154,3 +154,100 @@ def test_compare_assessment_cli_writes_markdown_report(tmp_path):
assert exit_code == 0
assert "Status: `regression`" in report
assert "Route LLM Requests Across Providers" in report
def test_self_assess_cli_exports_challenger_and_comparison(tmp_path):
source = write_repo(tmp_path)
golden_path = tmp_path / "golden.json"
golden_path.write_text(
json.dumps(
{
"profile_id": "test-golden",
"ability": {
"expected_capabilities": [
{"name": "Expose Repository Interface"}
]
},
"forbidden_native_capabilities": [],
}
),
encoding="utf-8",
)
assessment_path = tmp_path / "out" / "assessment.json"
comparison_path = tmp_path / "out" / "comparison.json"
exit_code = main(
[
"self-assess",
"--repo",
"Self Assess Repo",
"--source-path",
str(source),
"--golden",
str(golden_path),
"--assessment-output",
str(assessment_path),
"--comparison-output",
str(comparison_path),
"--format",
"json",
"--database-path",
str(tmp_path / "registry.sqlite3"),
"--checkout-root",
str(tmp_path / "checkouts"),
]
)
assessment = json.loads(assessment_path.read_text(encoding="utf-8"))
comparison = json.loads(comparison_path.read_text(encoding="utf-8"))
assert exit_code == 0
assert assessment["target_repository"]["repo_slug"] == "self-assess-repo"
assert assessment["execution"]["mode"] == "deterministic-only"
assert comparison["status"] == "candidate_improvement"
assert comparison["matched_expected_capabilities"] == [
"Expose Repository Interface"
]
def test_self_assess_cli_can_fail_on_regression(tmp_path):
source = tmp_path / "provider-repo"
source.mkdir()
(source / "README.md").write_text("# Provider Repo\n", encoding="utf-8")
(source / "providers.py").write_text(
"provider_registry = {'openrouter': OpenRouterAdapter}\n",
encoding="utf-8",
)
golden_path = tmp_path / "golden.json"
golden_path.write_text(
json.dumps(
{
"profile_id": "test-golden",
"ability": {"expected_capabilities": []},
"forbidden_native_capabilities": [
{"name": "Route LLM Requests Across Providers"}
],
}
),
encoding="utf-8",
)
exit_code = main(
[
"self-assess",
"--repo",
"Provider Repo",
"--source-path",
str(source),
"--golden",
str(golden_path),
"--format",
"json",
"--fail-on-regression",
"--database-path",
str(tmp_path / "registry.sqlite3"),
"--checkout-root",
str(tmp_path / "checkouts"),
]
)
assert exit_code == 1

View File

@@ -227,7 +227,7 @@ Acceptance criteria:
```task
id: RREG-WP-0013-T07
status: todo
status: done
priority: medium
state_hub_task_id: "af1fcecd-686d-4592-b739-4698abc98c55"
```
@@ -242,6 +242,14 @@ Acceptance criteria:
- The command emits a comparison report and exits non-zero only for explicit
CI-blocking regressions, not for ordinary "needs review" assessment outcomes.
Implementation note 2026-05-15: added `repo-scoping self-assess`. The command
analyzes a source tree, exports a challenger assessment artifact, compares it to
the golden profile, emits JSON or Markdown, and returns non-zero only with
`--fail-on-regression` when the comparison status is `regression`. The command
defaults to deterministic-only; `--with-llm` opts into configured LLM assistance.
`--agentic-review` is reserved for RREG-WP-0014 and currently errors when no
agentic reviewer is configured.
## T08: Document Assessment Workflow
```task