Add self-scoping regression command

2026-05-15 13:33:23 +02:00
parent 18ac5fe2ba
commit 750985839f
4 changed files with 245 additions and 3 deletions
--- a/docs/self-scoping/README.md
+++ b/docs/self-scoping/README.md
@@ -60,3 +60,18 @@ repo-scoping compare-assessment \

 The first comparison report highlights missing expected capabilities, forbidden
 native capabilities, known regression patterns, and misplaced API/CLI features.
+
+Run the full self-assessment loop:
+
+```bash
+repo-scoping self-assess \
+  --source-path . \
+  --assessment-output docs/self-scoping/assessments/repo-scoping-challenger.json \
+  --comparison-output docs/self-scoping/assessments/repo-scoping-challenger.md
+```
+
+By default this path is deterministic-only and leaves generated candidates
+pending review. Add `--with-llm` only when a provider is configured and the run
+should include LLM-assisted candidate extraction. Add `--fail-on-regression` in
+CI when known regressions should fail the command; ordinary `needs_review`
+comparisons still exit successfully.
--- a/src/repo_registry/cli.py
+++ b/src/repo_registry/cli.py
@@ -99,6 +99,58 @@ def build_parser() -> argparse.ArgumentParser:
        default="markdown",
        help="Comparison report format.",
    )
+    self_assess = subparsers.add_parser(
+        "self-assess",
+        help="Run repo-scoping against a source tree and compare the result to a golden profile.",
+    )
+    self_assess.add_argument(
+        "--repo",
+        default="repo-scoping",
+        help="Repository id or exact repository name to reuse; created by name when absent.",
+    )
+    self_assess.add_argument(
+        "--source-path",
+        default=".",
+        help="Source tree to analyze; defaults to the current working directory.",
+    )
+    self_assess.add_argument(
+        "--golden",
+        default="docs/self-scoping/golden/repo-scoping-golden-profile.v1.json",
+        help="Golden profile JSON path.",
+    )
+    self_assess.add_argument(
+        "--assessment-output",
+        help="Write challenger assessment artifact JSON to this path.",
+    )
+    self_assess.add_argument(
+        "--comparison-output",
+        help="Write comparison report to this path instead of stdout.",
+    )
+    self_assess.add_argument(
+        "--format",
+        choices=["json", "markdown"],
+        default="markdown",
+        help="Comparison report format.",
+    )
+    self_assess.add_argument(
+        "--with-llm",
+        action="store_false",
+        dest="no_llm",
+        help="Use configured LLM assistance during the self-assessment run.",
+    )
+    self_assess.add_argument(
+        "--agentic-review",
+        action="store_true",
+        help="Reserved for a configured agentic reviewer; currently errors when requested.",
+    )
+    self_assess.add_argument(
+        "--fail-on-regression",
+        action="store_true",
+        help="Return exit code 1 only when comparison status is regression.",
+    )
+    self_assess.add_argument("--database-path", help="Override REPO_REGISTRY_DATABASE_PATH.")
+    self_assess.add_argument("--checkout-root", help="Override REPO_REGISTRY_CHECKOUT_ROOT.")
+    self_assess.set_defaults(no_llm=True)
    return parser


@@ -111,6 +163,8 @@ def main(argv: Sequence[str] | None = None) -> int:
        return export_assessment_command(args, parser)
    if args.command == "compare-assessment":
        return compare_assessment_command(args)
+    if args.command == "self-assess":
+        return self_assess_command(args, parser)
    parser.error(f"unknown command: {args.command}")
    return 2

@@ -158,12 +212,57 @@ def compare_assessment_command(args: argparse.Namespace) -> int:
        else comparison_markdown(comparison)
    )
    if args.output:
-        Path(args.output).write_text(content, encoding="utf-8")
+        write_text(args.output, content)
    else:
        print(content, end="" if content.endswith("\n") else "\n")
    return 0


+def self_assess_command(
+    args: argparse.Namespace,
+    parser: argparse.ArgumentParser,
+) -> int:
+    if args.agentic_review:
+        parser.error("agentic review is not configured yet")
+    service = service_from_args(args)
+    source_path = Path(args.source_path).expanduser().resolve()
+    if not source_path.is_dir():
+        parser.error(f"source path does not exist or is not a directory: {source_path}")
+    repository = self_assessment_repository(service, args.repo, source_path)
+    summary = service.analyze_repository(
+        repository.id,
+        source_path=str(source_path),
+        use_llm_assistance=not args.no_llm,
+        trusted_auto_approve=False,
+    )
+    if summary.analysis_run.status != "completed":
+        parser.error(summary.analysis_run.error_message or "analysis failed")
+    artifact = export_assessment_artifact(
+        service,
+        repository.id,
+        summary.analysis_run.id,
+        role="challenger",
+        outcome="challenger",
+        reviewer="self-assess",
+    )
+    comparison = compare_assessment_to_golden(load_json(args.golden), artifact)
+
+    if args.assessment_output:
+        write_text(args.assessment_output, artifact_json(artifact))
+    report = (
+        comparison_json(comparison)
+        if args.format == "json"
+        else comparison_markdown(comparison)
+    )
+    if args.comparison_output:
+        write_text(args.comparison_output, report)
+    else:
+        print(report, end="" if report.endswith("\n") else "\n")
+    if args.fail_on_regression and comparison["status"] == "regression":
+        return 1
+    return 0
+
+
 def export_assessment_command(
    args: argparse.Namespace,
    parser: argparse.ArgumentParser,
@@ -190,7 +289,7 @@ def export_assessment_command(

    content = artifact_json(artifact)
    if args.output:
-        Path(args.output).write_text(content, encoding="utf-8")
+        write_text(args.output, content)
    else:
        print(content, end="")
    return 0
@@ -231,6 +330,29 @@ def selected_repositories(
    return [repository for repository in repositories if repository.name == repo]


+def self_assessment_repository(
+    service: RegistryService,
+    repo: str,
+    source_path: Path,
+) -> Repository:
+    selected = selected_repositories(service, argparse.Namespace(repo=repo, all=False))
+    if selected:
+        return selected[0]
+    if repo.isdigit():
+        raise NotFoundError(f"repository {repo} was not found")
+    return service.register_repository(
+        name=repo,
+        url=str(source_path),
+        description="Self-scoping assessment target.",
+    )
+
+
+def write_text(path: str | Path, content: str) -> None:
+    target = Path(path)
+    target.parent.mkdir(parents=True, exist_ok=True)
+    target.write_text(content, encoding="utf-8")
+
+
 def rebuild_summary_line(
    service: RegistryService,
    result: CharacteristicRebuildResult,
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -154,3 +154,100 @@ def test_compare_assessment_cli_writes_markdown_report(tmp_path):
    assert exit_code == 0
    assert "Status: `regression`" in report
    assert "Route LLM Requests Across Providers" in report
+
+
+def test_self_assess_cli_exports_challenger_and_comparison(tmp_path):
+    source = write_repo(tmp_path)
+    golden_path = tmp_path / "golden.json"
+    golden_path.write_text(
+        json.dumps(
+            {
+                "profile_id": "test-golden",
+                "ability": {
+                    "expected_capabilities": [
+                        {"name": "Expose Repository Interface"}
+                    ]
+                },
+                "forbidden_native_capabilities": [],
+            }
+        ),
+        encoding="utf-8",
+    )
+    assessment_path = tmp_path / "out" / "assessment.json"
+    comparison_path = tmp_path / "out" / "comparison.json"
+
+    exit_code = main(
+        [
+            "self-assess",
+            "--repo",
+            "Self Assess Repo",
+            "--source-path",
+            str(source),
+            "--golden",
+            str(golden_path),
+            "--assessment-output",
+            str(assessment_path),
+            "--comparison-output",
+            str(comparison_path),
+            "--format",
+            "json",
+            "--database-path",
+            str(tmp_path / "registry.sqlite3"),
+            "--checkout-root",
+            str(tmp_path / "checkouts"),
+        ]
+    )
+
+    assessment = json.loads(assessment_path.read_text(encoding="utf-8"))
+    comparison = json.loads(comparison_path.read_text(encoding="utf-8"))
+    assert exit_code == 0
+    assert assessment["target_repository"]["repo_slug"] == "self-assess-repo"
+    assert assessment["execution"]["mode"] == "deterministic-only"
+    assert comparison["status"] == "candidate_improvement"
+    assert comparison["matched_expected_capabilities"] == [
+        "Expose Repository Interface"
+    ]
+
+
+def test_self_assess_cli_can_fail_on_regression(tmp_path):
+    source = tmp_path / "provider-repo"
+    source.mkdir()
+    (source / "README.md").write_text("# Provider Repo\n", encoding="utf-8")
+    (source / "providers.py").write_text(
+        "provider_registry = {'openrouter': OpenRouterAdapter}\n",
+        encoding="utf-8",
+    )
+    golden_path = tmp_path / "golden.json"
+    golden_path.write_text(
+        json.dumps(
+            {
+                "profile_id": "test-golden",
+                "ability": {"expected_capabilities": []},
+                "forbidden_native_capabilities": [
+                    {"name": "Route LLM Requests Across Providers"}
+                ],
+            }
+        ),
+        encoding="utf-8",
+    )
+
+    exit_code = main(
+        [
+            "self-assess",
+            "--repo",
+            "Provider Repo",
+            "--source-path",
+            str(source),
+            "--golden",
+            str(golden_path),
+            "--format",
+            "json",
+            "--fail-on-regression",
+            "--database-path",
+            str(tmp_path / "registry.sqlite3"),
+            "--checkout-root",
+            str(tmp_path / "checkouts"),
+        ]
+    )
+
+    assert exit_code == 1
--- a/workplans/RREG-WP-0013-self-scoping-baseline-evaluation.md
+++ b/workplans/RREG-WP-0013-self-scoping-baseline-evaluation.md
@@ -227,7 +227,7 @@ Acceptance criteria:

 ```task
 id: RREG-WP-0013-T07
-status: todo
+status: done
 priority: medium
 state_hub_task_id: "af1fcecd-686d-4592-b739-4698abc98c55"
 ```
@@ -242,6 +242,14 @@ Acceptance criteria:
 - The command emits a comparison report and exits non-zero only for explicit
  CI-blocking regressions, not for ordinary "needs review" assessment outcomes.

+Implementation note 2026-05-15: added `repo-scoping self-assess`. The command
+analyzes a source tree, exports a challenger assessment artifact, compares it to
+the golden profile, emits JSON or Markdown, and returns non-zero only with
+`--fail-on-regression` when the comparison status is `regression`. The command
+defaults to deterministic-only; `--with-llm` opts into configured LLM assistance.
+`--agentic-review` is reserved for RREG-WP-0014 and currently errors when no
+agentic reviewer is configured.
+
 ## T08: Document Assessment Workflow

 ```task