diff --git a/docs/self-scoping/README.md b/docs/self-scoping/README.md index 9274060..09fd8f1 100644 --- a/docs/self-scoping/README.md +++ b/docs/self-scoping/README.md @@ -60,3 +60,18 @@ repo-scoping compare-assessment \ The first comparison report highlights missing expected capabilities, forbidden native capabilities, known regression patterns, and misplaced API/CLI features. + +Run the full self-assessment loop: + +```bash +repo-scoping self-assess \ + --source-path . \ + --assessment-output docs/self-scoping/assessments/repo-scoping-challenger.json \ + --comparison-output docs/self-scoping/assessments/repo-scoping-challenger.md +``` + +By default this path is deterministic-only and leaves generated candidates +pending review. Add `--with-llm` only when a provider is configured and the run +should include LLM-assisted candidate extraction. Add `--fail-on-regression` in +CI when known regressions should fail the command; ordinary `needs_review` +comparisons still exit successfully. diff --git a/src/repo_registry/cli.py b/src/repo_registry/cli.py index 2e0ce73..6745308 100644 --- a/src/repo_registry/cli.py +++ b/src/repo_registry/cli.py @@ -99,6 +99,58 @@ def build_parser() -> argparse.ArgumentParser: default="markdown", help="Comparison report format.", ) + self_assess = subparsers.add_parser( + "self-assess", + help="Run repo-scoping against a source tree and compare the result to a golden profile.", + ) + self_assess.add_argument( + "--repo", + default="repo-scoping", + help="Repository id or exact repository name to reuse; created by name when absent.", + ) + self_assess.add_argument( + "--source-path", + default=".", + help="Source tree to analyze; defaults to the current working directory.", + ) + self_assess.add_argument( + "--golden", + default="docs/self-scoping/golden/repo-scoping-golden-profile.v1.json", + help="Golden profile JSON path.", + ) + self_assess.add_argument( + "--assessment-output", + help="Write challenger assessment artifact JSON to this path.", + ) + self_assess.add_argument( + "--comparison-output", + help="Write comparison report to this path instead of stdout.", + ) + self_assess.add_argument( + "--format", + choices=["json", "markdown"], + default="markdown", + help="Comparison report format.", + ) + self_assess.add_argument( + "--with-llm", + action="store_false", + dest="no_llm", + help="Use configured LLM assistance during the self-assessment run.", + ) + self_assess.add_argument( + "--agentic-review", + action="store_true", + help="Reserved for a configured agentic reviewer; currently errors when requested.", + ) + self_assess.add_argument( + "--fail-on-regression", + action="store_true", + help="Return exit code 1 only when comparison status is regression.", + ) + self_assess.add_argument("--database-path", help="Override REPO_REGISTRY_DATABASE_PATH.") + self_assess.add_argument("--checkout-root", help="Override REPO_REGISTRY_CHECKOUT_ROOT.") + self_assess.set_defaults(no_llm=True) return parser @@ -111,6 +163,8 @@ def main(argv: Sequence[str] | None = None) -> int: return export_assessment_command(args, parser) if args.command == "compare-assessment": return compare_assessment_command(args) + if args.command == "self-assess": + return self_assess_command(args, parser) parser.error(f"unknown command: {args.command}") return 2 @@ -158,12 +212,57 @@ def compare_assessment_command(args: argparse.Namespace) -> int: else comparison_markdown(comparison) ) if args.output: - Path(args.output).write_text(content, encoding="utf-8") + write_text(args.output, content) else: print(content, end="" if content.endswith("\n") else "\n") return 0 +def self_assess_command( + args: argparse.Namespace, + parser: argparse.ArgumentParser, +) -> int: + if args.agentic_review: + parser.error("agentic review is not configured yet") + service = service_from_args(args) + source_path = Path(args.source_path).expanduser().resolve() + if not source_path.is_dir(): + parser.error(f"source path does not exist or is not a directory: {source_path}") + repository = self_assessment_repository(service, args.repo, source_path) + summary = service.analyze_repository( + repository.id, + source_path=str(source_path), + use_llm_assistance=not args.no_llm, + trusted_auto_approve=False, + ) + if summary.analysis_run.status != "completed": + parser.error(summary.analysis_run.error_message or "analysis failed") + artifact = export_assessment_artifact( + service, + repository.id, + summary.analysis_run.id, + role="challenger", + outcome="challenger", + reviewer="self-assess", + ) + comparison = compare_assessment_to_golden(load_json(args.golden), artifact) + + if args.assessment_output: + write_text(args.assessment_output, artifact_json(artifact)) + report = ( + comparison_json(comparison) + if args.format == "json" + else comparison_markdown(comparison) + ) + if args.comparison_output: + write_text(args.comparison_output, report) + else: + print(report, end="" if report.endswith("\n") else "\n") + if args.fail_on_regression and comparison["status"] == "regression": + return 1 + return 0 + + def export_assessment_command( args: argparse.Namespace, parser: argparse.ArgumentParser, @@ -190,7 +289,7 @@ def export_assessment_command( content = artifact_json(artifact) if args.output: - Path(args.output).write_text(content, encoding="utf-8") + write_text(args.output, content) else: print(content, end="") return 0 @@ -231,6 +330,29 @@ def selected_repositories( return [repository for repository in repositories if repository.name == repo] +def self_assessment_repository( + service: RegistryService, + repo: str, + source_path: Path, +) -> Repository: + selected = selected_repositories(service, argparse.Namespace(repo=repo, all=False)) + if selected: + return selected[0] + if repo.isdigit(): + raise NotFoundError(f"repository {repo} was not found") + return service.register_repository( + name=repo, + url=str(source_path), + description="Self-scoping assessment target.", + ) + + +def write_text(path: str | Path, content: str) -> None: + target = Path(path) + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(content, encoding="utf-8") + + def rebuild_summary_line( service: RegistryService, result: CharacteristicRebuildResult, diff --git a/tests/test_cli.py b/tests/test_cli.py index 9f44fb9..8967cf2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -154,3 +154,100 @@ def test_compare_assessment_cli_writes_markdown_report(tmp_path): assert exit_code == 0 assert "Status: `regression`" in report assert "Route LLM Requests Across Providers" in report + + +def test_self_assess_cli_exports_challenger_and_comparison(tmp_path): + source = write_repo(tmp_path) + golden_path = tmp_path / "golden.json" + golden_path.write_text( + json.dumps( + { + "profile_id": "test-golden", + "ability": { + "expected_capabilities": [ + {"name": "Expose Repository Interface"} + ] + }, + "forbidden_native_capabilities": [], + } + ), + encoding="utf-8", + ) + assessment_path = tmp_path / "out" / "assessment.json" + comparison_path = tmp_path / "out" / "comparison.json" + + exit_code = main( + [ + "self-assess", + "--repo", + "Self Assess Repo", + "--source-path", + str(source), + "--golden", + str(golden_path), + "--assessment-output", + str(assessment_path), + "--comparison-output", + str(comparison_path), + "--format", + "json", + "--database-path", + str(tmp_path / "registry.sqlite3"), + "--checkout-root", + str(tmp_path / "checkouts"), + ] + ) + + assessment = json.loads(assessment_path.read_text(encoding="utf-8")) + comparison = json.loads(comparison_path.read_text(encoding="utf-8")) + assert exit_code == 0 + assert assessment["target_repository"]["repo_slug"] == "self-assess-repo" + assert assessment["execution"]["mode"] == "deterministic-only" + assert comparison["status"] == "candidate_improvement" + assert comparison["matched_expected_capabilities"] == [ + "Expose Repository Interface" + ] + + +def test_self_assess_cli_can_fail_on_regression(tmp_path): + source = tmp_path / "provider-repo" + source.mkdir() + (source / "README.md").write_text("# Provider Repo\n", encoding="utf-8") + (source / "providers.py").write_text( + "provider_registry = {'openrouter': OpenRouterAdapter}\n", + encoding="utf-8", + ) + golden_path = tmp_path / "golden.json" + golden_path.write_text( + json.dumps( + { + "profile_id": "test-golden", + "ability": {"expected_capabilities": []}, + "forbidden_native_capabilities": [ + {"name": "Route LLM Requests Across Providers"} + ], + } + ), + encoding="utf-8", + ) + + exit_code = main( + [ + "self-assess", + "--repo", + "Provider Repo", + "--source-path", + str(source), + "--golden", + str(golden_path), + "--format", + "json", + "--fail-on-regression", + "--database-path", + str(tmp_path / "registry.sqlite3"), + "--checkout-root", + str(tmp_path / "checkouts"), + ] + ) + + assert exit_code == 1 diff --git a/workplans/RREG-WP-0013-self-scoping-baseline-evaluation.md b/workplans/RREG-WP-0013-self-scoping-baseline-evaluation.md index 20cf7b9..0ae75b8 100644 --- a/workplans/RREG-WP-0013-self-scoping-baseline-evaluation.md +++ b/workplans/RREG-WP-0013-self-scoping-baseline-evaluation.md @@ -227,7 +227,7 @@ Acceptance criteria: ```task id: RREG-WP-0013-T07 -status: todo +status: done priority: medium state_hub_task_id: "af1fcecd-686d-4592-b739-4698abc98c55" ``` @@ -242,6 +242,14 @@ Acceptance criteria: - The command emits a comparison report and exits non-zero only for explicit CI-blocking regressions, not for ordinary "needs review" assessment outcomes. +Implementation note 2026-05-15: added `repo-scoping self-assess`. The command +analyzes a source tree, exports a challenger assessment artifact, compares it to +the golden profile, emits JSON or Markdown, and returns non-zero only with +`--fail-on-regression` when the comparison status is `regression`. The command +defaults to deterministic-only; `--with-llm` opts into configured LLM assistance. +`--agentic-review` is reserved for RREG-WP-0014 and currently errors when no +agentic reviewer is configured. + ## T08: Document Assessment Workflow ```task