diff --git a/docs/abstraction-strategy.md b/docs/abstraction-strategy.md index 0f2e237..df7cf02 100644 --- a/docs/abstraction-strategy.md +++ b/docs/abstraction-strategy.md @@ -49,9 +49,10 @@ LLMs are most useful for naming and explaining intent: - summarizing README and code context into clearer ability descriptions - suggesting merges or relinks when deterministic names are too generic -LLM output remains candidate material. It should cite source paths and be reviewed -or explicitly auto-approved by a trusted mode before becoming approved registry -truth. +LLM output remains candidate material. It should cite source paths and be +reviewed by a human or configured agentic reviewer before becoming approved +registry truth. Deterministic checks can block or flag weak candidates; they do +not approve them. ## Trial Repo Observations diff --git a/docs/characteristic-evidence-model.md b/docs/characteristic-evidence-model.md index ab7be15..e6ab685 100644 --- a/docs/characteristic-evidence-model.md +++ b/docs/characteristic-evidence-model.md @@ -67,12 +67,12 @@ that show the repository provides the utility directly or intentionally exposes it as a facade/adapter. Mentions, dependencies, configuration, and tooling are context until a curator promotes them or stronger owned evidence appears. -Trusted auto-approval applies the same rule. A candidate capability must have -source references and an eligible utility relationship (`owned`, `facade`, or -`adapter`) before it can be approved automatically. Dependency, tooling, -configuration, and mention-only candidates remain review material. The review -decision should explain both sides: why approved candidates were considered safe -and why skipped candidates need curator review. +Deterministic quality gates apply the same source and utility relationship +signals, but they do not approve automatically. Gates may reject, downgrade, +invalidate, flag, merge, or require review. Approval requires human judgement or +a configured agentic reviewer that records evidence, criteria version, and +rationale. Dependency, tooling, configuration, and mention-only candidates remain +review material. `INTENT.md` may also seed intended capabilities when it contains an explicit capability section. These intent-derived candidates are marked as review diff --git a/src/repo_registry/acceptance/__init__.py b/src/repo_registry/acceptance/__init__.py index f0b0978..bfe559f 100644 --- a/src/repo_registry/acceptance/__init__.py +++ b/src/repo_registry/acceptance/__init__.py @@ -1,3 +1,4 @@ +from repo_registry.acceptance.agentic import AgenticReviewer, AgenticReviewRequest from repo_registry.acceptance.criteria import ( active_quality_criteria_version, criteria_registry_dict, @@ -14,6 +15,8 @@ from repo_registry.acceptance.gates import ( __all__ = [ "active_quality_criteria_version", + "AgenticReviewer", + "AgenticReviewRequest", "blocking_quality_gate_outcomes", "criteria_registry_dict", "criteria_registry_json", diff --git a/src/repo_registry/acceptance/agentic.py b/src/repo_registry/acceptance/agentic.py new file mode 100644 index 0000000..2b93ade --- /dev/null +++ b/src/repo_registry/acceptance/agentic.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Protocol + +from repo_registry.acceptance.gates import QualityGateOutcome +from repo_registry.core.models import CandidateGraph, Repository + + +@dataclass(frozen=True) +class AgenticReviewRequest: + repository: Repository + candidate_graph: CandidateGraph + criteria_version: str + quality_gate_outcomes: list[QualityGateOutcome] + context: str + + +class AgenticReviewer(Protocol): + reviewer_id: str + policy_version: str + + def review(self, request: AgenticReviewRequest) -> None: + """Review a candidate graph and record decisions through the caller.""" diff --git a/src/repo_registry/cli.py b/src/repo_registry/cli.py index 50c3b3e..c4b6905 100644 --- a/src/repo_registry/cli.py +++ b/src/repo_registry/cli.py @@ -40,9 +40,9 @@ def build_parser() -> argparse.ArgumentParser: rebuild.add_argument("--dry-run", action="store_true", help="Preview without clearing approved characteristics.") rebuild.add_argument("--no-llm", action="store_true", help="Disable configured LLM assistance.") rebuild.add_argument( - "--trusted-auto-approve", + "--agentic-review", action="store_true", - help="Run trusted auto-approval after a confirmed rebuild.", + help="Request configured agentic review after a confirmed rebuild.", ) rebuild.add_argument( "--confirm", @@ -146,7 +146,7 @@ def build_parser() -> argparse.ArgumentParser: self_assess.add_argument( "--agentic-review", action="store_true", - help="Reserved for a configured agentic reviewer; currently errors when requested.", + help="Request configured agentic review; leaves candidates pending when none is configured.", ) self_assess.add_argument( "--fail-on-regression", @@ -213,11 +213,11 @@ def rebuild_characteristics_command( confirm=not dry_run, use_llm_assistance=not args.no_llm, ) - if args.trusted_auto_approve and not dry_run and result.analysis_run.status == "completed": - service.trusted_auto_approve_candidate_graph( + if args.agentic_review and not dry_run and result.analysis_run.status == "completed": + service.request_agentic_review( repository.id, result.analysis_run.id, - notes="CLI trusted auto-approve after rebuild.", + notes="CLI agentic review request after rebuild.", ) print(rebuild_summary_line(service, result, args)) return 0 @@ -258,8 +258,6 @@ def self_assess_command( args: argparse.Namespace, parser: argparse.ArgumentParser, ) -> int: - if args.agentic_review: - parser.error("agentic review is not configured yet") service = service_from_args(args) source_path = Path(args.source_path).expanduser().resolve() if not source_path.is_dir(): @@ -269,6 +267,7 @@ def self_assess_command( repository.id, source_path=str(source_path), use_llm_assistance=not args.no_llm, + agentic_review=args.agentic_review, trusted_auto_approve=False, ) if summary.analysis_run.status != "completed": diff --git a/src/repo_registry/core/service.py b/src/repo_registry/core/service.py index 781f2b9..323e190 100644 --- a/src/repo_registry/core/service.py +++ b/src/repo_registry/core/service.py @@ -5,8 +5,12 @@ from dataclasses import asdict, replace from typing import Any from repo_registry.acceptance import ( + AgenticReviewer, + AgenticReviewRequest, + active_quality_criteria_version, blocking_quality_gate_outcomes, evaluate_candidate_capability_quality, + evaluate_candidate_graph_quality, ) from repo_registry.core.models import ( AbilitySummary, @@ -57,6 +61,7 @@ class RegistryService: ingestion: GitIngestionService | None = None, llm_extractor: LLMCandidateExtractor | None = None, embedding_provider: EmbeddingProvider | None = None, + agentic_reviewer: AgenticReviewer | None = None, ) -> None: self.store = store self.scanner = DeterministicScanner() @@ -67,6 +72,7 @@ class RegistryService: self.llm_extractor = llm_extractor self.llm_mapper = LLMExtractionMapper() self.embedding_provider = embedding_provider + self.agentic_reviewer = agentic_reviewer def register_repository( self, @@ -135,6 +141,7 @@ class RegistryService: source_path: str | None = None, use_cached_checkout: bool = False, use_llm_assistance: bool = True, + agentic_review: bool = False, trusted_auto_approve: bool = False, access_username: str | None = None, access_password: str | None = None, @@ -241,13 +248,20 @@ class RegistryService: f"from {candidate_source} candidate generation." ), ) - if trusted_auto_approve: - self.trusted_auto_approve_candidate_graph( + if agentic_review or trusted_auto_approve: + legacy_note = ( + " Deprecated trusted_auto_approve request was routed to " + "agentic review." + if trusted_auto_approve + else "" + ) + self.request_agentic_review( repository_id, completed_run.id, notes=( - "Trusted auto-populate mode reviewed candidate graph " - f"after {candidate_source} candidate generation." + "Agentic review requested after " + f"{candidate_source} candidate generation." + f"{legacy_note}" ), ) log_operation( @@ -602,6 +616,50 @@ class RegistryService: ) return self.store.get_ability_map(repository_id) + def request_agentic_review( + self, + repository_id: int, + analysis_run_id: int, + *, + notes: str = "", + ) -> CandidateGraph: + graph = self.store.get_candidate_graph(repository_id, analysis_run_id) + gate_outcomes = evaluate_candidate_graph_quality(graph) + criteria_version = active_quality_criteria_version() + if self.agentic_reviewer is None: + self.store.create_review_decision( + repository_id, + analysis_run_id, + action="agentic_review_unconfigured", + notes=( + f"{notes} No agentic reviewer is configured; candidates " + "remain pending human review. " + f"criteria_version={criteria_version}; " + f"quality_gate_outcomes={len(gate_outcomes)}." + ).strip(), + ) + return graph + request = AgenticReviewRequest( + repository=graph.repository, + candidate_graph=graph, + criteria_version=criteria_version, + quality_gate_outcomes=gate_outcomes, + context="candidate-characteristic-acceptance", + ) + self.agentic_reviewer.review(request) + self.store.create_review_decision( + repository_id, + analysis_run_id, + action="agentic_review_completed", + notes=( + f"{notes} reviewer={self.agentic_reviewer.reviewer_id}; " + f"policy_version={self.agentic_reviewer.policy_version}; " + f"criteria_version={criteria_version}; " + f"quality_gate_outcomes={len(gate_outcomes)}." + ).strip(), + ) + return self.store.get_candidate_graph(repository_id, analysis_run_id) + def _trusted_auto_approve_capability_safe( self, capability: CandidateCapability, diff --git a/src/repo_registry/self_scoping/assessment.py b/src/repo_registry/self_scoping/assessment.py index ec6ac29..4aa949a 100644 --- a/src/repo_registry/self_scoping/assessment.py +++ b/src/repo_registry/self_scoping/assessment.py @@ -423,6 +423,8 @@ def _known_regression_patterns( def _execution_mode(decisions: list[ReviewDecision]) -> str: + if any(decision.action.startswith("agentic_review") for decision in decisions): + return "agentic-review" if any(decision.action == "trusted_auto_approve_candidate_graph" for decision in decisions): return "trusted-auto-review" if any(decision.action == "llm_extraction_used" for decision in decisions): @@ -439,6 +441,12 @@ def _candidate_source(decisions: list[ReviewDecision]) -> str: def _acceptance_mode(decisions: list[ReviewDecision]) -> str: + agentic_decision = next( + (decision for decision in decisions if decision.action.startswith("agentic_review")), + None, + ) + if agentic_decision is not None: + return agentic_decision.action if any(decision.action == "trusted_auto_approve_candidate_graph" for decision in decisions): return "trusted_auto_approve_candidate_graph" if any(decision.action == "approve_candidate_graph" for decision in decisions): diff --git a/src/repo_registry/web_api/app.py b/src/repo_registry/web_api/app.py index 9080576..a992094 100644 --- a/src/repo_registry/web_api/app.py +++ b/src/repo_registry/web_api/app.py @@ -295,6 +295,7 @@ def create_analysis_run( source_path=payload.source_path, use_cached_checkout=payload.use_cached_checkout, use_llm_assistance=payload.use_llm_assistance, + agentic_review=payload.agentic_review, trusted_auto_approve=payload.trusted_auto_approve, access_username=payload.access_username, access_password=payload.access_password, diff --git a/src/repo_registry/web_api/schemas.py b/src/repo_registry/web_api/schemas.py index 57e8671..d3c6941 100644 --- a/src/repo_registry/web_api/schemas.py +++ b/src/repo_registry/web_api/schemas.py @@ -214,7 +214,20 @@ class AnalysisRunCreate(BaseModel): source_path: str | None = None use_cached_checkout: bool = False use_llm_assistance: bool = True - trusted_auto_approve: bool = False + agentic_review: bool = Field( + default=False, + description=( + "Request configured agentic review after analysis; candidates remain " + "pending when no reviewer is configured." + ), + ) + trusted_auto_approve: bool = Field( + default=False, + description=( + "Deprecated compatibility input. Requests are routed to agentic " + "review and do not deterministically approve candidates." + ), + ) access_username: str | None = None access_password: str | None = Field(default=None, repr=False) @@ -225,7 +238,7 @@ class AnalysisRunCreate(BaseModel): {"source_path": "/path/to/local/repository"}, {"use_cached_checkout": True}, {"use_llm_assistance": False}, - {"trusted_auto_approve": True}, + {"agentic_review": True}, { "access_username": "git-user", "access_password": "access-token", diff --git a/src/repo_registry/web_ui/views.py b/src/repo_registry/web_ui/views.py index 3c04644..4900dfd 100644 --- a/src/repo_registry/web_ui/views.py +++ b/src/repo_registry/web_ui/views.py @@ -416,7 +416,7 @@ def render_repository_index( - +
Registering repository... @@ -1440,7 +1440,7 @@ def create_repository_from_form( access_password: str = Form(""), explore_after_registration: str | None = Form(None), use_llm_assistance: str | None = Form(None), - trusted_auto_approve: str | None = Form(None), + agentic_review: str | None = Form(None), service: RegistryService = Depends(get_service), ): try: @@ -1460,7 +1460,7 @@ def create_repository_from_form( summary = service.analyze_repository( repository.id, use_llm_assistance=bool(use_llm_assistance), - trusted_auto_approve=bool(trusted_auto_approve), + agentic_review=bool(agentic_review), access_username=access_username or None, access_password=access_password or None, ) @@ -1529,7 +1529,7 @@ def repository_detail( - +
@@ -1964,7 +1964,7 @@ def create_analysis_run_from_form( source_path: str = Form(""), use_cached_checkout: str | None = Form(None), use_llm_assistance: str | None = Form(None), - trusted_auto_approve: str | None = Form(None), + agentic_review: str | None = Form(None), access_username: str = Form(""), access_password: str = Form(""), service: RegistryService = Depends(get_service), @@ -1974,7 +1974,7 @@ def create_analysis_run_from_form( source_path=source_path or None, use_cached_checkout=bool(use_cached_checkout), use_llm_assistance=bool(use_llm_assistance), - trusted_auto_approve=bool(trusted_auto_approve), + agentic_review=bool(agentic_review), access_username=access_username or None, access_password=access_password or None, ) diff --git a/tests/test_agentic_review.py b/tests/test_agentic_review.py new file mode 100644 index 0000000..2ed3d99 --- /dev/null +++ b/tests/test_agentic_review.py @@ -0,0 +1,48 @@ +from repo_registry.core.service import RegistryService +from repo_registry.repo_ingestion.git import GitIngestionService +from repo_registry.storage.sqlite import RegistryStore + + +class RecordingAgenticReviewer: + reviewer_id = "test-agent" + policy_version = "agentic-review-policy/test" + + def __init__(self): + self.requests = [] + + def review(self, request): + self.requests.append(request) + + +def test_configured_agentic_reviewer_receives_graph_gates_and_criteria(tmp_path): + source = tmp_path / "repo" + source.mkdir() + (source / "README.md").write_text("# Agentic Review\nReports health.\n", encoding="utf-8") + (source / "app.py").write_text('@app.get("/health")\ndef health():\n return {}\n', encoding="utf-8") + store = RegistryStore(tmp_path / "registry.sqlite3") + store.initialize() + reviewer = RecordingAgenticReviewer() + service = RegistryService( + store, + ingestion=GitIngestionService(tmp_path / "checkouts"), + agentic_reviewer=reviewer, + ) + repository = service.register_repository(name="Agentic Review", url=str(source)) + + summary = service.analyze_repository( + repository.id, + use_llm_assistance=False, + agentic_review=True, + ) + + graph = service.candidate_graph(repository.id, summary.analysis_run.id) + decisions = service.list_review_decisions(repository.id, summary.analysis_run.id) + assert len(reviewer.requests) == 1 + request = reviewer.requests[0] + assert request.repository.id == repository.id + assert request.candidate_graph.analysis_run.id == summary.analysis_run.id + assert request.criteria_version == "repo-scoping-quality-criteria/v1" + assert request.quality_gate_outcomes == [] + assert graph.abilities[0].capabilities[0].status == "candidate" + assert decisions[0].action == "agentic_review_completed" + assert "reviewer=test-agent" in decisions[0].notes diff --git a/tests/test_registry_service.py b/tests/test_registry_service.py index 966cb51..5c9ed0b 100644 --- a/tests/test_registry_service.py +++ b/tests/test_registry_service.py @@ -1337,7 +1337,7 @@ def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_ca assert graph.abilities[0].name == "Support Fallback" -def test_analyze_repository_can_trusted_auto_approve_candidates(tmp_path): +def test_analyze_repository_routes_legacy_auto_approve_to_agentic_review(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text( @@ -1364,20 +1364,17 @@ def test_analyze_repository_can_trusted_auto_approve_candidates(tmp_path): graph = service.candidate_graph(repository.id, summary.analysis_run.id) decisions = service.list_review_decisions(repository.id, summary.analysis_run.id) - assert service.get_repository(repository.id).status == "indexed" + assert service.get_repository(repository.id).status == "analyzed" statuses_by_capability = { capability.name: capability.status for capability in graph.abilities[0].capabilities } - assert statuses_by_capability["Expose Repository Interface"] == "approved" - assert ability_map.abilities[0].name == "Report Health Over HTTP" - assert decisions[0].action == "trusted_auto_approve_candidate_graph" + assert statuses_by_capability["Expose Repository Interface"] == "candidate" + assert ability_map.abilities == [] + assert decisions[0].action == "agentic_review_unconfigured" assert "deterministic candidate generation" in decisions[0].notes - assert "Auto-approved 1 safe candidate capability(s); left 0 for review." in decisions[0].notes - assert ( - "Approved: Expose Repository Interface: owned interface with sufficient confidence." - in decisions[0].notes - ) + assert "Deprecated trusted_auto_approve request was routed" in decisions[0].notes + assert "candidates remain pending human review" in decisions[0].notes def test_rebuild_characteristics_dry_run_preserves_approved_map(tmp_path): diff --git a/tests/test_web_api.py b/tests/test_web_api.py index 06f8eb4..616aa14 100644 --- a/tests/test_web_api.py +++ b/tests/test_web_api.py @@ -1403,7 +1403,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path): assert "Password or access token" in index_response.text assert "Explore after registration" in index_response.text assert "Use LLM assistance if configured" in index_response.text - assert "Trusted auto-populate after analysis" in index_response.text + assert "Request agentic review after analysis" in index_response.text assert 'SCOPE' not in index_response.text create_response = client.post( @@ -1428,7 +1428,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path): assert "Running analysis..." in detail_response.text assert "Analyze cached checkout without fetching upstream" in detail_response.text assert "Use LLM assistance if configured" in detail_response.text - assert "Trusted auto-populate after analysis" in detail_response.text + assert "Request agentic review after analysis" in detail_response.text assert "Repository Metadata" in detail_response.text assert ( f'SCOPE' @@ -2081,7 +2081,7 @@ def test_ui_register_and_explore_lands_on_analysis_result(tmp_path): "access_password": "", "explore_after_registration": "1", "use_llm_assistance": "", - "trusted_auto_approve": "1", + "agentic_review": "1", }, follow_redirects=False, ) @@ -2092,9 +2092,9 @@ def test_ui_register_and_explore_lands_on_analysis_result(tmp_path): result = client.get(response.headers["location"]) assert result.status_code == 200 assert "Candidate Graph" in result.text - assert "approved" in result.text + assert "candidate" in result.text assert "Observed Facts" in result.text - assert "trusted_auto_approve_candidate_graph" in result.text + assert "agentic_review_unconfigured" in result.text repository_detail = client.get("/ui/repos/1") assert repository_detail.status_code == 200 diff --git a/workplans/RREG-WP-0013-self-scoping-baseline-evaluation.md b/workplans/RREG-WP-0013-self-scoping-baseline-evaluation.md index 4842f8d..8c60249 100644 --- a/workplans/RREG-WP-0013-self-scoping-baseline-evaluation.md +++ b/workplans/RREG-WP-0013-self-scoping-baseline-evaluation.md @@ -253,8 +253,8 @@ analyzes a source tree, exports a challenger assessment artifact, compares it to the golden profile, emits JSON or Markdown, and returns non-zero only with `--fail-on-regression` when the comparison status is `regression`. The command defaults to deterministic-only; `--with-llm` opts into configured LLM assistance. -`--agentic-review` is reserved for RREG-WP-0014 and currently errors when no -agentic reviewer is configured. +`--agentic-review` now records an agentic-review request and leaves candidates +pending when no agentic reviewer is configured. ## T08: Document Assessment Workflow diff --git a/workplans/RREG-WP-0014-agentic-characteristic-acceptance.md b/workplans/RREG-WP-0014-agentic-characteristic-acceptance.md index 3136f63..97fb524 100644 --- a/workplans/RREG-WP-0014-agentic-characteristic-acceptance.md +++ b/workplans/RREG-WP-0014-agentic-characteristic-acceptance.md @@ -143,7 +143,7 @@ legacy auto-approval guard. ```task id: RREG-WP-0014-T04 -status: todo +status: in_progress priority: high state_hub_task_id: "b0d29756-7460-4ffa-8d56-d94cfb34e94f" ``` @@ -161,6 +161,15 @@ Acceptance criteria: - Each agentic approval includes a rationale tied to evidence and criteria. - If no agentic reviewer is configured, candidates remain pending review. +Implementation note 2026-05-15: started the migration by adding an +`AgenticReviewRequest`/`AgenticReviewer` boundary, routing normal API/CLI/UI +review requests to `request_agentic_review`, and leaving candidates pending with +an `agentic_review_unconfigured` review decision when no reviewer is configured. +Legacy `trusted_auto_approve` requests are treated as deprecated compatibility +input and routed to the same pending agentic-review path. Remaining work: +structured agentic decisions with approve/reject/downgrade/request-human-review +actions and rationale enforcement. + ## T05: Add Review Decision Audit Trail ```task