diff --git a/docs/acceptance-policy.md b/docs/acceptance-policy.md index f1b396e..0b13657 100644 --- a/docs/acceptance-policy.md +++ b/docs/acceptance-policy.md @@ -73,6 +73,12 @@ Agentic approval requires: - rationale - exact candidate elements affected +The repo-scoping service represents this as structured agentic review decisions. +Approval decisions are rejected unless they include rationale, criteria IDs, and +evidence refs. Non-approval decisions still require rationale and criteria IDs, +so downgrade, rejection, relink, proposed edit, and human-review requests remain +auditable. + ## Legacy Auto-Approval Terminology `trusted_auto_approve_candidate_graph` and UI labels such as "Trusted diff --git a/src/repo_registry/acceptance/__init__.py b/src/repo_registry/acceptance/__init__.py index bfe559f..f34b6c9 100644 --- a/src/repo_registry/acceptance/__init__.py +++ b/src/repo_registry/acceptance/__init__.py @@ -1,4 +1,10 @@ -from repo_registry.acceptance.agentic import AgenticReviewer, AgenticReviewRequest +from repo_registry.acceptance.agentic import ( + AgenticReviewer, + AgenticReviewDecision, + AgenticReviewRequest, + validate_agentic_review_decision, + validate_agentic_review_decisions, +) from repo_registry.acceptance.criteria import ( active_quality_criteria_version, criteria_registry_dict, @@ -15,6 +21,7 @@ from repo_registry.acceptance.gates import ( __all__ = [ "active_quality_criteria_version", + "AgenticReviewDecision", "AgenticReviewer", "AgenticReviewRequest", "blocking_quality_gate_outcomes", @@ -25,4 +32,6 @@ __all__ = [ "evaluate_candidate_graph_quality", "load_quality_criteria", "quality_gate_outcome_dicts", + "validate_agentic_review_decision", + "validate_agentic_review_decisions", ] diff --git a/src/repo_registry/acceptance/agentic.py b/src/repo_registry/acceptance/agentic.py index 2b93ade..013f91b 100644 --- a/src/repo_registry/acceptance/agentic.py +++ b/src/repo_registry/acceptance/agentic.py @@ -1,11 +1,23 @@ from __future__ import annotations from dataclasses import dataclass +from typing import Any from typing import Protocol from repo_registry.acceptance.gates import QualityGateOutcome from repo_registry.core.models import CandidateGraph, Repository +AGENTIC_REVIEW_ACTIONS = { + "approve", + "approve_with_edits", + "reject", + "downgrade", + "request_human_review", + "propose_edit", + "relink", +} +AGENTIC_APPROVAL_ACTIONS = {"approve", "approve_with_edits"} + @dataclass(frozen=True) class AgenticReviewRequest: @@ -16,9 +28,46 @@ class AgenticReviewRequest: context: str +@dataclass(frozen=True) +class AgenticReviewDecision: + action: str + target_type: str + target_id: int + rationale: str + criterion_ids: list[str] + evidence_refs: list[str] + notes: str = "" + proposed_changes: dict[str, Any] | None = None + + class AgenticReviewer(Protocol): reviewer_id: str policy_version: str - def review(self, request: AgenticReviewRequest) -> None: - """Review a candidate graph and record decisions through the caller.""" + def review(self, request: AgenticReviewRequest) -> list[AgenticReviewDecision]: + """Review a candidate graph and return structured decisions.""" + + +def validate_agentic_review_decision(decision: AgenticReviewDecision) -> None: + if decision.action not in AGENTIC_REVIEW_ACTIONS: + raise ValueError(f"unsupported agentic review action: {decision.action}") + if not decision.target_type: + raise ValueError("agentic review decision target_type is required") + if decision.target_id < 0: + raise ValueError("agentic review decision target_id must be non-negative") + if not decision.rationale.strip(): + raise ValueError("agentic review decision rationale is required") + if not decision.criterion_ids: + raise ValueError("agentic review decision criterion_ids are required") + if decision.action in AGENTIC_APPROVAL_ACTIONS and not decision.evidence_refs: + raise ValueError( + "agentic approval requires evidence refs tied to the rationale" + ) + + +def validate_agentic_review_decisions( + decisions: list[AgenticReviewDecision], +) -> list[AgenticReviewDecision]: + for decision in decisions: + validate_agentic_review_decision(decision) + return decisions diff --git a/src/repo_registry/core/service.py b/src/repo_registry/core/service.py index 323e190..59253fd 100644 --- a/src/repo_registry/core/service.py +++ b/src/repo_registry/core/service.py @@ -5,12 +5,14 @@ from dataclasses import asdict, replace from typing import Any from repo_registry.acceptance import ( + AgenticReviewDecision, AgenticReviewer, AgenticReviewRequest, active_quality_criteria_version, blocking_quality_gate_outcomes, evaluate_candidate_capability_quality, evaluate_candidate_graph_quality, + validate_agentic_review_decisions, ) from repo_registry.core.models import ( AbilitySummary, @@ -646,7 +648,18 @@ class RegistryService: quality_gate_outcomes=gate_outcomes, context="candidate-characteristic-acceptance", ) - self.agentic_reviewer.review(request) + decisions = validate_agentic_review_decisions( + self.agentic_reviewer.review(request) or [] + ) + for decision in decisions: + self._apply_agentic_review_decision( + repository_id, + analysis_run_id, + decision, + reviewer_id=self.agentic_reviewer.reviewer_id, + policy_version=self.agentic_reviewer.policy_version, + criteria_version=criteria_version, + ) self.store.create_review_decision( repository_id, analysis_run_id, @@ -655,11 +668,170 @@ class RegistryService: f"{notes} reviewer={self.agentic_reviewer.reviewer_id}; " f"policy_version={self.agentic_reviewer.policy_version}; " f"criteria_version={criteria_version}; " - f"quality_gate_outcomes={len(gate_outcomes)}." + f"quality_gate_outcomes={len(gate_outcomes)}; " + f"decisions={len(decisions)}." ).strip(), ) return self.store.get_candidate_graph(repository_id, analysis_run_id) + def _apply_agentic_review_decision( + self, + repository_id: int, + analysis_run_id: int, + decision: AgenticReviewDecision, + *, + reviewer_id: str, + policy_version: str, + criteria_version: str, + ) -> None: + notes = self._agentic_review_decision_notes( + decision, + reviewer_id=reviewer_id, + policy_version=policy_version, + criteria_version=criteria_version, + ) + if decision.action in {"approve", "approve_with_edits"}: + self._apply_agentic_approval( + repository_id, + analysis_run_id, + decision, + notes, + ) + return + if decision.action == "reject": + self._apply_agentic_rejection( + repository_id, + analysis_run_id, + decision, + notes, + ) + return + self.store.create_review_decision( + repository_id, + analysis_run_id, + action=f"agentic_{decision.action}", + notes=notes, + ) + + def _apply_agentic_approval( + self, + repository_id: int, + analysis_run_id: int, + decision: AgenticReviewDecision, + notes: str, + ) -> None: + action = f"agentic_{decision.action}" + if decision.target_type == "candidate_graph": + self.approve_candidate_graph( + repository_id, + analysis_run_id, + notes=notes, + action=f"{action}_candidate_graph", + ) + return + if decision.target_type == "candidate_ability": + self.accept_candidate_ability( + repository_id, + analysis_run_id, + decision.target_id, + notes=notes, + ) + return + if decision.target_type == "candidate_capability": + self.accept_candidate_capability( + repository_id, + analysis_run_id, + decision.target_id, + notes=notes, + ) + return + if decision.target_type == "candidate_feature": + self.accept_candidate_feature( + repository_id, + analysis_run_id, + decision.target_id, + notes=notes, + ) + return + if decision.target_type == "candidate_evidence": + self.accept_candidate_evidence( + repository_id, + analysis_run_id, + decision.target_id, + notes=notes, + ) + return + raise ValueError(f"unsupported agentic approval target: {decision.target_type}") + + def _apply_agentic_rejection( + self, + repository_id: int, + analysis_run_id: int, + decision: AgenticReviewDecision, + notes: str, + ) -> None: + if decision.target_type == "candidate_ability": + self.reject_candidate_ability( + repository_id, + analysis_run_id, + decision.target_id, + notes=notes, + ) + return + if decision.target_type == "candidate_capability": + self.reject_candidate_capability( + repository_id, + analysis_run_id, + decision.target_id, + notes=notes, + ) + return + if decision.target_type == "candidate_feature": + self.reject_candidate_feature( + repository_id, + analysis_run_id, + decision.target_id, + notes=notes, + ) + return + if decision.target_type == "candidate_evidence": + self.reject_candidate_evidence( + repository_id, + analysis_run_id, + decision.target_id, + notes=notes, + ) + return + self.store.create_review_decision( + repository_id, + analysis_run_id, + action="agentic_reject", + notes=notes, + ) + + def _agentic_review_decision_notes( + self, + decision: AgenticReviewDecision, + *, + reviewer_id: str, + policy_version: str, + criteria_version: str, + ) -> str: + evidence = ", ".join(decision.evidence_refs) or "none" + criteria = ", ".join(decision.criterion_ids) + notes = ( + f"reviewer={reviewer_id}; policy_version={policy_version}; " + f"criteria_version={criteria_version}; action={decision.action}; " + f"target={decision.target_type}:{decision.target_id}; " + f"criteria={criteria}; evidence={evidence}; " + f"rationale={decision.rationale.strip()}" + ) + if decision.proposed_changes: + notes += f"; proposed_changes={decision.proposed_changes}" + if decision.notes.strip(): + notes += f"; notes={decision.notes.strip()}" + return notes + def _trusted_auto_approve_capability_safe( self, capability: CandidateCapability, diff --git a/tests/test_agentic_review.py b/tests/test_agentic_review.py index 2ed3d99..ce87f15 100644 --- a/tests/test_agentic_review.py +++ b/tests/test_agentic_review.py @@ -1,3 +1,9 @@ +import pytest + +from repo_registry.acceptance import ( + AgenticReviewDecision, + validate_agentic_review_decision, +) from repo_registry.core.service import RegistryService from repo_registry.repo_ingestion.git import GitIngestionService from repo_registry.storage.sqlite import RegistryStore @@ -12,6 +18,29 @@ class RecordingAgenticReviewer: def review(self, request): self.requests.append(request) + return [] + + +class ApprovingAgenticReviewer: + reviewer_id = "approving-agent" + policy_version = "agentic-review-policy/test" + + def __init__(self): + self.requests = [] + + def review(self, request): + self.requests.append(request) + graph = request.candidate_graph + return [ + AgenticReviewDecision( + action="approve", + target_type="candidate_graph", + target_id=graph.analysis_run.id, + rationale="API source and README support the generated repository interface claim.", + criterion_ids=["RREG-QC-004"], + evidence_refs=["README.md", "app.py"], + ) + ] def test_configured_agentic_reviewer_receives_graph_gates_and_criteria(tmp_path): @@ -46,3 +75,58 @@ def test_configured_agentic_reviewer_receives_graph_gates_and_criteria(tmp_path) assert graph.abilities[0].capabilities[0].status == "candidate" assert decisions[0].action == "agentic_review_completed" assert "reviewer=test-agent" in decisions[0].notes + assert "decisions=0" in decisions[0].notes + + +def test_agentic_approval_requires_rationale_criteria_and_evidence(): + with pytest.raises(ValueError, match="evidence refs"): + validate_agentic_review_decision( + AgenticReviewDecision( + action="approve", + target_type="candidate_graph", + target_id=1, + rationale="Looks supported.", + criterion_ids=["RREG-QC-004"], + evidence_refs=[], + ) + ) + + +def test_agentic_reviewer_can_approve_candidate_graph_with_rationale(tmp_path): + source = tmp_path / "repo" + source.mkdir() + (source / "README.md").write_text( + "# Agentic Approval\nReports health.\n", + encoding="utf-8", + ) + (source / "app.py").write_text( + '@app.get("/health")\ndef health():\n return {}\n', + encoding="utf-8", + ) + store = RegistryStore(tmp_path / "registry.sqlite3") + store.initialize() + reviewer = ApprovingAgenticReviewer() + service = RegistryService( + store, + ingestion=GitIngestionService(tmp_path / "checkouts"), + agentic_reviewer=reviewer, + ) + repository = service.register_repository(name="Agentic Approval", url=str(source)) + + summary = service.analyze_repository( + repository.id, + use_llm_assistance=False, + agentic_review=True, + ) + + ability_map = service.ability_map(repository.id) + graph = service.candidate_graph(repository.id, summary.analysis_run.id) + decisions = service.list_review_decisions(repository.id, summary.analysis_run.id) + assert ability_map.abilities + assert graph.abilities[0].status == "approved" + assert decisions[1].action == "agentic_approve_candidate_graph" + assert "rationale=API source and README support" in decisions[1].notes + assert "criteria=RREG-QC-004" in decisions[1].notes + assert "evidence=README.md, app.py" in decisions[1].notes + assert decisions[0].action == "agentic_review_completed" + assert "decisions=1" in decisions[0].notes diff --git a/workplans/RREG-WP-0014-agentic-characteristic-acceptance.md b/workplans/RREG-WP-0014-agentic-characteristic-acceptance.md index 97fb524..1b6e8b2 100644 --- a/workplans/RREG-WP-0014-agentic-characteristic-acceptance.md +++ b/workplans/RREG-WP-0014-agentic-characteristic-acceptance.md @@ -143,7 +143,7 @@ legacy auto-approval guard. ```task id: RREG-WP-0014-T04 -status: in_progress +status: done priority: high state_hub_task_id: "b0d29756-7460-4ffa-8d56-d94cfb34e94f" ``` @@ -161,14 +161,15 @@ Acceptance criteria: - Each agentic approval includes a rationale tied to evidence and criteria. - If no agentic reviewer is configured, candidates remain pending review. -Implementation note 2026-05-15: started the migration by adding an +Implementation note 2026-05-15: completed the first migration by adding an `AgenticReviewRequest`/`AgenticReviewer` boundary, routing normal API/CLI/UI review requests to `request_agentic_review`, and leaving candidates pending with an `agentic_review_unconfigured` review decision when no reviewer is configured. Legacy `trusted_auto_approve` requests are treated as deprecated compatibility -input and routed to the same pending agentic-review path. Remaining work: -structured agentic decisions with approve/reject/downgrade/request-human-review -actions and rationale enforcement. +input and routed to the same pending agentic-review path. Agentic reviewers now +return structured decisions with approve, approve-with-edits, reject, downgrade, +request-human-review, relink, and propose-edit actions. Approval decisions are +validated for rationale, criteria IDs, and evidence refs before being applied. ## T05: Add Review Decision Audit Trail