Validate structured agentic review decisions

2026-05-15 16:09:03 +02:00
parent 9a320a95ee
commit 92eaf52bb6
6 changed files with 331 additions and 10 deletions
--- a/docs/acceptance-policy.md
+++ b/docs/acceptance-policy.md
@@ -73,6 +73,12 @@ Agentic approval requires:
 - rationale
 - exact candidate elements affected

+The repo-scoping service represents this as structured agentic review decisions.
+Approval decisions are rejected unless they include rationale, criteria IDs, and
+evidence refs. Non-approval decisions still require rationale and criteria IDs,
+so downgrade, rejection, relink, proposed edit, and human-review requests remain
+auditable.
+
 ## Legacy Auto-Approval Terminology

 `trusted_auto_approve_candidate_graph` and UI labels such as "Trusted
--- a/src/repo_registry/acceptance/init.py
+++ b/src/repo_registry/acceptance/init.py
@@ -1,4 +1,10 @@
-from repo_registry.acceptance.agentic import AgenticReviewer, AgenticReviewRequest
+from repo_registry.acceptance.agentic import (
+    AgenticReviewer,
+    AgenticReviewDecision,
+    AgenticReviewRequest,
+    validate_agentic_review_decision,
+    validate_agentic_review_decisions,
+)
 from repo_registry.acceptance.criteria import (
    active_quality_criteria_version,
    criteria_registry_dict,
@@ -15,6 +21,7 @@ from repo_registry.acceptance.gates import (

 __all__ = [
    "active_quality_criteria_version",
+    "AgenticReviewDecision",
    "AgenticReviewer",
    "AgenticReviewRequest",
    "blocking_quality_gate_outcomes",
@@ -25,4 +32,6 @@ __all__ = [
    "evaluate_candidate_graph_quality",
    "load_quality_criteria",
    "quality_gate_outcome_dicts",
+    "validate_agentic_review_decision",
+    "validate_agentic_review_decisions",
 ]
--- a/src/repo_registry/acceptance/agentic.py
+++ b/src/repo_registry/acceptance/agentic.py
@@ -1,11 +1,23 @@
 from __future__ import annotations

 from dataclasses import dataclass
+from typing import Any
 from typing import Protocol

 from repo_registry.acceptance.gates import QualityGateOutcome
 from repo_registry.core.models import CandidateGraph, Repository

+AGENTIC_REVIEW_ACTIONS = {
+    "approve",
+    "approve_with_edits",
+    "reject",
+    "downgrade",
+    "request_human_review",
+    "propose_edit",
+    "relink",
+}
+AGENTIC_APPROVAL_ACTIONS = {"approve", "approve_with_edits"}
+

@dataclass(frozen=True)
 class AgenticReviewRequest:
@@ -16,9 +28,46 @@ class AgenticReviewRequest:
    context: str


+@dataclass(frozen=True)
+class AgenticReviewDecision:
+    action: str
+    target_type: str
+    target_id: int
+    rationale: str
+    criterion_ids: list[str]
+    evidence_refs: list[str]
+    notes: str = ""
+    proposed_changes: dict[str, Any] | None = None
+
+
 class AgenticReviewer(Protocol):
    reviewer_id: str
    policy_version: str

-    def review(self, request: AgenticReviewRequest) -> None:
-        """Review a candidate graph and record decisions through the caller."""
+    def review(self, request: AgenticReviewRequest) -> list[AgenticReviewDecision]:
+        """Review a candidate graph and return structured decisions."""
+
+
+def validate_agentic_review_decision(decision: AgenticReviewDecision) -> None:
+    if decision.action not in AGENTIC_REVIEW_ACTIONS:
+        raise ValueError(f"unsupported agentic review action: {decision.action}")
+    if not decision.target_type:
+        raise ValueError("agentic review decision target_type is required")
+    if decision.target_id < 0:
+        raise ValueError("agentic review decision target_id must be non-negative")
+    if not decision.rationale.strip():
+        raise ValueError("agentic review decision rationale is required")
+    if not decision.criterion_ids:
+        raise ValueError("agentic review decision criterion_ids are required")
+    if decision.action in AGENTIC_APPROVAL_ACTIONS and not decision.evidence_refs:
+        raise ValueError(
+            "agentic approval requires evidence refs tied to the rationale"
+        )
+
+
+def validate_agentic_review_decisions(
+    decisions: list[AgenticReviewDecision],
+) -> list[AgenticReviewDecision]:
+    for decision in decisions:
+        validate_agentic_review_decision(decision)
+    return decisions
--- a/src/repo_registry/core/service.py
+++ b/src/repo_registry/core/service.py
@@ -5,12 +5,14 @@ from dataclasses import asdict, replace
 from typing import Any

 from repo_registry.acceptance import (
+    AgenticReviewDecision,
    AgenticReviewer,
    AgenticReviewRequest,
    active_quality_criteria_version,
    blocking_quality_gate_outcomes,
    evaluate_candidate_capability_quality,
    evaluate_candidate_graph_quality,
+    validate_agentic_review_decisions,
 )
 from repo_registry.core.models import (
    AbilitySummary,
@@ -646,7 +648,18 @@ class RegistryService:
            quality_gate_outcomes=gate_outcomes,
            context="candidate-characteristic-acceptance",
        )
-        self.agentic_reviewer.review(request)
+        decisions = validate_agentic_review_decisions(
+            self.agentic_reviewer.review(request) or []
+        )
+        for decision in decisions:
+            self._apply_agentic_review_decision(
+                repository_id,
+                analysis_run_id,
+                decision,
+                reviewer_id=self.agentic_reviewer.reviewer_id,
+                policy_version=self.agentic_reviewer.policy_version,
+                criteria_version=criteria_version,
+            )
        self.store.create_review_decision(
            repository_id,
            analysis_run_id,
@@ -655,11 +668,170 @@ class RegistryService:
                f"{notes} reviewer={self.agentic_reviewer.reviewer_id}; "
                f"policy_version={self.agentic_reviewer.policy_version}; "
                f"criteria_version={criteria_version}; "
-                f"quality_gate_outcomes={len(gate_outcomes)}."
+                f"quality_gate_outcomes={len(gate_outcomes)}; "
+                f"decisions={len(decisions)}."
            ).strip(),
        )
        return self.store.get_candidate_graph(repository_id, analysis_run_id)

+    def _apply_agentic_review_decision(
+        self,
+        repository_id: int,
+        analysis_run_id: int,
+        decision: AgenticReviewDecision,
+        *,
+        reviewer_id: str,
+        policy_version: str,
+        criteria_version: str,
+    ) -> None:
+        notes = self._agentic_review_decision_notes(
+            decision,
+            reviewer_id=reviewer_id,
+            policy_version=policy_version,
+            criteria_version=criteria_version,
+        )
+        if decision.action in {"approve", "approve_with_edits"}:
+            self._apply_agentic_approval(
+                repository_id,
+                analysis_run_id,
+                decision,
+                notes,
+            )
+            return
+        if decision.action == "reject":
+            self._apply_agentic_rejection(
+                repository_id,
+                analysis_run_id,
+                decision,
+                notes,
+            )
+            return
+        self.store.create_review_decision(
+            repository_id,
+            analysis_run_id,
+            action=f"agentic_{decision.action}",
+            notes=notes,
+        )
+
+    def _apply_agentic_approval(
+        self,
+        repository_id: int,
+        analysis_run_id: int,
+        decision: AgenticReviewDecision,
+        notes: str,
+    ) -> None:
+        action = f"agentic_{decision.action}"
+        if decision.target_type == "candidate_graph":
+            self.approve_candidate_graph(
+                repository_id,
+                analysis_run_id,
+                notes=notes,
+                action=f"{action}_candidate_graph",
+            )
+            return
+        if decision.target_type == "candidate_ability":
+            self.accept_candidate_ability(
+                repository_id,
+                analysis_run_id,
+                decision.target_id,
+                notes=notes,
+            )
+            return
+        if decision.target_type == "candidate_capability":
+            self.accept_candidate_capability(
+                repository_id,
+                analysis_run_id,
+                decision.target_id,
+                notes=notes,
+            )
+            return
+        if decision.target_type == "candidate_feature":
+            self.accept_candidate_feature(
+                repository_id,
+                analysis_run_id,
+                decision.target_id,
+                notes=notes,
+            )
+            return
+        if decision.target_type == "candidate_evidence":
+            self.accept_candidate_evidence(
+                repository_id,
+                analysis_run_id,
+                decision.target_id,
+                notes=notes,
+            )
+            return
+        raise ValueError(f"unsupported agentic approval target: {decision.target_type}")
+
+    def _apply_agentic_rejection(
+        self,
+        repository_id: int,
+        analysis_run_id: int,
+        decision: AgenticReviewDecision,
+        notes: str,
+    ) -> None:
+        if decision.target_type == "candidate_ability":
+            self.reject_candidate_ability(
+                repository_id,
+                analysis_run_id,
+                decision.target_id,
+                notes=notes,
+            )
+            return
+        if decision.target_type == "candidate_capability":
+            self.reject_candidate_capability(
+                repository_id,
+                analysis_run_id,
+                decision.target_id,
+                notes=notes,
+            )
+            return
+        if decision.target_type == "candidate_feature":
+            self.reject_candidate_feature(
+                repository_id,
+                analysis_run_id,
+                decision.target_id,
+                notes=notes,
+            )
+            return
+        if decision.target_type == "candidate_evidence":
+            self.reject_candidate_evidence(
+                repository_id,
+                analysis_run_id,
+                decision.target_id,
+                notes=notes,
+            )
+            return
+        self.store.create_review_decision(
+            repository_id,
+            analysis_run_id,
+            action="agentic_reject",
+            notes=notes,
+        )
+
+    def _agentic_review_decision_notes(
+        self,
+        decision: AgenticReviewDecision,
+        *,
+        reviewer_id: str,
+        policy_version: str,
+        criteria_version: str,
+    ) -> str:
+        evidence = ", ".join(decision.evidence_refs) or "none"
+        criteria = ", ".join(decision.criterion_ids)
+        notes = (
+            f"reviewer={reviewer_id}; policy_version={policy_version}; "
+            f"criteria_version={criteria_version}; action={decision.action}; "
+            f"target={decision.target_type}:{decision.target_id}; "
+            f"criteria={criteria}; evidence={evidence}; "
+            f"rationale={decision.rationale.strip()}"
+        )
+        if decision.proposed_changes:
+            notes += f"; proposed_changes={decision.proposed_changes}"
+        if decision.notes.strip():
+            notes += f"; notes={decision.notes.strip()}"
+        return notes
+
    def _trusted_auto_approve_capability_safe(
        self,
        capability: CandidateCapability,
--- a/tests/test_agentic_review.py
+++ b/tests/test_agentic_review.py
@@ -1,3 +1,9 @@
+import pytest
+
+from repo_registry.acceptance import (
+    AgenticReviewDecision,
+    validate_agentic_review_decision,
+)
 from repo_registry.core.service import RegistryService
 from repo_registry.repo_ingestion.git import GitIngestionService
 from repo_registry.storage.sqlite import RegistryStore
@@ -12,6 +18,29 @@ class RecordingAgenticReviewer:

    def review(self, request):
        self.requests.append(request)
+        return []
+
+
+class ApprovingAgenticReviewer:
+    reviewer_id = "approving-agent"
+    policy_version = "agentic-review-policy/test"
+
+    def __init__(self):
+        self.requests = []
+
+    def review(self, request):
+        self.requests.append(request)
+        graph = request.candidate_graph
+        return [
+            AgenticReviewDecision(
+                action="approve",
+                target_type="candidate_graph",
+                target_id=graph.analysis_run.id,
+                rationale="API source and README support the generated repository interface claim.",
+                criterion_ids=["RREG-QC-004"],
+                evidence_refs=["README.md", "app.py"],
+            )
+        ]


 def test_configured_agentic_reviewer_receives_graph_gates_and_criteria(tmp_path):
@@ -46,3 +75,58 @@ def test_configured_agentic_reviewer_receives_graph_gates_and_criteria(tmp_path)
    assert graph.abilities[0].capabilities[0].status == "candidate"
    assert decisions[0].action == "agentic_review_completed"
    assert "reviewer=test-agent" in decisions[0].notes
+    assert "decisions=0" in decisions[0].notes
+
+
+def test_agentic_approval_requires_rationale_criteria_and_evidence():
+    with pytest.raises(ValueError, match="evidence refs"):
+        validate_agentic_review_decision(
+            AgenticReviewDecision(
+                action="approve",
+                target_type="candidate_graph",
+                target_id=1,
+                rationale="Looks supported.",
+                criterion_ids=["RREG-QC-004"],
+                evidence_refs=[],
+            )
+        )
+
+
+def test_agentic_reviewer_can_approve_candidate_graph_with_rationale(tmp_path):
+    source = tmp_path / "repo"
+    source.mkdir()
+    (source / "README.md").write_text(
+        "# Agentic Approval\nReports health.\n",
+        encoding="utf-8",
+    )
+    (source / "app.py").write_text(
+        '@app.get("/health")\ndef health():\n    return {}\n',
+        encoding="utf-8",
+    )
+    store = RegistryStore(tmp_path / "registry.sqlite3")
+    store.initialize()
+    reviewer = ApprovingAgenticReviewer()
+    service = RegistryService(
+        store,
+        ingestion=GitIngestionService(tmp_path / "checkouts"),
+        agentic_reviewer=reviewer,
+    )
+    repository = service.register_repository(name="Agentic Approval", url=str(source))
+
+    summary = service.analyze_repository(
+        repository.id,
+        use_llm_assistance=False,
+        agentic_review=True,
+    )
+
+    ability_map = service.ability_map(repository.id)
+    graph = service.candidate_graph(repository.id, summary.analysis_run.id)
+    decisions = service.list_review_decisions(repository.id, summary.analysis_run.id)
+    assert ability_map.abilities
+    assert graph.abilities[0].status == "approved"
+    assert decisions[1].action == "agentic_approve_candidate_graph"
+    assert "rationale=API source and README support" in decisions[1].notes
+    assert "criteria=RREG-QC-004" in decisions[1].notes
+    assert "evidence=README.md, app.py" in decisions[1].notes
+    assert decisions[0].action == "agentic_review_completed"
+    assert "decisions=1" in decisions[0].notes
--- a/workplans/RREG-WP-0014-agentic-characteristic-acceptance.md
+++ b/workplans/RREG-WP-0014-agentic-characteristic-acceptance.md
@@ -143,7 +143,7 @@ legacy auto-approval guard.

 ```task
 id: RREG-WP-0014-T04
-status: in_progress
+status: done
 priority: high
 state_hub_task_id: "b0d29756-7460-4ffa-8d56-d94cfb34e94f"
 ```
@@ -161,14 +161,15 @@ Acceptance criteria:
 - Each agentic approval includes a rationale tied to evidence and criteria.
 - If no agentic reviewer is configured, candidates remain pending review.

-Implementation note 2026-05-15: started the migration by adding an
+Implementation note 2026-05-15: completed the first migration by adding an
 `AgenticReviewRequest`/`AgenticReviewer` boundary, routing normal API/CLI/UI
 review requests to `request_agentic_review`, and leaving candidates pending with
 an `agentic_review_unconfigured` review decision when no reviewer is configured.
 Legacy `trusted_auto_approve` requests are treated as deprecated compatibility
-input and routed to the same pending agentic-review path. Remaining work:
-structured agentic decisions with approve/reject/downgrade/request-human-review
-actions and rationale enforcement.
+input and routed to the same pending agentic-review path. Agentic reviewers now
+return structured decisions with approve, approve-with-edits, reject, downgrade,
+request-human-review, relink, and propose-edit actions. Approval decisions are
+validated for rationale, criteria IDs, and evidence refs before being applied.

 ## T05: Add Review Decision Audit Trail