diff --git a/src/repo_registry/core/models.py b/src/repo_registry/core/models.py index 1037a0d..275f40a 100644 --- a/src/repo_registry/core/models.py +++ b/src/repo_registry/core/models.py @@ -54,6 +54,34 @@ class ReviewDecision: created_at: str +@dataclass(frozen=True) +class AnalysisRunDiffItem: + change_type: str + item_type: str + key: str + base: dict[str, Any] | None = None + target: dict[str, Any] | None = None + + +@dataclass(frozen=True) +class AnalysisRunDiffSection: + added: list[AnalysisRunDiffItem] = field(default_factory=list) + removed: list[AnalysisRunDiffItem] = field(default_factory=list) + changed: list[AnalysisRunDiffItem] = field(default_factory=list) + weakened: list[AnalysisRunDiffItem] = field(default_factory=list) + + +@dataclass(frozen=True) +class AnalysisRunDiff: + repository: Repository + base_run: AnalysisRun + target_run: AnalysisRun + facts: AnalysisRunDiffSection + chunks: AnalysisRunDiffSection + candidates: AnalysisRunDiffSection + approved_entries: AnalysisRunDiffSection + + @dataclass(frozen=True) class ObservedFact: id: int diff --git a/src/repo_registry/core/service.py b/src/repo_registry/core/service.py index a84034c..2f250b0 100644 --- a/src/repo_registry/core/service.py +++ b/src/repo_registry/core/service.py @@ -5,8 +5,15 @@ from dataclasses import asdict from repo_registry.core.models import ( AbilitySummary, + AnalysisRunDiff, + AnalysisRunDiffItem, + AnalysisRunDiffSection, AnalysisRun, CapabilitySummary, + CandidateAbility, + CandidateCapability, + CandidateEvidence, + CandidateFeature, CandidateGraph, ContentChunk, ObservedFact, @@ -278,6 +285,68 @@ class RegistryService: self.store.update_repository_status(repository_id, "indexed") return self.store.get_ability_map(repository_id) + def diff_analysis_runs( + self, + repository_id: int, + base_analysis_run_id: int, + target_analysis_run_id: int, + ) -> AnalysisRunDiff: + repository = self.store.get_repository(repository_id) + base_run = self.store.get_analysis_run(repository_id, base_analysis_run_id) + target_run = self.store.get_analysis_run(repository_id, target_analysis_run_id) + base_graph = self.store.get_candidate_graph(repository_id, base_analysis_run_id) + target_graph = self.store.get_candidate_graph(repository_id, target_analysis_run_id) + approved_map = self.store.get_ability_map(repository_id) + + return AnalysisRunDiff( + repository=repository, + base_run=base_run, + target_run=target_run, + facts=self._diff_items( + self._fact_index( + self.store.list_observed_facts(repository_id, base_analysis_run_id) + ), + self._fact_index( + self.store.list_observed_facts(repository_id, target_analysis_run_id) + ), + ), + chunks=self._diff_items( + self._chunk_index( + self.store.list_content_chunks(repository_id, base_analysis_run_id) + ), + self._chunk_index( + self.store.list_content_chunks(repository_id, target_analysis_run_id) + ), + ), + candidates=self._diff_items( + self._candidate_index(base_graph.abilities), + self._candidate_index(target_graph.abilities), + ), + approved_entries=self._diff_items( + self._approved_index(approved_map.abilities), + self._candidate_index(target_graph.abilities), + ), + ) + + def approve_analysis_run_changes( + self, + repository_id: int, + analysis_run_id: int, + *, + notes: str = "", + ) -> RepositoryAbilityMap: + graph = self.store.get_candidate_graph(repository_id, analysis_run_id) + self.store.replace_approved_from_candidate_graph(repository_id, graph) + self.store.mark_candidate_graph_status(repository_id, analysis_run_id, "approved") + self.store.create_review_decision( + repository_id, + analysis_run_id, + action="approve_analysis_run_changes", + notes=notes, + ) + self.store.update_repository_status(repository_id, "indexed") + return self.store.get_ability_map(repository_id) + def reject_candidate_ability( self, repository_id: int, @@ -975,6 +1044,263 @@ class RegistryService: return strength return None + def _diff_items( + self, + base: dict[str, dict[str, object]], + target: dict[str, dict[str, object]], + ) -> AnalysisRunDiffSection: + added: list[AnalysisRunDiffItem] = [] + removed: list[AnalysisRunDiffItem] = [] + changed: list[AnalysisRunDiffItem] = [] + weakened: list[AnalysisRunDiffItem] = [] + + for key in sorted(target.keys() - base.keys()): + added.append( + AnalysisRunDiffItem( + change_type="added", + item_type=str(target[key]["item_type"]), + key=key, + target=target[key], + ) + ) + for key in sorted(base.keys() - target.keys()): + removed.append( + AnalysisRunDiffItem( + change_type="removed", + item_type=str(base[key]["item_type"]), + key=key, + base=base[key], + ) + ) + for key in sorted(base.keys() & target.keys()): + if base[key] == target[key]: + continue + item = AnalysisRunDiffItem( + change_type="weakened" if self._is_weakened(base[key], target[key]) else "changed", + item_type=str(target[key]["item_type"]), + key=key, + base=base[key], + target=target[key], + ) + if item.change_type == "weakened": + weakened.append(item) + else: + changed.append(item) + return AnalysisRunDiffSection( + added=added, + removed=removed, + changed=changed, + weakened=weakened, + ) + + def _is_weakened( + self, + base: dict[str, object], + target: dict[str, object], + ) -> bool: + base_confidence = base.get("confidence") + target_confidence = target.get("confidence") + if ( + isinstance(base_confidence, int | float) + and isinstance(target_confidence, int | float) + and target_confidence < base_confidence + ): + return True + base_strength = base.get("strength") + target_strength = target.get("strength") + strength_order = {"weak": 1, "medium": 2, "strong": 3} + return ( + isinstance(base_strength, str) + and isinstance(target_strength, str) + and strength_order.get(target_strength, 0) < strength_order.get(base_strength, 0) + ) + + def _fact_index(self, facts: Sequence[ObservedFact]) -> dict[str, dict[str, object]]: + return { + f"fact:{fact.kind}:{fact.path}:{fact.name}": { + "item_type": "fact", + "kind": fact.kind, + "path": fact.path, + "name": fact.name, + "value": fact.value, + "metadata": fact.metadata, + } + for fact in facts + } + + def _chunk_index( + self, + chunks: Sequence[ContentChunk], + ) -> dict[str, dict[str, object]]: + return { + f"chunk:{chunk.kind}:{chunk.path}:{chunk.start_line}:{chunk.end_line}": { + "item_type": "chunk", + "kind": chunk.kind, + "path": chunk.path, + "start_line": chunk.start_line, + "end_line": chunk.end_line, + "text": chunk.text, + } + for chunk in chunks + } + + def _candidate_index( + self, + abilities: Sequence[CandidateAbility], + ) -> dict[str, dict[str, object]]: + index: dict[str, dict[str, object]] = {} + for ability in abilities: + ability_key = self._entry_key("ability", ability.name) + index[ability_key] = { + "item_type": "ability", + "name": ability.name, + "description": ability.description, + "confidence": ability.confidence, + "status": ability.status, + } + for capability in ability.capabilities: + capability_key = self._entry_key( + "capability", + ability.name, + capability.name, + ) + index[capability_key] = { + "item_type": "capability", + "ability_name": ability.name, + "name": capability.name, + "description": capability.description, + "inputs": capability.inputs, + "outputs": capability.outputs, + "confidence": capability.confidence, + "status": capability.status, + } + self._index_candidate_leaves(index, ability, capability) + return index + + def _index_candidate_leaves( + self, + index: dict[str, dict[str, object]], + ability: CandidateAbility, + capability: CandidateCapability, + ) -> None: + for feature in capability.features: + key = self._entry_key( + "feature", + ability.name, + capability.name, + feature.name, + feature.type, + feature.location, + ) + index[key] = self._feature_payload( + feature, + ability_name=ability.name, + capability_name=capability.name, + ) + for evidence in capability.evidence: + key = self._entry_key( + "evidence", + ability.name, + capability.name, + evidence.type, + evidence.reference, + ) + index[key] = self._evidence_payload( + evidence, + ability_name=ability.name, + capability_name=capability.name, + ) + + def _approved_index(self, abilities) -> dict[str, dict[str, object]]: + index: dict[str, dict[str, object]] = {} + for ability in abilities: + ability_key = self._entry_key("ability", ability.name) + index[ability_key] = { + "item_type": "ability", + "name": ability.name, + "description": ability.description, + "confidence": ability.confidence, + } + for capability in ability.capabilities: + capability_key = self._entry_key( + "capability", + ability.name, + capability.name, + ) + index[capability_key] = { + "item_type": "capability", + "ability_name": ability.name, + "name": capability.name, + "description": capability.description, + "inputs": capability.inputs, + "outputs": capability.outputs, + "confidence": capability.confidence, + } + for feature in capability.features: + key = self._entry_key( + "feature", + ability.name, + capability.name, + feature.name, + feature.type, + feature.location, + ) + index[key] = self._feature_payload( + feature, + ability_name=ability.name, + capability_name=capability.name, + ) + for evidence in capability.evidence: + key = self._entry_key( + "evidence", + ability.name, + capability.name, + evidence.type, + evidence.reference, + ) + index[key] = self._evidence_payload( + evidence, + ability_name=ability.name, + capability_name=capability.name, + ) + return index + + def _feature_payload( + self, + feature: CandidateFeature, + *, + ability_name: str, + capability_name: str, + ) -> dict[str, object]: + return { + "item_type": "feature", + "ability_name": ability_name, + "capability_name": capability_name, + "name": feature.name, + "type": feature.type, + "location": feature.location, + "confidence": feature.confidence, + } + + def _evidence_payload( + self, + evidence: CandidateEvidence, + *, + ability_name: str, + capability_name: str, + ) -> dict[str, object]: + return { + "item_type": "evidence", + "ability_name": ability_name, + "capability_name": capability_name, + "type": evidence.type, + "reference": evidence.reference, + "strength": evidence.strength, + } + + def _entry_key(self, *parts: str) -> str: + return ":".join(part.strip().lower() for part in parts) + def _yaml_list(self, values: Sequence[str]) -> str: return "[" + ", ".join(self._yaml_scalar(value) for value in values) + "]" diff --git a/src/repo_registry/storage/sqlite.py b/src/repo_registry/storage/sqlite.py index 51eae19..9d0bca9 100644 --- a/src/repo_registry/storage/sqlite.py +++ b/src/repo_registry/storage/sqlite.py @@ -1512,6 +1512,99 @@ class RegistryStore: row_id=evidence_id, ) + def replace_approved_from_candidate_graph( + self, + repository_id: int, + graph: CandidateGraph, + ) -> None: + if graph.repository.id != repository_id: + raise NotFoundError( + f"candidate graph for repository {graph.repository.id} does not match " + f"repository {repository_id}" + ) + with self.connect() as connection: + connection.execute( + "DELETE FROM approved_abilities WHERE repository_id = ?", + (repository_id,), + ) + for ability in graph.abilities: + if ability.status not in {"candidate", "approved"}: + continue + ability_cursor = connection.execute( + """ + INSERT INTO approved_abilities + (repository_id, name, description, confidence) + VALUES (?, ?, ?, ?) + """, + ( + repository_id, + ability.name, + ability.description, + ability.confidence, + ), + ) + approved_ability_id = int(ability_cursor.lastrowid) + for capability in ability.capabilities: + if capability.status not in {"candidate", "approved"}: + continue + capability_cursor = connection.execute( + """ + INSERT INTO approved_capabilities + (repository_id, ability_id, name, description, inputs, outputs, + confidence) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ( + repository_id, + approved_ability_id, + capability.name, + capability.description, + json.dumps(capability.inputs), + json.dumps(capability.outputs), + capability.confidence, + ), + ) + approved_capability_id = int(capability_cursor.lastrowid) + for feature in capability.features: + if feature.status not in {"candidate", "approved"}: + continue + connection.execute( + """ + INSERT INTO approved_features + (repository_id, capability_id, name, type, location, + confidence, source_refs) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ( + repository_id, + approved_capability_id, + feature.name, + feature.type, + feature.location, + feature.confidence, + self._source_refs_to_json(feature.source_refs), + ), + ) + for evidence in capability.evidence: + if evidence.status not in {"candidate", "approved"}: + continue + connection.execute( + """ + INSERT INTO approved_evidence + (repository_id, capability_id, type, reference, strength, + source_refs) + VALUES (?, ?, ?, ?, ?, ?) + """, + ( + repository_id, + approved_capability_id, + evidence.type, + evidence.reference, + evidence.strength, + self._source_refs_to_json(evidence.source_refs), + ), + ) + def get_ability_map(self, repository_id: int) -> RepositoryAbilityMap: repository = self.get_repository(repository_id) with self.connect() as connection: diff --git a/src/repo_registry/web_api/app.py b/src/repo_registry/web_api/app.py index a15f6f4..ecae577 100644 --- a/src/repo_registry/web_api/app.py +++ b/src/repo_registry/web_api/app.py @@ -16,7 +16,9 @@ from repo_registry.web_api.schemas import ( AbilityCreate, AbilitySummaryResponse, AbilityUpdate, + AnalysisRunChangeApproval, AnalysisRunCreate, + AnalysisRunDiffResponse, AnalysisRunResponse, CandidateAbilityMerge, CandidateCapabilityMerge, @@ -239,6 +241,29 @@ def get_analysis_run( raise HTTPException(status_code=404, detail=str(exc)) from exc +@app.get( + "/repos/{repository_id}/analysis-runs/{base_analysis_run_id}/diff/{target_analysis_run_id}", + tags=["review"], + response_model=AnalysisRunDiffResponse, +) +def diff_analysis_runs( + repository_id: int, + base_analysis_run_id: int, + target_analysis_run_id: int, + service: RegistryService = Depends(get_service), +) -> dict[str, object]: + try: + return asdict( + service.diff_analysis_runs( + repository_id, + base_analysis_run_id, + target_analysis_run_id, + ) + ) + except NotFoundError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + @app.get( "/repos/{repository_id}/review-decisions", tags=["review"], @@ -375,6 +400,29 @@ def approve_candidate_graph( raise HTTPException(status_code=404, detail=str(exc)) from exc +@app.post( + "/repos/{repository_id}/analysis-runs/{analysis_run_id}/changes/approve", + tags=["review"], + response_model=RepositoryAbilityMapResponse, +) +def approve_analysis_run_changes( + repository_id: int, + analysis_run_id: int, + payload: AnalysisRunChangeApproval, + service: RegistryService = Depends(get_service), +) -> dict[str, object]: + try: + return asdict( + service.approve_analysis_run_changes( + repository_id, + analysis_run_id, + notes=payload.notes, + ) + ) + except NotFoundError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + @app.post( "/repos/{repository_id}/analysis-runs/{analysis_run_id}" "/candidate-abilities/{candidate_ability_id}/reject", diff --git a/src/repo_registry/web_api/schemas.py b/src/repo_registry/web_api/schemas.py index 00b2533..04457e3 100644 --- a/src/repo_registry/web_api/schemas.py +++ b/src/repo_registry/web_api/schemas.py @@ -206,6 +206,16 @@ class CandidateGraphApproval(BaseModel): } +class AnalysisRunChangeApproval(BaseModel): + notes: str = "" + + model_config = { + "json_schema_extra": { + "examples": [{"notes": "Accept target run changes after review."}] + } + } + + class CandidateRejection(BaseModel): notes: str = "" @@ -506,6 +516,31 @@ class CandidateGraphResponse(BaseModel): } +class AnalysisRunDiffItemResponse(BaseModel): + change_type: str + item_type: str + key: str + base: dict[str, Any] | None = None + target: dict[str, Any] | None = None + + +class AnalysisRunDiffSectionResponse(BaseModel): + added: list[AnalysisRunDiffItemResponse] + removed: list[AnalysisRunDiffItemResponse] + changed: list[AnalysisRunDiffItemResponse] + weakened: list[AnalysisRunDiffItemResponse] + + +class AnalysisRunDiffResponse(BaseModel): + repository: RepositoryResponse + base_run: AnalysisRunResponse + target_run: AnalysisRunResponse + facts: AnalysisRunDiffSectionResponse + chunks: AnalysisRunDiffSectionResponse + candidates: AnalysisRunDiffSectionResponse + approved_entries: AnalysisRunDiffSectionResponse + + class EvidenceResponse(BaseModel): id: int type: str diff --git a/tests/test_registry_service.py b/tests/test_registry_service.py index cd56321..a5e657e 100644 --- a/tests/test_registry_service.py +++ b/tests/test_registry_service.py @@ -509,6 +509,69 @@ def test_approve_candidate_graph_publishes_ability_map_once(tmp_path): assert decisions[0].notes == "Looks good for the first pass." +def test_analysis_run_diff_keeps_approved_map_stable_until_change_approval(tmp_path): + source = tmp_path / "repo" + source.mkdir() + (source / "README.md").write_text("# Example\n", encoding="utf-8") + app_file = source / "app.py" + app_file.write_text( + "from fastapi import FastAPI\n" + "app = FastAPI()\n" + '@app.get("/health")\n' + "def health():\n" + " return {}\n", + encoding="utf-8", + ) + + service = make_service(tmp_path) + repository = service.register_repository(name="Example", url=str(source)) + first_summary = service.analyze_repository(repository.id) + approved_before = service.approve_candidate_graph( + repository.id, + first_summary.analysis_run.id, + ) + + app_file.write_text( + "from fastapi import FastAPI\n" + "app = FastAPI()\n" + '@app.get("/ready")\n' + "def ready():\n" + " return {}\n", + encoding="utf-8", + ) + second_summary = service.analyze_repository(repository.id) + approved_after_analysis = service.ability_map(repository.id) + diff = service.diff_analysis_runs( + repository.id, + first_summary.analysis_run.id, + second_summary.analysis_run.id, + ) + + assert approved_after_analysis.abilities[0].capabilities[0].features[0].name == ( + approved_before.abilities[0].capabilities[0].features[0].name + ) + assert any(item.item_type == "feature" for item in diff.candidates.added) + assert any(item.item_type == "feature" for item in diff.candidates.removed) + assert any(item.item_type == "feature" for item in diff.approved_entries.added) + assert any(item.item_type == "feature" for item in diff.approved_entries.removed) + + approved_after_review = service.approve_analysis_run_changes( + repository.id, + second_summary.analysis_run.id, + notes="Accept route change.", + ) + + assert approved_after_review.abilities[0].capabilities[0].features[0].name == ( + "GET /ready" + ) + decisions = service.list_review_decisions( + repository.id, + second_summary.analysis_run.id, + ) + assert decisions[0].action == "approve_analysis_run_changes" + assert decisions[0].notes == "Accept route change." + + def test_reject_candidate_ability_excludes_it_from_approval(tmp_path): source = tmp_path / "repo" source.mkdir() diff --git a/tests/test_web_api.py b/tests/test_web_api.py index f066d44..3f0cbde 100644 --- a/tests/test_web_api.py +++ b/tests/test_web_api.py @@ -45,6 +45,13 @@ def test_openapi_groups_agent_facing_endpoints(): assert components["CandidateGraphResponse"]["examples"][0]["abilities"][0][ "status" ] == "pending" + assert ( + "/repos/{repository_id}/analysis-runs/{base_analysis_run_id}/diff/" + "{target_analysis_run_id}" + ) in schema["paths"] + assert ( + "/repos/{repository_id}/analysis-runs/{analysis_run_id}/changes/approve" + ) in schema["paths"] def test_docs_endpoint_is_available():