service logic for facts, chunks, candidates, and current-approved-vs-target comparison

This commit is contained in:
2026-04-26 15:43:03 +02:00
parent 35274baac1
commit ea5da4a8e0
7 changed files with 600 additions and 0 deletions

View File

@@ -54,6 +54,34 @@ class ReviewDecision:
created_at: str
@dataclass(frozen=True)
class AnalysisRunDiffItem:
change_type: str
item_type: str
key: str
base: dict[str, Any] | None = None
target: dict[str, Any] | None = None
@dataclass(frozen=True)
class AnalysisRunDiffSection:
added: list[AnalysisRunDiffItem] = field(default_factory=list)
removed: list[AnalysisRunDiffItem] = field(default_factory=list)
changed: list[AnalysisRunDiffItem] = field(default_factory=list)
weakened: list[AnalysisRunDiffItem] = field(default_factory=list)
@dataclass(frozen=True)
class AnalysisRunDiff:
repository: Repository
base_run: AnalysisRun
target_run: AnalysisRun
facts: AnalysisRunDiffSection
chunks: AnalysisRunDiffSection
candidates: AnalysisRunDiffSection
approved_entries: AnalysisRunDiffSection
@dataclass(frozen=True)
class ObservedFact:
id: int

View File

@@ -5,8 +5,15 @@ from dataclasses import asdict
from repo_registry.core.models import (
AbilitySummary,
AnalysisRunDiff,
AnalysisRunDiffItem,
AnalysisRunDiffSection,
AnalysisRun,
CapabilitySummary,
CandidateAbility,
CandidateCapability,
CandidateEvidence,
CandidateFeature,
CandidateGraph,
ContentChunk,
ObservedFact,
@@ -278,6 +285,68 @@ class RegistryService:
self.store.update_repository_status(repository_id, "indexed")
return self.store.get_ability_map(repository_id)
def diff_analysis_runs(
self,
repository_id: int,
base_analysis_run_id: int,
target_analysis_run_id: int,
) -> AnalysisRunDiff:
repository = self.store.get_repository(repository_id)
base_run = self.store.get_analysis_run(repository_id, base_analysis_run_id)
target_run = self.store.get_analysis_run(repository_id, target_analysis_run_id)
base_graph = self.store.get_candidate_graph(repository_id, base_analysis_run_id)
target_graph = self.store.get_candidate_graph(repository_id, target_analysis_run_id)
approved_map = self.store.get_ability_map(repository_id)
return AnalysisRunDiff(
repository=repository,
base_run=base_run,
target_run=target_run,
facts=self._diff_items(
self._fact_index(
self.store.list_observed_facts(repository_id, base_analysis_run_id)
),
self._fact_index(
self.store.list_observed_facts(repository_id, target_analysis_run_id)
),
),
chunks=self._diff_items(
self._chunk_index(
self.store.list_content_chunks(repository_id, base_analysis_run_id)
),
self._chunk_index(
self.store.list_content_chunks(repository_id, target_analysis_run_id)
),
),
candidates=self._diff_items(
self._candidate_index(base_graph.abilities),
self._candidate_index(target_graph.abilities),
),
approved_entries=self._diff_items(
self._approved_index(approved_map.abilities),
self._candidate_index(target_graph.abilities),
),
)
def approve_analysis_run_changes(
self,
repository_id: int,
analysis_run_id: int,
*,
notes: str = "",
) -> RepositoryAbilityMap:
graph = self.store.get_candidate_graph(repository_id, analysis_run_id)
self.store.replace_approved_from_candidate_graph(repository_id, graph)
self.store.mark_candidate_graph_status(repository_id, analysis_run_id, "approved")
self.store.create_review_decision(
repository_id,
analysis_run_id,
action="approve_analysis_run_changes",
notes=notes,
)
self.store.update_repository_status(repository_id, "indexed")
return self.store.get_ability_map(repository_id)
def reject_candidate_ability(
self,
repository_id: int,
@@ -975,6 +1044,263 @@ class RegistryService:
return strength
return None
def _diff_items(
self,
base: dict[str, dict[str, object]],
target: dict[str, dict[str, object]],
) -> AnalysisRunDiffSection:
added: list[AnalysisRunDiffItem] = []
removed: list[AnalysisRunDiffItem] = []
changed: list[AnalysisRunDiffItem] = []
weakened: list[AnalysisRunDiffItem] = []
for key in sorted(target.keys() - base.keys()):
added.append(
AnalysisRunDiffItem(
change_type="added",
item_type=str(target[key]["item_type"]),
key=key,
target=target[key],
)
)
for key in sorted(base.keys() - target.keys()):
removed.append(
AnalysisRunDiffItem(
change_type="removed",
item_type=str(base[key]["item_type"]),
key=key,
base=base[key],
)
)
for key in sorted(base.keys() & target.keys()):
if base[key] == target[key]:
continue
item = AnalysisRunDiffItem(
change_type="weakened" if self._is_weakened(base[key], target[key]) else "changed",
item_type=str(target[key]["item_type"]),
key=key,
base=base[key],
target=target[key],
)
if item.change_type == "weakened":
weakened.append(item)
else:
changed.append(item)
return AnalysisRunDiffSection(
added=added,
removed=removed,
changed=changed,
weakened=weakened,
)
def _is_weakened(
self,
base: dict[str, object],
target: dict[str, object],
) -> bool:
base_confidence = base.get("confidence")
target_confidence = target.get("confidence")
if (
isinstance(base_confidence, int | float)
and isinstance(target_confidence, int | float)
and target_confidence < base_confidence
):
return True
base_strength = base.get("strength")
target_strength = target.get("strength")
strength_order = {"weak": 1, "medium": 2, "strong": 3}
return (
isinstance(base_strength, str)
and isinstance(target_strength, str)
and strength_order.get(target_strength, 0) < strength_order.get(base_strength, 0)
)
def _fact_index(self, facts: Sequence[ObservedFact]) -> dict[str, dict[str, object]]:
return {
f"fact:{fact.kind}:{fact.path}:{fact.name}": {
"item_type": "fact",
"kind": fact.kind,
"path": fact.path,
"name": fact.name,
"value": fact.value,
"metadata": fact.metadata,
}
for fact in facts
}
def _chunk_index(
self,
chunks: Sequence[ContentChunk],
) -> dict[str, dict[str, object]]:
return {
f"chunk:{chunk.kind}:{chunk.path}:{chunk.start_line}:{chunk.end_line}": {
"item_type": "chunk",
"kind": chunk.kind,
"path": chunk.path,
"start_line": chunk.start_line,
"end_line": chunk.end_line,
"text": chunk.text,
}
for chunk in chunks
}
def _candidate_index(
self,
abilities: Sequence[CandidateAbility],
) -> dict[str, dict[str, object]]:
index: dict[str, dict[str, object]] = {}
for ability in abilities:
ability_key = self._entry_key("ability", ability.name)
index[ability_key] = {
"item_type": "ability",
"name": ability.name,
"description": ability.description,
"confidence": ability.confidence,
"status": ability.status,
}
for capability in ability.capabilities:
capability_key = self._entry_key(
"capability",
ability.name,
capability.name,
)
index[capability_key] = {
"item_type": "capability",
"ability_name": ability.name,
"name": capability.name,
"description": capability.description,
"inputs": capability.inputs,
"outputs": capability.outputs,
"confidence": capability.confidence,
"status": capability.status,
}
self._index_candidate_leaves(index, ability, capability)
return index
def _index_candidate_leaves(
self,
index: dict[str, dict[str, object]],
ability: CandidateAbility,
capability: CandidateCapability,
) -> None:
for feature in capability.features:
key = self._entry_key(
"feature",
ability.name,
capability.name,
feature.name,
feature.type,
feature.location,
)
index[key] = self._feature_payload(
feature,
ability_name=ability.name,
capability_name=capability.name,
)
for evidence in capability.evidence:
key = self._entry_key(
"evidence",
ability.name,
capability.name,
evidence.type,
evidence.reference,
)
index[key] = self._evidence_payload(
evidence,
ability_name=ability.name,
capability_name=capability.name,
)
def _approved_index(self, abilities) -> dict[str, dict[str, object]]:
index: dict[str, dict[str, object]] = {}
for ability in abilities:
ability_key = self._entry_key("ability", ability.name)
index[ability_key] = {
"item_type": "ability",
"name": ability.name,
"description": ability.description,
"confidence": ability.confidence,
}
for capability in ability.capabilities:
capability_key = self._entry_key(
"capability",
ability.name,
capability.name,
)
index[capability_key] = {
"item_type": "capability",
"ability_name": ability.name,
"name": capability.name,
"description": capability.description,
"inputs": capability.inputs,
"outputs": capability.outputs,
"confidence": capability.confidence,
}
for feature in capability.features:
key = self._entry_key(
"feature",
ability.name,
capability.name,
feature.name,
feature.type,
feature.location,
)
index[key] = self._feature_payload(
feature,
ability_name=ability.name,
capability_name=capability.name,
)
for evidence in capability.evidence:
key = self._entry_key(
"evidence",
ability.name,
capability.name,
evidence.type,
evidence.reference,
)
index[key] = self._evidence_payload(
evidence,
ability_name=ability.name,
capability_name=capability.name,
)
return index
def _feature_payload(
self,
feature: CandidateFeature,
*,
ability_name: str,
capability_name: str,
) -> dict[str, object]:
return {
"item_type": "feature",
"ability_name": ability_name,
"capability_name": capability_name,
"name": feature.name,
"type": feature.type,
"location": feature.location,
"confidence": feature.confidence,
}
def _evidence_payload(
self,
evidence: CandidateEvidence,
*,
ability_name: str,
capability_name: str,
) -> dict[str, object]:
return {
"item_type": "evidence",
"ability_name": ability_name,
"capability_name": capability_name,
"type": evidence.type,
"reference": evidence.reference,
"strength": evidence.strength,
}
def _entry_key(self, *parts: str) -> str:
return ":".join(part.strip().lower() for part in parts)
def _yaml_list(self, values: Sequence[str]) -> str:
return "[" + ", ".join(self._yaml_scalar(value) for value in values) + "]"

View File

@@ -1512,6 +1512,99 @@ class RegistryStore:
row_id=evidence_id,
)
def replace_approved_from_candidate_graph(
self,
repository_id: int,
graph: CandidateGraph,
) -> None:
if graph.repository.id != repository_id:
raise NotFoundError(
f"candidate graph for repository {graph.repository.id} does not match "
f"repository {repository_id}"
)
with self.connect() as connection:
connection.execute(
"DELETE FROM approved_abilities WHERE repository_id = ?",
(repository_id,),
)
for ability in graph.abilities:
if ability.status not in {"candidate", "approved"}:
continue
ability_cursor = connection.execute(
"""
INSERT INTO approved_abilities
(repository_id, name, description, confidence)
VALUES (?, ?, ?, ?)
""",
(
repository_id,
ability.name,
ability.description,
ability.confidence,
),
)
approved_ability_id = int(ability_cursor.lastrowid)
for capability in ability.capabilities:
if capability.status not in {"candidate", "approved"}:
continue
capability_cursor = connection.execute(
"""
INSERT INTO approved_capabilities
(repository_id, ability_id, name, description, inputs, outputs,
confidence)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
repository_id,
approved_ability_id,
capability.name,
capability.description,
json.dumps(capability.inputs),
json.dumps(capability.outputs),
capability.confidence,
),
)
approved_capability_id = int(capability_cursor.lastrowid)
for feature in capability.features:
if feature.status not in {"candidate", "approved"}:
continue
connection.execute(
"""
INSERT INTO approved_features
(repository_id, capability_id, name, type, location,
confidence, source_refs)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
repository_id,
approved_capability_id,
feature.name,
feature.type,
feature.location,
feature.confidence,
self._source_refs_to_json(feature.source_refs),
),
)
for evidence in capability.evidence:
if evidence.status not in {"candidate", "approved"}:
continue
connection.execute(
"""
INSERT INTO approved_evidence
(repository_id, capability_id, type, reference, strength,
source_refs)
VALUES (?, ?, ?, ?, ?, ?)
""",
(
repository_id,
approved_capability_id,
evidence.type,
evidence.reference,
evidence.strength,
self._source_refs_to_json(evidence.source_refs),
),
)
def get_ability_map(self, repository_id: int) -> RepositoryAbilityMap:
repository = self.get_repository(repository_id)
with self.connect() as connection:

View File

@@ -16,7 +16,9 @@ from repo_registry.web_api.schemas import (
AbilityCreate,
AbilitySummaryResponse,
AbilityUpdate,
AnalysisRunChangeApproval,
AnalysisRunCreate,
AnalysisRunDiffResponse,
AnalysisRunResponse,
CandidateAbilityMerge,
CandidateCapabilityMerge,
@@ -239,6 +241,29 @@ def get_analysis_run(
raise HTTPException(status_code=404, detail=str(exc)) from exc
@app.get(
"/repos/{repository_id}/analysis-runs/{base_analysis_run_id}/diff/{target_analysis_run_id}",
tags=["review"],
response_model=AnalysisRunDiffResponse,
)
def diff_analysis_runs(
repository_id: int,
base_analysis_run_id: int,
target_analysis_run_id: int,
service: RegistryService = Depends(get_service),
) -> dict[str, object]:
try:
return asdict(
service.diff_analysis_runs(
repository_id,
base_analysis_run_id,
target_analysis_run_id,
)
)
except NotFoundError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
@app.get(
"/repos/{repository_id}/review-decisions",
tags=["review"],
@@ -375,6 +400,29 @@ def approve_candidate_graph(
raise HTTPException(status_code=404, detail=str(exc)) from exc
@app.post(
"/repos/{repository_id}/analysis-runs/{analysis_run_id}/changes/approve",
tags=["review"],
response_model=RepositoryAbilityMapResponse,
)
def approve_analysis_run_changes(
repository_id: int,
analysis_run_id: int,
payload: AnalysisRunChangeApproval,
service: RegistryService = Depends(get_service),
) -> dict[str, object]:
try:
return asdict(
service.approve_analysis_run_changes(
repository_id,
analysis_run_id,
notes=payload.notes,
)
)
except NotFoundError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
@app.post(
"/repos/{repository_id}/analysis-runs/{analysis_run_id}"
"/candidate-abilities/{candidate_ability_id}/reject",

View File

@@ -206,6 +206,16 @@ class CandidateGraphApproval(BaseModel):
}
class AnalysisRunChangeApproval(BaseModel):
notes: str = ""
model_config = {
"json_schema_extra": {
"examples": [{"notes": "Accept target run changes after review."}]
}
}
class CandidateRejection(BaseModel):
notes: str = ""
@@ -506,6 +516,31 @@ class CandidateGraphResponse(BaseModel):
}
class AnalysisRunDiffItemResponse(BaseModel):
change_type: str
item_type: str
key: str
base: dict[str, Any] | None = None
target: dict[str, Any] | None = None
class AnalysisRunDiffSectionResponse(BaseModel):
added: list[AnalysisRunDiffItemResponse]
removed: list[AnalysisRunDiffItemResponse]
changed: list[AnalysisRunDiffItemResponse]
weakened: list[AnalysisRunDiffItemResponse]
class AnalysisRunDiffResponse(BaseModel):
repository: RepositoryResponse
base_run: AnalysisRunResponse
target_run: AnalysisRunResponse
facts: AnalysisRunDiffSectionResponse
chunks: AnalysisRunDiffSectionResponse
candidates: AnalysisRunDiffSectionResponse
approved_entries: AnalysisRunDiffSectionResponse
class EvidenceResponse(BaseModel):
id: int
type: str

View File

@@ -509,6 +509,69 @@ def test_approve_candidate_graph_publishes_ability_map_once(tmp_path):
assert decisions[0].notes == "Looks good for the first pass."
def test_analysis_run_diff_keeps_approved_map_stable_until_change_approval(tmp_path):
source = tmp_path / "repo"
source.mkdir()
(source / "README.md").write_text("# Example\n", encoding="utf-8")
app_file = source / "app.py"
app_file.write_text(
"from fastapi import FastAPI\n"
"app = FastAPI()\n"
'@app.get("/health")\n'
"def health():\n"
" return {}\n",
encoding="utf-8",
)
service = make_service(tmp_path)
repository = service.register_repository(name="Example", url=str(source))
first_summary = service.analyze_repository(repository.id)
approved_before = service.approve_candidate_graph(
repository.id,
first_summary.analysis_run.id,
)
app_file.write_text(
"from fastapi import FastAPI\n"
"app = FastAPI()\n"
'@app.get("/ready")\n'
"def ready():\n"
" return {}\n",
encoding="utf-8",
)
second_summary = service.analyze_repository(repository.id)
approved_after_analysis = service.ability_map(repository.id)
diff = service.diff_analysis_runs(
repository.id,
first_summary.analysis_run.id,
second_summary.analysis_run.id,
)
assert approved_after_analysis.abilities[0].capabilities[0].features[0].name == (
approved_before.abilities[0].capabilities[0].features[0].name
)
assert any(item.item_type == "feature" for item in diff.candidates.added)
assert any(item.item_type == "feature" for item in diff.candidates.removed)
assert any(item.item_type == "feature" for item in diff.approved_entries.added)
assert any(item.item_type == "feature" for item in diff.approved_entries.removed)
approved_after_review = service.approve_analysis_run_changes(
repository.id,
second_summary.analysis_run.id,
notes="Accept route change.",
)
assert approved_after_review.abilities[0].capabilities[0].features[0].name == (
"GET /ready"
)
decisions = service.list_review_decisions(
repository.id,
second_summary.analysis_run.id,
)
assert decisions[0].action == "approve_analysis_run_changes"
assert decisions[0].notes == "Accept route change."
def test_reject_candidate_ability_excludes_it_from_approval(tmp_path):
source = tmp_path / "repo"
source.mkdir()

View File

@@ -45,6 +45,13 @@ def test_openapi_groups_agent_facing_endpoints():
assert components["CandidateGraphResponse"]["examples"][0]["abilities"][0][
"status"
] == "pending"
assert (
"/repos/{repository_id}/analysis-runs/{base_analysis_run_id}/diff/"
"{target_analysis_run_id}"
) in schema["paths"]
assert (
"/repos/{repository_id}/analysis-runs/{analysis_run_id}/changes/approve"
) in schema["paths"]
def test_docs_endpoint_is_available():