service logic for facts, chunks, candidates, and current-approved-vs-target comparison

This commit is contained in:
2026-04-26 15:43:03 +02:00
parent 35274baac1
commit ea5da4a8e0
7 changed files with 600 additions and 0 deletions

View File

@@ -5,8 +5,15 @@ from dataclasses import asdict
from repo_registry.core.models import (
AbilitySummary,
AnalysisRunDiff,
AnalysisRunDiffItem,
AnalysisRunDiffSection,
AnalysisRun,
CapabilitySummary,
CandidateAbility,
CandidateCapability,
CandidateEvidence,
CandidateFeature,
CandidateGraph,
ContentChunk,
ObservedFact,
@@ -278,6 +285,68 @@ class RegistryService:
self.store.update_repository_status(repository_id, "indexed")
return self.store.get_ability_map(repository_id)
def diff_analysis_runs(
self,
repository_id: int,
base_analysis_run_id: int,
target_analysis_run_id: int,
) -> AnalysisRunDiff:
repository = self.store.get_repository(repository_id)
base_run = self.store.get_analysis_run(repository_id, base_analysis_run_id)
target_run = self.store.get_analysis_run(repository_id, target_analysis_run_id)
base_graph = self.store.get_candidate_graph(repository_id, base_analysis_run_id)
target_graph = self.store.get_candidate_graph(repository_id, target_analysis_run_id)
approved_map = self.store.get_ability_map(repository_id)
return AnalysisRunDiff(
repository=repository,
base_run=base_run,
target_run=target_run,
facts=self._diff_items(
self._fact_index(
self.store.list_observed_facts(repository_id, base_analysis_run_id)
),
self._fact_index(
self.store.list_observed_facts(repository_id, target_analysis_run_id)
),
),
chunks=self._diff_items(
self._chunk_index(
self.store.list_content_chunks(repository_id, base_analysis_run_id)
),
self._chunk_index(
self.store.list_content_chunks(repository_id, target_analysis_run_id)
),
),
candidates=self._diff_items(
self._candidate_index(base_graph.abilities),
self._candidate_index(target_graph.abilities),
),
approved_entries=self._diff_items(
self._approved_index(approved_map.abilities),
self._candidate_index(target_graph.abilities),
),
)
def approve_analysis_run_changes(
self,
repository_id: int,
analysis_run_id: int,
*,
notes: str = "",
) -> RepositoryAbilityMap:
graph = self.store.get_candidate_graph(repository_id, analysis_run_id)
self.store.replace_approved_from_candidate_graph(repository_id, graph)
self.store.mark_candidate_graph_status(repository_id, analysis_run_id, "approved")
self.store.create_review_decision(
repository_id,
analysis_run_id,
action="approve_analysis_run_changes",
notes=notes,
)
self.store.update_repository_status(repository_id, "indexed")
return self.store.get_ability_map(repository_id)
def reject_candidate_ability(
self,
repository_id: int,
@@ -975,6 +1044,263 @@ class RegistryService:
return strength
return None
def _diff_items(
self,
base: dict[str, dict[str, object]],
target: dict[str, dict[str, object]],
) -> AnalysisRunDiffSection:
added: list[AnalysisRunDiffItem] = []
removed: list[AnalysisRunDiffItem] = []
changed: list[AnalysisRunDiffItem] = []
weakened: list[AnalysisRunDiffItem] = []
for key in sorted(target.keys() - base.keys()):
added.append(
AnalysisRunDiffItem(
change_type="added",
item_type=str(target[key]["item_type"]),
key=key,
target=target[key],
)
)
for key in sorted(base.keys() - target.keys()):
removed.append(
AnalysisRunDiffItem(
change_type="removed",
item_type=str(base[key]["item_type"]),
key=key,
base=base[key],
)
)
for key in sorted(base.keys() & target.keys()):
if base[key] == target[key]:
continue
item = AnalysisRunDiffItem(
change_type="weakened" if self._is_weakened(base[key], target[key]) else "changed",
item_type=str(target[key]["item_type"]),
key=key,
base=base[key],
target=target[key],
)
if item.change_type == "weakened":
weakened.append(item)
else:
changed.append(item)
return AnalysisRunDiffSection(
added=added,
removed=removed,
changed=changed,
weakened=weakened,
)
def _is_weakened(
self,
base: dict[str, object],
target: dict[str, object],
) -> bool:
base_confidence = base.get("confidence")
target_confidence = target.get("confidence")
if (
isinstance(base_confidence, int | float)
and isinstance(target_confidence, int | float)
and target_confidence < base_confidence
):
return True
base_strength = base.get("strength")
target_strength = target.get("strength")
strength_order = {"weak": 1, "medium": 2, "strong": 3}
return (
isinstance(base_strength, str)
and isinstance(target_strength, str)
and strength_order.get(target_strength, 0) < strength_order.get(base_strength, 0)
)
def _fact_index(self, facts: Sequence[ObservedFact]) -> dict[str, dict[str, object]]:
return {
f"fact:{fact.kind}:{fact.path}:{fact.name}": {
"item_type": "fact",
"kind": fact.kind,
"path": fact.path,
"name": fact.name,
"value": fact.value,
"metadata": fact.metadata,
}
for fact in facts
}
def _chunk_index(
self,
chunks: Sequence[ContentChunk],
) -> dict[str, dict[str, object]]:
return {
f"chunk:{chunk.kind}:{chunk.path}:{chunk.start_line}:{chunk.end_line}": {
"item_type": "chunk",
"kind": chunk.kind,
"path": chunk.path,
"start_line": chunk.start_line,
"end_line": chunk.end_line,
"text": chunk.text,
}
for chunk in chunks
}
def _candidate_index(
self,
abilities: Sequence[CandidateAbility],
) -> dict[str, dict[str, object]]:
index: dict[str, dict[str, object]] = {}
for ability in abilities:
ability_key = self._entry_key("ability", ability.name)
index[ability_key] = {
"item_type": "ability",
"name": ability.name,
"description": ability.description,
"confidence": ability.confidence,
"status": ability.status,
}
for capability in ability.capabilities:
capability_key = self._entry_key(
"capability",
ability.name,
capability.name,
)
index[capability_key] = {
"item_type": "capability",
"ability_name": ability.name,
"name": capability.name,
"description": capability.description,
"inputs": capability.inputs,
"outputs": capability.outputs,
"confidence": capability.confidence,
"status": capability.status,
}
self._index_candidate_leaves(index, ability, capability)
return index
def _index_candidate_leaves(
self,
index: dict[str, dict[str, object]],
ability: CandidateAbility,
capability: CandidateCapability,
) -> None:
for feature in capability.features:
key = self._entry_key(
"feature",
ability.name,
capability.name,
feature.name,
feature.type,
feature.location,
)
index[key] = self._feature_payload(
feature,
ability_name=ability.name,
capability_name=capability.name,
)
for evidence in capability.evidence:
key = self._entry_key(
"evidence",
ability.name,
capability.name,
evidence.type,
evidence.reference,
)
index[key] = self._evidence_payload(
evidence,
ability_name=ability.name,
capability_name=capability.name,
)
def _approved_index(self, abilities) -> dict[str, dict[str, object]]:
index: dict[str, dict[str, object]] = {}
for ability in abilities:
ability_key = self._entry_key("ability", ability.name)
index[ability_key] = {
"item_type": "ability",
"name": ability.name,
"description": ability.description,
"confidence": ability.confidence,
}
for capability in ability.capabilities:
capability_key = self._entry_key(
"capability",
ability.name,
capability.name,
)
index[capability_key] = {
"item_type": "capability",
"ability_name": ability.name,
"name": capability.name,
"description": capability.description,
"inputs": capability.inputs,
"outputs": capability.outputs,
"confidence": capability.confidence,
}
for feature in capability.features:
key = self._entry_key(
"feature",
ability.name,
capability.name,
feature.name,
feature.type,
feature.location,
)
index[key] = self._feature_payload(
feature,
ability_name=ability.name,
capability_name=capability.name,
)
for evidence in capability.evidence:
key = self._entry_key(
"evidence",
ability.name,
capability.name,
evidence.type,
evidence.reference,
)
index[key] = self._evidence_payload(
evidence,
ability_name=ability.name,
capability_name=capability.name,
)
return index
def _feature_payload(
self,
feature: CandidateFeature,
*,
ability_name: str,
capability_name: str,
) -> dict[str, object]:
return {
"item_type": "feature",
"ability_name": ability_name,
"capability_name": capability_name,
"name": feature.name,
"type": feature.type,
"location": feature.location,
"confidence": feature.confidence,
}
def _evidence_payload(
self,
evidence: CandidateEvidence,
*,
ability_name: str,
capability_name: str,
) -> dict[str, object]:
return {
"item_type": "evidence",
"ability_name": ability_name,
"capability_name": capability_name,
"type": evidence.type,
"reference": evidence.reference,
"strength": evidence.strength,
}
def _entry_key(self, *parts: str) -> str:
return ":".join(part.strip().lower() for part in parts)
def _yaml_list(self, values: Sequence[str]) -> str:
return "[" + ", ".join(self._yaml_scalar(value) for value in values) + "]"