Milestone 3: candidate graph generation

This commit is contained in:
2026-04-25 22:42:13 +02:00
parent ef41a9974a
commit 519b7726e7
11 changed files with 634 additions and 0 deletions

View File

@@ -7,6 +7,11 @@ from pathlib import Path
from repo_registry.core.models import (
Ability,
AnalysisRun,
CandidateAbility,
CandidateCapability,
CandidateEvidence,
CandidateFeature,
CandidateGraph,
Capability,
Evidence,
Feature,
@@ -15,7 +20,9 @@ from repo_registry.core.models import (
RepositoryAbilityMap,
RepositorySnapshot,
SearchResult,
SourceReference,
)
from repo_registry.candidate_graph.generator import CandidateAbilityDraft
from repo_registry.repo_scanning.scanner import FactCandidate, ScanResult
@@ -158,6 +165,202 @@ class RegistryStore:
)
return self.get_analysis_run(repository_id, analysis_run_id)
def replace_candidate_graph(
self,
repository_id: int,
analysis_run_id: int,
abilities: list[CandidateAbilityDraft],
) -> None:
with self.connect() as connection:
connection.execute(
"DELETE FROM candidate_abilities WHERE analysis_run_id = ?",
(analysis_run_id,),
)
for ability in abilities:
ability_cursor = connection.execute(
"""
INSERT INTO candidate_abilities
(repository_id, analysis_run_id, name, description, confidence, source_refs)
VALUES (?, ?, ?, ?, ?, ?)
""",
(
repository_id,
analysis_run_id,
ability.name,
ability.description,
ability.confidence,
self._source_refs_to_json(ability.source_refs),
),
)
ability_id = int(ability_cursor.lastrowid)
for capability in ability.capabilities:
capability_cursor = connection.execute(
"""
INSERT INTO candidate_capabilities
(repository_id, analysis_run_id, ability_id, name, description,
inputs, outputs, confidence, source_refs)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
repository_id,
analysis_run_id,
ability_id,
capability.name,
capability.description,
json.dumps(capability.inputs),
json.dumps(capability.outputs),
capability.confidence,
self._source_refs_to_json(capability.source_refs),
),
)
capability_id = int(capability_cursor.lastrowid)
for feature in capability.features:
connection.execute(
"""
INSERT INTO candidate_features
(repository_id, analysis_run_id, capability_id, name, type,
location, confidence, source_refs)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
(
repository_id,
analysis_run_id,
capability_id,
feature.name,
feature.type,
feature.location,
feature.confidence,
self._source_refs_to_json(feature.source_refs),
),
)
for evidence in capability.evidence:
connection.execute(
"""
INSERT INTO candidate_evidence
(repository_id, analysis_run_id, capability_id, type,
reference, strength, source_refs)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
repository_id,
analysis_run_id,
capability_id,
evidence.type,
evidence.reference,
evidence.strength,
self._source_refs_to_json(evidence.source_refs),
),
)
def get_candidate_graph(
self,
repository_id: int,
analysis_run_id: int,
) -> CandidateGraph:
repository = self.get_repository(repository_id)
analysis_run = self.get_analysis_run(repository_id, analysis_run_id)
with self.connect() as connection:
ability_rows = connection.execute(
"""
SELECT id, name, description, confidence, status, source_refs
FROM candidate_abilities
WHERE repository_id = ? AND analysis_run_id = ?
ORDER BY id
""",
(repository_id, analysis_run_id),
).fetchall()
capability_rows = connection.execute(
"""
SELECT id, ability_id, name, description, inputs, outputs,
confidence, status, source_refs
FROM candidate_capabilities
WHERE repository_id = ? AND analysis_run_id = ?
ORDER BY id
""",
(repository_id, analysis_run_id),
).fetchall()
feature_rows = connection.execute(
"""
SELECT id, capability_id, name, type, location, confidence,
status, source_refs
FROM candidate_features
WHERE repository_id = ? AND analysis_run_id = ?
ORDER BY id
""",
(repository_id, analysis_run_id),
).fetchall()
evidence_rows = connection.execute(
"""
SELECT id, capability_id, type, reference, strength, status, source_refs
FROM candidate_evidence
WHERE repository_id = ? AND analysis_run_id = ?
ORDER BY id
""",
(repository_id, analysis_run_id),
).fetchall()
features_by_capability: dict[int, list[CandidateFeature]] = {}
for row in feature_rows:
features_by_capability.setdefault(row["capability_id"], []).append(
CandidateFeature(
id=row["id"],
name=row["name"],
type=row["type"],
location=row["location"],
confidence=row["confidence"],
status=row["status"],
source_refs=self._source_refs_from_json(row["source_refs"]),
)
)
evidence_by_capability: dict[int, list[CandidateEvidence]] = {}
for row in evidence_rows:
evidence_by_capability.setdefault(row["capability_id"], []).append(
CandidateEvidence(
id=row["id"],
type=row["type"],
reference=row["reference"],
strength=row["strength"],
status=row["status"],
source_refs=self._source_refs_from_json(row["source_refs"]),
)
)
capabilities_by_ability: dict[int, list[CandidateCapability]] = {}
for row in capability_rows:
capabilities_by_ability.setdefault(row["ability_id"], []).append(
CandidateCapability(
id=row["id"],
name=row["name"],
description=row["description"],
inputs=json.loads(row["inputs"]),
outputs=json.loads(row["outputs"]),
confidence=row["confidence"],
status=row["status"],
source_refs=self._source_refs_from_json(row["source_refs"]),
features=features_by_capability.get(row["id"], []),
evidence=evidence_by_capability.get(row["id"], []),
)
)
abilities = [
CandidateAbility(
id=row["id"],
name=row["name"],
description=row["description"],
confidence=row["confidence"],
status=row["status"],
source_refs=self._source_refs_from_json(row["source_refs"]),
capabilities=capabilities_by_ability.get(row["id"], []),
)
for row in ability_rows
]
return CandidateGraph(
repository=repository,
analysis_run=analysis_run,
abilities=abilities,
)
def fail_analysis_run(
self,
repository_id: int,
@@ -538,6 +741,30 @@ class RegistryStore:
],
)
def _source_refs_to_json(self, source_refs: list[SourceReference]) -> str:
return json.dumps(
[
{
"fact_id": source_ref.fact_id,
"path": source_ref.path,
"kind": source_ref.kind,
"name": source_ref.name,
}
for source_ref in source_refs
]
)
def _source_refs_from_json(self, value: str) -> list[SourceReference]:
return [
SourceReference(
fact_id=item.get("fact_id"),
path=item.get("path", ""),
kind=item.get("kind", ""),
name=item.get("name", ""),
)
for item in json.loads(value)
]
@staticmethod
def _repository_from_row(row: sqlite3.Row) -> Repository:
return Repository(