generated from coulomb/repo-seed
Milestone 3: candidate graph generation
This commit is contained in:
@@ -73,3 +73,11 @@ curl http://127.0.0.1:8000/repos/1/observed-facts
|
||||
```
|
||||
|
||||
The deterministic scanner records observed facts only: languages, documentation files, examples, tests, package manifests, configuration files, framework hints, and likely API/CLI interfaces.
|
||||
|
||||
Each completed analysis run also creates a conservative candidate graph for review:
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8000/repos/1/analysis-runs/1/candidate-graph
|
||||
```
|
||||
|
||||
Candidate entries are source-linked review seeds. They are not canonical registry truth until a review workflow approves them.
|
||||
|
||||
@@ -43,6 +43,60 @@ CREATE TABLE IF NOT EXISTS observed_facts (
|
||||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS candidate_abilities (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
repository_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
||||
analysis_run_id INTEGER NOT NULL REFERENCES analysis_runs(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
description TEXT NOT NULL DEFAULT '',
|
||||
confidence REAL NOT NULL DEFAULT 0.0,
|
||||
status TEXT NOT NULL DEFAULT 'candidate',
|
||||
source_refs TEXT NOT NULL DEFAULT '[]',
|
||||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS candidate_capabilities (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
repository_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
||||
analysis_run_id INTEGER NOT NULL REFERENCES analysis_runs(id) ON DELETE CASCADE,
|
||||
ability_id INTEGER NOT NULL REFERENCES candidate_abilities(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
description TEXT NOT NULL DEFAULT '',
|
||||
inputs TEXT NOT NULL DEFAULT '[]',
|
||||
outputs TEXT NOT NULL DEFAULT '[]',
|
||||
confidence REAL NOT NULL DEFAULT 0.0,
|
||||
status TEXT NOT NULL DEFAULT 'candidate',
|
||||
source_refs TEXT NOT NULL DEFAULT '[]',
|
||||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS candidate_features (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
repository_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
||||
analysis_run_id INTEGER NOT NULL REFERENCES analysis_runs(id) ON DELETE CASCADE,
|
||||
capability_id INTEGER NOT NULL REFERENCES candidate_capabilities(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
type TEXT NOT NULL,
|
||||
location TEXT NOT NULL DEFAULT '',
|
||||
confidence REAL NOT NULL DEFAULT 0.0,
|
||||
status TEXT NOT NULL DEFAULT 'candidate',
|
||||
source_refs TEXT NOT NULL DEFAULT '[]',
|
||||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS candidate_evidence (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
repository_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
||||
analysis_run_id INTEGER NOT NULL REFERENCES analysis_runs(id) ON DELETE CASCADE,
|
||||
capability_id INTEGER NOT NULL REFERENCES candidate_capabilities(id) ON DELETE CASCADE,
|
||||
type TEXT NOT NULL,
|
||||
reference TEXT NOT NULL,
|
||||
strength TEXT NOT NULL DEFAULT 'medium',
|
||||
status TEXT NOT NULL DEFAULT 'candidate',
|
||||
source_refs TEXT NOT NULL DEFAULT '[]',
|
||||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS approved_abilities (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
repository_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
||||
@@ -90,6 +144,10 @@ CREATE INDEX IF NOT EXISTS idx_snapshots_repository ON repository_snapshots(repo
|
||||
CREATE INDEX IF NOT EXISTS idx_analysis_runs_repository ON analysis_runs(repository_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_observed_facts_repository ON observed_facts(repository_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_observed_facts_run ON observed_facts(analysis_run_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_candidate_abilities_repository ON candidate_abilities(repository_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_candidate_capabilities_repository ON candidate_capabilities(repository_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_candidate_features_repository ON candidate_features(repository_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_candidate_evidence_repository ON candidate_evidence(repository_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_abilities_repository ON approved_abilities(repository_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_capabilities_repository ON approved_capabilities(repository_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_features_repository ON approved_features(repository_id);
|
||||
|
||||
1
src/repo_registry/candidate_graph/__init__.py
Normal file
1
src/repo_registry/candidate_graph/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Candidate ability graph generation."""
|
||||
194
src/repo_registry/candidate_graph/generator.py
Normal file
194
src/repo_registry/candidate_graph/generator.py
Normal file
@@ -0,0 +1,194 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from repo_registry.core.models import ObservedFact, Repository, SourceReference
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CandidateEvidenceDraft:
|
||||
type: str
|
||||
reference: str
|
||||
strength: str
|
||||
source_refs: list[SourceReference]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CandidateFeatureDraft:
|
||||
name: str
|
||||
type: str
|
||||
location: str
|
||||
confidence: float
|
||||
source_refs: list[SourceReference]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CandidateCapabilityDraft:
|
||||
name: str
|
||||
description: str
|
||||
inputs: list[str]
|
||||
outputs: list[str]
|
||||
confidence: float
|
||||
source_refs: list[SourceReference]
|
||||
features: list[CandidateFeatureDraft] = field(default_factory=list)
|
||||
evidence: list[CandidateEvidenceDraft] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CandidateAbilityDraft:
|
||||
name: str
|
||||
description: str
|
||||
confidence: float
|
||||
source_refs: list[SourceReference]
|
||||
capabilities: list[CandidateCapabilityDraft] = field(default_factory=list)
|
||||
|
||||
|
||||
class CandidateGraphGenerator:
|
||||
"""Build conservative review candidates from observed facts."""
|
||||
|
||||
def generate(
|
||||
self,
|
||||
repository: Repository,
|
||||
facts: list[ObservedFact],
|
||||
) -> list[CandidateAbilityDraft]:
|
||||
if not facts:
|
||||
return []
|
||||
|
||||
docs = self._facts(facts, "documentation")
|
||||
tests = self._facts(facts, "test")
|
||||
examples = self._facts(facts, "example")
|
||||
interfaces = self._facts(facts, "interface")
|
||||
manifests = self._facts(facts, "manifest")
|
||||
frameworks = self._facts(facts, "framework")
|
||||
languages = self._facts(facts, "language")
|
||||
|
||||
ability_sources = docs or manifests or languages
|
||||
ability = CandidateAbilityDraft(
|
||||
name=f"Review {repository.name} Repository Usefulness",
|
||||
description=(
|
||||
"Candidate usefulness summary generated from observed repository "
|
||||
"documentation, manifests, languages, and interfaces. This is a "
|
||||
"review seed, not an asserted domain ability."
|
||||
),
|
||||
confidence=0.55 if docs else 0.35,
|
||||
source_refs=self._source_refs(ability_sources),
|
||||
capabilities=[],
|
||||
)
|
||||
|
||||
capabilities: list[CandidateCapabilityDraft] = []
|
||||
if interfaces:
|
||||
capabilities.append(self._interface_capability(interfaces, tests, examples, docs))
|
||||
if manifests or frameworks or languages:
|
||||
capabilities.append(
|
||||
CandidateCapabilityDraft(
|
||||
name="Describe Repository Structure",
|
||||
description=(
|
||||
"Summarize detected languages, package manifests, and framework "
|
||||
"hints as structural context for review."
|
||||
),
|
||||
inputs=[],
|
||||
outputs=["repository structure summary"],
|
||||
confidence=0.6,
|
||||
source_refs=self._source_refs(manifests + frameworks + languages),
|
||||
evidence=self._evidence(tests, examples, docs),
|
||||
)
|
||||
)
|
||||
|
||||
return [
|
||||
CandidateAbilityDraft(
|
||||
name=ability.name,
|
||||
description=ability.description,
|
||||
confidence=ability.confidence,
|
||||
source_refs=ability.source_refs,
|
||||
capabilities=capabilities,
|
||||
)
|
||||
]
|
||||
|
||||
def _interface_capability(
|
||||
self,
|
||||
interfaces: list[ObservedFact],
|
||||
tests: list[ObservedFact],
|
||||
examples: list[ObservedFact],
|
||||
docs: list[ObservedFact],
|
||||
) -> CandidateCapabilityDraft:
|
||||
features = [
|
||||
CandidateFeatureDraft(
|
||||
name=fact.value or fact.name,
|
||||
type=self._feature_type(fact),
|
||||
location=fact.path,
|
||||
confidence=0.65 if fact.value else 0.45,
|
||||
source_refs=self._source_refs([fact]),
|
||||
)
|
||||
for fact in interfaces
|
||||
]
|
||||
return CandidateCapabilityDraft(
|
||||
name="Expose Repository Interface",
|
||||
description=(
|
||||
"Expose one or more likely user-facing API or CLI entry points. "
|
||||
"Review is required to name the concrete domain behavior."
|
||||
),
|
||||
inputs=[],
|
||||
outputs=["callable interface"],
|
||||
confidence=0.65,
|
||||
source_refs=self._source_refs(interfaces),
|
||||
features=features,
|
||||
evidence=self._evidence(tests, examples, docs),
|
||||
)
|
||||
|
||||
def _evidence(
|
||||
self,
|
||||
tests: list[ObservedFact],
|
||||
examples: list[ObservedFact],
|
||||
docs: list[ObservedFact],
|
||||
) -> list[CandidateEvidenceDraft]:
|
||||
evidence: list[CandidateEvidenceDraft] = []
|
||||
for fact in tests:
|
||||
evidence.append(
|
||||
CandidateEvidenceDraft(
|
||||
type="test",
|
||||
reference=fact.path,
|
||||
strength="strong",
|
||||
source_refs=self._source_refs([fact]),
|
||||
)
|
||||
)
|
||||
for fact in examples:
|
||||
evidence.append(
|
||||
CandidateEvidenceDraft(
|
||||
type="example",
|
||||
reference=fact.path,
|
||||
strength="strong",
|
||||
source_refs=self._source_refs([fact]),
|
||||
)
|
||||
)
|
||||
for fact in docs:
|
||||
evidence.append(
|
||||
CandidateEvidenceDraft(
|
||||
type="documentation",
|
||||
reference=fact.path,
|
||||
strength="medium",
|
||||
source_refs=self._source_refs([fact]),
|
||||
)
|
||||
)
|
||||
return evidence
|
||||
|
||||
def _feature_type(self, fact: ObservedFact) -> str:
|
||||
lower = f"{fact.name} {fact.path} {fact.value}".lower()
|
||||
if "cli" in lower or "command" in lower:
|
||||
return "CLI"
|
||||
if "api" in lower or "route" in lower or "@app." in lower or "@router." in lower:
|
||||
return "API"
|
||||
return "interface"
|
||||
|
||||
def _facts(self, facts: list[ObservedFact], kind: str) -> list[ObservedFact]:
|
||||
return [fact for fact in facts if fact.kind == kind]
|
||||
|
||||
def _source_refs(self, facts: list[ObservedFact]) -> list[SourceReference]:
|
||||
return [
|
||||
SourceReference(
|
||||
fact_id=fact.id,
|
||||
path=fact.path,
|
||||
kind=fact.kind,
|
||||
name=fact.name,
|
||||
)
|
||||
for fact in facts
|
||||
]
|
||||
@@ -56,6 +56,67 @@ class ScanSummary:
|
||||
facts: list[ObservedFact]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SourceReference:
|
||||
fact_id: int | None
|
||||
path: str
|
||||
kind: str
|
||||
name: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CandidateEvidence:
|
||||
id: int
|
||||
type: str
|
||||
reference: str
|
||||
strength: str
|
||||
status: str
|
||||
source_refs: list[SourceReference]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CandidateFeature:
|
||||
id: int
|
||||
name: str
|
||||
type: str
|
||||
location: str
|
||||
confidence: float
|
||||
status: str
|
||||
source_refs: list[SourceReference]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CandidateCapability:
|
||||
id: int
|
||||
name: str
|
||||
description: str
|
||||
inputs: list[str]
|
||||
outputs: list[str]
|
||||
confidence: float
|
||||
status: str
|
||||
source_refs: list[SourceReference]
|
||||
features: list[CandidateFeature] = field(default_factory=list)
|
||||
evidence: list[CandidateEvidence] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CandidateAbility:
|
||||
id: int
|
||||
name: str
|
||||
description: str
|
||||
confidence: float
|
||||
status: str
|
||||
source_refs: list[SourceReference]
|
||||
capabilities: list[CandidateCapability] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CandidateGraph:
|
||||
repository: Repository
|
||||
analysis_run: AnalysisRun
|
||||
abilities: list[CandidateAbility]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Evidence:
|
||||
id: int
|
||||
|
||||
@@ -4,12 +4,14 @@ from collections.abc import Sequence
|
||||
|
||||
from repo_registry.core.models import (
|
||||
AnalysisRun,
|
||||
CandidateGraph,
|
||||
ObservedFact,
|
||||
Repository,
|
||||
RepositoryAbilityMap,
|
||||
ScanSummary,
|
||||
SearchResult,
|
||||
)
|
||||
from repo_registry.candidate_graph.generator import CandidateGraphGenerator
|
||||
from repo_registry.repo_ingestion.git import GitIngestionService
|
||||
from repo_registry.repo_scanning.scanner import DeterministicScanner
|
||||
from repo_registry.storage.sqlite import RegistryStore
|
||||
@@ -26,6 +28,7 @@ class RegistryService:
|
||||
self.store = store
|
||||
self.scanner = DeterministicScanner()
|
||||
self.ingestion = ingestion or GitIngestionService()
|
||||
self.candidate_generator = CandidateGraphGenerator()
|
||||
|
||||
def register_repository(
|
||||
self,
|
||||
@@ -79,6 +82,8 @@ class RegistryService:
|
||||
else None
|
||||
)
|
||||
facts = self.store.list_observed_facts(repository_id, completed_run.id)
|
||||
candidates = self.candidate_generator.generate(repository, facts)
|
||||
self.store.replace_candidate_graph(repository_id, completed_run.id, candidates)
|
||||
return ScanSummary(
|
||||
analysis_run=completed_run,
|
||||
snapshot=snapshot,
|
||||
@@ -95,6 +100,9 @@ class RegistryService:
|
||||
) -> list[ObservedFact]:
|
||||
return self.store.list_observed_facts(repository_id, analysis_run_id)
|
||||
|
||||
def candidate_graph(self, repository_id: int, analysis_run_id: int) -> CandidateGraph:
|
||||
return self.store.get_candidate_graph(repository_id, analysis_run_id)
|
||||
|
||||
def add_ability(
|
||||
self,
|
||||
repository_id: int,
|
||||
|
||||
@@ -7,6 +7,11 @@ from pathlib import Path
|
||||
from repo_registry.core.models import (
|
||||
Ability,
|
||||
AnalysisRun,
|
||||
CandidateAbility,
|
||||
CandidateCapability,
|
||||
CandidateEvidence,
|
||||
CandidateFeature,
|
||||
CandidateGraph,
|
||||
Capability,
|
||||
Evidence,
|
||||
Feature,
|
||||
@@ -15,7 +20,9 @@ from repo_registry.core.models import (
|
||||
RepositoryAbilityMap,
|
||||
RepositorySnapshot,
|
||||
SearchResult,
|
||||
SourceReference,
|
||||
)
|
||||
from repo_registry.candidate_graph.generator import CandidateAbilityDraft
|
||||
from repo_registry.repo_scanning.scanner import FactCandidate, ScanResult
|
||||
|
||||
|
||||
@@ -158,6 +165,202 @@ class RegistryStore:
|
||||
)
|
||||
return self.get_analysis_run(repository_id, analysis_run_id)
|
||||
|
||||
def replace_candidate_graph(
|
||||
self,
|
||||
repository_id: int,
|
||||
analysis_run_id: int,
|
||||
abilities: list[CandidateAbilityDraft],
|
||||
) -> None:
|
||||
with self.connect() as connection:
|
||||
connection.execute(
|
||||
"DELETE FROM candidate_abilities WHERE analysis_run_id = ?",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
for ability in abilities:
|
||||
ability_cursor = connection.execute(
|
||||
"""
|
||||
INSERT INTO candidate_abilities
|
||||
(repository_id, analysis_run_id, name, description, confidence, source_refs)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
repository_id,
|
||||
analysis_run_id,
|
||||
ability.name,
|
||||
ability.description,
|
||||
ability.confidence,
|
||||
self._source_refs_to_json(ability.source_refs),
|
||||
),
|
||||
)
|
||||
ability_id = int(ability_cursor.lastrowid)
|
||||
for capability in ability.capabilities:
|
||||
capability_cursor = connection.execute(
|
||||
"""
|
||||
INSERT INTO candidate_capabilities
|
||||
(repository_id, analysis_run_id, ability_id, name, description,
|
||||
inputs, outputs, confidence, source_refs)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
repository_id,
|
||||
analysis_run_id,
|
||||
ability_id,
|
||||
capability.name,
|
||||
capability.description,
|
||||
json.dumps(capability.inputs),
|
||||
json.dumps(capability.outputs),
|
||||
capability.confidence,
|
||||
self._source_refs_to_json(capability.source_refs),
|
||||
),
|
||||
)
|
||||
capability_id = int(capability_cursor.lastrowid)
|
||||
for feature in capability.features:
|
||||
connection.execute(
|
||||
"""
|
||||
INSERT INTO candidate_features
|
||||
(repository_id, analysis_run_id, capability_id, name, type,
|
||||
location, confidence, source_refs)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
repository_id,
|
||||
analysis_run_id,
|
||||
capability_id,
|
||||
feature.name,
|
||||
feature.type,
|
||||
feature.location,
|
||||
feature.confidence,
|
||||
self._source_refs_to_json(feature.source_refs),
|
||||
),
|
||||
)
|
||||
for evidence in capability.evidence:
|
||||
connection.execute(
|
||||
"""
|
||||
INSERT INTO candidate_evidence
|
||||
(repository_id, analysis_run_id, capability_id, type,
|
||||
reference, strength, source_refs)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
repository_id,
|
||||
analysis_run_id,
|
||||
capability_id,
|
||||
evidence.type,
|
||||
evidence.reference,
|
||||
evidence.strength,
|
||||
self._source_refs_to_json(evidence.source_refs),
|
||||
),
|
||||
)
|
||||
|
||||
def get_candidate_graph(
|
||||
self,
|
||||
repository_id: int,
|
||||
analysis_run_id: int,
|
||||
) -> CandidateGraph:
|
||||
repository = self.get_repository(repository_id)
|
||||
analysis_run = self.get_analysis_run(repository_id, analysis_run_id)
|
||||
with self.connect() as connection:
|
||||
ability_rows = connection.execute(
|
||||
"""
|
||||
SELECT id, name, description, confidence, status, source_refs
|
||||
FROM candidate_abilities
|
||||
WHERE repository_id = ? AND analysis_run_id = ?
|
||||
ORDER BY id
|
||||
""",
|
||||
(repository_id, analysis_run_id),
|
||||
).fetchall()
|
||||
capability_rows = connection.execute(
|
||||
"""
|
||||
SELECT id, ability_id, name, description, inputs, outputs,
|
||||
confidence, status, source_refs
|
||||
FROM candidate_capabilities
|
||||
WHERE repository_id = ? AND analysis_run_id = ?
|
||||
ORDER BY id
|
||||
""",
|
||||
(repository_id, analysis_run_id),
|
||||
).fetchall()
|
||||
feature_rows = connection.execute(
|
||||
"""
|
||||
SELECT id, capability_id, name, type, location, confidence,
|
||||
status, source_refs
|
||||
FROM candidate_features
|
||||
WHERE repository_id = ? AND analysis_run_id = ?
|
||||
ORDER BY id
|
||||
""",
|
||||
(repository_id, analysis_run_id),
|
||||
).fetchall()
|
||||
evidence_rows = connection.execute(
|
||||
"""
|
||||
SELECT id, capability_id, type, reference, strength, status, source_refs
|
||||
FROM candidate_evidence
|
||||
WHERE repository_id = ? AND analysis_run_id = ?
|
||||
ORDER BY id
|
||||
""",
|
||||
(repository_id, analysis_run_id),
|
||||
).fetchall()
|
||||
|
||||
features_by_capability: dict[int, list[CandidateFeature]] = {}
|
||||
for row in feature_rows:
|
||||
features_by_capability.setdefault(row["capability_id"], []).append(
|
||||
CandidateFeature(
|
||||
id=row["id"],
|
||||
name=row["name"],
|
||||
type=row["type"],
|
||||
location=row["location"],
|
||||
confidence=row["confidence"],
|
||||
status=row["status"],
|
||||
source_refs=self._source_refs_from_json(row["source_refs"]),
|
||||
)
|
||||
)
|
||||
|
||||
evidence_by_capability: dict[int, list[CandidateEvidence]] = {}
|
||||
for row in evidence_rows:
|
||||
evidence_by_capability.setdefault(row["capability_id"], []).append(
|
||||
CandidateEvidence(
|
||||
id=row["id"],
|
||||
type=row["type"],
|
||||
reference=row["reference"],
|
||||
strength=row["strength"],
|
||||
status=row["status"],
|
||||
source_refs=self._source_refs_from_json(row["source_refs"]),
|
||||
)
|
||||
)
|
||||
|
||||
capabilities_by_ability: dict[int, list[CandidateCapability]] = {}
|
||||
for row in capability_rows:
|
||||
capabilities_by_ability.setdefault(row["ability_id"], []).append(
|
||||
CandidateCapability(
|
||||
id=row["id"],
|
||||
name=row["name"],
|
||||
description=row["description"],
|
||||
inputs=json.loads(row["inputs"]),
|
||||
outputs=json.loads(row["outputs"]),
|
||||
confidence=row["confidence"],
|
||||
status=row["status"],
|
||||
source_refs=self._source_refs_from_json(row["source_refs"]),
|
||||
features=features_by_capability.get(row["id"], []),
|
||||
evidence=evidence_by_capability.get(row["id"], []),
|
||||
)
|
||||
)
|
||||
|
||||
abilities = [
|
||||
CandidateAbility(
|
||||
id=row["id"],
|
||||
name=row["name"],
|
||||
description=row["description"],
|
||||
confidence=row["confidence"],
|
||||
status=row["status"],
|
||||
source_refs=self._source_refs_from_json(row["source_refs"]),
|
||||
capabilities=capabilities_by_ability.get(row["id"], []),
|
||||
)
|
||||
for row in ability_rows
|
||||
]
|
||||
return CandidateGraph(
|
||||
repository=repository,
|
||||
analysis_run=analysis_run,
|
||||
abilities=abilities,
|
||||
)
|
||||
|
||||
def fail_analysis_run(
|
||||
self,
|
||||
repository_id: int,
|
||||
@@ -538,6 +741,30 @@ class RegistryStore:
|
||||
],
|
||||
)
|
||||
|
||||
def _source_refs_to_json(self, source_refs: list[SourceReference]) -> str:
|
||||
return json.dumps(
|
||||
[
|
||||
{
|
||||
"fact_id": source_ref.fact_id,
|
||||
"path": source_ref.path,
|
||||
"kind": source_ref.kind,
|
||||
"name": source_ref.name,
|
||||
}
|
||||
for source_ref in source_refs
|
||||
]
|
||||
)
|
||||
|
||||
def _source_refs_from_json(self, value: str) -> list[SourceReference]:
|
||||
return [
|
||||
SourceReference(
|
||||
fact_id=item.get("fact_id"),
|
||||
path=item.get("path", ""),
|
||||
kind=item.get("kind", ""),
|
||||
name=item.get("name", ""),
|
||||
)
|
||||
for item in json.loads(value)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _repository_from_row(row: sqlite3.Row) -> Repository:
|
||||
return Repository(
|
||||
|
||||
@@ -149,6 +149,18 @@ def list_observed_facts(
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
|
||||
|
||||
@app.get("/repos/{repository_id}/analysis-runs/{analysis_run_id}/candidate-graph")
|
||||
def get_candidate_graph(
|
||||
repository_id: int,
|
||||
analysis_run_id: int,
|
||||
service: RegistryService = Depends(get_service),
|
||||
) -> dict[str, object]:
|
||||
try:
|
||||
return asdict(service.candidate_graph(repository_id, analysis_run_id))
|
||||
except NotFoundError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
|
||||
|
||||
@app.post("/repos/{repository_id}/abilities", status_code=201)
|
||||
def create_ability(
|
||||
repository_id: int,
|
||||
|
||||
45
tests/test_candidate_graph.py
Normal file
45
tests/test_candidate_graph.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from repo_registry.candidate_graph.generator import CandidateGraphGenerator
|
||||
from repo_registry.core.models import ObservedFact, Repository
|
||||
|
||||
|
||||
def fact(id, kind, name, path="", value=""):
|
||||
return ObservedFact(
|
||||
id=id,
|
||||
repository_id=1,
|
||||
analysis_run_id=1,
|
||||
snapshot_id=1,
|
||||
kind=kind,
|
||||
path=path,
|
||||
name=name,
|
||||
value=value,
|
||||
metadata={},
|
||||
)
|
||||
|
||||
|
||||
def test_candidate_generator_builds_review_seed_from_observed_facts():
|
||||
repository = Repository(
|
||||
id=1,
|
||||
name="MailRouter",
|
||||
url="/tmp/mail-router",
|
||||
description=None,
|
||||
branch="main",
|
||||
status="analyzed",
|
||||
)
|
||||
facts = [
|
||||
fact(1, "documentation", "README", "README.md"),
|
||||
fact(2, "interface", "python route decorator", "app.py", '@app.post("/classify")'),
|
||||
fact(3, "test", "test_app.py", "tests/test_app.py"),
|
||||
fact(4, "framework", "FastAPI", "requirements.txt"),
|
||||
]
|
||||
|
||||
graph = CandidateGraphGenerator().generate(repository, facts)
|
||||
|
||||
assert len(graph) == 1
|
||||
ability = graph[0]
|
||||
assert ability.name == "Review MailRouter Repository Usefulness"
|
||||
assert ability.source_refs[0].path == "README.md"
|
||||
interface_capability = ability.capabilities[0]
|
||||
assert interface_capability.name == "Expose Repository Interface"
|
||||
assert interface_capability.features[0].type == "API"
|
||||
assert interface_capability.features[0].location == "app.py"
|
||||
assert interface_capability.evidence[0].strength == "strong"
|
||||
@@ -133,6 +133,16 @@ def test_analyze_repository_records_snapshot_and_observed_facts(tmp_path):
|
||||
assert ("framework", "FastAPI", "requirements.txt") in fact_names
|
||||
assert ("interface", "python route decorator", "app.py") in fact_names
|
||||
|
||||
candidate_graph = service.candidate_graph(repository.id, summary.analysis_run.id)
|
||||
assert candidate_graph.repository.name == "Example"
|
||||
assert candidate_graph.abilities
|
||||
capability_names = {
|
||||
capability.name
|
||||
for ability in candidate_graph.abilities
|
||||
for capability in ability.capabilities
|
||||
}
|
||||
assert "Expose Repository Interface" in capability_names
|
||||
|
||||
|
||||
def test_analyze_repository_failure_is_recorded(tmp_path):
|
||||
service = make_service(tmp_path)
|
||||
|
||||
@@ -102,6 +102,16 @@ def test_api_analysis_run_loop(tmp_path):
|
||||
assert run["analysis_run"]["status"] == "completed"
|
||||
assert run["snapshot"]["file_count"] == 2
|
||||
|
||||
candidate_response = client.get(
|
||||
f"/repos/{repository_id}/analysis-runs/"
|
||||
f"{run['analysis_run']['id']}/candidate-graph"
|
||||
)
|
||||
assert candidate_response.status_code == 200
|
||||
candidate_graph = candidate_response.json()
|
||||
assert candidate_graph["abilities"][0]["name"] == (
|
||||
"Review Frontend Repository Usefulness"
|
||||
)
|
||||
|
||||
facts_response = client.get(f"/repos/{repository_id}/observed-facts")
|
||||
assert facts_response.status_code == 200
|
||||
fact_names = {
|
||||
|
||||
Reference in New Issue
Block a user