generated from coulomb/repo-seed
Wired optional LLM extraction into the analysis path
This commit is contained in:
@@ -122,6 +122,11 @@ entries while preserving source paths where they match observed facts or
|
|||||||
content chunks. Tests use fake adapters, so the default test suite does not call
|
content chunks. Tests use fake adapters, so the default test suite does not call
|
||||||
external providers.
|
external providers.
|
||||||
|
|
||||||
|
Application code can inject an `LLMCandidateExtractor` into `RegistryService`.
|
||||||
|
When an extractor is present and returns candidates, analysis stores those
|
||||||
|
reviewable candidates; when it returns no candidates, the deterministic
|
||||||
|
heuristic generator remains the fallback.
|
||||||
|
|
||||||
## Agent-Facing Endpoints
|
## Agent-Facing Endpoints
|
||||||
|
|
||||||
The v0.1 API covers the main registration, analysis, review, search, and inspection loop:
|
The v0.1 API covers the main registration, analysis, review, search, and inspection loop:
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ from repo_registry.core.models import (
|
|||||||
)
|
)
|
||||||
from repo_registry.candidate_graph.generator import CandidateGraphGenerator
|
from repo_registry.candidate_graph.generator import CandidateGraphGenerator
|
||||||
from repo_registry.content_indexing.extractor import ContentExtractor
|
from repo_registry.content_indexing.extractor import ContentExtractor
|
||||||
|
from repo_registry.llm_extraction.extractor import LLMCandidateExtractor
|
||||||
|
from repo_registry.llm_extraction.mapper import LLMExtractionMapper
|
||||||
from repo_registry.repo_ingestion.git import GitIngestionService
|
from repo_registry.repo_ingestion.git import GitIngestionService
|
||||||
from repo_registry.repo_ingestion.metadata import RepositoryMetadataExtractor
|
from repo_registry.repo_ingestion.metadata import RepositoryMetadataExtractor
|
||||||
from repo_registry.repo_scanning.scanner import DeterministicScanner
|
from repo_registry.repo_scanning.scanner import DeterministicScanner
|
||||||
@@ -30,6 +32,7 @@ class RegistryService:
|
|||||||
self,
|
self,
|
||||||
store: RegistryStore,
|
store: RegistryStore,
|
||||||
ingestion: GitIngestionService | None = None,
|
ingestion: GitIngestionService | None = None,
|
||||||
|
llm_extractor: LLMCandidateExtractor | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.store = store
|
self.store = store
|
||||||
self.scanner = DeterministicScanner()
|
self.scanner = DeterministicScanner()
|
||||||
@@ -37,6 +40,8 @@ class RegistryService:
|
|||||||
self.metadata_extractor = RepositoryMetadataExtractor()
|
self.metadata_extractor = RepositoryMetadataExtractor()
|
||||||
self.candidate_generator = CandidateGraphGenerator()
|
self.candidate_generator = CandidateGraphGenerator()
|
||||||
self.content_extractor = ContentExtractor()
|
self.content_extractor = ContentExtractor()
|
||||||
|
self.llm_extractor = llm_extractor
|
||||||
|
self.llm_mapper = LLMExtractionMapper()
|
||||||
|
|
||||||
def register_repository(
|
def register_repository(
|
||||||
self,
|
self,
|
||||||
@@ -122,11 +127,7 @@ class RegistryService:
|
|||||||
chunks,
|
chunks,
|
||||||
)
|
)
|
||||||
stored_chunks = self.store.list_content_chunks(repository_id, completed_run.id)
|
stored_chunks = self.store.list_content_chunks(repository_id, completed_run.id)
|
||||||
candidates = self.candidate_generator.generate(
|
candidates = self._generate_candidates(repository, facts, stored_chunks)
|
||||||
repository,
|
|
||||||
facts,
|
|
||||||
stored_chunks,
|
|
||||||
)
|
|
||||||
self.store.replace_candidate_graph(repository_id, completed_run.id, candidates)
|
self.store.replace_candidate_graph(repository_id, completed_run.id, candidates)
|
||||||
return ScanSummary(
|
return ScanSummary(
|
||||||
analysis_run=completed_run,
|
analysis_run=completed_run,
|
||||||
@@ -134,6 +135,18 @@ class RegistryService:
|
|||||||
facts=facts,
|
facts=facts,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _generate_candidates(
|
||||||
|
self,
|
||||||
|
repository: Repository,
|
||||||
|
facts: list[ObservedFact],
|
||||||
|
chunks: list[ContentChunk],
|
||||||
|
):
|
||||||
|
if self.llm_extractor is not None:
|
||||||
|
extracted = self.llm_extractor.extract(repository, chunks)
|
||||||
|
if extracted:
|
||||||
|
return self.llm_mapper.map(extracted, facts, chunks)
|
||||||
|
return self.candidate_generator.generate(repository, facts, chunks)
|
||||||
|
|
||||||
def list_analysis_runs(self, repository_id: int) -> list[AnalysisRun]:
|
def list_analysis_runs(self, repository_id: int) -> list[AnalysisRun]:
|
||||||
return self.store.list_analysis_runs(repository_id)
|
return self.store.list_analysis_runs(repository_id)
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from repo_registry.core.service import RegistryService
|
from repo_registry.core.service import RegistryService
|
||||||
|
from repo_registry.llm_extraction import ExtractedAbility, ExtractedCapability
|
||||||
from repo_registry.repo_ingestion.git import GitIngestionService
|
from repo_registry.repo_ingestion.git import GitIngestionService
|
||||||
from repo_registry.storage.sqlite import NotFoundError, RegistryStore
|
from repo_registry.storage.sqlite import NotFoundError, RegistryStore
|
||||||
|
|
||||||
@@ -11,6 +12,16 @@ def make_service(tmp_path):
|
|||||||
return RegistryService(store, ingestion=GitIngestionService(tmp_path / "checkouts"))
|
return RegistryService(store, ingestion=GitIngestionService(tmp_path / "checkouts"))
|
||||||
|
|
||||||
|
|
||||||
|
class FakeLLMExtractor:
|
||||||
|
def __init__(self, abilities):
|
||||||
|
self.abilities = abilities
|
||||||
|
self.calls = []
|
||||||
|
|
||||||
|
def extract(self, repository, chunks):
|
||||||
|
self.calls.append((repository, chunks))
|
||||||
|
return self.abilities
|
||||||
|
|
||||||
|
|
||||||
def test_manual_registry_builds_ability_map(tmp_path):
|
def test_manual_registry_builds_ability_map(tmp_path):
|
||||||
service = make_service(tmp_path)
|
service = make_service(tmp_path)
|
||||||
|
|
||||||
@@ -363,6 +374,67 @@ def test_analyze_repository_records_snapshot_and_observed_facts(tmp_path):
|
|||||||
assert "Expose Repository Interface" in capability_names
|
assert "Expose Repository Interface" in capability_names
|
||||||
|
|
||||||
|
|
||||||
|
def test_analyze_repository_can_use_optional_llm_extractor(tmp_path):
|
||||||
|
source = tmp_path / "repo"
|
||||||
|
source.mkdir()
|
||||||
|
(source / "README.md").write_text(
|
||||||
|
"# Email Router\nRoutes incoming customer email.\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
store = RegistryStore(tmp_path / "registry.sqlite3")
|
||||||
|
store.initialize()
|
||||||
|
extractor = FakeLLMExtractor(
|
||||||
|
[
|
||||||
|
ExtractedAbility(
|
||||||
|
name="Business Email Routing",
|
||||||
|
description="Route incoming messages.",
|
||||||
|
source_paths=["README.md"],
|
||||||
|
capabilities=[
|
||||||
|
ExtractedCapability(
|
||||||
|
name="Classify Incoming Email",
|
||||||
|
description="Classify messages by intent.",
|
||||||
|
source_paths=["README.md"],
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
service = RegistryService(
|
||||||
|
store,
|
||||||
|
ingestion=GitIngestionService(tmp_path / "checkouts"),
|
||||||
|
llm_extractor=extractor,
|
||||||
|
)
|
||||||
|
repository = service.register_repository(name="Email Router", url=str(source))
|
||||||
|
|
||||||
|
summary = service.analyze_repository(repository.id)
|
||||||
|
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
|
||||||
|
|
||||||
|
assert extractor.calls
|
||||||
|
assert extractor.calls[0][1]
|
||||||
|
assert graph.abilities[0].name == "Business Email Routing"
|
||||||
|
assert graph.abilities[0].capabilities[0].name == "Classify Incoming Email"
|
||||||
|
assert graph.abilities[0].source_refs[0].path == "README.md"
|
||||||
|
|
||||||
|
|
||||||
|
def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_candidates(tmp_path):
|
||||||
|
source = tmp_path / "repo"
|
||||||
|
source.mkdir()
|
||||||
|
(source / "README.md").write_text("# Fallback\n", encoding="utf-8")
|
||||||
|
store = RegistryStore(tmp_path / "registry.sqlite3")
|
||||||
|
store.initialize()
|
||||||
|
service = RegistryService(
|
||||||
|
store,
|
||||||
|
ingestion=GitIngestionService(tmp_path / "checkouts"),
|
||||||
|
llm_extractor=FakeLLMExtractor([]),
|
||||||
|
)
|
||||||
|
repository = service.register_repository(name="Fallback", url=str(source))
|
||||||
|
|
||||||
|
summary = service.analyze_repository(repository.id)
|
||||||
|
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
|
||||||
|
|
||||||
|
assert graph.abilities[0].name == "Review Fallback Repository Usefulness"
|
||||||
|
|
||||||
|
|
||||||
def test_approve_candidate_graph_publishes_ability_map_once(tmp_path):
|
def test_approve_candidate_graph_publishes_ability_map_once(tmp_path):
|
||||||
source = tmp_path / "repo"
|
source = tmp_path / "repo"
|
||||||
source.mkdir()
|
source.mkdir()
|
||||||
|
|||||||
Reference in New Issue
Block a user