generated from coulomb/repo-seed
Wired optional LLM extraction into the analysis path
This commit is contained in:
@@ -122,6 +122,11 @@ entries while preserving source paths where they match observed facts or
|
||||
content chunks. Tests use fake adapters, so the default test suite does not call
|
||||
external providers.
|
||||
|
||||
Application code can inject an `LLMCandidateExtractor` into `RegistryService`.
|
||||
When an extractor is present and returns candidates, analysis stores those
|
||||
reviewable candidates; when it returns no candidates, the deterministic
|
||||
heuristic generator remains the fallback.
|
||||
|
||||
## Agent-Facing Endpoints
|
||||
|
||||
The v0.1 API covers the main registration, analysis, review, search, and inspection loop:
|
||||
|
||||
@@ -17,6 +17,8 @@ from repo_registry.core.models import (
|
||||
)
|
||||
from repo_registry.candidate_graph.generator import CandidateGraphGenerator
|
||||
from repo_registry.content_indexing.extractor import ContentExtractor
|
||||
from repo_registry.llm_extraction.extractor import LLMCandidateExtractor
|
||||
from repo_registry.llm_extraction.mapper import LLMExtractionMapper
|
||||
from repo_registry.repo_ingestion.git import GitIngestionService
|
||||
from repo_registry.repo_ingestion.metadata import RepositoryMetadataExtractor
|
||||
from repo_registry.repo_scanning.scanner import DeterministicScanner
|
||||
@@ -30,6 +32,7 @@ class RegistryService:
|
||||
self,
|
||||
store: RegistryStore,
|
||||
ingestion: GitIngestionService | None = None,
|
||||
llm_extractor: LLMCandidateExtractor | None = None,
|
||||
) -> None:
|
||||
self.store = store
|
||||
self.scanner = DeterministicScanner()
|
||||
@@ -37,6 +40,8 @@ class RegistryService:
|
||||
self.metadata_extractor = RepositoryMetadataExtractor()
|
||||
self.candidate_generator = CandidateGraphGenerator()
|
||||
self.content_extractor = ContentExtractor()
|
||||
self.llm_extractor = llm_extractor
|
||||
self.llm_mapper = LLMExtractionMapper()
|
||||
|
||||
def register_repository(
|
||||
self,
|
||||
@@ -122,11 +127,7 @@ class RegistryService:
|
||||
chunks,
|
||||
)
|
||||
stored_chunks = self.store.list_content_chunks(repository_id, completed_run.id)
|
||||
candidates = self.candidate_generator.generate(
|
||||
repository,
|
||||
facts,
|
||||
stored_chunks,
|
||||
)
|
||||
candidates = self._generate_candidates(repository, facts, stored_chunks)
|
||||
self.store.replace_candidate_graph(repository_id, completed_run.id, candidates)
|
||||
return ScanSummary(
|
||||
analysis_run=completed_run,
|
||||
@@ -134,6 +135,18 @@ class RegistryService:
|
||||
facts=facts,
|
||||
)
|
||||
|
||||
def _generate_candidates(
|
||||
self,
|
||||
repository: Repository,
|
||||
facts: list[ObservedFact],
|
||||
chunks: list[ContentChunk],
|
||||
):
|
||||
if self.llm_extractor is not None:
|
||||
extracted = self.llm_extractor.extract(repository, chunks)
|
||||
if extracted:
|
||||
return self.llm_mapper.map(extracted, facts, chunks)
|
||||
return self.candidate_generator.generate(repository, facts, chunks)
|
||||
|
||||
def list_analysis_runs(self, repository_id: int) -> list[AnalysisRun]:
|
||||
return self.store.list_analysis_runs(repository_id)
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import subprocess
|
||||
|
||||
from repo_registry.core.service import RegistryService
|
||||
from repo_registry.llm_extraction import ExtractedAbility, ExtractedCapability
|
||||
from repo_registry.repo_ingestion.git import GitIngestionService
|
||||
from repo_registry.storage.sqlite import NotFoundError, RegistryStore
|
||||
|
||||
@@ -11,6 +12,16 @@ def make_service(tmp_path):
|
||||
return RegistryService(store, ingestion=GitIngestionService(tmp_path / "checkouts"))
|
||||
|
||||
|
||||
class FakeLLMExtractor:
|
||||
def __init__(self, abilities):
|
||||
self.abilities = abilities
|
||||
self.calls = []
|
||||
|
||||
def extract(self, repository, chunks):
|
||||
self.calls.append((repository, chunks))
|
||||
return self.abilities
|
||||
|
||||
|
||||
def test_manual_registry_builds_ability_map(tmp_path):
|
||||
service = make_service(tmp_path)
|
||||
|
||||
@@ -363,6 +374,67 @@ def test_analyze_repository_records_snapshot_and_observed_facts(tmp_path):
|
||||
assert "Expose Repository Interface" in capability_names
|
||||
|
||||
|
||||
def test_analyze_repository_can_use_optional_llm_extractor(tmp_path):
|
||||
source = tmp_path / "repo"
|
||||
source.mkdir()
|
||||
(source / "README.md").write_text(
|
||||
"# Email Router\nRoutes incoming customer email.\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
store = RegistryStore(tmp_path / "registry.sqlite3")
|
||||
store.initialize()
|
||||
extractor = FakeLLMExtractor(
|
||||
[
|
||||
ExtractedAbility(
|
||||
name="Business Email Routing",
|
||||
description="Route incoming messages.",
|
||||
source_paths=["README.md"],
|
||||
capabilities=[
|
||||
ExtractedCapability(
|
||||
name="Classify Incoming Email",
|
||||
description="Classify messages by intent.",
|
||||
source_paths=["README.md"],
|
||||
)
|
||||
],
|
||||
)
|
||||
]
|
||||
)
|
||||
service = RegistryService(
|
||||
store,
|
||||
ingestion=GitIngestionService(tmp_path / "checkouts"),
|
||||
llm_extractor=extractor,
|
||||
)
|
||||
repository = service.register_repository(name="Email Router", url=str(source))
|
||||
|
||||
summary = service.analyze_repository(repository.id)
|
||||
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
|
||||
|
||||
assert extractor.calls
|
||||
assert extractor.calls[0][1]
|
||||
assert graph.abilities[0].name == "Business Email Routing"
|
||||
assert graph.abilities[0].capabilities[0].name == "Classify Incoming Email"
|
||||
assert graph.abilities[0].source_refs[0].path == "README.md"
|
||||
|
||||
|
||||
def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_candidates(tmp_path):
|
||||
source = tmp_path / "repo"
|
||||
source.mkdir()
|
||||
(source / "README.md").write_text("# Fallback\n", encoding="utf-8")
|
||||
store = RegistryStore(tmp_path / "registry.sqlite3")
|
||||
store.initialize()
|
||||
service = RegistryService(
|
||||
store,
|
||||
ingestion=GitIngestionService(tmp_path / "checkouts"),
|
||||
llm_extractor=FakeLLMExtractor([]),
|
||||
)
|
||||
repository = service.register_repository(name="Fallback", url=str(source))
|
||||
|
||||
summary = service.analyze_repository(repository.id)
|
||||
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
|
||||
|
||||
assert graph.abilities[0].name == "Review Fallback Repository Usefulness"
|
||||
|
||||
|
||||
def test_approve_candidate_graph_publishes_ability_map_once(tmp_path):
|
||||
source = tmp_path / "repo"
|
||||
source.mkdir()
|
||||
|
||||
Reference in New Issue
Block a user