Wired optional LLM extraction into the analysis path

This commit is contained in:
2026-04-26 03:11:45 +02:00
parent 3aa0c08ab9
commit 0f10ca6065
3 changed files with 95 additions and 5 deletions

View File

@@ -17,6 +17,8 @@ from repo_registry.core.models import (
)
from repo_registry.candidate_graph.generator import CandidateGraphGenerator
from repo_registry.content_indexing.extractor import ContentExtractor
from repo_registry.llm_extraction.extractor import LLMCandidateExtractor
from repo_registry.llm_extraction.mapper import LLMExtractionMapper
from repo_registry.repo_ingestion.git import GitIngestionService
from repo_registry.repo_ingestion.metadata import RepositoryMetadataExtractor
from repo_registry.repo_scanning.scanner import DeterministicScanner
@@ -30,6 +32,7 @@ class RegistryService:
self,
store: RegistryStore,
ingestion: GitIngestionService | None = None,
llm_extractor: LLMCandidateExtractor | None = None,
) -> None:
self.store = store
self.scanner = DeterministicScanner()
@@ -37,6 +40,8 @@ class RegistryService:
self.metadata_extractor = RepositoryMetadataExtractor()
self.candidate_generator = CandidateGraphGenerator()
self.content_extractor = ContentExtractor()
self.llm_extractor = llm_extractor
self.llm_mapper = LLMExtractionMapper()
def register_repository(
self,
@@ -122,11 +127,7 @@ class RegistryService:
chunks,
)
stored_chunks = self.store.list_content_chunks(repository_id, completed_run.id)
candidates = self.candidate_generator.generate(
repository,
facts,
stored_chunks,
)
candidates = self._generate_candidates(repository, facts, stored_chunks)
self.store.replace_candidate_graph(repository_id, completed_run.id, candidates)
return ScanSummary(
analysis_run=completed_run,
@@ -134,6 +135,18 @@ class RegistryService:
facts=facts,
)
def _generate_candidates(
self,
repository: Repository,
facts: list[ObservedFact],
chunks: list[ContentChunk],
):
if self.llm_extractor is not None:
extracted = self.llm_extractor.extract(repository, chunks)
if extracted:
return self.llm_mapper.map(extracted, facts, chunks)
return self.candidate_generator.generate(repository, facts, chunks)
def list_analysis_runs(self, repository_id: int) -> list[AnalysisRun]:
return self.store.list_analysis_runs(repository_id)