Wired optional LLM extraction into the analysis path

This commit is contained in:
2026-04-26 03:11:45 +02:00
parent 3aa0c08ab9
commit 0f10ca6065
3 changed files with 95 additions and 5 deletions

View File

@@ -1,6 +1,7 @@
import subprocess
from repo_registry.core.service import RegistryService
from repo_registry.llm_extraction import ExtractedAbility, ExtractedCapability
from repo_registry.repo_ingestion.git import GitIngestionService
from repo_registry.storage.sqlite import NotFoundError, RegistryStore
@@ -11,6 +12,16 @@ def make_service(tmp_path):
return RegistryService(store, ingestion=GitIngestionService(tmp_path / "checkouts"))
class FakeLLMExtractor:
def __init__(self, abilities):
self.abilities = abilities
self.calls = []
def extract(self, repository, chunks):
self.calls.append((repository, chunks))
return self.abilities
def test_manual_registry_builds_ability_map(tmp_path):
service = make_service(tmp_path)
@@ -363,6 +374,67 @@ def test_analyze_repository_records_snapshot_and_observed_facts(tmp_path):
assert "Expose Repository Interface" in capability_names
def test_analyze_repository_can_use_optional_llm_extractor(tmp_path):
source = tmp_path / "repo"
source.mkdir()
(source / "README.md").write_text(
"# Email Router\nRoutes incoming customer email.\n",
encoding="utf-8",
)
store = RegistryStore(tmp_path / "registry.sqlite3")
store.initialize()
extractor = FakeLLMExtractor(
[
ExtractedAbility(
name="Business Email Routing",
description="Route incoming messages.",
source_paths=["README.md"],
capabilities=[
ExtractedCapability(
name="Classify Incoming Email",
description="Classify messages by intent.",
source_paths=["README.md"],
)
],
)
]
)
service = RegistryService(
store,
ingestion=GitIngestionService(tmp_path / "checkouts"),
llm_extractor=extractor,
)
repository = service.register_repository(name="Email Router", url=str(source))
summary = service.analyze_repository(repository.id)
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
assert extractor.calls
assert extractor.calls[0][1]
assert graph.abilities[0].name == "Business Email Routing"
assert graph.abilities[0].capabilities[0].name == "Classify Incoming Email"
assert graph.abilities[0].source_refs[0].path == "README.md"
def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_candidates(tmp_path):
source = tmp_path / "repo"
source.mkdir()
(source / "README.md").write_text("# Fallback\n", encoding="utf-8")
store = RegistryStore(tmp_path / "registry.sqlite3")
store.initialize()
service = RegistryService(
store,
ingestion=GitIngestionService(tmp_path / "checkouts"),
llm_extractor=FakeLLMExtractor([]),
)
repository = service.register_repository(name="Fallback", url=str(source))
summary = service.analyze_repository(repository.id)
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
assert graph.abilities[0].name == "Review Fallback Repository Usefulness"
def test_approve_candidate_graph_publishes_ability_map_once(tmp_path):
source = tmp_path / "repo"
source.mkdir()