Hardened the optional LLM extraction path

2026-04-26 03:18:58 +02:00
parent 1881daea32
commit cdaeefd4b2
3 changed files with 44 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -126,6 +126,8 @@ Application code can inject an `LLMCandidateExtractor` into `RegistryService`.
 When an extractor is present and returns candidates, analysis stores those
 reviewable candidates; when it returns no candidates, the deterministic
 heuristic generator remains the fallback.
+If extraction fails, the failure is recorded as a review decision and analysis
+continues with deterministic candidates.

 The FastAPI settings object also accepts `llm_provider` and `llm_model`. By
 default `llm_provider` is unset, so analysis is fully offline and deterministic.
--- a/src/repo_registry/core/service.py
+++ b/src/repo_registry/core/service.py
@@ -127,7 +127,20 @@ class RegistryService:
            chunks,
        )
        stored_chunks = self.store.list_content_chunks(repository_id, completed_run.id)
-        candidates = self._generate_candidates(repository, facts, stored_chunks)
+        try:
+            candidates = self._generate_candidates(repository, facts, stored_chunks)
+        except Exception as exc:
+            self.store.create_review_decision(
+                repository_id,
+                completed_run.id,
+                action="llm_extraction_failed",
+                notes=str(exc),
+            )
+            candidates = self.candidate_generator.generate(
+                repository,
+                facts,
+                stored_chunks,
+            )
        self.store.replace_candidate_graph(repository_id, completed_run.id, candidates)
        return ScanSummary(
            analysis_run=completed_run,
--- a/tests/test_registry_service.py
+++ b/tests/test_registry_service.py
@@ -22,6 +22,11 @@ class FakeLLMExtractor:
        return self.abilities


+class FailingLLMExtractor:
+    def extract(self, repository, chunks):
+        raise RuntimeError("provider unavailable")
+
+
 def test_manual_registry_builds_ability_map(tmp_path):
    service = make_service(tmp_path)

@@ -435,6 +440,29 @@ def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_ca
    assert graph.abilities[0].name == "Review Fallback Repository Usefulness"


+def test_analyze_repository_records_llm_failure_and_falls_back(tmp_path):
+    source = tmp_path / "repo"
+    source.mkdir()
+    (source / "README.md").write_text("# Failing LLM\n", encoding="utf-8")
+    store = RegistryStore(tmp_path / "registry.sqlite3")
+    store.initialize()
+    service = RegistryService(
+        store,
+        ingestion=GitIngestionService(tmp_path / "checkouts"),
+        llm_extractor=FailingLLMExtractor(),
+    )
+    repository = service.register_repository(name="Failing LLM", url=str(source))
+
+    summary = service.analyze_repository(repository.id)
+    graph = service.candidate_graph(repository.id, summary.analysis_run.id)
+    decisions = service.list_review_decisions(repository.id, summary.analysis_run.id)
+
+    assert summary.analysis_run.status == "completed"
+    assert graph.abilities[0].name == "Review Failing LLM Repository Usefulness"
+    assert decisions[0].action == "llm_extraction_failed"
+    assert "provider unavailable" in decisions[0].notes
+
+
 def test_approve_candidate_graph_publishes_ability_map_once(tmp_path):
    source = tmp_path / "repo"
    source.mkdir()