From cdaeefd4b2474d37cfbf2dd247227e0574f18783 Mon Sep 17 00:00:00 2001
From: tegwick <bernd.worsch@gmail.com>
Date: Sun, 26 Apr 2026 03:18:58 +0200
Subject: [PATCH] Hardened the optional LLM extraction path

---
 README.md                         |  2 ++
 src/repo_registry/core/service.py | 15 ++++++++++++++-
 tests/test_registry_service.py    | 28 ++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 27442e4..c878937 100644
--- a/README.md
+++ b/README.md
@@ -126,6 +126,8 @@ Application code can inject an `LLMCandidateExtractor` into `RegistryService`.
 When an extractor is present and returns candidates, analysis stores those
 reviewable candidates; when it returns no candidates, the deterministic
 heuristic generator remains the fallback.
+If extraction fails, the failure is recorded as a review decision and analysis
+continues with deterministic candidates.
 
 The FastAPI settings object also accepts `llm_provider` and `llm_model`. By
 default `llm_provider` is unset, so analysis is fully offline and deterministic.
diff --git a/src/repo_registry/core/service.py b/src/repo_registry/core/service.py
index e6adf95..c4a3f5c 100644
--- a/src/repo_registry/core/service.py
+++ b/src/repo_registry/core/service.py
@@ -127,7 +127,20 @@ class RegistryService:
             chunks,
         )
         stored_chunks = self.store.list_content_chunks(repository_id, completed_run.id)
-        candidates = self._generate_candidates(repository, facts, stored_chunks)
+        try:
+            candidates = self._generate_candidates(repository, facts, stored_chunks)
+        except Exception as exc:
+            self.store.create_review_decision(
+                repository_id,
+                completed_run.id,
+                action="llm_extraction_failed",
+                notes=str(exc),
+            )
+            candidates = self.candidate_generator.generate(
+                repository,
+                facts,
+                stored_chunks,
+            )
         self.store.replace_candidate_graph(repository_id, completed_run.id, candidates)
         return ScanSummary(
             analysis_run=completed_run,
diff --git a/tests/test_registry_service.py b/tests/test_registry_service.py
index f19676a..614bd16 100644
--- a/tests/test_registry_service.py
+++ b/tests/test_registry_service.py
@@ -22,6 +22,11 @@ class FakeLLMExtractor:
         return self.abilities
 
 
+class FailingLLMExtractor:
+    def extract(self, repository, chunks):
+        raise RuntimeError("provider unavailable")
+
+
 def test_manual_registry_builds_ability_map(tmp_path):
     service = make_service(tmp_path)
 
@@ -435,6 +440,29 @@ def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_ca
     assert graph.abilities[0].name == "Review Fallback Repository Usefulness"
 
 
+def test_analyze_repository_records_llm_failure_and_falls_back(tmp_path):
+    source = tmp_path / "repo"
+    source.mkdir()
+    (source / "README.md").write_text("# Failing LLM\n", encoding="utf-8")
+    store = RegistryStore(tmp_path / "registry.sqlite3")
+    store.initialize()
+    service = RegistryService(
+        store,
+        ingestion=GitIngestionService(tmp_path / "checkouts"),
+        llm_extractor=FailingLLMExtractor(),
+    )
+    repository = service.register_repository(name="Failing LLM", url=str(source))
+
+    summary = service.analyze_repository(repository.id)
+    graph = service.candidate_graph(repository.id, summary.analysis_run.id)
+    decisions = service.list_review_decisions(repository.id, summary.analysis_run.id)
+
+    assert summary.analysis_run.status == "completed"
+    assert graph.abilities[0].name == "Review Failing LLM Repository Usefulness"
+    assert decisions[0].action == "llm_extraction_failed"
+    assert "provider unavailable" in decisions[0].notes
+
+
 def test_approve_candidate_graph_publishes_ability_map_once(tmp_path):
     source = tmp_path / "repo"
     source.mkdir()