From cdaeefd4b2474d37cfbf2dd247227e0574f18783 Mon Sep 17 00:00:00 2001 From: tegwick Date: Sun, 26 Apr 2026 03:18:58 +0200 Subject: [PATCH] Hardened the optional LLM extraction path --- README.md | 2 ++ src/repo_registry/core/service.py | 15 ++++++++++++++- tests/test_registry_service.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 27442e4..c878937 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,8 @@ Application code can inject an `LLMCandidateExtractor` into `RegistryService`. When an extractor is present and returns candidates, analysis stores those reviewable candidates; when it returns no candidates, the deterministic heuristic generator remains the fallback. +If extraction fails, the failure is recorded as a review decision and analysis +continues with deterministic candidates. The FastAPI settings object also accepts `llm_provider` and `llm_model`. By default `llm_provider` is unset, so analysis is fully offline and deterministic. diff --git a/src/repo_registry/core/service.py b/src/repo_registry/core/service.py index e6adf95..c4a3f5c 100644 --- a/src/repo_registry/core/service.py +++ b/src/repo_registry/core/service.py @@ -127,7 +127,20 @@ class RegistryService: chunks, ) stored_chunks = self.store.list_content_chunks(repository_id, completed_run.id) - candidates = self._generate_candidates(repository, facts, stored_chunks) + try: + candidates = self._generate_candidates(repository, facts, stored_chunks) + except Exception as exc: + self.store.create_review_decision( + repository_id, + completed_run.id, + action="llm_extraction_failed", + notes=str(exc), + ) + candidates = self.candidate_generator.generate( + repository, + facts, + stored_chunks, + ) self.store.replace_candidate_graph(repository_id, completed_run.id, candidates) return ScanSummary( analysis_run=completed_run, diff --git a/tests/test_registry_service.py b/tests/test_registry_service.py index f19676a..614bd16 100644 --- a/tests/test_registry_service.py +++ b/tests/test_registry_service.py @@ -22,6 +22,11 @@ class FakeLLMExtractor: return self.abilities +class FailingLLMExtractor: + def extract(self, repository, chunks): + raise RuntimeError("provider unavailable") + + def test_manual_registry_builds_ability_map(tmp_path): service = make_service(tmp_path) @@ -435,6 +440,29 @@ def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_ca assert graph.abilities[0].name == "Review Fallback Repository Usefulness" +def test_analyze_repository_records_llm_failure_and_falls_back(tmp_path): + source = tmp_path / "repo" + source.mkdir() + (source / "README.md").write_text("# Failing LLM\n", encoding="utf-8") + store = RegistryStore(tmp_path / "registry.sqlite3") + store.initialize() + service = RegistryService( + store, + ingestion=GitIngestionService(tmp_path / "checkouts"), + llm_extractor=FailingLLMExtractor(), + ) + repository = service.register_repository(name="Failing LLM", url=str(source)) + + summary = service.analyze_repository(repository.id) + graph = service.candidate_graph(repository.id, summary.analysis_run.id) + decisions = service.list_review_decisions(repository.id, summary.analysis_run.id) + + assert summary.analysis_run.status == "completed" + assert graph.abilities[0].name == "Review Failing LLM Repository Usefulness" + assert decisions[0].action == "llm_extraction_failed" + assert "provider unavailable" in decisions[0].notes + + def test_approve_candidate_graph_publishes_ability_map_once(tmp_path): source = tmp_path / "repo" source.mkdir()