From 2403accd06278e35bb8ff167fff4c7bca65fb26f Mon Sep 17 00:00:00 2001 From: tegwick Date: Sun, 26 Apr 2026 03:21:26 +0200 Subject: [PATCH] provenance for successful LLM-assisted candidate generation --- README.md | 3 +++ src/repo_registry/core/service.py | 18 +++++++++++++++--- tests/test_registry_service.py | 3 +++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c878937..9f65347 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,9 @@ reviewable candidates; when it returns no candidates, the deterministic heuristic generator remains the fallback. If extraction fails, the failure is recorded as a review decision and analysis continues with deterministic candidates. +Successful LLM candidate generation is also recorded as a review decision so +curators can see whether a graph came from deterministic heuristics or an LLM +draft. The FastAPI settings object also accepts `llm_provider` and `llm_model`. By default `llm_provider` is unset, so analysis is fully offline and deterministic. diff --git a/src/repo_registry/core/service.py b/src/repo_registry/core/service.py index c4a3f5c..34e8b6b 100644 --- a/src/repo_registry/core/service.py +++ b/src/repo_registry/core/service.py @@ -128,7 +128,11 @@ class RegistryService: ) stored_chunks = self.store.list_content_chunks(repository_id, completed_run.id) try: - candidates = self._generate_candidates(repository, facts, stored_chunks) + candidates, candidate_source = self._generate_candidates( + repository, + facts, + stored_chunks, + ) except Exception as exc: self.store.create_review_decision( repository_id, @@ -141,7 +145,15 @@ class RegistryService: facts, stored_chunks, ) + candidate_source = "deterministic" self.store.replace_candidate_graph(repository_id, completed_run.id, candidates) + if candidate_source == "llm": + self.store.create_review_decision( + repository_id, + completed_run.id, + action="llm_extraction_used", + notes=f"Generated {len(candidates)} candidate ability draft(s).", + ) return ScanSummary( analysis_run=completed_run, snapshot=snapshot, @@ -157,8 +169,8 @@ class RegistryService: if self.llm_extractor is not None: extracted = self.llm_extractor.extract(repository, chunks) if extracted: - return self.llm_mapper.map(extracted, facts, chunks) - return self.candidate_generator.generate(repository, facts, chunks) + return self.llm_mapper.map(extracted, facts, chunks), "llm" + return self.candidate_generator.generate(repository, facts, chunks), "deterministic" def list_analysis_runs(self, repository_id: int) -> list[AnalysisRun]: return self.store.list_analysis_runs(repository_id) diff --git a/tests/test_registry_service.py b/tests/test_registry_service.py index 614bd16..cd56321 100644 --- a/tests/test_registry_service.py +++ b/tests/test_registry_service.py @@ -413,12 +413,15 @@ def test_analyze_repository_can_use_optional_llm_extractor(tmp_path): summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) + decisions = service.list_review_decisions(repository.id, summary.analysis_run.id) assert extractor.calls assert extractor.calls[0][1] assert graph.abilities[0].name == "Business Email Routing" assert graph.abilities[0].capabilities[0].name == "Classify Incoming Email" assert graph.abilities[0].source_refs[0].path == "README.md" + assert decisions[0].action == "llm_extraction_used" + assert "1 candidate ability" in decisions[0].notes def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_candidates(tmp_path):