From 2403accd06278e35bb8ff167fff4c7bca65fb26f Mon Sep 17 00:00:00 2001
From: tegwick <bernd.worsch@gmail.com>
Date: Sun, 26 Apr 2026 03:21:26 +0200
Subject: [PATCH] provenance for successful LLM-assisted candidate generation

---
 README.md                         |  3 +++
 src/repo_registry/core/service.py | 18 +++++++++++++++---
 tests/test_registry_service.py    |  3 +++
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index c878937..9f65347 100644
--- a/README.md
+++ b/README.md
@@ -128,6 +128,9 @@ reviewable candidates; when it returns no candidates, the deterministic
 heuristic generator remains the fallback.
 If extraction fails, the failure is recorded as a review decision and analysis
 continues with deterministic candidates.
+Successful LLM candidate generation is also recorded as a review decision so
+curators can see whether a graph came from deterministic heuristics or an LLM
+draft.
 
 The FastAPI settings object also accepts `llm_provider` and `llm_model`. By
 default `llm_provider` is unset, so analysis is fully offline and deterministic.
diff --git a/src/repo_registry/core/service.py b/src/repo_registry/core/service.py
index c4a3f5c..34e8b6b 100644
--- a/src/repo_registry/core/service.py
+++ b/src/repo_registry/core/service.py
@@ -128,7 +128,11 @@ class RegistryService:
         )
         stored_chunks = self.store.list_content_chunks(repository_id, completed_run.id)
         try:
-            candidates = self._generate_candidates(repository, facts, stored_chunks)
+            candidates, candidate_source = self._generate_candidates(
+                repository,
+                facts,
+                stored_chunks,
+            )
         except Exception as exc:
             self.store.create_review_decision(
                 repository_id,
@@ -141,7 +145,15 @@ class RegistryService:
                 facts,
                 stored_chunks,
             )
+            candidate_source = "deterministic"
         self.store.replace_candidate_graph(repository_id, completed_run.id, candidates)
+        if candidate_source == "llm":
+            self.store.create_review_decision(
+                repository_id,
+                completed_run.id,
+                action="llm_extraction_used",
+                notes=f"Generated {len(candidates)} candidate ability draft(s).",
+            )
         return ScanSummary(
             analysis_run=completed_run,
             snapshot=snapshot,
@@ -157,8 +169,8 @@ class RegistryService:
         if self.llm_extractor is not None:
             extracted = self.llm_extractor.extract(repository, chunks)
             if extracted:
-                return self.llm_mapper.map(extracted, facts, chunks)
-        return self.candidate_generator.generate(repository, facts, chunks)
+                return self.llm_mapper.map(extracted, facts, chunks), "llm"
+        return self.candidate_generator.generate(repository, facts, chunks), "deterministic"
 
     def list_analysis_runs(self, repository_id: int) -> list[AnalysisRun]:
         return self.store.list_analysis_runs(repository_id)
diff --git a/tests/test_registry_service.py b/tests/test_registry_service.py
index 614bd16..cd56321 100644
--- a/tests/test_registry_service.py
+++ b/tests/test_registry_service.py
@@ -413,12 +413,15 @@ def test_analyze_repository_can_use_optional_llm_extractor(tmp_path):
 
     summary = service.analyze_repository(repository.id)
     graph = service.candidate_graph(repository.id, summary.analysis_run.id)
+    decisions = service.list_review_decisions(repository.id, summary.analysis_run.id)
 
     assert extractor.calls
     assert extractor.calls[0][1]
     assert graph.abilities[0].name == "Business Email Routing"
     assert graph.abilities[0].capabilities[0].name == "Classify Incoming Email"
     assert graph.abilities[0].source_refs[0].path == "README.md"
+    assert decisions[0].action == "llm_extraction_used"
+    assert "1 candidate ability" in decisions[0].notes
 
 
 def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_candidates(tmp_path):