provenance for successful LLM-assisted candidate generation

This commit is contained in:
2026-04-26 03:21:26 +02:00
parent cdaeefd4b2
commit 2403accd06
3 changed files with 21 additions and 3 deletions

View File

@@ -128,6 +128,9 @@ reviewable candidates; when it returns no candidates, the deterministic
heuristic generator remains the fallback. heuristic generator remains the fallback.
If extraction fails, the failure is recorded as a review decision and analysis If extraction fails, the failure is recorded as a review decision and analysis
continues with deterministic candidates. continues with deterministic candidates.
Successful LLM candidate generation is also recorded as a review decision so
curators can see whether a graph came from deterministic heuristics or an LLM
draft.
The FastAPI settings object also accepts `llm_provider` and `llm_model`. By The FastAPI settings object also accepts `llm_provider` and `llm_model`. By
default `llm_provider` is unset, so analysis is fully offline and deterministic. default `llm_provider` is unset, so analysis is fully offline and deterministic.

View File

@@ -128,7 +128,11 @@ class RegistryService:
) )
stored_chunks = self.store.list_content_chunks(repository_id, completed_run.id) stored_chunks = self.store.list_content_chunks(repository_id, completed_run.id)
try: try:
candidates = self._generate_candidates(repository, facts, stored_chunks) candidates, candidate_source = self._generate_candidates(
repository,
facts,
stored_chunks,
)
except Exception as exc: except Exception as exc:
self.store.create_review_decision( self.store.create_review_decision(
repository_id, repository_id,
@@ -141,7 +145,15 @@ class RegistryService:
facts, facts,
stored_chunks, stored_chunks,
) )
candidate_source = "deterministic"
self.store.replace_candidate_graph(repository_id, completed_run.id, candidates) self.store.replace_candidate_graph(repository_id, completed_run.id, candidates)
if candidate_source == "llm":
self.store.create_review_decision(
repository_id,
completed_run.id,
action="llm_extraction_used",
notes=f"Generated {len(candidates)} candidate ability draft(s).",
)
return ScanSummary( return ScanSummary(
analysis_run=completed_run, analysis_run=completed_run,
snapshot=snapshot, snapshot=snapshot,
@@ -157,8 +169,8 @@ class RegistryService:
if self.llm_extractor is not None: if self.llm_extractor is not None:
extracted = self.llm_extractor.extract(repository, chunks) extracted = self.llm_extractor.extract(repository, chunks)
if extracted: if extracted:
return self.llm_mapper.map(extracted, facts, chunks) return self.llm_mapper.map(extracted, facts, chunks), "llm"
return self.candidate_generator.generate(repository, facts, chunks) return self.candidate_generator.generate(repository, facts, chunks), "deterministic"
def list_analysis_runs(self, repository_id: int) -> list[AnalysisRun]: def list_analysis_runs(self, repository_id: int) -> list[AnalysisRun]:
return self.store.list_analysis_runs(repository_id) return self.store.list_analysis_runs(repository_id)

View File

@@ -413,12 +413,15 @@ def test_analyze_repository_can_use_optional_llm_extractor(tmp_path):
summary = service.analyze_repository(repository.id) summary = service.analyze_repository(repository.id)
graph = service.candidate_graph(repository.id, summary.analysis_run.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id)
decisions = service.list_review_decisions(repository.id, summary.analysis_run.id)
assert extractor.calls assert extractor.calls
assert extractor.calls[0][1] assert extractor.calls[0][1]
assert graph.abilities[0].name == "Business Email Routing" assert graph.abilities[0].name == "Business Email Routing"
assert graph.abilities[0].capabilities[0].name == "Classify Incoming Email" assert graph.abilities[0].capabilities[0].name == "Classify Incoming Email"
assert graph.abilities[0].source_refs[0].path == "README.md" assert graph.abilities[0].source_refs[0].path == "README.md"
assert decisions[0].action == "llm_extraction_used"
assert "1 candidate ability" in decisions[0].notes
def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_candidates(tmp_path): def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_candidates(tmp_path):