llm_extraction integration by adding the bridge into candidate graph drafts

This commit is contained in:
2026-04-26 03:08:55 +02:00
parent 7e66c57350
commit 3aa0c08ab9
4 changed files with 261 additions and 2 deletions

View File

@@ -7,6 +7,7 @@ from repo_registry.llm_extraction.extractor import (
LLMExtractionError,
create_llm_connect_adapter,
)
from repo_registry.llm_extraction.mapper import LLMExtractionMapper
__all__ = [
"ExtractedAbility",
@@ -15,5 +16,6 @@ __all__ = [
"ExtractedFeature",
"LLMCandidateExtractor",
"LLMExtractionError",
"LLMExtractionMapper",
"create_llm_connect_adapter",
]

View File

@@ -0,0 +1,145 @@
from __future__ import annotations
from repo_registry.candidate_graph.generator import (
CandidateAbilityDraft,
CandidateCapabilityDraft,
CandidateEvidenceDraft,
CandidateFeatureDraft,
)
from repo_registry.core.models import ContentChunk, ObservedFact, SourceReference
from repo_registry.llm_extraction.extractor import ExtractedAbility
class LLMExtractionMapper:
"""Map structured LLM extraction drafts into reviewable candidate drafts."""
def map(
self,
abilities: list[ExtractedAbility],
facts: list[ObservedFact],
chunks: list[ContentChunk],
) -> list[CandidateAbilityDraft]:
return [
CandidateAbilityDraft(
name=ability.name,
description=ability.description,
confidence=self._confidence(ability.source_paths, facts, chunks, 0.45),
source_refs=self._source_refs(ability.source_paths, facts, chunks),
capabilities=[
CandidateCapabilityDraft(
name=capability.name,
description=capability.description,
inputs=capability.inputs,
outputs=capability.outputs,
confidence=self._confidence(
capability.source_paths,
facts,
chunks,
0.5,
),
source_refs=self._source_refs(
capability.source_paths,
facts,
chunks,
),
features=[
CandidateFeatureDraft(
name=feature.name,
type=feature.type,
location=feature.location,
confidence=self._confidence(
feature.source_paths or [feature.location],
facts,
chunks,
0.45,
),
source_refs=self._source_refs(
feature.source_paths or [feature.location],
facts,
chunks,
),
)
for feature in capability.features
],
evidence=[
CandidateEvidenceDraft(
type=evidence.type,
reference=evidence.reference,
strength=evidence.strength,
source_refs=self._source_refs(
evidence.source_paths or [evidence.reference],
facts,
chunks,
),
)
for evidence in capability.evidence
],
)
for capability in ability.capabilities
],
)
for ability in abilities
]
def _confidence(
self,
source_paths: list[str],
facts: list[ObservedFact],
chunks: list[ContentChunk],
base: float,
) -> float:
refs = self._source_refs(source_paths, facts, chunks)
if not refs:
return base
fact_kinds = {ref.kind for ref in refs}
score = base + 0.15
if "documentation" in fact_kinds:
score += 0.10
if "test" in fact_kinds or "example" in fact_kinds:
score += 0.10
if "interface" in fact_kinds:
score += 0.10
return min(0.95, round(score, 2))
def _source_refs(
self,
source_paths: list[str],
facts: list[ObservedFact],
chunks: list[ContentChunk],
) -> list[SourceReference]:
refs: list[SourceReference] = []
seen: set[tuple[int | None, str, str, int | None]] = set()
for path in source_paths:
normalized = path.split(":", 1)[0]
for fact in facts:
if fact.path != normalized:
continue
ref = SourceReference(
fact_id=fact.id,
path=fact.path,
kind=fact.kind,
name=fact.name,
line=fact.metadata.get("line"),
)
key = (ref.fact_id, ref.path, ref.kind, ref.line)
if key not in seen:
seen.add(key)
refs.append(ref)
if any(ref.path == normalized for ref in refs):
continue
for chunk in chunks:
if chunk.path != normalized:
continue
ref = SourceReference(
fact_id=None,
path=chunk.path,
kind=chunk.kind,
name=chunk.path,
line=chunk.start_line,
)
key = (ref.fact_id, ref.path, ref.kind, ref.line)
if key not in seen:
seen.add(key)
refs.append(ref)
break
return refs