generated from coulomb/repo-seed
llm_extraction integration by adding the bridge into candidate graph drafts
This commit is contained in:
@@ -7,6 +7,7 @@ from repo_registry.llm_extraction.extractor import (
|
||||
LLMExtractionError,
|
||||
create_llm_connect_adapter,
|
||||
)
|
||||
from repo_registry.llm_extraction.mapper import LLMExtractionMapper
|
||||
|
||||
__all__ = [
|
||||
"ExtractedAbility",
|
||||
@@ -15,5 +16,6 @@ __all__ = [
|
||||
"ExtractedFeature",
|
||||
"LLMCandidateExtractor",
|
||||
"LLMExtractionError",
|
||||
"LLMExtractionMapper",
|
||||
"create_llm_connect_adapter",
|
||||
]
|
||||
|
||||
145
src/repo_registry/llm_extraction/mapper.py
Normal file
145
src/repo_registry/llm_extraction/mapper.py
Normal file
@@ -0,0 +1,145 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from repo_registry.candidate_graph.generator import (
|
||||
CandidateAbilityDraft,
|
||||
CandidateCapabilityDraft,
|
||||
CandidateEvidenceDraft,
|
||||
CandidateFeatureDraft,
|
||||
)
|
||||
from repo_registry.core.models import ContentChunk, ObservedFact, SourceReference
|
||||
from repo_registry.llm_extraction.extractor import ExtractedAbility
|
||||
|
||||
|
||||
class LLMExtractionMapper:
|
||||
"""Map structured LLM extraction drafts into reviewable candidate drafts."""
|
||||
|
||||
def map(
|
||||
self,
|
||||
abilities: list[ExtractedAbility],
|
||||
facts: list[ObservedFact],
|
||||
chunks: list[ContentChunk],
|
||||
) -> list[CandidateAbilityDraft]:
|
||||
return [
|
||||
CandidateAbilityDraft(
|
||||
name=ability.name,
|
||||
description=ability.description,
|
||||
confidence=self._confidence(ability.source_paths, facts, chunks, 0.45),
|
||||
source_refs=self._source_refs(ability.source_paths, facts, chunks),
|
||||
capabilities=[
|
||||
CandidateCapabilityDraft(
|
||||
name=capability.name,
|
||||
description=capability.description,
|
||||
inputs=capability.inputs,
|
||||
outputs=capability.outputs,
|
||||
confidence=self._confidence(
|
||||
capability.source_paths,
|
||||
facts,
|
||||
chunks,
|
||||
0.5,
|
||||
),
|
||||
source_refs=self._source_refs(
|
||||
capability.source_paths,
|
||||
facts,
|
||||
chunks,
|
||||
),
|
||||
features=[
|
||||
CandidateFeatureDraft(
|
||||
name=feature.name,
|
||||
type=feature.type,
|
||||
location=feature.location,
|
||||
confidence=self._confidence(
|
||||
feature.source_paths or [feature.location],
|
||||
facts,
|
||||
chunks,
|
||||
0.45,
|
||||
),
|
||||
source_refs=self._source_refs(
|
||||
feature.source_paths or [feature.location],
|
||||
facts,
|
||||
chunks,
|
||||
),
|
||||
)
|
||||
for feature in capability.features
|
||||
],
|
||||
evidence=[
|
||||
CandidateEvidenceDraft(
|
||||
type=evidence.type,
|
||||
reference=evidence.reference,
|
||||
strength=evidence.strength,
|
||||
source_refs=self._source_refs(
|
||||
evidence.source_paths or [evidence.reference],
|
||||
facts,
|
||||
chunks,
|
||||
),
|
||||
)
|
||||
for evidence in capability.evidence
|
||||
],
|
||||
)
|
||||
for capability in ability.capabilities
|
||||
],
|
||||
)
|
||||
for ability in abilities
|
||||
]
|
||||
|
||||
def _confidence(
|
||||
self,
|
||||
source_paths: list[str],
|
||||
facts: list[ObservedFact],
|
||||
chunks: list[ContentChunk],
|
||||
base: float,
|
||||
) -> float:
|
||||
refs = self._source_refs(source_paths, facts, chunks)
|
||||
if not refs:
|
||||
return base
|
||||
fact_kinds = {ref.kind for ref in refs}
|
||||
score = base + 0.15
|
||||
if "documentation" in fact_kinds:
|
||||
score += 0.10
|
||||
if "test" in fact_kinds or "example" in fact_kinds:
|
||||
score += 0.10
|
||||
if "interface" in fact_kinds:
|
||||
score += 0.10
|
||||
return min(0.95, round(score, 2))
|
||||
|
||||
def _source_refs(
|
||||
self,
|
||||
source_paths: list[str],
|
||||
facts: list[ObservedFact],
|
||||
chunks: list[ContentChunk],
|
||||
) -> list[SourceReference]:
|
||||
refs: list[SourceReference] = []
|
||||
seen: set[tuple[int | None, str, str, int | None]] = set()
|
||||
for path in source_paths:
|
||||
normalized = path.split(":", 1)[0]
|
||||
for fact in facts:
|
||||
if fact.path != normalized:
|
||||
continue
|
||||
ref = SourceReference(
|
||||
fact_id=fact.id,
|
||||
path=fact.path,
|
||||
kind=fact.kind,
|
||||
name=fact.name,
|
||||
line=fact.metadata.get("line"),
|
||||
)
|
||||
key = (ref.fact_id, ref.path, ref.kind, ref.line)
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
refs.append(ref)
|
||||
if any(ref.path == normalized for ref in refs):
|
||||
continue
|
||||
for chunk in chunks:
|
||||
if chunk.path != normalized:
|
||||
continue
|
||||
ref = SourceReference(
|
||||
fact_id=None,
|
||||
path=chunk.path,
|
||||
kind=chunk.kind,
|
||||
name=chunk.path,
|
||||
line=chunk.start_line,
|
||||
)
|
||||
key = (ref.fact_id, ref.path, ref.kind, ref.line)
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
refs.append(ref)
|
||||
break
|
||||
return refs
|
||||
Reference in New Issue
Block a user