generated from coulomb/repo-seed
111 lines
3.3 KiB
Python
111 lines
3.3 KiB
Python
from repo_registry.core.models import ContentChunk, ObservedFact
|
|
from repo_registry.llm_extraction import (
|
|
ExtractedAbility,
|
|
ExtractedCapability,
|
|
ExtractedEvidence,
|
|
ExtractedFeature,
|
|
LLMExtractionMapper,
|
|
)
|
|
|
|
|
|
def fact(id, kind, name, path, line=None):
|
|
metadata = {}
|
|
if line is not None:
|
|
metadata["line"] = line
|
|
return ObservedFact(
|
|
id=id,
|
|
repository_id=1,
|
|
analysis_run_id=1,
|
|
snapshot_id=1,
|
|
kind=kind,
|
|
path=path,
|
|
name=name,
|
|
value="",
|
|
metadata=metadata,
|
|
)
|
|
|
|
|
|
def chunk(id, kind, path, start_line=1):
|
|
return ContentChunk(
|
|
id=id,
|
|
repository_id=1,
|
|
analysis_run_id=1,
|
|
snapshot_id=1,
|
|
path=path,
|
|
kind=kind,
|
|
start_line=start_line,
|
|
end_line=start_line + 2,
|
|
text="source text",
|
|
)
|
|
|
|
|
|
def test_llm_extraction_mapper_builds_candidate_drafts_with_source_refs():
|
|
extracted = [
|
|
ExtractedAbility(
|
|
name="Business Email Routing",
|
|
description="Routes email.",
|
|
source_paths=["README.md"],
|
|
capabilities=[
|
|
ExtractedCapability(
|
|
name="Classify Incoming Email",
|
|
description="Classifies email.",
|
|
inputs=["email body"],
|
|
outputs=["intent"],
|
|
source_paths=["README.md"],
|
|
features=[
|
|
ExtractedFeature(
|
|
name="POST /classify",
|
|
type="REST endpoint",
|
|
location="app.py",
|
|
source_paths=["app.py"],
|
|
)
|
|
],
|
|
evidence=[
|
|
ExtractedEvidence(
|
|
type="test",
|
|
reference="tests/test_app.py",
|
|
strength="strong",
|
|
source_paths=["tests/test_app.py"],
|
|
)
|
|
],
|
|
)
|
|
],
|
|
)
|
|
]
|
|
facts = [
|
|
fact(1, "documentation", "README", "README.md"),
|
|
fact(2, "interface", "python route decorator", "app.py", line=4),
|
|
fact(3, "test", "test_app.py", "tests/test_app.py"),
|
|
]
|
|
|
|
candidates = LLMExtractionMapper().map(extracted, facts, [])
|
|
|
|
ability = candidates[0]
|
|
capability = ability.capabilities[0]
|
|
assert ability.name == "Business Email Routing"
|
|
assert ability.confidence == 0.7
|
|
assert ability.source_refs[0].fact_id == 1
|
|
assert capability.inputs == ["email body"]
|
|
assert capability.outputs == ["intent"]
|
|
assert capability.features[0].source_refs[0].line == 4
|
|
assert capability.evidence[0].source_refs[0].kind == "test"
|
|
|
|
|
|
def test_llm_extraction_mapper_can_use_chunk_refs_without_facts():
|
|
extracted = [
|
|
ExtractedAbility(
|
|
name="Readme Ability",
|
|
source_paths=["README.md:1-3"],
|
|
)
|
|
]
|
|
|
|
candidates = LLMExtractionMapper().map(
|
|
extracted,
|
|
facts=[],
|
|
chunks=[chunk(1, "documentation", "README.md", start_line=1)],
|
|
)
|
|
|
|
assert candidates[0].source_refs[0].fact_id is None
|
|
assert candidates[0].source_refs[0].path == "README.md"
|
|
assert candidates[0].source_refs[0].line == 1
|