Files
repo-scoping/tests/test_llm_extraction_mapper.py

111 lines
3.3 KiB
Python

from repo_scoping.core.models import ContentChunk, ObservedFact
from repo_scoping.llm_extraction import (
ExtractedAbility,
ExtractedCapability,
ExtractedEvidence,
ExtractedFeature,
LLMExtractionMapper,
)
def fact(id, kind, name, path, line=None):
metadata = {}
if line is not None:
metadata["line"] = line
return ObservedFact(
id=id,
repository_id=1,
analysis_run_id=1,
snapshot_id=1,
kind=kind,
path=path,
name=name,
value="",
metadata=metadata,
)
def chunk(id, kind, path, start_line=1):
return ContentChunk(
id=id,
repository_id=1,
analysis_run_id=1,
snapshot_id=1,
path=path,
kind=kind,
start_line=start_line,
end_line=start_line + 2,
text="source text",
)
def test_llm_extraction_mapper_builds_candidate_drafts_with_source_refs():
extracted = [
ExtractedAbility(
name="Business Email Routing",
description="Routes email.",
source_paths=["README.md"],
capabilities=[
ExtractedCapability(
name="Classify Incoming Email",
description="Classifies email.",
inputs=["email body"],
outputs=["intent"],
source_paths=["README.md"],
features=[
ExtractedFeature(
name="POST /classify",
type="REST endpoint",
location="app.py",
source_paths=["app.py"],
)
],
evidence=[
ExtractedEvidence(
type="test",
reference="tests/test_app.py",
strength="strong",
source_paths=["tests/test_app.py"],
)
],
)
],
)
]
facts = [
fact(1, "documentation", "README", "README.md"),
fact(2, "interface", "python route decorator", "app.py", line=4),
fact(3, "test", "test_app.py", "tests/test_app.py"),
]
candidates = LLMExtractionMapper().map(extracted, facts, [])
ability = candidates[0]
capability = ability.capabilities[0]
assert ability.name == "Business Email Routing"
assert ability.confidence == 0.7
assert ability.source_refs[0].fact_id == 1
assert capability.inputs == ["email body"]
assert capability.outputs == ["intent"]
assert capability.features[0].source_refs[0].line == 4
assert capability.evidence[0].source_refs[0].kind == "test"
def test_llm_extraction_mapper_can_use_chunk_refs_without_facts():
extracted = [
ExtractedAbility(
name="Readme Ability",
source_paths=["README.md:1-3"],
)
]
candidates = LLMExtractionMapper().map(
extracted,
facts=[],
chunks=[chunk(1, "documentation", "README.md", start_line=1)],
)
assert candidates[0].source_refs[0].fact_id is None
assert candidates[0].source_refs[0].path == "README.md"
assert candidates[0].source_refs[0].line == 1