Files
repo-scoping/tests/test_llm_extraction.py

177 lines
5.0 KiB
Python

import pytest
from repo_scoping.core.models import ContentChunk, Repository
from repo_scoping.llm_extraction import (
LLMCandidateExtractor,
LLMExtractionError,
create_llm_connect_adapter,
)
class Response:
def __init__(self, content):
self.content = content
class FakeAdapter:
def __init__(self, content):
self.content = content
self.last_prompt = ""
self.last_config = object()
def execute_prompt(self, prompt, config):
self.last_prompt = prompt
self.last_config = config
return Response(self.content)
def repository():
return Repository(
id=1,
name="MailRouter",
url="/tmp/mail-router",
description="Routes inbound email.",
branch="main",
status="analyzed",
)
def chunk():
return ContentChunk(
id=1,
repository_id=1,
analysis_run_id=1,
snapshot_id=1,
path="README.md",
kind="documentation",
start_line=1,
end_line=2,
text="# MailRouter\nRoutes incoming customer email.",
)
def test_llm_prompt_filters_derived_scope_and_labels_source_roles():
adapter = FakeAdapter('{"abilities": []}')
extractor = LLMCandidateExtractor(adapter)
chunks = [
ContentChunk(
id=1,
repository_id=1,
analysis_run_id=1,
snapshot_id=1,
path="SCOPE.md",
kind="scope",
start_line=1,
end_line=3,
text="# SCOPE\n\nOld approved LLM routing entry.",
metadata={"source_role": "derived_scope"},
),
ContentChunk(
id=2,
repository_id=1,
analysis_run_id=1,
snapshot_id=1,
path="INTENT.md",
kind="intent",
start_line=1,
end_line=3,
text="# INTENT\n\nProvide lightweight IAM.",
metadata={"source_role": "intent_summary"},
),
ContentChunk(
id=3,
repository_id=1,
analysis_run_id=1,
snapshot_id=1,
path="CLAUDE.md",
kind="documentation",
start_line=1,
end_line=2,
text="# CLAUDE\n\nAgent guidance.",
metadata={"source_role": "agent_guidance"},
),
]
extractor.extract(repository(), chunks)
assert "Source: INTENT.md" in adapter.last_prompt
assert "source_role=intent_summary" in adapter.last_prompt
assert "Source: SCOPE.md" not in adapter.last_prompt
assert "Old approved LLM routing entry" not in adapter.last_prompt
assert "Source: CLAUDE.md" not in adapter.last_prompt
assert "Do not use SCOPE.md" in adapter.last_prompt
def test_llm_candidate_extractor_parses_structured_response():
adapter = FakeAdapter(
"""
{
"abilities": [
{
"name": "Business Email Routing",
"description": "Routes inbound customer email.",
"source_paths": ["README.md"],
"capabilities": [
{
"name": "Classify Incoming Email",
"description": "Classify messages.",
"inputs": ["email body"],
"outputs": ["intent"],
"source_paths": ["README.md"],
"features": [
{
"name": "POST /classify",
"type": "REST endpoint",
"location": "app.py",
"source_paths": ["app.py"]
}
],
"evidence": [
{
"type": "documentation",
"reference": "README.md",
"strength": "medium",
"source_paths": ["README.md"]
}
]
}
]
}
]
}
"""
)
extractor = LLMCandidateExtractor(adapter)
abilities = extractor.extract(repository(), [chunk()])
assert "Return strict JSON only" in adapter.last_prompt
assert "README.md:1-2" in adapter.last_prompt
assert abilities[0].name == "Business Email Routing"
assert abilities[0].capabilities[0].features[0].name == "POST /classify"
assert abilities[0].capabilities[0].evidence[0].reference == "README.md"
def test_llm_candidate_extractor_accepts_fenced_json():
adapter = FakeAdapter(
'```json\n{"abilities": [{"name": "A", "capabilities": []}]}\n```'
)
abilities = LLMCandidateExtractor(adapter).extract(repository(), [])
assert abilities[0].name == "A"
def test_llm_candidate_extractor_rejects_invalid_json():
adapter = FakeAdapter("not json")
with pytest.raises(LLMExtractionError):
LLMCandidateExtractor(adapter).extract(repository(), [])
def test_llm_connect_factory_reports_missing_dependency():
with pytest.raises(LLMExtractionError) as exc:
create_llm_connect_adapter("mock")
assert "llm-connect is not installed" in str(exc.value)