generated from coulomb/repo-seed
177 lines
5.0 KiB
Python
177 lines
5.0 KiB
Python
import pytest
|
|
|
|
from repo_registry.core.models import ContentChunk, Repository
|
|
from repo_registry.llm_extraction import (
|
|
LLMCandidateExtractor,
|
|
LLMExtractionError,
|
|
create_llm_connect_adapter,
|
|
)
|
|
|
|
|
|
class Response:
|
|
def __init__(self, content):
|
|
self.content = content
|
|
|
|
|
|
class FakeAdapter:
|
|
def __init__(self, content):
|
|
self.content = content
|
|
self.last_prompt = ""
|
|
self.last_config = object()
|
|
|
|
def execute_prompt(self, prompt, config):
|
|
self.last_prompt = prompt
|
|
self.last_config = config
|
|
return Response(self.content)
|
|
|
|
|
|
def repository():
|
|
return Repository(
|
|
id=1,
|
|
name="MailRouter",
|
|
url="/tmp/mail-router",
|
|
description="Routes inbound email.",
|
|
branch="main",
|
|
status="analyzed",
|
|
)
|
|
|
|
|
|
def chunk():
|
|
return ContentChunk(
|
|
id=1,
|
|
repository_id=1,
|
|
analysis_run_id=1,
|
|
snapshot_id=1,
|
|
path="README.md",
|
|
kind="documentation",
|
|
start_line=1,
|
|
end_line=2,
|
|
text="# MailRouter\nRoutes incoming customer email.",
|
|
)
|
|
|
|
|
|
def test_llm_prompt_filters_derived_scope_and_labels_source_roles():
|
|
adapter = FakeAdapter('{"abilities": []}')
|
|
extractor = LLMCandidateExtractor(adapter)
|
|
chunks = [
|
|
ContentChunk(
|
|
id=1,
|
|
repository_id=1,
|
|
analysis_run_id=1,
|
|
snapshot_id=1,
|
|
path="SCOPE.md",
|
|
kind="scope",
|
|
start_line=1,
|
|
end_line=3,
|
|
text="# SCOPE\n\nOld approved LLM routing entry.",
|
|
metadata={"source_role": "derived_scope"},
|
|
),
|
|
ContentChunk(
|
|
id=2,
|
|
repository_id=1,
|
|
analysis_run_id=1,
|
|
snapshot_id=1,
|
|
path="INTENT.md",
|
|
kind="intent",
|
|
start_line=1,
|
|
end_line=3,
|
|
text="# INTENT\n\nProvide lightweight IAM.",
|
|
metadata={"source_role": "intent_summary"},
|
|
),
|
|
ContentChunk(
|
|
id=3,
|
|
repository_id=1,
|
|
analysis_run_id=1,
|
|
snapshot_id=1,
|
|
path="CLAUDE.md",
|
|
kind="documentation",
|
|
start_line=1,
|
|
end_line=2,
|
|
text="# CLAUDE\n\nAgent guidance.",
|
|
metadata={"source_role": "agent_guidance"},
|
|
),
|
|
]
|
|
|
|
extractor.extract(repository(), chunks)
|
|
|
|
assert "Source: INTENT.md" in adapter.last_prompt
|
|
assert "source_role=intent_summary" in adapter.last_prompt
|
|
assert "Source: SCOPE.md" not in adapter.last_prompt
|
|
assert "Old approved LLM routing entry" not in adapter.last_prompt
|
|
assert "Source: CLAUDE.md" not in adapter.last_prompt
|
|
assert "Do not use SCOPE.md" in adapter.last_prompt
|
|
|
|
|
|
def test_llm_candidate_extractor_parses_structured_response():
|
|
adapter = FakeAdapter(
|
|
"""
|
|
{
|
|
"abilities": [
|
|
{
|
|
"name": "Business Email Routing",
|
|
"description": "Routes inbound customer email.",
|
|
"source_paths": ["README.md"],
|
|
"capabilities": [
|
|
{
|
|
"name": "Classify Incoming Email",
|
|
"description": "Classify messages.",
|
|
"inputs": ["email body"],
|
|
"outputs": ["intent"],
|
|
"source_paths": ["README.md"],
|
|
"features": [
|
|
{
|
|
"name": "POST /classify",
|
|
"type": "REST endpoint",
|
|
"location": "app.py",
|
|
"source_paths": ["app.py"]
|
|
}
|
|
],
|
|
"evidence": [
|
|
{
|
|
"type": "documentation",
|
|
"reference": "README.md",
|
|
"strength": "medium",
|
|
"source_paths": ["README.md"]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
"""
|
|
)
|
|
extractor = LLMCandidateExtractor(adapter)
|
|
|
|
abilities = extractor.extract(repository(), [chunk()])
|
|
|
|
assert "Return strict JSON only" in adapter.last_prompt
|
|
assert "README.md:1-2" in adapter.last_prompt
|
|
assert abilities[0].name == "Business Email Routing"
|
|
assert abilities[0].capabilities[0].features[0].name == "POST /classify"
|
|
assert abilities[0].capabilities[0].evidence[0].reference == "README.md"
|
|
|
|
|
|
def test_llm_candidate_extractor_accepts_fenced_json():
|
|
adapter = FakeAdapter(
|
|
'```json\n{"abilities": [{"name": "A", "capabilities": []}]}\n```'
|
|
)
|
|
|
|
abilities = LLMCandidateExtractor(adapter).extract(repository(), [])
|
|
|
|
assert abilities[0].name == "A"
|
|
|
|
|
|
def test_llm_candidate_extractor_rejects_invalid_json():
|
|
adapter = FakeAdapter("not json")
|
|
|
|
with pytest.raises(LLMExtractionError):
|
|
LLMCandidateExtractor(adapter).extract(repository(), [])
|
|
|
|
|
|
def test_llm_connect_factory_reports_missing_dependency():
|
|
with pytest.raises(LLMExtractionError) as exc:
|
|
create_llm_connect_adapter("mock")
|
|
|
|
assert "llm-connect is not installed" in str(exc.value)
|