import pytest from repo_registry.core.models import ContentChunk, Repository from repo_registry.llm_extraction import ( LLMCandidateExtractor, LLMExtractionError, create_llm_connect_adapter, ) class Response: def __init__(self, content): self.content = content class FakeAdapter: def __init__(self, content): self.content = content self.last_prompt = "" self.last_config = object() def execute_prompt(self, prompt, config): self.last_prompt = prompt self.last_config = config return Response(self.content) def repository(): return Repository( id=1, name="MailRouter", url="/tmp/mail-router", description="Routes inbound email.", branch="main", status="analyzed", ) def chunk(): return ContentChunk( id=1, repository_id=1, analysis_run_id=1, snapshot_id=1, path="README.md", kind="documentation", start_line=1, end_line=2, text="# MailRouter\nRoutes incoming customer email.", ) def test_llm_prompt_filters_derived_scope_and_labels_source_roles(): adapter = FakeAdapter('{"abilities": []}') extractor = LLMCandidateExtractor(adapter) chunks = [ ContentChunk( id=1, repository_id=1, analysis_run_id=1, snapshot_id=1, path="SCOPE.md", kind="scope", start_line=1, end_line=3, text="# SCOPE\n\nOld approved LLM routing entry.", metadata={"source_role": "derived_scope"}, ), ContentChunk( id=2, repository_id=1, analysis_run_id=1, snapshot_id=1, path="INTENT.md", kind="intent", start_line=1, end_line=3, text="# INTENT\n\nProvide lightweight IAM.", metadata={"source_role": "intent_summary"}, ), ContentChunk( id=3, repository_id=1, analysis_run_id=1, snapshot_id=1, path="CLAUDE.md", kind="documentation", start_line=1, end_line=2, text="# CLAUDE\n\nAgent guidance.", metadata={"source_role": "agent_guidance"}, ), ] extractor.extract(repository(), chunks) assert "Source: INTENT.md" in adapter.last_prompt assert "source_role=intent_summary" in adapter.last_prompt assert "Source: SCOPE.md" not in adapter.last_prompt assert "Old approved LLM routing entry" not in adapter.last_prompt assert "Source: CLAUDE.md" not in adapter.last_prompt assert "Do not use SCOPE.md" in adapter.last_prompt def test_llm_candidate_extractor_parses_structured_response(): adapter = FakeAdapter( """ { "abilities": [ { "name": "Business Email Routing", "description": "Routes inbound customer email.", "source_paths": ["README.md"], "capabilities": [ { "name": "Classify Incoming Email", "description": "Classify messages.", "inputs": ["email body"], "outputs": ["intent"], "source_paths": ["README.md"], "features": [ { "name": "POST /classify", "type": "REST endpoint", "location": "app.py", "source_paths": ["app.py"] } ], "evidence": [ { "type": "documentation", "reference": "README.md", "strength": "medium", "source_paths": ["README.md"] } ] } ] } ] } """ ) extractor = LLMCandidateExtractor(adapter) abilities = extractor.extract(repository(), [chunk()]) assert "Return strict JSON only" in adapter.last_prompt assert "README.md:1-2" in adapter.last_prompt assert abilities[0].name == "Business Email Routing" assert abilities[0].capabilities[0].features[0].name == "POST /classify" assert abilities[0].capabilities[0].evidence[0].reference == "README.md" def test_llm_candidate_extractor_accepts_fenced_json(): adapter = FakeAdapter( '```json\n{"abilities": [{"name": "A", "capabilities": []}]}\n```' ) abilities = LLMCandidateExtractor(adapter).extract(repository(), []) assert abilities[0].name == "A" def test_llm_candidate_extractor_rejects_invalid_json(): adapter = FakeAdapter("not json") with pytest.raises(LLMExtractionError): LLMCandidateExtractor(adapter).extract(repository(), []) def test_llm_connect_factory_reports_missing_dependency(): with pytest.raises(LLMExtractionError) as exc: create_llm_connect_adapter("mock") assert "llm-connect is not installed" in str(exc.value)