from repo_registry.core.models import ContentChunk, ObservedFact from repo_registry.llm_extraction import ( ExtractedAbility, ExtractedCapability, ExtractedEvidence, ExtractedFeature, LLMExtractionMapper, ) def fact(id, kind, name, path, line=None): metadata = {} if line is not None: metadata["line"] = line return ObservedFact( id=id, repository_id=1, analysis_run_id=1, snapshot_id=1, kind=kind, path=path, name=name, value="", metadata=metadata, ) def chunk(id, kind, path, start_line=1): return ContentChunk( id=id, repository_id=1, analysis_run_id=1, snapshot_id=1, path=path, kind=kind, start_line=start_line, end_line=start_line + 2, text="source text", ) def test_llm_extraction_mapper_builds_candidate_drafts_with_source_refs(): extracted = [ ExtractedAbility( name="Business Email Routing", description="Routes email.", source_paths=["README.md"], capabilities=[ ExtractedCapability( name="Classify Incoming Email", description="Classifies email.", inputs=["email body"], outputs=["intent"], source_paths=["README.md"], features=[ ExtractedFeature( name="POST /classify", type="REST endpoint", location="app.py", source_paths=["app.py"], ) ], evidence=[ ExtractedEvidence( type="test", reference="tests/test_app.py", strength="strong", source_paths=["tests/test_app.py"], ) ], ) ], ) ] facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "python route decorator", "app.py", line=4), fact(3, "test", "test_app.py", "tests/test_app.py"), ] candidates = LLMExtractionMapper().map(extracted, facts, []) ability = candidates[0] capability = ability.capabilities[0] assert ability.name == "Business Email Routing" assert ability.confidence == 0.7 assert ability.source_refs[0].fact_id == 1 assert capability.inputs == ["email body"] assert capability.outputs == ["intent"] assert capability.features[0].source_refs[0].line == 4 assert capability.evidence[0].source_refs[0].kind == "test" def test_llm_extraction_mapper_can_use_chunk_refs_without_facts(): extracted = [ ExtractedAbility( name="Readme Ability", source_paths=["README.md:1-3"], ) ] candidates = LLMExtractionMapper().map( extracted, facts=[], chunks=[chunk(1, "documentation", "README.md", start_line=1)], ) assert candidates[0].source_refs[0].fact_id is None assert candidates[0].source_refs[0].path == "README.md" assert candidates[0].source_refs[0].line == 1