from __future__ import annotations import json import sys import types from pathlib import Path from types import SimpleNamespace from railiance_fabric.llm_extraction import LLMExtractionConfig, PROMPT_VERSION from railiance_fabric.scanner import ScanOptions, scan_repo from railiance_fabric.schema_validation import draft202012_validator def test_llm_extraction_uses_llm_connect_boundary_with_mock_adapter(tmp_path: Path, monkeypatch) -> None: repo = _minimal_repo(tmp_path) response = json.dumps( { "nodes": [ { "kind": "CapabilityDeclaration", "label": "Fixture Operations", "confidence": 0.82, "evidence_refs": [], "aliases": ["fixture-ops"], "attributes": {"capability_type": "operations"}, "rationale": "README describes operational responsibility.", } ], "edges": [ { "edge_type": "suggests_capability", "source_label": "Fixture Repo", "target_label": "Fixture Operations", "confidence": 0.78, "evidence_refs": [], "rationale": "The repository appears to own this capability.", } ], "attributes": [ { "entity_label": "Fixture Operations", "name": "uncertainty", "value": "needs human review", "confidence": 0.75, "evidence_refs": [], "rationale": "LLM-only extraction should remain review-gated.", } ], } ) calls: dict[str, object] = {} fake_module = types.ModuleType("llm_connect") class RunConfig: def __init__(self, **kwargs: object) -> None: self.kwargs = kwargs self.model_name = str(kwargs["model_name"]) class MockLLMAdapter: def __init__(self, mock_response: str = response) -> None: self.mock_response = mock_response def execute_prompt(self, prompt: str, config: RunConfig) -> SimpleNamespace: calls["prompt"] = prompt calls["config"] = config return SimpleNamespace( content=self.mock_response, model=config.model_name, usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, metadata={"mock": True}, ) def create_adapter(provider: str, model: str | None = None, api_key: str | None = None) -> MockLLMAdapter: calls["provider"] = provider calls["model"] = model calls["api_key"] = api_key return MockLLMAdapter() fake_module.RunConfig = RunConfig fake_module.MockLLMAdapter = MockLLMAdapter fake_module.create_adapter = create_adapter monkeypatch.setitem(sys.modules, "llm_connect", fake_module) snapshot = scan_repo( ScanOptions( repo_path=repo, repo_slug="fixture-repo", repo_name="Fixture Repo", commit="abc123", llm_enabled=True, deterministic_only=False, llm_config=LLMExtractionConfig(provider="mock", model="mock-model", min_confidence=0.6), ) ) _validate_schema("discovery-snapshot.schema.yaml", snapshot) assert calls["provider"] == "mock" assert calls["model"] == "mock-model" assert isinstance(calls["config"], RunConfig) assert "Evidence bundle:" in str(calls["prompt"]) assert "Use only the JSON evidence bundle below" in str(calls["prompt"]) assert snapshot["scan"]["llm_enabled"] is True assert snapshot["scan"]["deterministic_only"] is False assert snapshot["scan"]["llm_budget"]["prompt_version"] == PROMPT_VERSION llm_node = next(node for node in snapshot["candidates"]["nodes"] if node["label"] == "Fixture Operations") assert llm_node["origin"] == "llm" assert llm_node["review_state"] == "needs_review" assert llm_node["confidence"] == 0.82 assert llm_node["provenance"][0]["provider"] == "mock" assert llm_node["provenance"][0]["model"] == "mock-model" assert llm_node["provenance"][0]["usage"]["total_tokens"] == 15 assert any(scope["source_kind"] == "llm" and scope["mode"] == "additive" for scope in snapshot["replacement_scopes"]) assert any(edge["edge_type"] == "suggests_capability" for edge in snapshot["candidates"]["edges"]) assert any(attribute["name"] == "uncertainty" for attribute in snapshot["candidates"]["attributes"]) def test_llm_extraction_fails_closed_for_bad_or_low_confidence_output(tmp_path: Path) -> None: repo = _minimal_repo(tmp_path) bad_snapshot = scan_repo( ScanOptions( repo_path=repo, repo_slug="fixture-repo", repo_name="Fixture Repo", commit="abc123", llm_enabled=True, deterministic_only=False, llm_config=LLMExtractionConfig(provider="mock", model="mock-model"), llm_adapter=_Adapter("not json"), ) ) _validate_schema("discovery-snapshot.schema.yaml", bad_snapshot) assert {artifact["artifact_type"] for artifact in bad_snapshot["review_artifacts"]} == {"llm_output_invalid"} low_confidence_snapshot = scan_repo( ScanOptions( repo_path=repo, repo_slug="fixture-repo", repo_name="Fixture Repo", commit="abc123", llm_enabled=True, deterministic_only=False, llm_config=LLMExtractionConfig(provider="mock", model="mock-model", min_confidence=0.6), llm_adapter=_Adapter( json.dumps( { "nodes": [ { "kind": "CapabilityDeclaration", "label": "Too Uncertain", "confidence": 0.2, "evidence_refs": [], "rationale": "Weak signal.", } ], "edges": [], "attributes": [], } ) ), ) ) _validate_schema("discovery-snapshot.schema.yaml", low_confidence_snapshot) labels = {node["label"] for node in low_confidence_snapshot["candidates"]["nodes"]} assert "Too Uncertain" not in labels assert {artifact["artifact_type"] for artifact in low_confidence_snapshot["review_artifacts"]} == {"llm_low_confidence"} class _Adapter: def __init__(self, response: str) -> None: self.response = response def execute_prompt(self, prompt: str, config: object) -> SimpleNamespace: return SimpleNamespace( content=self.response, model=getattr(config, "model_name", "mock-model"), usage={"total_tokens": 1}, metadata={"mock": True}, ) def _minimal_repo(tmp_path: Path) -> Path: repo = tmp_path / "fixture-repo" repo.mkdir() (repo / "README.md").write_text("# Fixture Repo\n\nOwns operational repo signals.\n", encoding="utf-8") return repo def _validate_schema(schema_name: str, payload: dict[str, object]) -> None: validator = draft202012_validator(Path("schemas") / schema_name) validator.validate(payload)