railiance-fabric/tests/test_llm_extraction.py

from __future__ import annotations

import json
import sys
import types
from pathlib import Path
from types import SimpleNamespace

from railiance_fabric.llm_extraction import LLMExtractionConfig, PROMPT_VERSION
from railiance_fabric.scanner import ScanOptions, scan_repo
from railiance_fabric.schema_validation import draft202012_validator


def test_llm_extraction_uses_llm_connect_boundary_with_mock_adapter(tmp_path: Path, monkeypatch) -> None:
    repo = _minimal_repo(tmp_path)
    response = json.dumps(
        {
            "nodes": [
                {
                    "kind": "CapabilityDeclaration",
                    "label": "Fixture Operations",
                    "confidence": 0.82,
                    "evidence_refs": [],
                    "aliases": ["fixture-ops"],
                    "attributes": {"capability_type": "operations"},
                    "rationale": "README describes operational responsibility.",
                }
            ],
            "edges": [
                {
                    "edge_type": "suggests_capability",
                    "source_label": "Fixture Repo",
                    "target_label": "Fixture Operations",
                    "confidence": 0.78,
                    "evidence_refs": [],
                    "rationale": "The repository appears to own this capability.",
                }
            ],
            "attributes": [
                {
                    "entity_label": "Fixture Operations",
                    "name": "uncertainty",
                    "value": "needs human review",
                    "confidence": 0.75,
                    "evidence_refs": [],
                    "rationale": "LLM-only extraction should remain review-gated.",
                }
            ],
        }
    )
    calls: dict[str, object] = {}

    fake_module = types.ModuleType("llm_connect")

    class RunConfig:
        def __init__(self, **kwargs: object) -> None:
            self.kwargs = kwargs
            self.model_name = str(kwargs["model_name"])

    class MockLLMAdapter:
        def __init__(self, mock_response: str = response) -> None:
            self.mock_response = mock_response

        def execute_prompt(self, prompt: str, config: RunConfig) -> SimpleNamespace:
            calls["prompt"] = prompt
            calls["config"] = config
            return SimpleNamespace(
                content=self.mock_response,
                model=config.model_name,
                usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
                metadata={"mock": True},
            )

    def create_adapter(provider: str, model: str | None = None, api_key: str | None = None) -> MockLLMAdapter:
        calls["provider"] = provider
        calls["model"] = model
        calls["api_key"] = api_key
        return MockLLMAdapter()

    fake_module.RunConfig = RunConfig
    fake_module.MockLLMAdapter = MockLLMAdapter
    fake_module.create_adapter = create_adapter
    monkeypatch.setitem(sys.modules, "llm_connect", fake_module)

    snapshot = scan_repo(
        ScanOptions(
            repo_path=repo,
            repo_slug="fixture-repo",
            repo_name="Fixture Repo",
            commit="abc123",
            llm_enabled=True,
            deterministic_only=False,
            llm_config=LLMExtractionConfig(provider="mock", model="mock-model", min_confidence=0.6),
        )
    )

    _validate_schema("discovery-snapshot.schema.yaml", snapshot)
    assert calls["provider"] == "mock"
    assert calls["model"] == "mock-model"
    assert isinstance(calls["config"], RunConfig)
    assert "Evidence bundle:" in str(calls["prompt"])
    assert "Use only the JSON evidence bundle below" in str(calls["prompt"])
    assert snapshot["scan"]["llm_enabled"] is True
    assert snapshot["scan"]["deterministic_only"] is False
    assert snapshot["scan"]["llm_budget"]["prompt_version"] == PROMPT_VERSION

    llm_node = next(node for node in snapshot["candidates"]["nodes"] if node["label"] == "Fixture Operations")
    assert llm_node["origin"] == "llm"
    assert llm_node["review_state"] == "needs_review"
    assert llm_node["confidence"] == 0.82
    assert llm_node["provenance"][0]["provider"] == "mock"
    assert llm_node["provenance"][0]["model"] == "mock-model"
    assert llm_node["provenance"][0]["usage"]["total_tokens"] == 15
    assert any(scope["source_kind"] == "llm" and scope["mode"] == "additive" for scope in snapshot["replacement_scopes"])
    assert any(edge["edge_type"] == "suggests_capability" for edge in snapshot["candidates"]["edges"])
    assert any(attribute["name"] == "uncertainty" for attribute in snapshot["candidates"]["attributes"])


def test_llm_extraction_fails_closed_for_bad_or_low_confidence_output(tmp_path: Path) -> None:
    repo = _minimal_repo(tmp_path)

    bad_snapshot = scan_repo(
        ScanOptions(
            repo_path=repo,
            repo_slug="fixture-repo",
            repo_name="Fixture Repo",
            commit="abc123",
            llm_enabled=True,
            deterministic_only=False,
            llm_config=LLMExtractionConfig(provider="mock", model="mock-model"),
            llm_adapter=_Adapter("not json"),
        )
    )
    _validate_schema("discovery-snapshot.schema.yaml", bad_snapshot)
    assert {artifact["artifact_type"] for artifact in bad_snapshot["review_artifacts"]} == {"llm_output_invalid"}

    low_confidence_snapshot = scan_repo(
        ScanOptions(
            repo_path=repo,
            repo_slug="fixture-repo",
            repo_name="Fixture Repo",
            commit="abc123",
            llm_enabled=True,
            deterministic_only=False,
            llm_config=LLMExtractionConfig(provider="mock", model="mock-model", min_confidence=0.6),
            llm_adapter=_Adapter(
                json.dumps(
                    {
                        "nodes": [
                            {
                                "kind": "CapabilityDeclaration",
                                "label": "Too Uncertain",
                                "confidence": 0.2,
                                "evidence_refs": [],
                                "rationale": "Weak signal.",
                            }
                        ],
                        "edges": [],
                        "attributes": [],
                    }
                )
            ),
        )
    )
    _validate_schema("discovery-snapshot.schema.yaml", low_confidence_snapshot)
    labels = {node["label"] for node in low_confidence_snapshot["candidates"]["nodes"]}
    assert "Too Uncertain" not in labels
    assert {artifact["artifact_type"] for artifact in low_confidence_snapshot["review_artifacts"]} == {"llm_low_confidence"}


class _Adapter:
    def __init__(self, response: str) -> None:
        self.response = response

    def execute_prompt(self, prompt: str, config: object) -> SimpleNamespace:
        return SimpleNamespace(
            content=self.response,
            model=getattr(config, "model_name", "mock-model"),
            usage={"total_tokens": 1},
            metadata={"mock": True},
        )


def _minimal_repo(tmp_path: Path) -> Path:
    repo = tmp_path / "fixture-repo"
    repo.mkdir()
    (repo / "README.md").write_text("# Fixture Repo\n\nOwns operational repo signals.\n", encoding="utf-8")
    return repo


def _validate_schema(schema_name: str, payload: dict[str, object]) -> None:
    validator = draft202012_validator(Path("schemas") / schema_name)
    validator.validate(payload)