infospace-bench/tests/test_routing_config.py

"""
Tests for the routing config schema (IB-WP-0020-T01).

Parser-only — no network calls, no llm-connect construction. T02 will
test the provider construction loader separately.
"""

from __future__ import annotations

from pathlib import Path

import pytest
import yaml

from infospace_bench.errors import InfospaceError
from infospace_bench.routing_config import (
    ROUTING_SCHEMA_VERSION,
    RoutingCandidateConfig,
    RoutingConfig,
    RoutingTaskTypeConfig,
    load_routing_config,
    parse_routing_config,
)


MINIMAL = {
    "schema_version": 1,
    "task_types": {
        "summarize-source": {
            "candidates": [
                {
                    "id": "openrouter:gpt-4o-mini",
                    "provider": "openrouter",
                    "model": "openai/gpt-4o-mini",
                },
            ],
        },
    },
}


def test_parses_minimal_config() -> None:
    config = parse_routing_config(MINIMAL)

    assert config.schema_version == ROUTING_SCHEMA_VERSION
    assert config.default_quality_floor is None
    assert config.ledger_path is None
    assert config.stage_to_task_type == {}
    assert len(config.task_types) == 1
    task = config.task_types[0]
    assert task.task_type == "summarize-source"
    assert task.quality_floor is None
    assert len(task.candidates) == 1
    candidate = task.candidates[0]
    assert candidate.id == "openrouter:gpt-4o-mini"
    assert candidate.provider == "openrouter"
    assert candidate.model == "openai/gpt-4o-mini"
    assert candidate.api_key_env == ""
    assert candidate.max_cost_per_1k is None


def test_parses_full_config_round_trip() -> None:
    data = {
        "schema_version": 1,
        "default_quality_floor": 0.8,
        "ledger_path": "output/routing/quality.jsonl",
        "stage_to_task_type": {
            "extract-entities": "smart",
            "extract-relations": "smart",
        },
        "task_types": {
            "cheap": {
                "quality_floor": 0.7,
                "candidates": [
                    {
                        "id": "openrouter:gpt-4o-mini",
                        "provider": "openrouter",
                        "model": "openai/gpt-4o-mini",
                        "api_key_env": "OPENROUTER_API_KEY",
                        "max_cost_per_1k": 0.001,
                    },
                ],
            },
            "smart": {
                "quality_floor": 0.85,
                "candidates": [
                    {
                        "id": "openrouter:claude-haiku",
                        "provider": "openrouter",
                        "model": "anthropic/claude-3.5-haiku",
                    },
                    {
                        "id": "openrouter:claude-sonnet",
                        "provider": "openrouter",
                        "model": "anthropic/claude-3.5-sonnet",
                        "max_cost_per_1k": 0.003,
                    },
                ],
            },
        },
    }

    config = parse_routing_config(data)

    assert config.default_quality_floor == 0.8
    assert config.ledger_path == "output/routing/quality.jsonl"
    assert config.stage_to_task_type == {
        "extract-entities": "smart",
        "extract-relations": "smart",
    }
    smart = next(t for t in config.task_types if t.task_type == "smart")
    assert smart.quality_floor == 0.85
    assert len(smart.candidates) == 2
    assert smart.candidates[1].max_cost_per_1k == 0.003


def test_load_routing_config_reads_yaml_file(tmp_path: Path) -> None:
    config_path = tmp_path / "routing.yaml"
    config_path.write_text(yaml.safe_dump(MINIMAL, sort_keys=False), encoding="utf-8")

    config = load_routing_config(config_path)

    assert isinstance(config, RoutingConfig)
    assert config.schema_version == 1


def test_load_routing_config_missing_file(tmp_path: Path) -> None:
    with pytest.raises(InfospaceError) as exc_info:
        load_routing_config(tmp_path / "missing.yaml")
    assert exc_info.value.code == "missing_routing_config"


def test_load_routing_config_bad_yaml(tmp_path: Path) -> None:
    config_path = tmp_path / "broken.yaml"
    config_path.write_text("schema_version: 1\n  bad: indent\n: : : :\n", encoding="utf-8")

    with pytest.raises(InfospaceError) as exc_info:
        load_routing_config(config_path)
    assert exc_info.value.code == "invalid_routing_config_yaml"


def test_rejects_wrong_schema_version() -> None:
    payload = {**MINIMAL, "schema_version": 2}
    with pytest.raises(InfospaceError) as exc_info:
        parse_routing_config(payload)
    assert exc_info.value.code == "unsupported_routing_schema"


def test_rejects_missing_schema_version() -> None:
    payload = {"task_types": MINIMAL["task_types"]}
    with pytest.raises(InfospaceError) as exc_info:
        parse_routing_config(payload)
    assert exc_info.value.code == "unsupported_routing_schema"


def test_rejects_empty_task_types() -> None:
    payload = {"schema_version": 1, "task_types": {}}
    with pytest.raises(InfospaceError) as exc_info:
        parse_routing_config(payload)
    assert exc_info.value.code == "empty_routing_task_types"


def test_rejects_task_type_without_candidates() -> None:
    payload = {
        "schema_version": 1,
        "task_types": {"foo": {"candidates": []}},
    }
    with pytest.raises(InfospaceError) as exc_info:
        parse_routing_config(payload)
    assert exc_info.value.code == "empty_routing_candidates"


def test_rejects_candidate_missing_required_field() -> None:
    payload = {
        "schema_version": 1,
        "task_types": {
            "foo": {
                "candidates": [{"provider": "openrouter", "model": "x"}],  # missing id
            },
        },
    }
    with pytest.raises(InfospaceError) as exc_info:
        parse_routing_config(payload)
    assert exc_info.value.code == "missing_routing_candidate_field"
    assert "id" in exc_info.value.detail["missing"]


def test_rejects_unsupported_provider() -> None:
    payload = {
        "schema_version": 1,
        "task_types": {
            "foo": {
                "candidates": [
                    {"id": "x", "provider": "acme", "model": "acme/model"},
                ],
            },
        },
    }
    with pytest.raises(InfospaceError) as exc_info:
        parse_routing_config(payload)
    assert exc_info.value.code == "unsupported_routing_provider"


def test_rejects_negative_max_cost() -> None:
    payload = {
        "schema_version": 1,
        "task_types": {
            "foo": {
                "candidates": [
                    {
                        "id": "x",
                        "provider": "openrouter",
                        "model": "openai/gpt-4o-mini",
                        "max_cost_per_1k": -1,
                    },
                ],
            },
        },
    }
    with pytest.raises(InfospaceError) as exc_info:
        parse_routing_config(payload)
    assert exc_info.value.code == "invalid_routing_max_cost"


def test_rejects_quality_floor_out_of_range() -> None:
    payload = {
        "schema_version": 1,
        "default_quality_floor": 1.5,
        "task_types": MINIMAL["task_types"],
    }
    with pytest.raises(InfospaceError) as exc_info:
        parse_routing_config(payload)
    assert exc_info.value.code == "invalid_routing_quality_floor"


def test_rejects_duplicate_candidate_ids_within_task_type() -> None:
    payload = {
        "schema_version": 1,
        "task_types": {
            "foo": {
                "candidates": [
                    {"id": "dupe", "provider": "openrouter", "model": "a"},
                    {"id": "dupe", "provider": "openrouter", "model": "b"},
                ],
            },
        },
    }
    with pytest.raises(InfospaceError) as exc_info:
        parse_routing_config(payload)
    assert exc_info.value.code == "duplicate_routing_candidate_id"


def test_rejects_non_mapping_stage_map() -> None:
    payload = {
        "schema_version": 1,
        "task_types": MINIMAL["task_types"],
        "stage_to_task_type": ["not", "a", "mapping"],
    }
    with pytest.raises(InfospaceError) as exc_info:
        parse_routing_config(payload)
    assert exc_info.value.code == "invalid_routing_stage_map"


def test_rejects_non_string_ledger_path() -> None:
    payload = {
        "schema_version": 1,
        "task_types": MINIMAL["task_types"],
        "ledger_path": 42,
    }
    with pytest.raises(InfospaceError) as exc_info:
        parse_routing_config(payload)
    assert exc_info.value.code == "invalid_routing_ledger_path"