""" Tests for the routing config schema (IB-WP-0020-T01). Parser-only — no network calls, no llm-connect construction. T02 will test the provider construction loader separately. """ from __future__ import annotations from pathlib import Path import pytest import yaml from infospace_bench.errors import InfospaceError from infospace_bench.routing_config import ( ROUTING_SCHEMA_VERSION, RoutingCandidateConfig, RoutingConfig, RoutingTaskTypeConfig, load_routing_config, parse_routing_config, ) MINIMAL = { "schema_version": 1, "task_types": { "summarize-source": { "candidates": [ { "id": "openrouter:gpt-4o-mini", "provider": "openrouter", "model": "openai/gpt-4o-mini", }, ], }, }, } def test_parses_minimal_config() -> None: config = parse_routing_config(MINIMAL) assert config.schema_version == ROUTING_SCHEMA_VERSION assert config.default_quality_floor is None assert config.ledger_path is None assert config.stage_to_task_type == {} assert len(config.task_types) == 1 task = config.task_types[0] assert task.task_type == "summarize-source" assert task.quality_floor is None assert len(task.candidates) == 1 candidate = task.candidates[0] assert candidate.id == "openrouter:gpt-4o-mini" assert candidate.provider == "openrouter" assert candidate.model == "openai/gpt-4o-mini" assert candidate.api_key_env == "" assert candidate.max_cost_per_1k is None def test_parses_full_config_round_trip() -> None: data = { "schema_version": 1, "default_quality_floor": 0.8, "ledger_path": "output/routing/quality.jsonl", "stage_to_task_type": { "extract-entities": "smart", "extract-relations": "smart", }, "task_types": { "cheap": { "quality_floor": 0.7, "candidates": [ { "id": "openrouter:gpt-4o-mini", "provider": "openrouter", "model": "openai/gpt-4o-mini", "api_key_env": "OPENROUTER_API_KEY", "max_cost_per_1k": 0.001, }, ], }, "smart": { "quality_floor": 0.85, "candidates": [ { "id": "openrouter:claude-haiku", "provider": "openrouter", "model": "anthropic/claude-3.5-haiku", }, { "id": "openrouter:claude-sonnet", "provider": "openrouter", "model": "anthropic/claude-3.5-sonnet", "max_cost_per_1k": 0.003, }, ], }, }, } config = parse_routing_config(data) assert config.default_quality_floor == 0.8 assert config.ledger_path == "output/routing/quality.jsonl" assert config.stage_to_task_type == { "extract-entities": "smart", "extract-relations": "smart", } smart = next(t for t in config.task_types if t.task_type == "smart") assert smart.quality_floor == 0.85 assert len(smart.candidates) == 2 assert smart.candidates[1].max_cost_per_1k == 0.003 def test_load_routing_config_reads_yaml_file(tmp_path: Path) -> None: config_path = tmp_path / "routing.yaml" config_path.write_text(yaml.safe_dump(MINIMAL, sort_keys=False), encoding="utf-8") config = load_routing_config(config_path) assert isinstance(config, RoutingConfig) assert config.schema_version == 1 def test_load_routing_config_missing_file(tmp_path: Path) -> None: with pytest.raises(InfospaceError) as exc_info: load_routing_config(tmp_path / "missing.yaml") assert exc_info.value.code == "missing_routing_config" def test_load_routing_config_bad_yaml(tmp_path: Path) -> None: config_path = tmp_path / "broken.yaml" config_path.write_text("schema_version: 1\n bad: indent\n: : : :\n", encoding="utf-8") with pytest.raises(InfospaceError) as exc_info: load_routing_config(config_path) assert exc_info.value.code == "invalid_routing_config_yaml" def test_rejects_wrong_schema_version() -> None: payload = {**MINIMAL, "schema_version": 2} with pytest.raises(InfospaceError) as exc_info: parse_routing_config(payload) assert exc_info.value.code == "unsupported_routing_schema" def test_rejects_missing_schema_version() -> None: payload = {"task_types": MINIMAL["task_types"]} with pytest.raises(InfospaceError) as exc_info: parse_routing_config(payload) assert exc_info.value.code == "unsupported_routing_schema" def test_rejects_empty_task_types() -> None: payload = {"schema_version": 1, "task_types": {}} with pytest.raises(InfospaceError) as exc_info: parse_routing_config(payload) assert exc_info.value.code == "empty_routing_task_types" def test_rejects_task_type_without_candidates() -> None: payload = { "schema_version": 1, "task_types": {"foo": {"candidates": []}}, } with pytest.raises(InfospaceError) as exc_info: parse_routing_config(payload) assert exc_info.value.code == "empty_routing_candidates" def test_rejects_candidate_missing_required_field() -> None: payload = { "schema_version": 1, "task_types": { "foo": { "candidates": [{"provider": "openrouter", "model": "x"}], # missing id }, }, } with pytest.raises(InfospaceError) as exc_info: parse_routing_config(payload) assert exc_info.value.code == "missing_routing_candidate_field" assert "id" in exc_info.value.detail["missing"] def test_rejects_unsupported_provider() -> None: payload = { "schema_version": 1, "task_types": { "foo": { "candidates": [ {"id": "x", "provider": "acme", "model": "acme/model"}, ], }, }, } with pytest.raises(InfospaceError) as exc_info: parse_routing_config(payload) assert exc_info.value.code == "unsupported_routing_provider" def test_rejects_negative_max_cost() -> None: payload = { "schema_version": 1, "task_types": { "foo": { "candidates": [ { "id": "x", "provider": "openrouter", "model": "openai/gpt-4o-mini", "max_cost_per_1k": -1, }, ], }, }, } with pytest.raises(InfospaceError) as exc_info: parse_routing_config(payload) assert exc_info.value.code == "invalid_routing_max_cost" def test_rejects_quality_floor_out_of_range() -> None: payload = { "schema_version": 1, "default_quality_floor": 1.5, "task_types": MINIMAL["task_types"], } with pytest.raises(InfospaceError) as exc_info: parse_routing_config(payload) assert exc_info.value.code == "invalid_routing_quality_floor" def test_rejects_duplicate_candidate_ids_within_task_type() -> None: payload = { "schema_version": 1, "task_types": { "foo": { "candidates": [ {"id": "dupe", "provider": "openrouter", "model": "a"}, {"id": "dupe", "provider": "openrouter", "model": "b"}, ], }, }, } with pytest.raises(InfospaceError) as exc_info: parse_routing_config(payload) assert exc_info.value.code == "duplicate_routing_candidate_id" def test_rejects_non_mapping_stage_map() -> None: payload = { "schema_version": 1, "task_types": MINIMAL["task_types"], "stage_to_task_type": ["not", "a", "mapping"], } with pytest.raises(InfospaceError) as exc_info: parse_routing_config(payload) assert exc_info.value.code == "invalid_routing_stage_map" # --------------------------------------------------------------------------- # T02 — loader that materialises a config into a live llm-connect policy # --------------------------------------------------------------------------- def _fake_adapter_factory_record(record: list): """Return a factory that records calls and returns a sentinel string.""" def _factory(candidate, env): record.append({"id": candidate.id, "provider": candidate.provider, "model": candidate.model}) return f"adapter:{candidate.id}" return _factory def test_build_routing_policy_returns_static_when_no_adaptive_signals() -> None: from llm_connect.routing import RoutingPolicy from infospace_bench.routing_config import build_routing_policy_from_config config = parse_routing_config(MINIMAL) calls: list[dict] = [] policy = build_routing_policy_from_config( config, adapter_factory=_fake_adapter_factory_record(calls) ) assert isinstance(policy, RoutingPolicy) assert type(policy).__name__ == "RoutingPolicy", "no adaptive signals -> static policy" assert len(policy.rules) == 1 assert policy.rules[0].task_type == "summarize-source" assert policy.rules[0].prefer == "adapter:openrouter:gpt-4o-mini" assert policy.rules[0].fallback is None assert calls and calls[0]["provider"] == "openrouter" def test_build_routing_policy_returns_adaptive_when_ledger_path_set(tmp_path: Path) -> None: from llm_connect.routing import AdaptiveRoutingPolicy from infospace_bench.routing_config import build_routing_policy_from_config data = { **MINIMAL, "ledger_path": "output/routing/quality.jsonl", } config = parse_routing_config(data) policy = build_routing_policy_from_config( config, workspace=tmp_path, adapter_factory=_fake_adapter_factory_record([]), ) assert isinstance(policy, AdaptiveRoutingPolicy) assert policy.ledger is not None expected_path = tmp_path / "output" / "routing" / "quality.jsonl" assert Path(policy.ledger.path) == expected_path def test_build_routing_policy_returns_adaptive_when_quality_floor_set() -> None: from llm_connect.routing import AdaptiveRoutingPolicy from infospace_bench.routing_config import build_routing_policy_from_config data = { **MINIMAL, "default_quality_floor": 0.8, } config = parse_routing_config(data) policy = build_routing_policy_from_config( config, adapter_factory=_fake_adapter_factory_record([]) ) assert isinstance(policy, AdaptiveRoutingPolicy) assert policy.ledger is None # no ledger_path set def test_build_routing_policy_routes_fallback_for_multi_candidate_rule() -> None: from infospace_bench.routing_config import build_routing_policy_from_config data = { "schema_version": 1, "task_types": { "extract-entities": { "candidates": [ { "id": "openrouter:cheap", "provider": "openrouter", "model": "openai/gpt-4o-mini", "max_cost_per_1k": 0.001, }, { "id": "openrouter:smart", "provider": "openrouter", "model": "anthropic/claude-3.5-sonnet", }, ], }, }, } config = parse_routing_config(data) policy = build_routing_policy_from_config( config, adapter_factory=_fake_adapter_factory_record([]) ) rule = policy.rules[0] assert rule.prefer == "adapter:openrouter:cheap" assert rule.max_cost_per_1k == 0.001 assert rule.fallback == "adapter:openrouter:smart" def test_build_routing_policy_resolves_api_key_from_env() -> None: from infospace_bench.routing_config import ( build_routing_policy_from_config, _default_adapter_factory, ) config = parse_routing_config(MINIMAL) # Smoke: real factory with a fake env should construct an OpenRouterAdapter. env = {"OPENROUTER_API_KEY": "sk-fake-test-key"} policy = build_routing_policy_from_config(config, env=env) rule = policy.rules[0] # The constructed adapter is an OpenRouterAdapter from llm-connect. from llm_connect.openrouter import OpenRouterAdapter assert isinstance(rule.prefer, OpenRouterAdapter) def test_build_routing_policy_fails_fast_on_missing_api_key() -> None: from infospace_bench.routing_config import build_routing_policy_from_config config = parse_routing_config(MINIMAL) # Empty env — the candidate's required env var is unset. with pytest.raises(InfospaceError) as exc_info: build_routing_policy_from_config(config, env={}) assert exc_info.value.code == "missing_routing_api_key" assert exc_info.value.detail["provider"] == "openrouter" def test_build_routing_policy_claude_code_needs_no_api_key() -> None: from infospace_bench.routing_config import build_routing_policy_from_config from llm_connect.claude_code import ClaudeCodeAdapter data = { "schema_version": 1, "task_types": { "baseline": { "candidates": [ {"id": "claude-code", "provider": "claude_code", "model": "claude-opus-4-7"}, ], }, }, } config = parse_routing_config(data) policy = build_routing_policy_from_config(config, env={}) assert isinstance(policy.rules[0].prefer, ClaudeCodeAdapter) def test_example_trading_literature_config_parses() -> None: """Regression: the shipped example config must parse cleanly.""" from infospace_bench.routing_config import load_routing_config example_path = Path(__file__).resolve().parent.parent / "examples" / "routing" / "trading-literature.yaml" config = load_routing_config(example_path) task_type_names = {task.task_type for task in config.task_types} assert {"cheap", "smart", "judge", "baseline"} <= task_type_names assert config.default_quality_floor == 0.80 # Each shipped stage maps to a task type the config actually declares. for stage, task_type in config.stage_to_task_type.items(): assert task_type in task_type_names, f"stage {stage!r} maps to undeclared task type {task_type!r}" # baseline is included so a T05 ShadowingAdapter wiring can reference it. baseline = next(t for t in config.task_types if t.task_type == "baseline") assert baseline.candidates[0].provider == "claude_code" def test_build_routing_policy_honours_custom_api_key_env() -> None: from infospace_bench.routing_config import build_routing_policy_from_config from llm_connect.openrouter import OpenRouterAdapter data = { "schema_version": 1, "task_types": { "summarize-source": { "candidates": [ { "id": "openrouter:gpt-4o-mini", "provider": "openrouter", "model": "openai/gpt-4o-mini", "api_key_env": "ALT_OPENROUTER_KEY", }, ], }, }, } config = parse_routing_config(data) with pytest.raises(InfospaceError) as exc_info: build_routing_policy_from_config(config, env={"OPENROUTER_API_KEY": "wrong-default"}) assert exc_info.value.code == "missing_routing_api_key" assert exc_info.value.detail["api_key_env"] == "ALT_OPENROUTER_KEY" policy = build_routing_policy_from_config( config, env={"ALT_OPENROUTER_KEY": "sk-fake"} ) assert isinstance(policy.rules[0].prefer, OpenRouterAdapter) def test_shadow_rate_without_baseline_fails_fast() -> None: from infospace_bench.routing_config import build_routing_policy_from_config config = parse_routing_config(MINIMAL) with pytest.raises(InfospaceError) as exc_info: build_routing_policy_from_config( config, shadow_rate=0.5, adapter_factory=_fake_adapter_factory_record([]), ) assert exc_info.value.code == "shadow_rate_without_baseline" def test_shadow_baseline_without_ledger_path_fails_fast() -> None: """ShadowingAdapter needs a place to write observations; require ledger_path.""" from infospace_bench.routing_config import build_routing_policy_from_config config = parse_routing_config(MINIMAL) with pytest.raises(InfospaceError) as exc_info: build_routing_policy_from_config( config, shadow_baseline_id="openrouter:gpt-4o-mini", adapter_factory=_fake_adapter_factory_record([]), ) assert exc_info.value.code == "missing_routing_ledger_for_shadow" def test_shadow_baseline_not_in_config_fails_fast(tmp_path: Path) -> None: from infospace_bench.routing_config import build_routing_policy_from_config data = {**MINIMAL, "ledger_path": "quality.jsonl"} config = parse_routing_config(data) with pytest.raises(InfospaceError) as exc_info: build_routing_policy_from_config( config, workspace=tmp_path, shadow_baseline_id="not-in-config", adapter_factory=_fake_adapter_factory_record([]), ) assert exc_info.value.code == "missing_shadow_baseline" def test_shadow_wraps_candidates_excluding_baseline(tmp_path: Path) -> None: from llm_connect.adapter import LLMAdapter from llm_connect.models import LLMResponse, RunConfig from llm_connect.shadowing import ShadowingAdapter from infospace_bench.routing_config import build_routing_policy_from_config data = { "schema_version": 1, "ledger_path": "quality.jsonl", "task_types": { "extract-entities": { "candidates": [ {"id": "candidate-a", "provider": "openrouter", "model": "openai/gpt-4o-mini"}, {"id": "baseline-x", "provider": "claude_code", "model": "claude-opus-4-7"}, ], }, }, } config = parse_routing_config(data) class _Stub(LLMAdapter): def __init__(self, name): self.name = name self.calls = 0 def execute_prompt(self, prompt, config): self.calls += 1 return LLMResponse(content="match", model=self.name, usage={"prompt_tokens": 1, "completion_tokens": 1}) def validate_config(self, config): return True stubs: dict[str, _Stub] = {} def factory(candidate, env): stubs[candidate.id] = _Stub(candidate.id) return stubs[candidate.id] policy = build_routing_policy_from_config( config, workspace=tmp_path, adapter_factory=factory, shadow_baseline_id="baseline-x", shadow_rate=1.0, ) rule = policy.rules[0] # The prefer slot is now a ShadowingAdapter wrapping candidate-a. assert isinstance(rule.prefer, ShadowingAdapter) assert rule.prefer.candidate_adapter is stubs["candidate-a"] assert rule.prefer.baseline_adapter is stubs["baseline-x"] assert rule.prefer.task_type == "extract-entities" # The baseline candidate (fallback) is NOT wrapped. assert rule.fallback is stubs["baseline-x"] def test_shadow_rate_one_fires_per_call_and_zero_skips(tmp_path: Path) -> None: """ShadowingAdapter is best-effort and supplied by llm-connect. Spot-check the wiring: at rate=1.0 the baseline.execute_prompt runs on every call; at rate=0.0 it never runs. """ from llm_connect.adapter import LLMAdapter from llm_connect.models import LLMResponse, RunConfig from infospace_bench.routing_config import build_routing_policy_from_config data = { "schema_version": 1, "ledger_path": "quality.jsonl", "task_types": { "extract-entities": { "candidates": [ {"id": "candidate-a", "provider": "openrouter", "model": "openai/gpt-4o-mini"}, {"id": "baseline-x", "provider": "claude_code", "model": "claude-opus-4-7"}, ], }, }, } config = parse_routing_config(data) class _Counter(LLMAdapter): def __init__(self, name): self.name = name self.calls = 0 def execute_prompt(self, prompt, config): self.calls += 1 return LLMResponse(content="match", model=self.name, usage={"prompt_tokens": 1, "completion_tokens": 1}) def validate_config(self, config): return True def make_factory(): stubs: dict[str, _Counter] = {} def factory(candidate, env): stubs[candidate.id] = _Counter(candidate.id) return stubs[candidate.id] return factory, stubs factory, stubs = make_factory() policy_full = build_routing_policy_from_config( config, workspace=tmp_path, adapter_factory=factory, shadow_baseline_id="baseline-x", shadow_rate=1.0, ) # Drive the prefer adapter (synchronous shadow) and force any # background shadow work to drain before we count calls. shadow_adapter = policy_full.rules[0].prefer shadow_adapter.async_shadow = False # force sync grading for a deterministic count for _ in range(3): shadow_adapter.execute_prompt("hello", RunConfig(model_name="x")) assert stubs["candidate-a"].calls == 3 assert stubs["baseline-x"].calls == 3, "rate=1.0 should call baseline on every call" # Fresh factory + stubs for the zero-rate run so counters reset. factory2, stubs2 = make_factory() # Use a unique ledger path so the two policies do not share state. (tmp_path / "subdir").mkdir(exist_ok=True) data2 = {**data, "ledger_path": "subdir/quality.jsonl"} config2 = parse_routing_config(data2) policy_zero = build_routing_policy_from_config( config2, workspace=tmp_path, adapter_factory=factory2, shadow_baseline_id="baseline-x", shadow_rate=0.0, ) shadow_adapter2 = policy_zero.rules[0].prefer shadow_adapter2.async_shadow = False for _ in range(3): shadow_adapter2.execute_prompt("hello", RunConfig(model_name="x")) assert stubs2["candidate-a"].calls == 3 assert stubs2["baseline-x"].calls == 0, "rate=0.0 should skip baseline entirely" def test_rejects_non_string_ledger_path() -> None: payload = { "schema_version": 1, "task_types": MINIMAL["task_types"], "ledger_path": 42, } with pytest.raises(InfospaceError) as exc_info: parse_routing_config(payload) assert exc_info.value.code == "invalid_routing_ledger_path"