Files
infospace-bench/tests/test_routing_config.py
tegwick debd2b8e69 IB-WP-0020-T04: example routing config + live routing smoke
examples/routing/trading-literature.yaml is the checked-in starting
config for a Lefevre-style run. It applies the IB-WP-0018 task-type
taxonomy: cheap candidates for summary + evaluation, smart candidates
for entity + relation extraction, and a separate baseline rule wiring
claude_code for a follow-on T05 ShadowingAdapter step. Workspace-
relative ledger_path keeps adaptive observations with the workspace.

tests/test_routing_config.py gains a regression test that asserts the
shipped example parses cleanly, every stage in stage_to_task_type maps
to a declared task type, and the baseline candidate uses the
claude_code provider — so the example will not bit-rot silently.

tests/test_openrouter_live.py gains test_provider_routing_one_chapter_live_smoke
gated on the same INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER + OPENROUTER_API_KEY
opt-in as the existing static smoke. It builds a one-candidate routing
config, runs a single chapter through --provider routing, and asserts
the per-stage adapter-choices report section names the routed model
and the routed artifacts carry adapter_id provenance.

docs/generic-source-generator.md gains a "Live runs with --provider
routing" subsection that walks through the one-command routed run,
explains the --quality-floor override, and points at the parallel
live smoke test.

174 tests pass, 2 skipped (both live smokes, correctly gated).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-18 22:19:54 +02:00

475 lines
16 KiB
Python

"""
Tests for the routing config schema (IB-WP-0020-T01).
Parser-only — no network calls, no llm-connect construction. T02 will
test the provider construction loader separately.
"""
from __future__ import annotations
from pathlib import Path
import pytest
import yaml
from infospace_bench.errors import InfospaceError
from infospace_bench.routing_config import (
ROUTING_SCHEMA_VERSION,
RoutingCandidateConfig,
RoutingConfig,
RoutingTaskTypeConfig,
load_routing_config,
parse_routing_config,
)
MINIMAL = {
"schema_version": 1,
"task_types": {
"summarize-source": {
"candidates": [
{
"id": "openrouter:gpt-4o-mini",
"provider": "openrouter",
"model": "openai/gpt-4o-mini",
},
],
},
},
}
def test_parses_minimal_config() -> None:
config = parse_routing_config(MINIMAL)
assert config.schema_version == ROUTING_SCHEMA_VERSION
assert config.default_quality_floor is None
assert config.ledger_path is None
assert config.stage_to_task_type == {}
assert len(config.task_types) == 1
task = config.task_types[0]
assert task.task_type == "summarize-source"
assert task.quality_floor is None
assert len(task.candidates) == 1
candidate = task.candidates[0]
assert candidate.id == "openrouter:gpt-4o-mini"
assert candidate.provider == "openrouter"
assert candidate.model == "openai/gpt-4o-mini"
assert candidate.api_key_env == ""
assert candidate.max_cost_per_1k is None
def test_parses_full_config_round_trip() -> None:
data = {
"schema_version": 1,
"default_quality_floor": 0.8,
"ledger_path": "output/routing/quality.jsonl",
"stage_to_task_type": {
"extract-entities": "smart",
"extract-relations": "smart",
},
"task_types": {
"cheap": {
"quality_floor": 0.7,
"candidates": [
{
"id": "openrouter:gpt-4o-mini",
"provider": "openrouter",
"model": "openai/gpt-4o-mini",
"api_key_env": "OPENROUTER_API_KEY",
"max_cost_per_1k": 0.001,
},
],
},
"smart": {
"quality_floor": 0.85,
"candidates": [
{
"id": "openrouter:claude-haiku",
"provider": "openrouter",
"model": "anthropic/claude-3.5-haiku",
},
{
"id": "openrouter:claude-sonnet",
"provider": "openrouter",
"model": "anthropic/claude-3.5-sonnet",
"max_cost_per_1k": 0.003,
},
],
},
},
}
config = parse_routing_config(data)
assert config.default_quality_floor == 0.8
assert config.ledger_path == "output/routing/quality.jsonl"
assert config.stage_to_task_type == {
"extract-entities": "smart",
"extract-relations": "smart",
}
smart = next(t for t in config.task_types if t.task_type == "smart")
assert smart.quality_floor == 0.85
assert len(smart.candidates) == 2
assert smart.candidates[1].max_cost_per_1k == 0.003
def test_load_routing_config_reads_yaml_file(tmp_path: Path) -> None:
config_path = tmp_path / "routing.yaml"
config_path.write_text(yaml.safe_dump(MINIMAL, sort_keys=False), encoding="utf-8")
config = load_routing_config(config_path)
assert isinstance(config, RoutingConfig)
assert config.schema_version == 1
def test_load_routing_config_missing_file(tmp_path: Path) -> None:
with pytest.raises(InfospaceError) as exc_info:
load_routing_config(tmp_path / "missing.yaml")
assert exc_info.value.code == "missing_routing_config"
def test_load_routing_config_bad_yaml(tmp_path: Path) -> None:
config_path = tmp_path / "broken.yaml"
config_path.write_text("schema_version: 1\n bad: indent\n: : : :\n", encoding="utf-8")
with pytest.raises(InfospaceError) as exc_info:
load_routing_config(config_path)
assert exc_info.value.code == "invalid_routing_config_yaml"
def test_rejects_wrong_schema_version() -> None:
payload = {**MINIMAL, "schema_version": 2}
with pytest.raises(InfospaceError) as exc_info:
parse_routing_config(payload)
assert exc_info.value.code == "unsupported_routing_schema"
def test_rejects_missing_schema_version() -> None:
payload = {"task_types": MINIMAL["task_types"]}
with pytest.raises(InfospaceError) as exc_info:
parse_routing_config(payload)
assert exc_info.value.code == "unsupported_routing_schema"
def test_rejects_empty_task_types() -> None:
payload = {"schema_version": 1, "task_types": {}}
with pytest.raises(InfospaceError) as exc_info:
parse_routing_config(payload)
assert exc_info.value.code == "empty_routing_task_types"
def test_rejects_task_type_without_candidates() -> None:
payload = {
"schema_version": 1,
"task_types": {"foo": {"candidates": []}},
}
with pytest.raises(InfospaceError) as exc_info:
parse_routing_config(payload)
assert exc_info.value.code == "empty_routing_candidates"
def test_rejects_candidate_missing_required_field() -> None:
payload = {
"schema_version": 1,
"task_types": {
"foo": {
"candidates": [{"provider": "openrouter", "model": "x"}], # missing id
},
},
}
with pytest.raises(InfospaceError) as exc_info:
parse_routing_config(payload)
assert exc_info.value.code == "missing_routing_candidate_field"
assert "id" in exc_info.value.detail["missing"]
def test_rejects_unsupported_provider() -> None:
payload = {
"schema_version": 1,
"task_types": {
"foo": {
"candidates": [
{"id": "x", "provider": "acme", "model": "acme/model"},
],
},
},
}
with pytest.raises(InfospaceError) as exc_info:
parse_routing_config(payload)
assert exc_info.value.code == "unsupported_routing_provider"
def test_rejects_negative_max_cost() -> None:
payload = {
"schema_version": 1,
"task_types": {
"foo": {
"candidates": [
{
"id": "x",
"provider": "openrouter",
"model": "openai/gpt-4o-mini",
"max_cost_per_1k": -1,
},
],
},
},
}
with pytest.raises(InfospaceError) as exc_info:
parse_routing_config(payload)
assert exc_info.value.code == "invalid_routing_max_cost"
def test_rejects_quality_floor_out_of_range() -> None:
payload = {
"schema_version": 1,
"default_quality_floor": 1.5,
"task_types": MINIMAL["task_types"],
}
with pytest.raises(InfospaceError) as exc_info:
parse_routing_config(payload)
assert exc_info.value.code == "invalid_routing_quality_floor"
def test_rejects_duplicate_candidate_ids_within_task_type() -> None:
payload = {
"schema_version": 1,
"task_types": {
"foo": {
"candidates": [
{"id": "dupe", "provider": "openrouter", "model": "a"},
{"id": "dupe", "provider": "openrouter", "model": "b"},
],
},
},
}
with pytest.raises(InfospaceError) as exc_info:
parse_routing_config(payload)
assert exc_info.value.code == "duplicate_routing_candidate_id"
def test_rejects_non_mapping_stage_map() -> None:
payload = {
"schema_version": 1,
"task_types": MINIMAL["task_types"],
"stage_to_task_type": ["not", "a", "mapping"],
}
with pytest.raises(InfospaceError) as exc_info:
parse_routing_config(payload)
assert exc_info.value.code == "invalid_routing_stage_map"
# ---------------------------------------------------------------------------
# T02 — loader that materialises a config into a live llm-connect policy
# ---------------------------------------------------------------------------
def _fake_adapter_factory_record(record: list):
"""Return a factory that records calls and returns a sentinel string."""
def _factory(candidate, env):
record.append({"id": candidate.id, "provider": candidate.provider, "model": candidate.model})
return f"adapter:{candidate.id}"
return _factory
def test_build_routing_policy_returns_static_when_no_adaptive_signals() -> None:
from llm_connect.routing import RoutingPolicy
from infospace_bench.routing_config import build_routing_policy_from_config
config = parse_routing_config(MINIMAL)
calls: list[dict] = []
policy = build_routing_policy_from_config(
config, adapter_factory=_fake_adapter_factory_record(calls)
)
assert isinstance(policy, RoutingPolicy)
assert type(policy).__name__ == "RoutingPolicy", "no adaptive signals -> static policy"
assert len(policy.rules) == 1
assert policy.rules[0].task_type == "summarize-source"
assert policy.rules[0].prefer == "adapter:openrouter:gpt-4o-mini"
assert policy.rules[0].fallback is None
assert calls and calls[0]["provider"] == "openrouter"
def test_build_routing_policy_returns_adaptive_when_ledger_path_set(tmp_path: Path) -> None:
from llm_connect.routing import AdaptiveRoutingPolicy
from infospace_bench.routing_config import build_routing_policy_from_config
data = {
**MINIMAL,
"ledger_path": "output/routing/quality.jsonl",
}
config = parse_routing_config(data)
policy = build_routing_policy_from_config(
config,
workspace=tmp_path,
adapter_factory=_fake_adapter_factory_record([]),
)
assert isinstance(policy, AdaptiveRoutingPolicy)
assert policy.ledger is not None
expected_path = tmp_path / "output" / "routing" / "quality.jsonl"
assert Path(policy.ledger.path) == expected_path
def test_build_routing_policy_returns_adaptive_when_quality_floor_set() -> None:
from llm_connect.routing import AdaptiveRoutingPolicy
from infospace_bench.routing_config import build_routing_policy_from_config
data = {
**MINIMAL,
"default_quality_floor": 0.8,
}
config = parse_routing_config(data)
policy = build_routing_policy_from_config(
config, adapter_factory=_fake_adapter_factory_record([])
)
assert isinstance(policy, AdaptiveRoutingPolicy)
assert policy.ledger is None # no ledger_path set
def test_build_routing_policy_routes_fallback_for_multi_candidate_rule() -> None:
from infospace_bench.routing_config import build_routing_policy_from_config
data = {
"schema_version": 1,
"task_types": {
"extract-entities": {
"candidates": [
{
"id": "openrouter:cheap",
"provider": "openrouter",
"model": "openai/gpt-4o-mini",
"max_cost_per_1k": 0.001,
},
{
"id": "openrouter:smart",
"provider": "openrouter",
"model": "anthropic/claude-3.5-sonnet",
},
],
},
},
}
config = parse_routing_config(data)
policy = build_routing_policy_from_config(
config, adapter_factory=_fake_adapter_factory_record([])
)
rule = policy.rules[0]
assert rule.prefer == "adapter:openrouter:cheap"
assert rule.max_cost_per_1k == 0.001
assert rule.fallback == "adapter:openrouter:smart"
def test_build_routing_policy_resolves_api_key_from_env() -> None:
from infospace_bench.routing_config import (
build_routing_policy_from_config,
_default_adapter_factory,
)
config = parse_routing_config(MINIMAL)
# Smoke: real factory with a fake env should construct an OpenRouterAdapter.
env = {"OPENROUTER_API_KEY": "sk-fake-test-key"}
policy = build_routing_policy_from_config(config, env=env)
rule = policy.rules[0]
# The constructed adapter is an OpenRouterAdapter from llm-connect.
from llm_connect.openrouter import OpenRouterAdapter
assert isinstance(rule.prefer, OpenRouterAdapter)
def test_build_routing_policy_fails_fast_on_missing_api_key() -> None:
from infospace_bench.routing_config import build_routing_policy_from_config
config = parse_routing_config(MINIMAL)
# Empty env — the candidate's required env var is unset.
with pytest.raises(InfospaceError) as exc_info:
build_routing_policy_from_config(config, env={})
assert exc_info.value.code == "missing_routing_api_key"
assert exc_info.value.detail["provider"] == "openrouter"
def test_build_routing_policy_claude_code_needs_no_api_key() -> None:
from infospace_bench.routing_config import build_routing_policy_from_config
from llm_connect.claude_code import ClaudeCodeAdapter
data = {
"schema_version": 1,
"task_types": {
"baseline": {
"candidates": [
{"id": "claude-code", "provider": "claude_code", "model": "claude-opus-4-7"},
],
},
},
}
config = parse_routing_config(data)
policy = build_routing_policy_from_config(config, env={})
assert isinstance(policy.rules[0].prefer, ClaudeCodeAdapter)
def test_example_trading_literature_config_parses() -> None:
"""Regression: the shipped example config must parse cleanly."""
from infospace_bench.routing_config import load_routing_config
example_path = Path(__file__).resolve().parent.parent / "examples" / "routing" / "trading-literature.yaml"
config = load_routing_config(example_path)
task_type_names = {task.task_type for task in config.task_types}
assert {"cheap", "smart", "judge", "baseline"} <= task_type_names
assert config.default_quality_floor == 0.80
# Each shipped stage maps to a task type the config actually declares.
for stage, task_type in config.stage_to_task_type.items():
assert task_type in task_type_names, f"stage {stage!r} maps to undeclared task type {task_type!r}"
# baseline is included so a T05 ShadowingAdapter wiring can reference it.
baseline = next(t for t in config.task_types if t.task_type == "baseline")
assert baseline.candidates[0].provider == "claude_code"
def test_build_routing_policy_honours_custom_api_key_env() -> None:
from infospace_bench.routing_config import build_routing_policy_from_config
from llm_connect.openrouter import OpenRouterAdapter
data = {
"schema_version": 1,
"task_types": {
"summarize-source": {
"candidates": [
{
"id": "openrouter:gpt-4o-mini",
"provider": "openrouter",
"model": "openai/gpt-4o-mini",
"api_key_env": "ALT_OPENROUTER_KEY",
},
],
},
},
}
config = parse_routing_config(data)
with pytest.raises(InfospaceError) as exc_info:
build_routing_policy_from_config(config, env={"OPENROUTER_API_KEY": "wrong-default"})
assert exc_info.value.code == "missing_routing_api_key"
assert exc_info.value.detail["api_key_env"] == "ALT_OPENROUTER_KEY"
policy = build_routing_policy_from_config(
config, env={"ALT_OPENROUTER_KEY": "sk-fake"}
)
assert isinstance(policy.rules[0].prefer, OpenRouterAdapter)
def test_rejects_non_string_ledger_path() -> None:
payload = {
"schema_version": 1,
"task_types": MINIMAL["task_types"],
"ledger_path": 42,
}
with pytest.raises(InfospaceError) as exc_info:
parse_routing_config(payload)
assert exc_info.value.code == "invalid_routing_ledger_path"