generated from coulomb/repo-seed
Add --shadow-baseline <id> and --shadow-rate <float> opt-in flags to
generate run, generate resume, and generate from-source. When
--shadow-baseline names a candidate id from the routing config,
build_routing_policy_from_config wraps every other candidate in an
llm-connect ShadowingAdapter using that baseline plus a
PairedGrader(ExactMatchJudge()) and the workspace-resolved
QualityLedger. The baseline candidate itself is never wrapped — that
would shadow it against itself. --shadow-rate defaults to 0.1 when
--shadow-baseline is set; passing --shadow-rate without
--shadow-baseline fails fast with shadow_rate_without_baseline.
Setting --shadow-baseline without a ledger_path in the config fails
with missing_routing_ledger_for_shadow so observations have a place to
land before any call goes out.
run_generation grew shadow_baseline + shadow_rate kwargs and
_adapter_for("routing", ...) plumbs them into
build_routing_policy_from_config. The wrapped ShadowingAdapter slots
into the policy's prefer/fallback per task type via a
(candidate_id, task_type) reverse lookup, and adapters_by_id on the
adaptive policy gets the string-keyed entries.
Five new tests cover: shadow_rate without baseline fails fast, shadow
mode without a ledger fails fast, unknown shadow baseline id fails
fast, structural assertion that ShadowingAdapter wraps non-baseline
candidates and leaves the baseline raw, and a behavioural check that
shadow_rate=1.0 calls the baseline on every call while shadow_rate=0.0
skips entirely. Test forces async_shadow=False so the call counter is
deterministic.
Closes IB-WP-0020: T01-T05 all done. Workplan status flips from active
to finished. 179 tests pass, 2 skipped (both live OpenRouter smokes).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
656 lines
22 KiB
Python
656 lines
22 KiB
Python
"""
|
|
Tests for the routing config schema (IB-WP-0020-T01).
|
|
|
|
Parser-only — no network calls, no llm-connect construction. T02 will
|
|
test the provider construction loader separately.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
import yaml
|
|
|
|
from infospace_bench.errors import InfospaceError
|
|
from infospace_bench.routing_config import (
|
|
ROUTING_SCHEMA_VERSION,
|
|
RoutingCandidateConfig,
|
|
RoutingConfig,
|
|
RoutingTaskTypeConfig,
|
|
load_routing_config,
|
|
parse_routing_config,
|
|
)
|
|
|
|
|
|
MINIMAL = {
|
|
"schema_version": 1,
|
|
"task_types": {
|
|
"summarize-source": {
|
|
"candidates": [
|
|
{
|
|
"id": "openrouter:gpt-4o-mini",
|
|
"provider": "openrouter",
|
|
"model": "openai/gpt-4o-mini",
|
|
},
|
|
],
|
|
},
|
|
},
|
|
}
|
|
|
|
|
|
def test_parses_minimal_config() -> None:
|
|
config = parse_routing_config(MINIMAL)
|
|
|
|
assert config.schema_version == ROUTING_SCHEMA_VERSION
|
|
assert config.default_quality_floor is None
|
|
assert config.ledger_path is None
|
|
assert config.stage_to_task_type == {}
|
|
assert len(config.task_types) == 1
|
|
task = config.task_types[0]
|
|
assert task.task_type == "summarize-source"
|
|
assert task.quality_floor is None
|
|
assert len(task.candidates) == 1
|
|
candidate = task.candidates[0]
|
|
assert candidate.id == "openrouter:gpt-4o-mini"
|
|
assert candidate.provider == "openrouter"
|
|
assert candidate.model == "openai/gpt-4o-mini"
|
|
assert candidate.api_key_env == ""
|
|
assert candidate.max_cost_per_1k is None
|
|
|
|
|
|
def test_parses_full_config_round_trip() -> None:
|
|
data = {
|
|
"schema_version": 1,
|
|
"default_quality_floor": 0.8,
|
|
"ledger_path": "output/routing/quality.jsonl",
|
|
"stage_to_task_type": {
|
|
"extract-entities": "smart",
|
|
"extract-relations": "smart",
|
|
},
|
|
"task_types": {
|
|
"cheap": {
|
|
"quality_floor": 0.7,
|
|
"candidates": [
|
|
{
|
|
"id": "openrouter:gpt-4o-mini",
|
|
"provider": "openrouter",
|
|
"model": "openai/gpt-4o-mini",
|
|
"api_key_env": "OPENROUTER_API_KEY",
|
|
"max_cost_per_1k": 0.001,
|
|
},
|
|
],
|
|
},
|
|
"smart": {
|
|
"quality_floor": 0.85,
|
|
"candidates": [
|
|
{
|
|
"id": "openrouter:claude-haiku",
|
|
"provider": "openrouter",
|
|
"model": "anthropic/claude-3.5-haiku",
|
|
},
|
|
{
|
|
"id": "openrouter:claude-sonnet",
|
|
"provider": "openrouter",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"max_cost_per_1k": 0.003,
|
|
},
|
|
],
|
|
},
|
|
},
|
|
}
|
|
|
|
config = parse_routing_config(data)
|
|
|
|
assert config.default_quality_floor == 0.8
|
|
assert config.ledger_path == "output/routing/quality.jsonl"
|
|
assert config.stage_to_task_type == {
|
|
"extract-entities": "smart",
|
|
"extract-relations": "smart",
|
|
}
|
|
smart = next(t for t in config.task_types if t.task_type == "smart")
|
|
assert smart.quality_floor == 0.85
|
|
assert len(smart.candidates) == 2
|
|
assert smart.candidates[1].max_cost_per_1k == 0.003
|
|
|
|
|
|
def test_load_routing_config_reads_yaml_file(tmp_path: Path) -> None:
|
|
config_path = tmp_path / "routing.yaml"
|
|
config_path.write_text(yaml.safe_dump(MINIMAL, sort_keys=False), encoding="utf-8")
|
|
|
|
config = load_routing_config(config_path)
|
|
|
|
assert isinstance(config, RoutingConfig)
|
|
assert config.schema_version == 1
|
|
|
|
|
|
def test_load_routing_config_missing_file(tmp_path: Path) -> None:
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
load_routing_config(tmp_path / "missing.yaml")
|
|
assert exc_info.value.code == "missing_routing_config"
|
|
|
|
|
|
def test_load_routing_config_bad_yaml(tmp_path: Path) -> None:
|
|
config_path = tmp_path / "broken.yaml"
|
|
config_path.write_text("schema_version: 1\n bad: indent\n: : : :\n", encoding="utf-8")
|
|
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
load_routing_config(config_path)
|
|
assert exc_info.value.code == "invalid_routing_config_yaml"
|
|
|
|
|
|
def test_rejects_wrong_schema_version() -> None:
|
|
payload = {**MINIMAL, "schema_version": 2}
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
parse_routing_config(payload)
|
|
assert exc_info.value.code == "unsupported_routing_schema"
|
|
|
|
|
|
def test_rejects_missing_schema_version() -> None:
|
|
payload = {"task_types": MINIMAL["task_types"]}
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
parse_routing_config(payload)
|
|
assert exc_info.value.code == "unsupported_routing_schema"
|
|
|
|
|
|
def test_rejects_empty_task_types() -> None:
|
|
payload = {"schema_version": 1, "task_types": {}}
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
parse_routing_config(payload)
|
|
assert exc_info.value.code == "empty_routing_task_types"
|
|
|
|
|
|
def test_rejects_task_type_without_candidates() -> None:
|
|
payload = {
|
|
"schema_version": 1,
|
|
"task_types": {"foo": {"candidates": []}},
|
|
}
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
parse_routing_config(payload)
|
|
assert exc_info.value.code == "empty_routing_candidates"
|
|
|
|
|
|
def test_rejects_candidate_missing_required_field() -> None:
|
|
payload = {
|
|
"schema_version": 1,
|
|
"task_types": {
|
|
"foo": {
|
|
"candidates": [{"provider": "openrouter", "model": "x"}], # missing id
|
|
},
|
|
},
|
|
}
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
parse_routing_config(payload)
|
|
assert exc_info.value.code == "missing_routing_candidate_field"
|
|
assert "id" in exc_info.value.detail["missing"]
|
|
|
|
|
|
def test_rejects_unsupported_provider() -> None:
|
|
payload = {
|
|
"schema_version": 1,
|
|
"task_types": {
|
|
"foo": {
|
|
"candidates": [
|
|
{"id": "x", "provider": "acme", "model": "acme/model"},
|
|
],
|
|
},
|
|
},
|
|
}
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
parse_routing_config(payload)
|
|
assert exc_info.value.code == "unsupported_routing_provider"
|
|
|
|
|
|
def test_rejects_negative_max_cost() -> None:
|
|
payload = {
|
|
"schema_version": 1,
|
|
"task_types": {
|
|
"foo": {
|
|
"candidates": [
|
|
{
|
|
"id": "x",
|
|
"provider": "openrouter",
|
|
"model": "openai/gpt-4o-mini",
|
|
"max_cost_per_1k": -1,
|
|
},
|
|
],
|
|
},
|
|
},
|
|
}
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
parse_routing_config(payload)
|
|
assert exc_info.value.code == "invalid_routing_max_cost"
|
|
|
|
|
|
def test_rejects_quality_floor_out_of_range() -> None:
|
|
payload = {
|
|
"schema_version": 1,
|
|
"default_quality_floor": 1.5,
|
|
"task_types": MINIMAL["task_types"],
|
|
}
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
parse_routing_config(payload)
|
|
assert exc_info.value.code == "invalid_routing_quality_floor"
|
|
|
|
|
|
def test_rejects_duplicate_candidate_ids_within_task_type() -> None:
|
|
payload = {
|
|
"schema_version": 1,
|
|
"task_types": {
|
|
"foo": {
|
|
"candidates": [
|
|
{"id": "dupe", "provider": "openrouter", "model": "a"},
|
|
{"id": "dupe", "provider": "openrouter", "model": "b"},
|
|
],
|
|
},
|
|
},
|
|
}
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
parse_routing_config(payload)
|
|
assert exc_info.value.code == "duplicate_routing_candidate_id"
|
|
|
|
|
|
def test_rejects_non_mapping_stage_map() -> None:
|
|
payload = {
|
|
"schema_version": 1,
|
|
"task_types": MINIMAL["task_types"],
|
|
"stage_to_task_type": ["not", "a", "mapping"],
|
|
}
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
parse_routing_config(payload)
|
|
assert exc_info.value.code == "invalid_routing_stage_map"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# T02 — loader that materialises a config into a live llm-connect policy
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _fake_adapter_factory_record(record: list):
|
|
"""Return a factory that records calls and returns a sentinel string."""
|
|
def _factory(candidate, env):
|
|
record.append({"id": candidate.id, "provider": candidate.provider, "model": candidate.model})
|
|
return f"adapter:{candidate.id}"
|
|
return _factory
|
|
|
|
|
|
def test_build_routing_policy_returns_static_when_no_adaptive_signals() -> None:
|
|
from llm_connect.routing import RoutingPolicy
|
|
from infospace_bench.routing_config import build_routing_policy_from_config
|
|
|
|
config = parse_routing_config(MINIMAL)
|
|
calls: list[dict] = []
|
|
policy = build_routing_policy_from_config(
|
|
config, adapter_factory=_fake_adapter_factory_record(calls)
|
|
)
|
|
|
|
assert isinstance(policy, RoutingPolicy)
|
|
assert type(policy).__name__ == "RoutingPolicy", "no adaptive signals -> static policy"
|
|
assert len(policy.rules) == 1
|
|
assert policy.rules[0].task_type == "summarize-source"
|
|
assert policy.rules[0].prefer == "adapter:openrouter:gpt-4o-mini"
|
|
assert policy.rules[0].fallback is None
|
|
assert calls and calls[0]["provider"] == "openrouter"
|
|
|
|
|
|
def test_build_routing_policy_returns_adaptive_when_ledger_path_set(tmp_path: Path) -> None:
|
|
from llm_connect.routing import AdaptiveRoutingPolicy
|
|
from infospace_bench.routing_config import build_routing_policy_from_config
|
|
|
|
data = {
|
|
**MINIMAL,
|
|
"ledger_path": "output/routing/quality.jsonl",
|
|
}
|
|
config = parse_routing_config(data)
|
|
policy = build_routing_policy_from_config(
|
|
config,
|
|
workspace=tmp_path,
|
|
adapter_factory=_fake_adapter_factory_record([]),
|
|
)
|
|
|
|
assert isinstance(policy, AdaptiveRoutingPolicy)
|
|
assert policy.ledger is not None
|
|
expected_path = tmp_path / "output" / "routing" / "quality.jsonl"
|
|
assert Path(policy.ledger.path) == expected_path
|
|
|
|
|
|
def test_build_routing_policy_returns_adaptive_when_quality_floor_set() -> None:
|
|
from llm_connect.routing import AdaptiveRoutingPolicy
|
|
from infospace_bench.routing_config import build_routing_policy_from_config
|
|
|
|
data = {
|
|
**MINIMAL,
|
|
"default_quality_floor": 0.8,
|
|
}
|
|
config = parse_routing_config(data)
|
|
policy = build_routing_policy_from_config(
|
|
config, adapter_factory=_fake_adapter_factory_record([])
|
|
)
|
|
|
|
assert isinstance(policy, AdaptiveRoutingPolicy)
|
|
assert policy.ledger is None # no ledger_path set
|
|
|
|
|
|
def test_build_routing_policy_routes_fallback_for_multi_candidate_rule() -> None:
|
|
from infospace_bench.routing_config import build_routing_policy_from_config
|
|
|
|
data = {
|
|
"schema_version": 1,
|
|
"task_types": {
|
|
"extract-entities": {
|
|
"candidates": [
|
|
{
|
|
"id": "openrouter:cheap",
|
|
"provider": "openrouter",
|
|
"model": "openai/gpt-4o-mini",
|
|
"max_cost_per_1k": 0.001,
|
|
},
|
|
{
|
|
"id": "openrouter:smart",
|
|
"provider": "openrouter",
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
},
|
|
],
|
|
},
|
|
},
|
|
}
|
|
config = parse_routing_config(data)
|
|
policy = build_routing_policy_from_config(
|
|
config, adapter_factory=_fake_adapter_factory_record([])
|
|
)
|
|
|
|
rule = policy.rules[0]
|
|
assert rule.prefer == "adapter:openrouter:cheap"
|
|
assert rule.max_cost_per_1k == 0.001
|
|
assert rule.fallback == "adapter:openrouter:smart"
|
|
|
|
|
|
def test_build_routing_policy_resolves_api_key_from_env() -> None:
|
|
from infospace_bench.routing_config import (
|
|
build_routing_policy_from_config,
|
|
_default_adapter_factory,
|
|
)
|
|
|
|
config = parse_routing_config(MINIMAL)
|
|
# Smoke: real factory with a fake env should construct an OpenRouterAdapter.
|
|
env = {"OPENROUTER_API_KEY": "sk-fake-test-key"}
|
|
policy = build_routing_policy_from_config(config, env=env)
|
|
rule = policy.rules[0]
|
|
# The constructed adapter is an OpenRouterAdapter from llm-connect.
|
|
from llm_connect.openrouter import OpenRouterAdapter
|
|
assert isinstance(rule.prefer, OpenRouterAdapter)
|
|
|
|
|
|
def test_build_routing_policy_fails_fast_on_missing_api_key() -> None:
|
|
from infospace_bench.routing_config import build_routing_policy_from_config
|
|
|
|
config = parse_routing_config(MINIMAL)
|
|
# Empty env — the candidate's required env var is unset.
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
build_routing_policy_from_config(config, env={})
|
|
assert exc_info.value.code == "missing_routing_api_key"
|
|
assert exc_info.value.detail["provider"] == "openrouter"
|
|
|
|
|
|
def test_build_routing_policy_claude_code_needs_no_api_key() -> None:
|
|
from infospace_bench.routing_config import build_routing_policy_from_config
|
|
from llm_connect.claude_code import ClaudeCodeAdapter
|
|
|
|
data = {
|
|
"schema_version": 1,
|
|
"task_types": {
|
|
"baseline": {
|
|
"candidates": [
|
|
{"id": "claude-code", "provider": "claude_code", "model": "claude-opus-4-7"},
|
|
],
|
|
},
|
|
},
|
|
}
|
|
config = parse_routing_config(data)
|
|
policy = build_routing_policy_from_config(config, env={})
|
|
|
|
assert isinstance(policy.rules[0].prefer, ClaudeCodeAdapter)
|
|
|
|
|
|
def test_example_trading_literature_config_parses() -> None:
|
|
"""Regression: the shipped example config must parse cleanly."""
|
|
from infospace_bench.routing_config import load_routing_config
|
|
|
|
example_path = Path(__file__).resolve().parent.parent / "examples" / "routing" / "trading-literature.yaml"
|
|
|
|
config = load_routing_config(example_path)
|
|
|
|
task_type_names = {task.task_type for task in config.task_types}
|
|
assert {"cheap", "smart", "judge", "baseline"} <= task_type_names
|
|
assert config.default_quality_floor == 0.80
|
|
# Each shipped stage maps to a task type the config actually declares.
|
|
for stage, task_type in config.stage_to_task_type.items():
|
|
assert task_type in task_type_names, f"stage {stage!r} maps to undeclared task type {task_type!r}"
|
|
# baseline is included so a T05 ShadowingAdapter wiring can reference it.
|
|
baseline = next(t for t in config.task_types if t.task_type == "baseline")
|
|
assert baseline.candidates[0].provider == "claude_code"
|
|
|
|
|
|
def test_build_routing_policy_honours_custom_api_key_env() -> None:
|
|
from infospace_bench.routing_config import build_routing_policy_from_config
|
|
from llm_connect.openrouter import OpenRouterAdapter
|
|
|
|
data = {
|
|
"schema_version": 1,
|
|
"task_types": {
|
|
"summarize-source": {
|
|
"candidates": [
|
|
{
|
|
"id": "openrouter:gpt-4o-mini",
|
|
"provider": "openrouter",
|
|
"model": "openai/gpt-4o-mini",
|
|
"api_key_env": "ALT_OPENROUTER_KEY",
|
|
},
|
|
],
|
|
},
|
|
},
|
|
}
|
|
config = parse_routing_config(data)
|
|
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
build_routing_policy_from_config(config, env={"OPENROUTER_API_KEY": "wrong-default"})
|
|
assert exc_info.value.code == "missing_routing_api_key"
|
|
assert exc_info.value.detail["api_key_env"] == "ALT_OPENROUTER_KEY"
|
|
|
|
policy = build_routing_policy_from_config(
|
|
config, env={"ALT_OPENROUTER_KEY": "sk-fake"}
|
|
)
|
|
assert isinstance(policy.rules[0].prefer, OpenRouterAdapter)
|
|
|
|
|
|
def test_shadow_rate_without_baseline_fails_fast() -> None:
|
|
from infospace_bench.routing_config import build_routing_policy_from_config
|
|
|
|
config = parse_routing_config(MINIMAL)
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
build_routing_policy_from_config(
|
|
config,
|
|
shadow_rate=0.5,
|
|
adapter_factory=_fake_adapter_factory_record([]),
|
|
)
|
|
assert exc_info.value.code == "shadow_rate_without_baseline"
|
|
|
|
|
|
def test_shadow_baseline_without_ledger_path_fails_fast() -> None:
|
|
"""ShadowingAdapter needs a place to write observations; require ledger_path."""
|
|
from infospace_bench.routing_config import build_routing_policy_from_config
|
|
|
|
config = parse_routing_config(MINIMAL)
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
build_routing_policy_from_config(
|
|
config,
|
|
shadow_baseline_id="openrouter:gpt-4o-mini",
|
|
adapter_factory=_fake_adapter_factory_record([]),
|
|
)
|
|
assert exc_info.value.code == "missing_routing_ledger_for_shadow"
|
|
|
|
|
|
def test_shadow_baseline_not_in_config_fails_fast(tmp_path: Path) -> None:
|
|
from infospace_bench.routing_config import build_routing_policy_from_config
|
|
|
|
data = {**MINIMAL, "ledger_path": "quality.jsonl"}
|
|
config = parse_routing_config(data)
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
build_routing_policy_from_config(
|
|
config,
|
|
workspace=tmp_path,
|
|
shadow_baseline_id="not-in-config",
|
|
adapter_factory=_fake_adapter_factory_record([]),
|
|
)
|
|
assert exc_info.value.code == "missing_shadow_baseline"
|
|
|
|
|
|
def test_shadow_wraps_candidates_excluding_baseline(tmp_path: Path) -> None:
|
|
from llm_connect.adapter import LLMAdapter
|
|
from llm_connect.models import LLMResponse, RunConfig
|
|
from llm_connect.shadowing import ShadowingAdapter
|
|
from infospace_bench.routing_config import build_routing_policy_from_config
|
|
|
|
data = {
|
|
"schema_version": 1,
|
|
"ledger_path": "quality.jsonl",
|
|
"task_types": {
|
|
"extract-entities": {
|
|
"candidates": [
|
|
{"id": "candidate-a", "provider": "openrouter", "model": "openai/gpt-4o-mini"},
|
|
{"id": "baseline-x", "provider": "claude_code", "model": "claude-opus-4-7"},
|
|
],
|
|
},
|
|
},
|
|
}
|
|
config = parse_routing_config(data)
|
|
|
|
class _Stub(LLMAdapter):
|
|
def __init__(self, name):
|
|
self.name = name
|
|
self.calls = 0
|
|
|
|
def execute_prompt(self, prompt, config):
|
|
self.calls += 1
|
|
return LLMResponse(content="match", model=self.name, usage={"prompt_tokens": 1, "completion_tokens": 1})
|
|
|
|
def validate_config(self, config):
|
|
return True
|
|
|
|
stubs: dict[str, _Stub] = {}
|
|
|
|
def factory(candidate, env):
|
|
stubs[candidate.id] = _Stub(candidate.id)
|
|
return stubs[candidate.id]
|
|
|
|
policy = build_routing_policy_from_config(
|
|
config,
|
|
workspace=tmp_path,
|
|
adapter_factory=factory,
|
|
shadow_baseline_id="baseline-x",
|
|
shadow_rate=1.0,
|
|
)
|
|
|
|
rule = policy.rules[0]
|
|
# The prefer slot is now a ShadowingAdapter wrapping candidate-a.
|
|
assert isinstance(rule.prefer, ShadowingAdapter)
|
|
assert rule.prefer.candidate_adapter is stubs["candidate-a"]
|
|
assert rule.prefer.baseline_adapter is stubs["baseline-x"]
|
|
assert rule.prefer.task_type == "extract-entities"
|
|
# The baseline candidate (fallback) is NOT wrapped.
|
|
assert rule.fallback is stubs["baseline-x"]
|
|
|
|
|
|
def test_shadow_rate_one_fires_per_call_and_zero_skips(tmp_path: Path) -> None:
|
|
"""ShadowingAdapter is best-effort and supplied by llm-connect.
|
|
|
|
Spot-check the wiring: at rate=1.0 the baseline.execute_prompt runs on
|
|
every call; at rate=0.0 it never runs.
|
|
"""
|
|
from llm_connect.adapter import LLMAdapter
|
|
from llm_connect.models import LLMResponse, RunConfig
|
|
from infospace_bench.routing_config import build_routing_policy_from_config
|
|
|
|
data = {
|
|
"schema_version": 1,
|
|
"ledger_path": "quality.jsonl",
|
|
"task_types": {
|
|
"extract-entities": {
|
|
"candidates": [
|
|
{"id": "candidate-a", "provider": "openrouter", "model": "openai/gpt-4o-mini"},
|
|
{"id": "baseline-x", "provider": "claude_code", "model": "claude-opus-4-7"},
|
|
],
|
|
},
|
|
},
|
|
}
|
|
config = parse_routing_config(data)
|
|
|
|
class _Counter(LLMAdapter):
|
|
def __init__(self, name):
|
|
self.name = name
|
|
self.calls = 0
|
|
|
|
def execute_prompt(self, prompt, config):
|
|
self.calls += 1
|
|
return LLMResponse(content="match", model=self.name, usage={"prompt_tokens": 1, "completion_tokens": 1})
|
|
|
|
def validate_config(self, config):
|
|
return True
|
|
|
|
def make_factory():
|
|
stubs: dict[str, _Counter] = {}
|
|
|
|
def factory(candidate, env):
|
|
stubs[candidate.id] = _Counter(candidate.id)
|
|
return stubs[candidate.id]
|
|
|
|
return factory, stubs
|
|
|
|
factory, stubs = make_factory()
|
|
policy_full = build_routing_policy_from_config(
|
|
config,
|
|
workspace=tmp_path,
|
|
adapter_factory=factory,
|
|
shadow_baseline_id="baseline-x",
|
|
shadow_rate=1.0,
|
|
)
|
|
# Drive the prefer adapter (synchronous shadow) and force any
|
|
# background shadow work to drain before we count calls.
|
|
shadow_adapter = policy_full.rules[0].prefer
|
|
shadow_adapter.async_shadow = False # force sync grading for a deterministic count
|
|
for _ in range(3):
|
|
shadow_adapter.execute_prompt("hello", RunConfig(model_name="x"))
|
|
assert stubs["candidate-a"].calls == 3
|
|
assert stubs["baseline-x"].calls == 3, "rate=1.0 should call baseline on every call"
|
|
|
|
# Fresh factory + stubs for the zero-rate run so counters reset.
|
|
factory2, stubs2 = make_factory()
|
|
# Use a unique ledger path so the two policies do not share state.
|
|
(tmp_path / "subdir").mkdir(exist_ok=True)
|
|
data2 = {**data, "ledger_path": "subdir/quality.jsonl"}
|
|
config2 = parse_routing_config(data2)
|
|
policy_zero = build_routing_policy_from_config(
|
|
config2,
|
|
workspace=tmp_path,
|
|
adapter_factory=factory2,
|
|
shadow_baseline_id="baseline-x",
|
|
shadow_rate=0.0,
|
|
)
|
|
shadow_adapter2 = policy_zero.rules[0].prefer
|
|
shadow_adapter2.async_shadow = False
|
|
for _ in range(3):
|
|
shadow_adapter2.execute_prompt("hello", RunConfig(model_name="x"))
|
|
assert stubs2["candidate-a"].calls == 3
|
|
assert stubs2["baseline-x"].calls == 0, "rate=0.0 should skip baseline entirely"
|
|
|
|
|
|
def test_rejects_non_string_ledger_path() -> None:
|
|
payload = {
|
|
"schema_version": 1,
|
|
"task_types": MINIMAL["task_types"],
|
|
"ledger_path": 42,
|
|
}
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
parse_routing_config(payload)
|
|
assert exc_info.value.code == "invalid_routing_ledger_path"
|