diff --git a/docs/routing-config.md b/docs/routing-config.md new file mode 100644 index 0000000..cf4befd --- /dev/null +++ b/docs/routing-config.md @@ -0,0 +1,131 @@ +# Routing Config Schema + +Workplan: IB-WP-0020 (T01 schema, T02 loader) +Module: `src/infospace_bench/routing_config.py` + +A routing config is a small YAML file that names the candidate adapters +per task type and (optionally) the quality floor, the +`QualityLedger` path, and a stage-to-task-type override map. The file +is the consumer side of llm-connect `LLM-WP-0004`'s routing primitives: +it does not embed model selection logic, just declares the universe +the policy can choose from. + +The schema_version is pinned to `1`. Bump it (and the parser) before +making backward-incompatible changes. + +## Top-level fields + +| Field | Type | Notes | +|---|---|---| +| `schema_version` | int (required) | Currently `1`. Mismatch fails fast. | +| `task_types` | mapping (required) | At least one entry. Each entry has `candidates` and an optional `quality_floor`. | +| `default_quality_floor` | float (optional) | Falls back when a task type does not name its own. Must be 0..1. | +| `ledger_path` | string (optional) | Path to a `QualityLedger` JSONL. Relative paths resolve against the workspace by default. Required when any `quality_floor` is non-null. | +| `stage_to_task_type` | mapping (optional) | Caller-supplied mapping from infospace-bench stage ids to task types. Falls through to identity when omitted. | + +## Candidate fields + +Each entry under `task_types..candidates[]`: + +| Field | Type | Notes | +|---|---|---| +| `id` | string (required) | Stable adapter id used for the `QualityLedger` and the per-stage adapter-choice line of the generation report. | +| `provider` | string (required) | One of `openrouter`, `claude_code`, `openai`, `gemini`. | +| `model` | string (required) | Provider-specific model id, e.g. `openai/gpt-4o-mini`. | +| `api_key_env` | string (optional) | Env var that holds the API key. Defaults to a provider-specific name (`OPENROUTER_API_KEY` etc.) in the T02 loader. | +| `max_cost_per_1k` | float (optional) | Static cost cap. Static `RoutingPolicy` falls back to a cheaper candidate when the caller-supplied estimate exceeds this. | + +## Example A — OpenRouter-only, two-tier + +A pragmatic Lefevre-style config. Cheap model for summaries, mid model +for entities/relations, cheap again for evaluation. No adaptive +routing, no ledger. + +```yaml +schema_version: 1 + +stage_to_task_type: + summarize-source: cheap + extract-entities: smart + extract-relations: smart + evaluate-entity: cheap + synthesize-report: smart + +task_types: + cheap: + candidates: + - id: openrouter:gpt-4o-mini + provider: openrouter + model: openai/gpt-4o-mini + api_key_env: OPENROUTER_API_KEY + smart: + candidates: + - id: openrouter:claude-3.5-sonnet + provider: openrouter + model: anthropic/claude-3.5-sonnet + api_key_env: OPENROUTER_API_KEY +``` + +## Example B — Adaptive with a ClaudeCode baseline + +A two-candidate-per-stage adaptive config. The `QualityLedger` +accumulates observations; over time, the cheaper qualifying model is +preferred per stage. `ClaudeCodeAdapter` is wired into a separate +`task_types.baseline` rule so it can be referenced by a +`ShadowingAdapter` builder (T05). + +```yaml +schema_version: 1 +default_quality_floor: 0.80 +ledger_path: output/routing/quality.jsonl + +task_types: + summarize-source: + quality_floor: 0.70 + candidates: + - id: openrouter:gpt-4o-mini + provider: openrouter + model: openai/gpt-4o-mini + api_key_env: OPENROUTER_API_KEY + max_cost_per_1k: 0.001 + - id: openrouter:claude-3.5-haiku + provider: openrouter + model: anthropic/claude-3.5-haiku + api_key_env: OPENROUTER_API_KEY + max_cost_per_1k: 0.003 + + extract-entities: + quality_floor: 0.85 + candidates: + - id: openrouter:claude-3.5-haiku + provider: openrouter + model: anthropic/claude-3.5-haiku + api_key_env: OPENROUTER_API_KEY + - id: openrouter:claude-3.5-sonnet + provider: openrouter + model: anthropic/claude-3.5-sonnet + api_key_env: OPENROUTER_API_KEY + + baseline: + candidates: + - id: claude-code + provider: claude_code + model: claude-opus-4-7 +``` + +## What fails fast + +The parser refuses, before any network or workspace work, when: + +- `schema_version` is missing or not `1` +- `task_types` is missing or empty +- Any `task_type` has no `candidates` +- A candidate is missing `id`, `provider`, or `model` +- A `provider` is not one of the supported names +- `max_cost_per_1k` is non-numeric or negative +- Any `quality_floor` (top-level or per-task) is outside 0..1 +- A `task_type` has duplicate candidate `id`s +- `ledger_path` or `stage_to_task_type` has the wrong YAML shape + +`api_key_env` resolution and live adapter construction happen in T02. +This file only validates the declarative shape. diff --git a/src/infospace_bench/routing_config.py b/src/infospace_bench/routing_config.py new file mode 100644 index 0000000..514267a --- /dev/null +++ b/src/infospace_bench/routing_config.py @@ -0,0 +1,265 @@ +""" +Routing config schema (IB-WP-0020-T01). + +Parser-only: this module reads a YAML file into validated dataclasses. +The follow-on task T02 takes a ``RoutingConfig`` and constructs the +actual llm-connect ``RoutingPolicy`` / ``AdaptiveRoutingPolicy`` plus +LLMAdapter instances (which involves API keys and provider-specific +construction). Keeping parsing separate lets T01 stay network-free and +deterministically testable. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import yaml + +from .errors import InfospaceError + +ROUTING_SCHEMA_VERSION = 1 + +# Provider names that the T02 loader will know how to construct. +# Validation happens here so a config typo fails before any work begins. +SUPPORTED_PROVIDERS: frozenset[str] = frozenset( + {"openrouter", "claude_code", "openai", "gemini"} +) + + +@dataclass(frozen=True) +class RoutingCandidateConfig: + """One candidate adapter inside a task_type rule.""" + + id: str + provider: str + model: str + api_key_env: str = "" + max_cost_per_1k: float | None = None + + +@dataclass(frozen=True) +class RoutingTaskTypeConfig: + """All candidate adapters for one task_type, with an optional quality floor.""" + + task_type: str + candidates: tuple[RoutingCandidateConfig, ...] + quality_floor: float | None = None + + +@dataclass(frozen=True) +class RoutingConfig: + """Top-level routing config payload, parsed from YAML.""" + + schema_version: int + task_types: tuple[RoutingTaskTypeConfig, ...] + default_quality_floor: float | None = None + ledger_path: str | None = None + stage_to_task_type: dict[str, str] = field(default_factory=dict) + + +def load_routing_config(path: str | Path) -> RoutingConfig: + """Read and validate a routing config YAML file.""" + config_path = Path(path) + if not config_path.is_file(): + raise InfospaceError( + "missing_routing_config", + f"Routing config does not exist: {config_path}", + {"path": str(config_path)}, + ) + raw_text = config_path.read_text(encoding="utf-8") + try: + data = yaml.safe_load(raw_text) + except yaml.YAMLError as exc: + raise InfospaceError( + "invalid_routing_config_yaml", + f"Routing config is not valid YAML: {exc}", + {"path": str(config_path)}, + ) from exc + if not isinstance(data, dict): + raise InfospaceError( + "invalid_routing_config", + "Routing config must be a YAML mapping at the top level", + {"path": str(config_path)}, + ) + return parse_routing_config(data, source=str(config_path)) + + +def parse_routing_config( + data: dict[str, Any], *, source: str = "" +) -> RoutingConfig: + """Validate a parsed routing config dict and return a frozen config.""" + schema_version = data.get("schema_version") + if not isinstance(schema_version, int) or schema_version != ROUTING_SCHEMA_VERSION: + raise InfospaceError( + "unsupported_routing_schema", + f"Routing config schema_version must be {ROUTING_SCHEMA_VERSION}", + {"source": source, "got": schema_version}, + ) + task_types_raw = data.get("task_types") or {} + if not isinstance(task_types_raw, dict) or not task_types_raw: + raise InfospaceError( + "empty_routing_task_types", + "Routing config must declare at least one task_type with candidates", + {"source": source}, + ) + + task_types: list[RoutingTaskTypeConfig] = [] + for task_type, entry in task_types_raw.items(): + task_types.append(_parse_task_type(str(task_type), entry, source=source)) + + default_floor = _optional_quality_floor( + data.get("default_quality_floor"), "default_quality_floor", source + ) + ledger_path_value = data.get("ledger_path") + if ledger_path_value is not None and not isinstance(ledger_path_value, str): + raise InfospaceError( + "invalid_routing_ledger_path", + "ledger_path must be a string when present", + {"source": source}, + ) + + stage_map_raw = data.get("stage_to_task_type") or {} + if not isinstance(stage_map_raw, dict): + raise InfospaceError( + "invalid_routing_stage_map", + "stage_to_task_type must be a mapping", + {"source": source}, + ) + stage_to_task_type = {str(key): str(value) for key, value in stage_map_raw.items()} + + return RoutingConfig( + schema_version=schema_version, + task_types=tuple(task_types), + default_quality_floor=default_floor, + ledger_path=ledger_path_value if isinstance(ledger_path_value, str) else None, + stage_to_task_type=stage_to_task_type, + ) + + +def _parse_task_type( + task_type: str, entry: Any, *, source: str +) -> RoutingTaskTypeConfig: + if not isinstance(entry, dict): + raise InfospaceError( + "invalid_routing_task_type", + f"task_types.{task_type} must be a mapping", + {"source": source, "task_type": task_type}, + ) + candidates_raw = entry.get("candidates") or [] + if not isinstance(candidates_raw, list) or not candidates_raw: + raise InfospaceError( + "empty_routing_candidates", + f"task_types.{task_type} must declare at least one candidate", + {"source": source, "task_type": task_type}, + ) + candidates: list[RoutingCandidateConfig] = [] + seen_ids: set[str] = set() + for index, candidate_raw in enumerate(candidates_raw): + candidate = _parse_candidate(task_type, index, candidate_raw, source=source) + if candidate.id in seen_ids: + raise InfospaceError( + "duplicate_routing_candidate_id", + f"task_types.{task_type} has duplicate candidate id {candidate.id!r}", + {"source": source, "task_type": task_type, "id": candidate.id}, + ) + seen_ids.add(candidate.id) + candidates.append(candidate) + quality_floor = _optional_quality_floor( + entry.get("quality_floor"), + f"task_types.{task_type}.quality_floor", + source, + ) + return RoutingTaskTypeConfig( + task_type=task_type, + candidates=tuple(candidates), + quality_floor=quality_floor, + ) + + +def _parse_candidate( + task_type: str, index: int, candidate_raw: Any, *, source: str +) -> RoutingCandidateConfig: + if not isinstance(candidate_raw, dict): + raise InfospaceError( + "invalid_routing_candidate", + f"task_types.{task_type}.candidates[{index}] must be a mapping", + {"source": source, "task_type": task_type, "index": index}, + ) + candidate_id = str(candidate_raw.get("id") or "").strip() + provider = str(candidate_raw.get("provider") or "").strip().lower() + model = str(candidate_raw.get("model") or "").strip() + missing = [ + field_name + for field_name, value in (("id", candidate_id), ("provider", provider), ("model", model)) + if not value + ] + if missing: + raise InfospaceError( + "missing_routing_candidate_field", + f"task_types.{task_type}.candidates[{index}] is missing required fields: " + f"{', '.join(missing)}", + { + "source": source, + "task_type": task_type, + "index": index, + "missing": missing, + }, + ) + if provider not in SUPPORTED_PROVIDERS: + raise InfospaceError( + "unsupported_routing_provider", + f"Unsupported provider {provider!r}; allowed: {sorted(SUPPORTED_PROVIDERS)}", + { + "source": source, + "task_type": task_type, + "index": index, + "provider": provider, + }, + ) + max_cost = _optional_float( + candidate_raw.get("max_cost_per_1k"), + f"task_types.{task_type}.candidates[{index}].max_cost_per_1k", + source, + ) + if max_cost is not None and max_cost < 0: + raise InfospaceError( + "invalid_routing_max_cost", + "max_cost_per_1k must be non-negative", + {"source": source, "task_type": task_type, "index": index, "value": max_cost}, + ) + api_key_env = str(candidate_raw.get("api_key_env") or "").strip() + return RoutingCandidateConfig( + id=candidate_id, + provider=provider, + model=model, + api_key_env=api_key_env, + max_cost_per_1k=max_cost, + ) + + +def _optional_float(value: Any, name: str, source: str) -> float | None: + if value is None: + return None + try: + return float(value) + except (TypeError, ValueError) as exc: + raise InfospaceError( + "invalid_routing_float", + f"{name} must be numeric", + {"source": source, "value": value}, + ) from exc + + +def _optional_quality_floor(value: Any, name: str, source: str) -> float | None: + floor = _optional_float(value, name, source) + if floor is None: + return None + if not 0 <= floor <= 1: + raise InfospaceError( + "invalid_routing_quality_floor", + f"{name} must be between 0 and 1", + {"source": source, "name": name, "value": floor}, + ) + return floor diff --git a/tests/test_routing_config.py b/tests/test_routing_config.py new file mode 100644 index 0000000..bda0869 --- /dev/null +++ b/tests/test_routing_config.py @@ -0,0 +1,272 @@ +""" +Tests for the routing config schema (IB-WP-0020-T01). + +Parser-only — no network calls, no llm-connect construction. T02 will +test the provider construction loader separately. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +import yaml + +from infospace_bench.errors import InfospaceError +from infospace_bench.routing_config import ( + ROUTING_SCHEMA_VERSION, + RoutingCandidateConfig, + RoutingConfig, + RoutingTaskTypeConfig, + load_routing_config, + parse_routing_config, +) + + +MINIMAL = { + "schema_version": 1, + "task_types": { + "summarize-source": { + "candidates": [ + { + "id": "openrouter:gpt-4o-mini", + "provider": "openrouter", + "model": "openai/gpt-4o-mini", + }, + ], + }, + }, +} + + +def test_parses_minimal_config() -> None: + config = parse_routing_config(MINIMAL) + + assert config.schema_version == ROUTING_SCHEMA_VERSION + assert config.default_quality_floor is None + assert config.ledger_path is None + assert config.stage_to_task_type == {} + assert len(config.task_types) == 1 + task = config.task_types[0] + assert task.task_type == "summarize-source" + assert task.quality_floor is None + assert len(task.candidates) == 1 + candidate = task.candidates[0] + assert candidate.id == "openrouter:gpt-4o-mini" + assert candidate.provider == "openrouter" + assert candidate.model == "openai/gpt-4o-mini" + assert candidate.api_key_env == "" + assert candidate.max_cost_per_1k is None + + +def test_parses_full_config_round_trip() -> None: + data = { + "schema_version": 1, + "default_quality_floor": 0.8, + "ledger_path": "output/routing/quality.jsonl", + "stage_to_task_type": { + "extract-entities": "smart", + "extract-relations": "smart", + }, + "task_types": { + "cheap": { + "quality_floor": 0.7, + "candidates": [ + { + "id": "openrouter:gpt-4o-mini", + "provider": "openrouter", + "model": "openai/gpt-4o-mini", + "api_key_env": "OPENROUTER_API_KEY", + "max_cost_per_1k": 0.001, + }, + ], + }, + "smart": { + "quality_floor": 0.85, + "candidates": [ + { + "id": "openrouter:claude-haiku", + "provider": "openrouter", + "model": "anthropic/claude-3.5-haiku", + }, + { + "id": "openrouter:claude-sonnet", + "provider": "openrouter", + "model": "anthropic/claude-3.5-sonnet", + "max_cost_per_1k": 0.003, + }, + ], + }, + }, + } + + config = parse_routing_config(data) + + assert config.default_quality_floor == 0.8 + assert config.ledger_path == "output/routing/quality.jsonl" + assert config.stage_to_task_type == { + "extract-entities": "smart", + "extract-relations": "smart", + } + smart = next(t for t in config.task_types if t.task_type == "smart") + assert smart.quality_floor == 0.85 + assert len(smart.candidates) == 2 + assert smart.candidates[1].max_cost_per_1k == 0.003 + + +def test_load_routing_config_reads_yaml_file(tmp_path: Path) -> None: + config_path = tmp_path / "routing.yaml" + config_path.write_text(yaml.safe_dump(MINIMAL, sort_keys=False), encoding="utf-8") + + config = load_routing_config(config_path) + + assert isinstance(config, RoutingConfig) + assert config.schema_version == 1 + + +def test_load_routing_config_missing_file(tmp_path: Path) -> None: + with pytest.raises(InfospaceError) as exc_info: + load_routing_config(tmp_path / "missing.yaml") + assert exc_info.value.code == "missing_routing_config" + + +def test_load_routing_config_bad_yaml(tmp_path: Path) -> None: + config_path = tmp_path / "broken.yaml" + config_path.write_text("schema_version: 1\n bad: indent\n: : : :\n", encoding="utf-8") + + with pytest.raises(InfospaceError) as exc_info: + load_routing_config(config_path) + assert exc_info.value.code == "invalid_routing_config_yaml" + + +def test_rejects_wrong_schema_version() -> None: + payload = {**MINIMAL, "schema_version": 2} + with pytest.raises(InfospaceError) as exc_info: + parse_routing_config(payload) + assert exc_info.value.code == "unsupported_routing_schema" + + +def test_rejects_missing_schema_version() -> None: + payload = {"task_types": MINIMAL["task_types"]} + with pytest.raises(InfospaceError) as exc_info: + parse_routing_config(payload) + assert exc_info.value.code == "unsupported_routing_schema" + + +def test_rejects_empty_task_types() -> None: + payload = {"schema_version": 1, "task_types": {}} + with pytest.raises(InfospaceError) as exc_info: + parse_routing_config(payload) + assert exc_info.value.code == "empty_routing_task_types" + + +def test_rejects_task_type_without_candidates() -> None: + payload = { + "schema_version": 1, + "task_types": {"foo": {"candidates": []}}, + } + with pytest.raises(InfospaceError) as exc_info: + parse_routing_config(payload) + assert exc_info.value.code == "empty_routing_candidates" + + +def test_rejects_candidate_missing_required_field() -> None: + payload = { + "schema_version": 1, + "task_types": { + "foo": { + "candidates": [{"provider": "openrouter", "model": "x"}], # missing id + }, + }, + } + with pytest.raises(InfospaceError) as exc_info: + parse_routing_config(payload) + assert exc_info.value.code == "missing_routing_candidate_field" + assert "id" in exc_info.value.detail["missing"] + + +def test_rejects_unsupported_provider() -> None: + payload = { + "schema_version": 1, + "task_types": { + "foo": { + "candidates": [ + {"id": "x", "provider": "acme", "model": "acme/model"}, + ], + }, + }, + } + with pytest.raises(InfospaceError) as exc_info: + parse_routing_config(payload) + assert exc_info.value.code == "unsupported_routing_provider" + + +def test_rejects_negative_max_cost() -> None: + payload = { + "schema_version": 1, + "task_types": { + "foo": { + "candidates": [ + { + "id": "x", + "provider": "openrouter", + "model": "openai/gpt-4o-mini", + "max_cost_per_1k": -1, + }, + ], + }, + }, + } + with pytest.raises(InfospaceError) as exc_info: + parse_routing_config(payload) + assert exc_info.value.code == "invalid_routing_max_cost" + + +def test_rejects_quality_floor_out_of_range() -> None: + payload = { + "schema_version": 1, + "default_quality_floor": 1.5, + "task_types": MINIMAL["task_types"], + } + with pytest.raises(InfospaceError) as exc_info: + parse_routing_config(payload) + assert exc_info.value.code == "invalid_routing_quality_floor" + + +def test_rejects_duplicate_candidate_ids_within_task_type() -> None: + payload = { + "schema_version": 1, + "task_types": { + "foo": { + "candidates": [ + {"id": "dupe", "provider": "openrouter", "model": "a"}, + {"id": "dupe", "provider": "openrouter", "model": "b"}, + ], + }, + }, + } + with pytest.raises(InfospaceError) as exc_info: + parse_routing_config(payload) + assert exc_info.value.code == "duplicate_routing_candidate_id" + + +def test_rejects_non_mapping_stage_map() -> None: + payload = { + "schema_version": 1, + "task_types": MINIMAL["task_types"], + "stage_to_task_type": ["not", "a", "mapping"], + } + with pytest.raises(InfospaceError) as exc_info: + parse_routing_config(payload) + assert exc_info.value.code == "invalid_routing_stage_map" + + +def test_rejects_non_string_ledger_path() -> None: + payload = { + "schema_version": 1, + "task_types": MINIMAL["task_types"], + "ledger_path": 42, + } + with pytest.raises(InfospaceError) as exc_info: + parse_routing_config(payload) + assert exc_info.value.code == "invalid_routing_ledger_path" diff --git a/workplans/IB-WP-0020-provider-routing-cli.md b/workplans/IB-WP-0020-provider-routing-cli.md index 609857b..bd56ce5 100644 --- a/workplans/IB-WP-0020-provider-routing-cli.md +++ b/workplans/IB-WP-0020-provider-routing-cli.md @@ -4,7 +4,7 @@ type: workplan title: "Provider Routing CLI Integration" domain: markitect repo: infospace-bench -status: todo +status: active owner: markitect topic_slug: markitect created: "2026-05-18" @@ -70,7 +70,7 @@ already-closed IB-WP-0018. ```task id: IB-WP-0020-T01 -status: todo +status: done priority: medium state_hub_task_id: "39597441-22ab-4dcf-b68d-b045823a9374" ```