IB-WP-0020-T04: example routing config + live routing smoke

examples/routing/trading-literature.yaml is the checked-in starting config for a Lefevre-style run. It applies the IB-WP-0018 task-type taxonomy: cheap candidates for summary + evaluation, smart candidates for entity + relation extraction, and a separate baseline rule wiring claude_code for a follow-on T05 ShadowingAdapter step. Workspace- relative ledger_path keeps adaptive observations with the workspace. tests/test_routing_config.py gains a regression test that asserts the shipped example parses cleanly, every stage in stage_to_task_type maps to a declared task type, and the baseline candidate uses the claude_code provider — so the example will not bit-rot silently. tests/test_openrouter_live.py gains test_provider_routing_one_chapter_live_smoke gated on the same INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER + OPENROUTER_API_KEY opt-in as the existing static smoke. It builds a one-candidate routing config, runs a single chapter through --provider routing, and asserts the per-stage adapter-choices report section names the routed model and the routed artifacts carry adapter_id provenance. docs/generic-source-generator.md gains a "Live runs with --provider routing" subsection that walks through the one-command routed run, explains the --quality-floor override, and points at the parallel live smoke test. 174 tests pass, 2 skipped (both live smokes, correctly gated). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-18 22:19:54 +02:00
parent d3562454d7
commit debd2b8e69
5 changed files with 221 additions and 1 deletions
--- a/tests/test_openrouter_live.py
+++ b/tests/test_openrouter_live.py
@@ -208,3 +208,87 @@ def test_openrouter_one_chapter_smoke(tmp_path: Path) -> None:
        and item.get("provenance", {}).get("provider_metadata", {}).get("request_id")
    ]
    assert generated_with_metadata, "generated artifacts should carry provider_metadata.request_id"
+
+
+_LIVE_ROUTING_REASON = (
+    "set INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER=1 and OPENROUTER_API_KEY to run "
+    "the optional one-chapter routing smoke against OpenRouter"
+)
+
+
+@pytest.mark.skipif(not (_LIVE_OPT_IN and _LIVE_API_KEY), reason=_LIVE_ROUTING_REASON)
+def test_provider_routing_one_chapter_live_smoke(tmp_path: Path) -> None:
+    """Live smoke: one chapter through --provider routing against OpenRouter.
+
+    Uses a minimal one-candidate-per-task-type routing config so the test
+    spends roughly the same as the static OpenRouter smoke. Asserts the run
+    completes, the routing bridge recorded adapter_id / task_type on
+    provider_metadata, and the per-stage adapter-choices report section
+    reflects routed choices.
+    """
+    book = _build_fixture_epub(tmp_path / "lefevre.epub")
+    model = os.environ.get("INFOSPACE_BENCH_LIVE_MODEL", "openai/gpt-4o-mini")
+
+    routing_config = tmp_path / "routing.yaml"
+    routing_config.write_text(
+        yaml.safe_dump(
+            {
+                "schema_version": 1,
+                "stage_to_task_type": {
+                    "summarize-source": "cheap",
+                    "extract-entities": "cheap",
+                    "extract-relations": "cheap",
+                    "evaluate-entity": "cheap",
+                    "synthesize-report": "cheap",
+                },
+                "task_types": {
+                    "cheap": {
+                        "candidates": [
+                            {
+                                "id": f"openrouter:{model}",
+                                "provider": "openrouter",
+                                "model": model,
+                                "api_key_env": "OPENROUTER_API_KEY",
+                            },
+                        ],
+                    },
+                },
+            },
+            sort_keys=False,
+        ),
+        encoding="utf-8",
+    )
+
+    infospace = init_generation_infospace(
+        tmp_path,
+        book,
+        "lefevre-live-routing",
+        name="Lefevre Live Routing",
+        profile="trading-literature",
+        chapter_filter=["I"],
+    )
+    plan_generation(infospace.root, cost_per_1k_tokens=0.5)
+    result = run_generation(
+        infospace.root,
+        provider="routing",
+        routing_config=routing_config,
+    )
+    status = status_generation(infospace.root)
+
+    assert result.status == "completed"
+    assert status["source_chunk_count"] == 1
+    assert status["entity_count"] >= 1
+
+    report = (infospace.root / "reports" / "generation-summary.md").read_text(encoding="utf-8")
+    assert "## Per-stage adapter choices" in report
+    assert model in report, "report should name the routed model"
+
+    # The routing bridge writes adapter_id + task_type onto provider_metadata.
+    index = yaml.safe_load((infospace.root / "artifacts" / "index.yaml").read_text(encoding="utf-8"))
+    routed_artifacts = [
+        item
+        for item in index["artifacts"]
+        if item["kind"] in {"entity", "relation", "generated"}
+        and (item.get("provenance") or {}).get("provider_metadata", {}).get("adapter_id")
+    ]
+    assert routed_artifacts, "routed artifacts must carry adapter_id provenance"
--- a/tests/test_routing_config.py
+++ b/tests/test_routing_config.py
@@ -412,6 +412,25 @@ def test_build_routing_policy_claude_code_needs_no_api_key() -> None:
    assert isinstance(policy.rules[0].prefer, ClaudeCodeAdapter)


+def test_example_trading_literature_config_parses() -> None:
+    """Regression: the shipped example config must parse cleanly."""
+    from infospace_bench.routing_config import load_routing_config
+
+    example_path = Path(__file__).resolve().parent.parent / "examples" / "routing" / "trading-literature.yaml"
+
+    config = load_routing_config(example_path)
+
+    task_type_names = {task.task_type for task in config.task_types}
+    assert {"cheap", "smart", "judge", "baseline"} <= task_type_names
+    assert config.default_quality_floor == 0.80
+    # Each shipped stage maps to a task type the config actually declares.
+    for stage, task_type in config.stage_to_task_type.items():
+        assert task_type in task_type_names, f"stage {stage!r} maps to undeclared task type {task_type!r}"
+    # baseline is included so a T05 ShadowingAdapter wiring can reference it.
+    baseline = next(t for t in config.task_types if t.task_type == "baseline")
+    assert baseline.candidates[0].provider == "claude_code"
+
+
 def test_build_routing_policy_honours_custom_api_key_env() -> None:
    from infospace_bench.routing_config import build_routing_policy_from_config
    from llm_connect.openrouter import OpenRouterAdapter