From d3562454d7e277a83744a89ce3f8a6975f50866c Mon Sep 17 00:00:00 2001
From: tegwick <bernd.worsch@gmail.com>
Date: Mon, 18 May 2026 22:08:51 +0200
Subject: [PATCH] IB-WP-0020-T03: routing CLI flags

Add --provider routing, --routing-config <yaml>, and --quality-floor
<float> to generate run, generate resume, and generate from-source.
The CLI flag wiring constructs a RoutingAssistedGenerationAdapter from
the parsed config, with the workspace handed in so any ledger_path in
the config resolves relative to it. --quality-floor overrides the
config-level default_quality_floor for a single invocation.

run_generation gains routing_config + quality_floor kwargs and
_adapter_for grew a "routing" branch. Missing --routing-config with
--provider routing fails fast with InfospaceError("missing_routing_config");
missing API key for any candidate fails fast with
InfospaceError("missing_routing_api_key").

Two small bug fixes surfaced while writing T03:

- routing._identify_adapter now also reads ``_model`` from llm-connect
  adapters (their public attribute is private), so the per-stage
  adapter-choice line shows the model id rather than just the class
  name.
- budget.TOKEN_EVENTS_PATH corrected from /state/token-events to the
  state-hub HTTP endpoint /token-events/ that actually exists; the
  failure-isolation in emit_token_event already kept the prior typo
  from breaking runs, but the hub never saw the events.

Five new tests cover: _adapter_for refusal on missing config,
_adapter_for happy path, run_generation end-to-end through routing
with a stubbed OpenRouterAdapter.execute_prompt (no network),
workspace-relative ledger resolution, and a CLI subprocess smoke
asserting fast-fail on missing API key.

173 tests pass, 1 skipped.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/infospace_bench/budget.py                |   2 +-
 src/infospace_bench/cli.py                   |  18 +-
 src/infospace_bench/generator.py             |  41 ++-
 src/infospace_bench/routing.py               |   6 +-
 tests/test_budget_registry.py                |   2 +-
 tests/test_routing_cli.py                    | 286 +++++++++++++++++++
 workplans/IB-WP-0020-provider-routing-cli.md |   2 +-
 7 files changed, 348 insertions(+), 9 deletions(-)
 create mode 100644 tests/test_routing_cli.py
diff --git a/src/infospace_bench/budget.py b/src/infospace_bench/budget.py
index 163cb79..51454f1 100644
--- a/src/infospace_bench/budget.py
+++ b/src/infospace_bench/budget.py
@@ -29,7 +29,7 @@ _PACKAGE_RATES_PATH = Path(__file__).parent / "model_rates.yaml"
 HUB_URL_ENV = "INFOSPACE_BENCH_HUB_URL"
 HUB_DISABLE_ENV = "INFOSPACE_BENCH_DISABLE_HUB_TOKEN_EVENTS"
 DEFAULT_HUB_URL = "http://127.0.0.1:8000"
-TOKEN_EVENTS_PATH = "/state/token-events"
+TOKEN_EVENTS_PATH = "/token-events/"
 HUB_TIMEOUT_SECONDS = 3.0
 
 BUDGET_DIR = Path("output/budget")
diff --git a/src/infospace_bench/cli.py b/src/infospace_bench/cli.py
index 87aa744..f4c79dc 100644
--- a/src/infospace_bench/cli.py
+++ b/src/infospace_bench/cli.py
@@ -203,9 +203,11 @@ def build_parser() -> argparse.ArgumentParser:
     )
     generate_run.add_argument("root")
     generate_run.add_argument("--stage", default="all")
-    generate_run.add_argument("--provider", choices=["fixture", "openrouter"], default="fixture")
+    generate_run.add_argument("--provider", choices=["fixture", "openrouter", "routing"], default="fixture")
     generate_run.add_argument("--model", default="")
     generate_run.add_argument("--fixture-responses", default="")
+    generate_run.add_argument("--routing-config", default="", help="YAML routing config (required with --provider routing)")
+    generate_run.add_argument("--quality-floor", type=float, default=None, help="Override the config's default_quality_floor for this run")
     generate_run.add_argument("--resume", action="store_true")
     generate_run.add_argument("--force", action="store_true")
 
@@ -215,9 +217,11 @@ def build_parser() -> argparse.ArgumentParser:
     )
     generate_resume.add_argument("root")
     generate_resume.add_argument("--stage", default="all")
-    generate_resume.add_argument("--provider", choices=["fixture", "openrouter"], default="fixture")
+    generate_resume.add_argument("--provider", choices=["fixture", "openrouter", "routing"], default="fixture")
     generate_resume.add_argument("--model", default="")
     generate_resume.add_argument("--fixture-responses", default="")
+    generate_resume.add_argument("--routing-config", default="")
+    generate_resume.add_argument("--quality-floor", type=float, default=None)
     generate_resume.add_argument("--force", action="store_true")
 
     generate_status = generate_sub.add_parser(
@@ -236,9 +240,11 @@ def build_parser() -> argparse.ArgumentParser:
     generate_from_source.add_argument("--name", required=True)
     generate_from_source.add_argument("--profile", default="general-knowledge")
     generate_from_source.add_argument("--stage", default="all")
-    generate_from_source.add_argument("--provider", choices=["fixture", "openrouter"], default="fixture")
+    generate_from_source.add_argument("--provider", choices=["fixture", "openrouter", "routing"], default="fixture")
     generate_from_source.add_argument("--model", default="")
     generate_from_source.add_argument("--fixture-responses", default="")
+    generate_from_source.add_argument("--routing-config", default="", help="YAML routing config (required with --provider routing)")
+    generate_from_source.add_argument("--quality-floor", type=float, default=None)
     generate_from_source.add_argument("--max-chunks", type=int, default=0)
     generate_from_source.add_argument(
         "--chapter",
@@ -551,6 +557,8 @@ def main(argv: list[str] | None = None) -> int:
                         provider=args.provider,
                         model=args.model,
                         fixture_responses=args.fixture_responses or None,
+                        routing_config=args.routing_config or None,
+                        quality_floor=args.quality_floor,
                         resume=args.resume,
                         force=args.force,
                     ).to_dict()
@@ -563,6 +571,8 @@ def main(argv: list[str] | None = None) -> int:
                         provider=args.provider,
                         model=args.model,
                         fixture_responses=args.fixture_responses or None,
+                        routing_config=args.routing_config or None,
+                        quality_floor=args.quality_floor,
                         resume=True,
                         force=args.force,
                     ).to_dict()
@@ -589,6 +599,8 @@ def main(argv: list[str] | None = None) -> int:
                         provider=args.provider,
                         model=args.model,
                         fixture_responses=args.fixture_responses or None,
+                        routing_config=args.routing_config or None,
+                        quality_floor=args.quality_floor,
                     )
                     _write_json(result.to_dict())
                 else:
diff --git a/src/infospace_bench/generator.py b/src/infospace_bench/generator.py
index 21c8053..3815fc2 100644
--- a/src/infospace_bench/generator.py
+++ b/src/infospace_bench/generator.py
@@ -427,6 +427,8 @@ def run_generation(
     provider: str = "fixture",
     model: str = "",
     fixture_responses: str | Path | None = None,
+    routing_config: str | Path | None = None,
+    quality_floor: float | None = None,
     resume: bool = False,
     force: bool = False,
 ) -> GenerationRunResult:
@@ -449,7 +451,14 @@ def run_generation(
     started_wall = datetime.now(timezone.utc)
     monotonic_start = _monotonic()
     adapter = (
-        _adapter_for(provider, model=model, fixture_responses=fixture_responses)
+        _adapter_for(
+            provider,
+            model=model,
+            fixture_responses=fixture_responses,
+            routing_config=routing_config,
+            quality_floor=quality_floor,
+            workspace=_workspace_for(root_path),
+        )
         if workflow_ids
         else None
     )
@@ -551,14 +560,42 @@ def _adapter_for(
     *,
     model: str,
     fixture_responses: str | Path | None,
+    routing_config: str | Path | None = None,
+    quality_floor: float | None = None,
+    workspace: Path | None = None,
 ) -> AssistedGenerationAdapter:
     if fixture_responses:
         return FixtureAssistedGenerationAdapter.from_file(Path(fixture_responses))
     if provider == "openrouter":
         return OpenRouterAssistedGenerationAdapter(model=model)
+    if provider == "routing":
+        if not routing_config:
+            raise InfospaceError(
+                "missing_routing_config",
+                "--provider routing requires --routing-config <path>",
+                {"provider": provider},
+            )
+        from .routing import RoutingAssistedGenerationAdapter
+        from .routing_config import (
+            build_routing_policy_from_config,
+            load_routing_config,
+        )
+
+        config = load_routing_config(routing_config)
+        policy = build_routing_policy_from_config(config, workspace=workspace)
+        effective_floor = (
+            quality_floor
+            if quality_floor is not None
+            else config.default_quality_floor
+        )
+        return RoutingAssistedGenerationAdapter(
+            policy=policy,
+            stage_to_task_type=dict(config.stage_to_task_type),
+            quality_floor=effective_floor,
+        )
     raise InfospaceError(
         "missing_assisted_generation_adapter",
-        "Assisted generation requires --fixture-responses or --provider openrouter",
+        "Assisted generation requires --fixture-responses, --provider openrouter, or --provider routing",
         {"provider": provider},
     )
 
diff --git a/src/infospace_bench/routing.py b/src/infospace_bench/routing.py
index 74796fb..2ea2983 100644
--- a/src/infospace_bench/routing.py
+++ b/src/infospace_bench/routing.py
@@ -112,7 +112,11 @@ def _identify_adapter(adapter: LLMAdapter) -> str:
     adapter_id = getattr(adapter, "adapter_id", "")
     if adapter_id:
         return str(adapter_id)
-    model = getattr(adapter, "model", "") or getattr(adapter, "model_name", "")
+    model = (
+        getattr(adapter, "model", "")
+        or getattr(adapter, "model_name", "")
+        or getattr(adapter, "_model", "")
+    )
     name = type(adapter).__name__
     if model:
         return f"{name}:{model}"
diff --git a/tests/test_budget_registry.py b/tests/test_budget_registry.py
index 85453b5..1f5ed96 100644
--- a/tests/test_budget_registry.py
+++ b/tests/test_budget_registry.py
@@ -522,7 +522,7 @@ def test_emit_token_event_calls_poster_with_record_token_payload(tmp_path: Path)
     assert result["status"] == "emitted"
     assert len(calls) == 1
     url, payload, timeout = calls[0]
-    assert url == "http://hub.example/state/token-events"
+    assert url == "http://hub.example/token-events/"
     assert payload["tokens_in"] == 1200
     assert payload["tokens_out"] == 400
     assert payload["model"] == "openai/gpt-4o-mini"
diff --git a/tests/test_routing_cli.py b/tests/test_routing_cli.py
new file mode 100644
index 0000000..9b4868e
--- /dev/null
+++ b/tests/test_routing_cli.py
@@ -0,0 +1,286 @@
+"""
+Tests for the routing CLI flags (IB-WP-0020-T03).
+
+Three levels:
+- _adapter_for("routing") unit checks — missing config, happy path
+- run_generation end-to-end through --provider routing with a stubbed
+  OpenRouterAdapter.execute_prompt so no network is required
+- CLI subprocess smoke that proves the new flags are wired
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import subprocess
+import sys
+import zipfile
+from pathlib import Path
+
+import pytest
+import yaml
+
+from infospace_bench.errors import InfospaceError
+from infospace_bench.generator import (
+    _adapter_for,
+    init_generation_infospace,
+    run_generation,
+    status_generation,
+)
+from infospace_bench.routing import RoutingAssistedGenerationAdapter
+
+
+FIXTURE_ROOT = Path(__file__).parent / "fixtures" / "lefevre"
+
+
+def _build_fixture_epub(target: Path) -> Path:
+    sources = FIXTURE_ROOT / "sources"
+    layout: dict[str, str] = {
+        "mimetype": "application/epub+zip",
+        "META-INF/container.xml": (sources / "container.xml").read_text(encoding="utf-8"),
+    }
+    for source in sorted(sources.glob("*.xhtml")):
+        layout[f"OEBPS/{source.name}"] = source.read_text(encoding="utf-8")
+    layout["OEBPS/content.opf"] = (sources / "content.opf").read_text(encoding="utf-8")
+    with zipfile.ZipFile(target, "w") as archive:
+        for path_in_zip, contents in layout.items():
+            archive.writestr(path_in_zip, contents)
+    return target
+
+
+def _write_routing_config(path: Path, *, ledger_relpath: str | None = None) -> None:
+    """Minimal routing config that maps every fixture stage to one cheap candidate."""
+    data: dict = {
+        "schema_version": 1,
+        "stage_to_task_type": {
+            "summarize-source": "cheap",
+            "extract-entities": "cheap",
+            "extract-relations": "cheap",
+            "evaluate-entity": "cheap",
+            "synthesize-report": "cheap",
+        },
+        "task_types": {
+            "cheap": {
+                "candidates": [
+                    {
+                        "id": "openrouter:gpt-4o-mini",
+                        "provider": "openrouter",
+                        "model": "openai/gpt-4o-mini",
+                        "api_key_env": "OPENROUTER_API_KEY",
+                    },
+                ],
+            },
+        },
+    }
+    if ledger_relpath is not None:
+        data["ledger_path"] = ledger_relpath
+    path.write_text(yaml.safe_dump(data, sort_keys=False), encoding="utf-8")
+
+
+def test_adapter_for_routing_missing_config_raises() -> None:
+    with pytest.raises(InfospaceError) as exc_info:
+        _adapter_for("routing", model="", fixture_responses=None, routing_config=None)
+    assert exc_info.value.code == "missing_routing_config"
+
+
+def test_adapter_for_routing_returns_bridge(tmp_path: Path, monkeypatch) -> None:
+    monkeypatch.setenv("OPENROUTER_API_KEY", "sk-fake-test-key")
+    config_path = tmp_path / "routing.yaml"
+    _write_routing_config(config_path)
+
+    adapter = _adapter_for(
+        "routing",
+        model="",
+        fixture_responses=None,
+        routing_config=config_path,
+        workspace=tmp_path,
+    )
+
+    assert isinstance(adapter, RoutingAssistedGenerationAdapter)
+    assert adapter.stage_to_task_type["summarize-source"] == "cheap"
+
+
+_FIXTURE_RESPONSES = {
+    "summarize-source": "# Source Summary\n\nFixture summary content.\n",
+    "extract-entities": (
+        "# Stub Entity\n\n"
+        "## Category\n\nstrategy\n\n"
+        "## Definition\n\nA stub trading concept for the routing CLI smoke.\n"
+    ),
+    "extract-relations": (
+        "# Stub Entity Practices Tape Reading\n\n"
+        "## Subject\n\nStub Entity\n\n"
+        "## Predicate\n\npractices\n\n"
+        "## Object\n\nTape Reading\n\n"
+        "## Relation Type\n\nstrategy_outcome\n\n"
+        "## Evidence\n\nFixture evidence.\n"
+    ),
+    "evaluate-entity": (
+        "---\n"
+        "artifact_id: entity/stub-entity.md\n"
+        "evaluator: fixture\n"
+        "evaluated_at: '2026-05-18T00:00:00'\n"
+        "scores:\n"
+        "  - name: groundedness\n    value: 4.0\n    max_value: 5.0\n"
+        "  - name: lesson_clarity\n    value: 4.0\n    max_value: 5.0\n"
+        "  - name: historical_context\n    value: 4.0\n    max_value: 5.0\n"
+        "  - name: overgeneralization_risk\n    value: 4.0\n    max_value: 5.0\n"
+        "---\n\n"
+        "# Evaluation: entity/stub-entity.md\n"
+    ),
+    "synthesize-report": "# Routed Report\n\nFixture report.\n",
+}
+
+
+def _stub_openrouter_execute(self, prompt, config):
+    """Replacement for OpenRouterAdapter.execute_prompt that returns canned content.
+
+    Identifies the stage from the rendered template's H1 line (templates
+    start with ``# Extract Entities`` / ``# Extract Relations`` / ``# Evaluate
+    ...`` / ``# Synthesize ...``; anything else is treated as the
+    summarize-source stage).
+    """
+    from llm_connect.models import LLMResponse
+
+    first_line = prompt.lstrip().splitlines()[0] if prompt.strip() else ""
+    lower = first_line.lower()
+    if lower.startswith("# extract") and "entit" in lower:
+        content = _FIXTURE_RESPONSES["extract-entities"]
+    elif lower.startswith("# extract") and "relation" in lower:
+        content = _FIXTURE_RESPONSES["extract-relations"]
+    elif lower.startswith("# evaluate"):
+        content = _FIXTURE_RESPONSES["evaluate-entity"]
+    elif lower.startswith("# synthesize"):
+        content = _FIXTURE_RESPONSES["synthesize-report"]
+    else:
+        content = _FIXTURE_RESPONSES["summarize-source"]
+    return LLMResponse(
+        content=content,
+        model=getattr(self, "_model", "openai/gpt-4o-mini"),
+        usage={"prompt_tokens": len(prompt.split()), "completion_tokens": 40},
+        finish_reason="stop",
+        metadata={"request_id": "or-stub-1"},
+    )
+
+
+def test_run_generation_via_routing_provider_completes_end_to_end(
+    tmp_path: Path, monkeypatch
+) -> None:
+    monkeypatch.setenv("OPENROUTER_API_KEY", "sk-fake-test-key")
+    from llm_connect.openrouter import OpenRouterAdapter
+
+    monkeypatch.setattr(
+        OpenRouterAdapter, "execute_prompt", _stub_openrouter_execute, raising=True
+    )
+
+    book = _build_fixture_epub(tmp_path / "lefevre.epub")
+    config_path = tmp_path / "routing.yaml"
+    _write_routing_config(config_path)
+
+    infospace = init_generation_infospace(
+        tmp_path,
+        book,
+        "lefevre-routing-smoke",
+        name="Lefevre Routing Smoke",
+        profile="trading-literature",
+        chapter_filter=["I"],
+    )
+    result = run_generation(
+        infospace.root,
+        provider="routing",
+        routing_config=config_path,
+    )
+    status = status_generation(infospace.root)
+
+    assert result.status == "completed"
+    assert status["source_chunk_count"] == 1
+    assert status["entity_count"] >= 1
+    assert status["evaluation_count"] >= 1
+
+    report = (infospace.root / "reports" / "generation-summary.md").read_text(encoding="utf-8")
+    assert "## Per-stage adapter choices" in report
+    assert "openai/gpt-4o-mini" in report  # adapter_id ends with the model
+
+    # Budget usage rollup should bucket calls by the routed model.
+    import yaml as _yaml
+
+    usage = _yaml.safe_load((infospace.root / "output" / "budget" / "usage.yaml").read_text(encoding="utf-8"))
+    bucket_models = {b["model"] for b in usage["runs"][0]["per_bucket"]}
+    assert "openai/gpt-4o-mini" in bucket_models
+
+
+def test_from_source_cli_provider_routing(tmp_path: Path, monkeypatch) -> None:
+    book = _build_fixture_epub(tmp_path / "lefevre.epub")
+    config_path = tmp_path / "routing.yaml"
+    _write_routing_config(config_path)
+
+    env = os.environ.copy()
+    env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src:/home/worsch/llm-connect"
+
+    # Missing API key → fast fail from the loader, no subprocess crash.
+    env.pop("OPENROUTER_API_KEY", None)
+    bad = subprocess.run(
+        [
+            sys.executable,
+            "-m",
+            "infospace_bench",
+            "generate",
+            "from-source",
+            str(book),
+            "--workspace",
+            str(tmp_path),
+            "--slug",
+            "routing-cli-missing-key",
+            "--name",
+            "Routing CLI Missing Key",
+            "--profile",
+            "trading-literature",
+            "--provider",
+            "routing",
+            "--routing-config",
+            str(config_path),
+            "--chapter",
+            "I",
+            "--apply",
+        ],
+        check=False,
+        env=env,
+        text=True,
+        capture_output=True,
+    )
+    assert bad.returncode != 0
+    assert "missing_routing_api_key" in (bad.stdout + bad.stderr)
+
+
+def test_run_via_routing_resolves_workspace_relative_ledger(
+    tmp_path: Path, monkeypatch
+) -> None:
+    monkeypatch.setenv("OPENROUTER_API_KEY", "sk-fake-test-key")
+    from llm_connect.openrouter import OpenRouterAdapter
+
+    monkeypatch.setattr(
+        OpenRouterAdapter, "execute_prompt", _stub_openrouter_execute, raising=True
+    )
+
+    book = _build_fixture_epub(tmp_path / "lefevre.epub")
+    config_path = tmp_path / "routing.yaml"
+    _write_routing_config(config_path, ledger_relpath="output/routing/quality.jsonl")
+
+    infospace = init_generation_infospace(
+        tmp_path,
+        book,
+        "lefevre-routing-ledger",
+        name="Lefevre Routing Ledger",
+        profile="trading-literature",
+        chapter_filter=["I"],
+    )
+    run_generation(
+        infospace.root,
+        provider="routing",
+        routing_config=config_path,
+        quality_floor=0.7,
+    )
+
+    # ledger_path is relative to the workspace (tmp_path), not the infospace root.
+    ledger_path = tmp_path / "output" / "routing" / "quality.jsonl"
+    assert ledger_path.parent.is_dir(), "loader must create the ledger parent dir"
diff --git a/workplans/IB-WP-0020-provider-routing-cli.md b/workplans/IB-WP-0020-provider-routing-cli.md
index 698f2f7..f7f8fac 100644
--- a/workplans/IB-WP-0020-provider-routing-cli.md
+++ b/workplans/IB-WP-0020-provider-routing-cli.md
@@ -117,7 +117,7 @@ state_hub_task_id: "5e38514b-ad6a-4d39-8716-f812f241d9fd"
 
 ```task
 id: IB-WP-0020-T03
-status: todo
+status: done
 priority: high
 state_hub_task_id: "fe5888e0-da33-413a-b026-71ed811b8c73"
 ```