From d3562454d7e277a83744a89ce3f8a6975f50866c Mon Sep 17 00:00:00 2001 From: tegwick Date: Mon, 18 May 2026 22:08:51 +0200 Subject: [PATCH] IB-WP-0020-T03: routing CLI flags Add --provider routing, --routing-config , and --quality-floor to generate run, generate resume, and generate from-source. The CLI flag wiring constructs a RoutingAssistedGenerationAdapter from the parsed config, with the workspace handed in so any ledger_path in the config resolves relative to it. --quality-floor overrides the config-level default_quality_floor for a single invocation. run_generation gains routing_config + quality_floor kwargs and _adapter_for grew a "routing" branch. Missing --routing-config with --provider routing fails fast with InfospaceError("missing_routing_config"); missing API key for any candidate fails fast with InfospaceError("missing_routing_api_key"). Two small bug fixes surfaced while writing T03: - routing._identify_adapter now also reads ``_model`` from llm-connect adapters (their public attribute is private), so the per-stage adapter-choice line shows the model id rather than just the class name. - budget.TOKEN_EVENTS_PATH corrected from /state/token-events to the state-hub HTTP endpoint /token-events/ that actually exists; the failure-isolation in emit_token_event already kept the prior typo from breaking runs, but the hub never saw the events. Five new tests cover: _adapter_for refusal on missing config, _adapter_for happy path, run_generation end-to-end through routing with a stubbed OpenRouterAdapter.execute_prompt (no network), workspace-relative ledger resolution, and a CLI subprocess smoke asserting fast-fail on missing API key. 173 tests pass, 1 skipped. Co-Authored-By: Claude Opus 4.7 --- src/infospace_bench/budget.py | 2 +- src/infospace_bench/cli.py | 18 +- src/infospace_bench/generator.py | 41 ++- src/infospace_bench/routing.py | 6 +- tests/test_budget_registry.py | 2 +- tests/test_routing_cli.py | 286 +++++++++++++++++++ workplans/IB-WP-0020-provider-routing-cli.md | 2 +- 7 files changed, 348 insertions(+), 9 deletions(-) create mode 100644 tests/test_routing_cli.py diff --git a/src/infospace_bench/budget.py b/src/infospace_bench/budget.py index 163cb79..51454f1 100644 --- a/src/infospace_bench/budget.py +++ b/src/infospace_bench/budget.py @@ -29,7 +29,7 @@ _PACKAGE_RATES_PATH = Path(__file__).parent / "model_rates.yaml" HUB_URL_ENV = "INFOSPACE_BENCH_HUB_URL" HUB_DISABLE_ENV = "INFOSPACE_BENCH_DISABLE_HUB_TOKEN_EVENTS" DEFAULT_HUB_URL = "http://127.0.0.1:8000" -TOKEN_EVENTS_PATH = "/state/token-events" +TOKEN_EVENTS_PATH = "/token-events/" HUB_TIMEOUT_SECONDS = 3.0 BUDGET_DIR = Path("output/budget") diff --git a/src/infospace_bench/cli.py b/src/infospace_bench/cli.py index 87aa744..f4c79dc 100644 --- a/src/infospace_bench/cli.py +++ b/src/infospace_bench/cli.py @@ -203,9 +203,11 @@ def build_parser() -> argparse.ArgumentParser: ) generate_run.add_argument("root") generate_run.add_argument("--stage", default="all") - generate_run.add_argument("--provider", choices=["fixture", "openrouter"], default="fixture") + generate_run.add_argument("--provider", choices=["fixture", "openrouter", "routing"], default="fixture") generate_run.add_argument("--model", default="") generate_run.add_argument("--fixture-responses", default="") + generate_run.add_argument("--routing-config", default="", help="YAML routing config (required with --provider routing)") + generate_run.add_argument("--quality-floor", type=float, default=None, help="Override the config's default_quality_floor for this run") generate_run.add_argument("--resume", action="store_true") generate_run.add_argument("--force", action="store_true") @@ -215,9 +217,11 @@ def build_parser() -> argparse.ArgumentParser: ) generate_resume.add_argument("root") generate_resume.add_argument("--stage", default="all") - generate_resume.add_argument("--provider", choices=["fixture", "openrouter"], default="fixture") + generate_resume.add_argument("--provider", choices=["fixture", "openrouter", "routing"], default="fixture") generate_resume.add_argument("--model", default="") generate_resume.add_argument("--fixture-responses", default="") + generate_resume.add_argument("--routing-config", default="") + generate_resume.add_argument("--quality-floor", type=float, default=None) generate_resume.add_argument("--force", action="store_true") generate_status = generate_sub.add_parser( @@ -236,9 +240,11 @@ def build_parser() -> argparse.ArgumentParser: generate_from_source.add_argument("--name", required=True) generate_from_source.add_argument("--profile", default="general-knowledge") generate_from_source.add_argument("--stage", default="all") - generate_from_source.add_argument("--provider", choices=["fixture", "openrouter"], default="fixture") + generate_from_source.add_argument("--provider", choices=["fixture", "openrouter", "routing"], default="fixture") generate_from_source.add_argument("--model", default="") generate_from_source.add_argument("--fixture-responses", default="") + generate_from_source.add_argument("--routing-config", default="", help="YAML routing config (required with --provider routing)") + generate_from_source.add_argument("--quality-floor", type=float, default=None) generate_from_source.add_argument("--max-chunks", type=int, default=0) generate_from_source.add_argument( "--chapter", @@ -551,6 +557,8 @@ def main(argv: list[str] | None = None) -> int: provider=args.provider, model=args.model, fixture_responses=args.fixture_responses or None, + routing_config=args.routing_config or None, + quality_floor=args.quality_floor, resume=args.resume, force=args.force, ).to_dict() @@ -563,6 +571,8 @@ def main(argv: list[str] | None = None) -> int: provider=args.provider, model=args.model, fixture_responses=args.fixture_responses or None, + routing_config=args.routing_config or None, + quality_floor=args.quality_floor, resume=True, force=args.force, ).to_dict() @@ -589,6 +599,8 @@ def main(argv: list[str] | None = None) -> int: provider=args.provider, model=args.model, fixture_responses=args.fixture_responses or None, + routing_config=args.routing_config or None, + quality_floor=args.quality_floor, ) _write_json(result.to_dict()) else: diff --git a/src/infospace_bench/generator.py b/src/infospace_bench/generator.py index 21c8053..3815fc2 100644 --- a/src/infospace_bench/generator.py +++ b/src/infospace_bench/generator.py @@ -427,6 +427,8 @@ def run_generation( provider: str = "fixture", model: str = "", fixture_responses: str | Path | None = None, + routing_config: str | Path | None = None, + quality_floor: float | None = None, resume: bool = False, force: bool = False, ) -> GenerationRunResult: @@ -449,7 +451,14 @@ def run_generation( started_wall = datetime.now(timezone.utc) monotonic_start = _monotonic() adapter = ( - _adapter_for(provider, model=model, fixture_responses=fixture_responses) + _adapter_for( + provider, + model=model, + fixture_responses=fixture_responses, + routing_config=routing_config, + quality_floor=quality_floor, + workspace=_workspace_for(root_path), + ) if workflow_ids else None ) @@ -551,14 +560,42 @@ def _adapter_for( *, model: str, fixture_responses: str | Path | None, + routing_config: str | Path | None = None, + quality_floor: float | None = None, + workspace: Path | None = None, ) -> AssistedGenerationAdapter: if fixture_responses: return FixtureAssistedGenerationAdapter.from_file(Path(fixture_responses)) if provider == "openrouter": return OpenRouterAssistedGenerationAdapter(model=model) + if provider == "routing": + if not routing_config: + raise InfospaceError( + "missing_routing_config", + "--provider routing requires --routing-config ", + {"provider": provider}, + ) + from .routing import RoutingAssistedGenerationAdapter + from .routing_config import ( + build_routing_policy_from_config, + load_routing_config, + ) + + config = load_routing_config(routing_config) + policy = build_routing_policy_from_config(config, workspace=workspace) + effective_floor = ( + quality_floor + if quality_floor is not None + else config.default_quality_floor + ) + return RoutingAssistedGenerationAdapter( + policy=policy, + stage_to_task_type=dict(config.stage_to_task_type), + quality_floor=effective_floor, + ) raise InfospaceError( "missing_assisted_generation_adapter", - "Assisted generation requires --fixture-responses or --provider openrouter", + "Assisted generation requires --fixture-responses, --provider openrouter, or --provider routing", {"provider": provider}, ) diff --git a/src/infospace_bench/routing.py b/src/infospace_bench/routing.py index 74796fb..2ea2983 100644 --- a/src/infospace_bench/routing.py +++ b/src/infospace_bench/routing.py @@ -112,7 +112,11 @@ def _identify_adapter(adapter: LLMAdapter) -> str: adapter_id = getattr(adapter, "adapter_id", "") if adapter_id: return str(adapter_id) - model = getattr(adapter, "model", "") or getattr(adapter, "model_name", "") + model = ( + getattr(adapter, "model", "") + or getattr(adapter, "model_name", "") + or getattr(adapter, "_model", "") + ) name = type(adapter).__name__ if model: return f"{name}:{model}" diff --git a/tests/test_budget_registry.py b/tests/test_budget_registry.py index 85453b5..1f5ed96 100644 --- a/tests/test_budget_registry.py +++ b/tests/test_budget_registry.py @@ -522,7 +522,7 @@ def test_emit_token_event_calls_poster_with_record_token_payload(tmp_path: Path) assert result["status"] == "emitted" assert len(calls) == 1 url, payload, timeout = calls[0] - assert url == "http://hub.example/state/token-events" + assert url == "http://hub.example/token-events/" assert payload["tokens_in"] == 1200 assert payload["tokens_out"] == 400 assert payload["model"] == "openai/gpt-4o-mini" diff --git a/tests/test_routing_cli.py b/tests/test_routing_cli.py new file mode 100644 index 0000000..9b4868e --- /dev/null +++ b/tests/test_routing_cli.py @@ -0,0 +1,286 @@ +""" +Tests for the routing CLI flags (IB-WP-0020-T03). + +Three levels: +- _adapter_for("routing") unit checks — missing config, happy path +- run_generation end-to-end through --provider routing with a stubbed + OpenRouterAdapter.execute_prompt so no network is required +- CLI subprocess smoke that proves the new flags are wired +""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +import zipfile +from pathlib import Path + +import pytest +import yaml + +from infospace_bench.errors import InfospaceError +from infospace_bench.generator import ( + _adapter_for, + init_generation_infospace, + run_generation, + status_generation, +) +from infospace_bench.routing import RoutingAssistedGenerationAdapter + + +FIXTURE_ROOT = Path(__file__).parent / "fixtures" / "lefevre" + + +def _build_fixture_epub(target: Path) -> Path: + sources = FIXTURE_ROOT / "sources" + layout: dict[str, str] = { + "mimetype": "application/epub+zip", + "META-INF/container.xml": (sources / "container.xml").read_text(encoding="utf-8"), + } + for source in sorted(sources.glob("*.xhtml")): + layout[f"OEBPS/{source.name}"] = source.read_text(encoding="utf-8") + layout["OEBPS/content.opf"] = (sources / "content.opf").read_text(encoding="utf-8") + with zipfile.ZipFile(target, "w") as archive: + for path_in_zip, contents in layout.items(): + archive.writestr(path_in_zip, contents) + return target + + +def _write_routing_config(path: Path, *, ledger_relpath: str | None = None) -> None: + """Minimal routing config that maps every fixture stage to one cheap candidate.""" + data: dict = { + "schema_version": 1, + "stage_to_task_type": { + "summarize-source": "cheap", + "extract-entities": "cheap", + "extract-relations": "cheap", + "evaluate-entity": "cheap", + "synthesize-report": "cheap", + }, + "task_types": { + "cheap": { + "candidates": [ + { + "id": "openrouter:gpt-4o-mini", + "provider": "openrouter", + "model": "openai/gpt-4o-mini", + "api_key_env": "OPENROUTER_API_KEY", + }, + ], + }, + }, + } + if ledger_relpath is not None: + data["ledger_path"] = ledger_relpath + path.write_text(yaml.safe_dump(data, sort_keys=False), encoding="utf-8") + + +def test_adapter_for_routing_missing_config_raises() -> None: + with pytest.raises(InfospaceError) as exc_info: + _adapter_for("routing", model="", fixture_responses=None, routing_config=None) + assert exc_info.value.code == "missing_routing_config" + + +def test_adapter_for_routing_returns_bridge(tmp_path: Path, monkeypatch) -> None: + monkeypatch.setenv("OPENROUTER_API_KEY", "sk-fake-test-key") + config_path = tmp_path / "routing.yaml" + _write_routing_config(config_path) + + adapter = _adapter_for( + "routing", + model="", + fixture_responses=None, + routing_config=config_path, + workspace=tmp_path, + ) + + assert isinstance(adapter, RoutingAssistedGenerationAdapter) + assert adapter.stage_to_task_type["summarize-source"] == "cheap" + + +_FIXTURE_RESPONSES = { + "summarize-source": "# Source Summary\n\nFixture summary content.\n", + "extract-entities": ( + "# Stub Entity\n\n" + "## Category\n\nstrategy\n\n" + "## Definition\n\nA stub trading concept for the routing CLI smoke.\n" + ), + "extract-relations": ( + "# Stub Entity Practices Tape Reading\n\n" + "## Subject\n\nStub Entity\n\n" + "## Predicate\n\npractices\n\n" + "## Object\n\nTape Reading\n\n" + "## Relation Type\n\nstrategy_outcome\n\n" + "## Evidence\n\nFixture evidence.\n" + ), + "evaluate-entity": ( + "---\n" + "artifact_id: entity/stub-entity.md\n" + "evaluator: fixture\n" + "evaluated_at: '2026-05-18T00:00:00'\n" + "scores:\n" + " - name: groundedness\n value: 4.0\n max_value: 5.0\n" + " - name: lesson_clarity\n value: 4.0\n max_value: 5.0\n" + " - name: historical_context\n value: 4.0\n max_value: 5.0\n" + " - name: overgeneralization_risk\n value: 4.0\n max_value: 5.0\n" + "---\n\n" + "# Evaluation: entity/stub-entity.md\n" + ), + "synthesize-report": "# Routed Report\n\nFixture report.\n", +} + + +def _stub_openrouter_execute(self, prompt, config): + """Replacement for OpenRouterAdapter.execute_prompt that returns canned content. + + Identifies the stage from the rendered template's H1 line (templates + start with ``# Extract Entities`` / ``# Extract Relations`` / ``# Evaluate + ...`` / ``# Synthesize ...``; anything else is treated as the + summarize-source stage). + """ + from llm_connect.models import LLMResponse + + first_line = prompt.lstrip().splitlines()[0] if prompt.strip() else "" + lower = first_line.lower() + if lower.startswith("# extract") and "entit" in lower: + content = _FIXTURE_RESPONSES["extract-entities"] + elif lower.startswith("# extract") and "relation" in lower: + content = _FIXTURE_RESPONSES["extract-relations"] + elif lower.startswith("# evaluate"): + content = _FIXTURE_RESPONSES["evaluate-entity"] + elif lower.startswith("# synthesize"): + content = _FIXTURE_RESPONSES["synthesize-report"] + else: + content = _FIXTURE_RESPONSES["summarize-source"] + return LLMResponse( + content=content, + model=getattr(self, "_model", "openai/gpt-4o-mini"), + usage={"prompt_tokens": len(prompt.split()), "completion_tokens": 40}, + finish_reason="stop", + metadata={"request_id": "or-stub-1"}, + ) + + +def test_run_generation_via_routing_provider_completes_end_to_end( + tmp_path: Path, monkeypatch +) -> None: + monkeypatch.setenv("OPENROUTER_API_KEY", "sk-fake-test-key") + from llm_connect.openrouter import OpenRouterAdapter + + monkeypatch.setattr( + OpenRouterAdapter, "execute_prompt", _stub_openrouter_execute, raising=True + ) + + book = _build_fixture_epub(tmp_path / "lefevre.epub") + config_path = tmp_path / "routing.yaml" + _write_routing_config(config_path) + + infospace = init_generation_infospace( + tmp_path, + book, + "lefevre-routing-smoke", + name="Lefevre Routing Smoke", + profile="trading-literature", + chapter_filter=["I"], + ) + result = run_generation( + infospace.root, + provider="routing", + routing_config=config_path, + ) + status = status_generation(infospace.root) + + assert result.status == "completed" + assert status["source_chunk_count"] == 1 + assert status["entity_count"] >= 1 + assert status["evaluation_count"] >= 1 + + report = (infospace.root / "reports" / "generation-summary.md").read_text(encoding="utf-8") + assert "## Per-stage adapter choices" in report + assert "openai/gpt-4o-mini" in report # adapter_id ends with the model + + # Budget usage rollup should bucket calls by the routed model. + import yaml as _yaml + + usage = _yaml.safe_load((infospace.root / "output" / "budget" / "usage.yaml").read_text(encoding="utf-8")) + bucket_models = {b["model"] for b in usage["runs"][0]["per_bucket"]} + assert "openai/gpt-4o-mini" in bucket_models + + +def test_from_source_cli_provider_routing(tmp_path: Path, monkeypatch) -> None: + book = _build_fixture_epub(tmp_path / "lefevre.epub") + config_path = tmp_path / "routing.yaml" + _write_routing_config(config_path) + + env = os.environ.copy() + env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src:/home/worsch/llm-connect" + + # Missing API key → fast fail from the loader, no subprocess crash. + env.pop("OPENROUTER_API_KEY", None) + bad = subprocess.run( + [ + sys.executable, + "-m", + "infospace_bench", + "generate", + "from-source", + str(book), + "--workspace", + str(tmp_path), + "--slug", + "routing-cli-missing-key", + "--name", + "Routing CLI Missing Key", + "--profile", + "trading-literature", + "--provider", + "routing", + "--routing-config", + str(config_path), + "--chapter", + "I", + "--apply", + ], + check=False, + env=env, + text=True, + capture_output=True, + ) + assert bad.returncode != 0 + assert "missing_routing_api_key" in (bad.stdout + bad.stderr) + + +def test_run_via_routing_resolves_workspace_relative_ledger( + tmp_path: Path, monkeypatch +) -> None: + monkeypatch.setenv("OPENROUTER_API_KEY", "sk-fake-test-key") + from llm_connect.openrouter import OpenRouterAdapter + + monkeypatch.setattr( + OpenRouterAdapter, "execute_prompt", _stub_openrouter_execute, raising=True + ) + + book = _build_fixture_epub(tmp_path / "lefevre.epub") + config_path = tmp_path / "routing.yaml" + _write_routing_config(config_path, ledger_relpath="output/routing/quality.jsonl") + + infospace = init_generation_infospace( + tmp_path, + book, + "lefevre-routing-ledger", + name="Lefevre Routing Ledger", + profile="trading-literature", + chapter_filter=["I"], + ) + run_generation( + infospace.root, + provider="routing", + routing_config=config_path, + quality_floor=0.7, + ) + + # ledger_path is relative to the workspace (tmp_path), not the infospace root. + ledger_path = tmp_path / "output" / "routing" / "quality.jsonl" + assert ledger_path.parent.is_dir(), "loader must create the ledger parent dir" diff --git a/workplans/IB-WP-0020-provider-routing-cli.md b/workplans/IB-WP-0020-provider-routing-cli.md index 698f2f7..f7f8fac 100644 --- a/workplans/IB-WP-0020-provider-routing-cli.md +++ b/workplans/IB-WP-0020-provider-routing-cli.md @@ -117,7 +117,7 @@ state_hub_task_id: "5e38514b-ad6a-4d39-8716-f812f241d9fd" ```task id: IB-WP-0020-T03 -status: todo +status: done priority: high state_hub_task_id: "fe5888e0-da33-413a-b026-71ed811b8c73" ```