Files
infospace-bench/tests/test_openrouter_live.py
tegwick debd2b8e69 IB-WP-0020-T04: example routing config + live routing smoke
examples/routing/trading-literature.yaml is the checked-in starting
config for a Lefevre-style run. It applies the IB-WP-0018 task-type
taxonomy: cheap candidates for summary + evaluation, smart candidates
for entity + relation extraction, and a separate baseline rule wiring
claude_code for a follow-on T05 ShadowingAdapter step. Workspace-
relative ledger_path keeps adaptive observations with the workspace.

tests/test_routing_config.py gains a regression test that asserts the
shipped example parses cleanly, every stage in stage_to_task_type maps
to a declared task type, and the baseline candidate uses the
claude_code provider — so the example will not bit-rot silently.

tests/test_openrouter_live.py gains test_provider_routing_one_chapter_live_smoke
gated on the same INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER + OPENROUTER_API_KEY
opt-in as the existing static smoke. It builds a one-candidate routing
config, runs a single chapter through --provider routing, and asserts
the per-stage adapter-choices report section names the routed model
and the routed artifacts carry adapter_id provenance.

docs/generic-source-generator.md gains a "Live runs with --provider
routing" subsection that walks through the one-command routed run,
explains the --quality-floor override, and points at the parallel
live smoke test.

174 tests pass, 2 skipped (both live smokes, correctly gated).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-18 22:19:54 +02:00

295 lines
10 KiB
Python

"""
Optional live-run smoke against OpenRouter (IB-WP-0016-T06).
Skipped unless the caller has explicitly opted in by setting BOTH
`OPENROUTER_API_KEY` and `INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER=1`. The
smoke run touches the network and is billed; keep `--chapter`-style
selection bound to a single chapter and a cheap model.
Also includes a non-live regression test that exercises the new chapter
selection plumbing without hitting the network.
"""
import json
import os
import subprocess
import sys
import zipfile
from pathlib import Path
import pytest
import yaml
from infospace_bench.generator import (
init_generation_infospace,
plan_generation,
run_generation,
status_generation,
)
FIXTURE_ROOT = Path(__file__).parent / "fixtures" / "lefevre"
def _build_fixture_epub(target: Path) -> Path:
sources = FIXTURE_ROOT / "sources"
layout: dict[str, str] = {
"mimetype": "application/epub+zip",
"META-INF/container.xml": (sources / "container.xml").read_text(encoding="utf-8"),
}
for source in sorted(sources.glob("*.xhtml")):
layout[f"OEBPS/{source.name}"] = source.read_text(encoding="utf-8")
layout["OEBPS/content.opf"] = (sources / "content.opf").read_text(encoding="utf-8")
with zipfile.ZipFile(target, "w") as archive:
for path_in_zip, contents in layout.items():
archive.writestr(path_in_zip, contents)
return target
def test_init_chapter_filter_keeps_only_selected_chapters(tmp_path: Path) -> None:
book = _build_fixture_epub(tmp_path / "lefevre.epub")
infospace = init_generation_infospace(
tmp_path,
book,
"lefevre-chapter-1",
name="Lefevre Chapter I",
profile="trading-literature",
chapter_filter=["I"],
)
sources = sorted(p.name for p in (infospace.root / "artifacts" / "sources").glob("*.md"))
assert sources == ["chapter-01.md"], sources
def test_init_chapter_range_keeps_only_selected_range(tmp_path: Path) -> None:
book = _build_fixture_epub(tmp_path / "lefevre.epub")
infospace = init_generation_infospace(
tmp_path,
book,
"lefevre-chapter-range",
name="Lefevre Chapters II-III",
profile="trading-literature",
from_chapter=2,
to_chapter=3,
)
sources = sorted(p.name for p in (infospace.root / "artifacts" / "sources").glob("*.md"))
assert sources == ["chapter-02.md", "chapter-03.md"], sources
def test_init_empty_selection_raises(tmp_path: Path) -> None:
from infospace_bench.errors import InfospaceError
book = _build_fixture_epub(tmp_path / "lefevre.epub")
with pytest.raises(InfospaceError) as exc_info:
init_generation_infospace(
tmp_path,
book,
"lefevre-no-match",
name="None",
profile="trading-literature",
chapter_filter=["nope"],
)
assert exc_info.value.code == "empty_chapter_selection"
def test_from_source_cli_chapter_filter(tmp_path: Path) -> None:
book = _build_fixture_epub(tmp_path / "lefevre.epub")
env = os.environ.copy()
env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src"
result = subprocess.run(
[
sys.executable,
"-m",
"infospace_bench",
"generate",
"from-source",
str(book),
"--workspace",
str(tmp_path),
"--slug",
"lefevre-cli-ch1",
"--name",
"Lefevre CLI Chapter I",
"--profile",
"trading-literature",
"--chapter",
"I",
"--fixture-responses",
str(FIXTURE_ROOT / "responses.yaml"),
"--apply",
],
check=False,
env=env,
text=True,
capture_output=True,
)
assert result.returncode == 0, result.stderr
payload = json.loads(result.stdout)
assert payload["status"] == "completed"
root = Path(payload["root"])
sources = sorted(p.name for p in (root / "artifacts" / "sources").glob("*.md"))
assert sources == ["chapter-01.md"]
_LIVE_OPT_IN = os.environ.get("INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER")
_LIVE_API_KEY = os.environ.get("OPENROUTER_API_KEY")
_LIVE_REASON = (
"set INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER=1 and OPENROUTER_API_KEY to run "
"the optional one-chapter OpenRouter smoke"
)
@pytest.mark.skipif(not (_LIVE_OPT_IN and _LIVE_API_KEY), reason=_LIVE_REASON)
def test_openrouter_one_chapter_smoke(tmp_path: Path) -> None:
"""Live smoke: one Lefevre fixture chapter through OpenRouter end-to-end.
Asserts the run completes, produces a manifest-backed infospace, and that
the run record + generated artifact provenance carry the provider model,
request_id, usage counts, and retry count fields T06 promises.
"""
book = _build_fixture_epub(tmp_path / "lefevre.epub")
model = os.environ.get("INFOSPACE_BENCH_LIVE_MODEL", "openai/gpt-4o-mini")
infospace = init_generation_infospace(
tmp_path,
book,
"lefevre-live-smoke",
name="Lefevre Live Smoke",
profile="trading-literature",
chapter_filter=["I"],
)
plan_generation(infospace.root, cost_per_1k_tokens=0.5)
result = run_generation(
infospace.root,
provider="openrouter",
model=model,
)
status = status_generation(infospace.root)
assert result.status == "completed"
assert status["source_chunk_count"] == 1
assert status["entity_count"] >= 1
assert status["evaluation_count"] >= 1
# At least one run record must carry provider metadata for openrouter calls.
run_records = sorted((infospace.root / "output" / "workflows" / "runs").glob("*.yaml"))
assert run_records
saw_openrouter_metadata = False
for record_path in run_records:
data = yaml.safe_load(record_path.read_text(encoding="utf-8"))
for stage in data.get("stages") or []:
if stage.get("provider") != "openrouter":
continue
metadata = stage.get("metadata") or {}
assert metadata.get("model") == model
assert metadata.get("request_id"), "expected non-empty OpenRouter request_id"
usage = metadata.get("usage") or {}
assert int(usage.get("prompt_tokens") or 0) > 0
assert int(usage.get("completion_tokens") or 0) > 0
assert metadata.get("retry_count") is not None
assert float(metadata.get("duration_seconds") or 0) > 0
saw_openrouter_metadata = True
assert saw_openrouter_metadata, "no openrouter stage recorded provider metadata"
# Provider metadata also reaches generated artifact provenance.
import yaml as _yaml
index = _yaml.safe_load((infospace.root / "artifacts" / "index.yaml").read_text(encoding="utf-8"))
generated_with_metadata = [
item
for item in index["artifacts"]
if item["kind"] in {"entity", "relation", "generated"}
and item.get("provenance", {}).get("provider_metadata", {}).get("request_id")
]
assert generated_with_metadata, "generated artifacts should carry provider_metadata.request_id"
_LIVE_ROUTING_REASON = (
"set INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER=1 and OPENROUTER_API_KEY to run "
"the optional one-chapter routing smoke against OpenRouter"
)
@pytest.mark.skipif(not (_LIVE_OPT_IN and _LIVE_API_KEY), reason=_LIVE_ROUTING_REASON)
def test_provider_routing_one_chapter_live_smoke(tmp_path: Path) -> None:
"""Live smoke: one chapter through --provider routing against OpenRouter.
Uses a minimal one-candidate-per-task-type routing config so the test
spends roughly the same as the static OpenRouter smoke. Asserts the run
completes, the routing bridge recorded adapter_id / task_type on
provider_metadata, and the per-stage adapter-choices report section
reflects routed choices.
"""
book = _build_fixture_epub(tmp_path / "lefevre.epub")
model = os.environ.get("INFOSPACE_BENCH_LIVE_MODEL", "openai/gpt-4o-mini")
routing_config = tmp_path / "routing.yaml"
routing_config.write_text(
yaml.safe_dump(
{
"schema_version": 1,
"stage_to_task_type": {
"summarize-source": "cheap",
"extract-entities": "cheap",
"extract-relations": "cheap",
"evaluate-entity": "cheap",
"synthesize-report": "cheap",
},
"task_types": {
"cheap": {
"candidates": [
{
"id": f"openrouter:{model}",
"provider": "openrouter",
"model": model,
"api_key_env": "OPENROUTER_API_KEY",
},
],
},
},
},
sort_keys=False,
),
encoding="utf-8",
)
infospace = init_generation_infospace(
tmp_path,
book,
"lefevre-live-routing",
name="Lefevre Live Routing",
profile="trading-literature",
chapter_filter=["I"],
)
plan_generation(infospace.root, cost_per_1k_tokens=0.5)
result = run_generation(
infospace.root,
provider="routing",
routing_config=routing_config,
)
status = status_generation(infospace.root)
assert result.status == "completed"
assert status["source_chunk_count"] == 1
assert status["entity_count"] >= 1
report = (infospace.root / "reports" / "generation-summary.md").read_text(encoding="utf-8")
assert "## Per-stage adapter choices" in report
assert model in report, "report should name the routed model"
# The routing bridge writes adapter_id + task_type onto provider_metadata.
index = yaml.safe_load((infospace.root / "artifacts" / "index.yaml").read_text(encoding="utf-8"))
routed_artifacts = [
item
for item in index["artifacts"]
if item["kind"] in {"entity", "relation", "generated"}
and (item.get("provenance") or {}).get("provider_metadata", {}).get("adapter_id")
]
assert routed_artifacts, "routed artifacts must carry adapter_id provenance"