generated from coulomb/repo-seed
examples/routing/trading-literature.yaml is the checked-in starting config for a Lefevre-style run. It applies the IB-WP-0018 task-type taxonomy: cheap candidates for summary + evaluation, smart candidates for entity + relation extraction, and a separate baseline rule wiring claude_code for a follow-on T05 ShadowingAdapter step. Workspace- relative ledger_path keeps adaptive observations with the workspace. tests/test_routing_config.py gains a regression test that asserts the shipped example parses cleanly, every stage in stage_to_task_type maps to a declared task type, and the baseline candidate uses the claude_code provider — so the example will not bit-rot silently. tests/test_openrouter_live.py gains test_provider_routing_one_chapter_live_smoke gated on the same INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER + OPENROUTER_API_KEY opt-in as the existing static smoke. It builds a one-candidate routing config, runs a single chapter through --provider routing, and asserts the per-stage adapter-choices report section names the routed model and the routed artifacts carry adapter_id provenance. docs/generic-source-generator.md gains a "Live runs with --provider routing" subsection that walks through the one-command routed run, explains the --quality-floor override, and points at the parallel live smoke test. 174 tests pass, 2 skipped (both live smokes, correctly gated). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
295 lines
10 KiB
Python
295 lines
10 KiB
Python
"""
|
|
Optional live-run smoke against OpenRouter (IB-WP-0016-T06).
|
|
|
|
Skipped unless the caller has explicitly opted in by setting BOTH
|
|
`OPENROUTER_API_KEY` and `INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER=1`. The
|
|
smoke run touches the network and is billed; keep `--chapter`-style
|
|
selection bound to a single chapter and a cheap model.
|
|
|
|
Also includes a non-live regression test that exercises the new chapter
|
|
selection plumbing without hitting the network.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import zipfile
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
import yaml
|
|
|
|
from infospace_bench.generator import (
|
|
init_generation_infospace,
|
|
plan_generation,
|
|
run_generation,
|
|
status_generation,
|
|
)
|
|
|
|
|
|
FIXTURE_ROOT = Path(__file__).parent / "fixtures" / "lefevre"
|
|
|
|
|
|
def _build_fixture_epub(target: Path) -> Path:
|
|
sources = FIXTURE_ROOT / "sources"
|
|
layout: dict[str, str] = {
|
|
"mimetype": "application/epub+zip",
|
|
"META-INF/container.xml": (sources / "container.xml").read_text(encoding="utf-8"),
|
|
}
|
|
for source in sorted(sources.glob("*.xhtml")):
|
|
layout[f"OEBPS/{source.name}"] = source.read_text(encoding="utf-8")
|
|
layout["OEBPS/content.opf"] = (sources / "content.opf").read_text(encoding="utf-8")
|
|
with zipfile.ZipFile(target, "w") as archive:
|
|
for path_in_zip, contents in layout.items():
|
|
archive.writestr(path_in_zip, contents)
|
|
return target
|
|
|
|
|
|
def test_init_chapter_filter_keeps_only_selected_chapters(tmp_path: Path) -> None:
|
|
book = _build_fixture_epub(tmp_path / "lefevre.epub")
|
|
|
|
infospace = init_generation_infospace(
|
|
tmp_path,
|
|
book,
|
|
"lefevre-chapter-1",
|
|
name="Lefevre Chapter I",
|
|
profile="trading-literature",
|
|
chapter_filter=["I"],
|
|
)
|
|
|
|
sources = sorted(p.name for p in (infospace.root / "artifacts" / "sources").glob("*.md"))
|
|
assert sources == ["chapter-01.md"], sources
|
|
|
|
|
|
def test_init_chapter_range_keeps_only_selected_range(tmp_path: Path) -> None:
|
|
book = _build_fixture_epub(tmp_path / "lefevre.epub")
|
|
|
|
infospace = init_generation_infospace(
|
|
tmp_path,
|
|
book,
|
|
"lefevre-chapter-range",
|
|
name="Lefevre Chapters II-III",
|
|
profile="trading-literature",
|
|
from_chapter=2,
|
|
to_chapter=3,
|
|
)
|
|
|
|
sources = sorted(p.name for p in (infospace.root / "artifacts" / "sources").glob("*.md"))
|
|
assert sources == ["chapter-02.md", "chapter-03.md"], sources
|
|
|
|
|
|
def test_init_empty_selection_raises(tmp_path: Path) -> None:
|
|
from infospace_bench.errors import InfospaceError
|
|
|
|
book = _build_fixture_epub(tmp_path / "lefevre.epub")
|
|
|
|
with pytest.raises(InfospaceError) as exc_info:
|
|
init_generation_infospace(
|
|
tmp_path,
|
|
book,
|
|
"lefevre-no-match",
|
|
name="None",
|
|
profile="trading-literature",
|
|
chapter_filter=["nope"],
|
|
)
|
|
assert exc_info.value.code == "empty_chapter_selection"
|
|
|
|
|
|
def test_from_source_cli_chapter_filter(tmp_path: Path) -> None:
|
|
book = _build_fixture_epub(tmp_path / "lefevre.epub")
|
|
env = os.environ.copy()
|
|
env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src"
|
|
|
|
result = subprocess.run(
|
|
[
|
|
sys.executable,
|
|
"-m",
|
|
"infospace_bench",
|
|
"generate",
|
|
"from-source",
|
|
str(book),
|
|
"--workspace",
|
|
str(tmp_path),
|
|
"--slug",
|
|
"lefevre-cli-ch1",
|
|
"--name",
|
|
"Lefevre CLI Chapter I",
|
|
"--profile",
|
|
"trading-literature",
|
|
"--chapter",
|
|
"I",
|
|
"--fixture-responses",
|
|
str(FIXTURE_ROOT / "responses.yaml"),
|
|
"--apply",
|
|
],
|
|
check=False,
|
|
env=env,
|
|
text=True,
|
|
capture_output=True,
|
|
)
|
|
|
|
assert result.returncode == 0, result.stderr
|
|
payload = json.loads(result.stdout)
|
|
assert payload["status"] == "completed"
|
|
root = Path(payload["root"])
|
|
sources = sorted(p.name for p in (root / "artifacts" / "sources").glob("*.md"))
|
|
assert sources == ["chapter-01.md"]
|
|
|
|
|
|
_LIVE_OPT_IN = os.environ.get("INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER")
|
|
_LIVE_API_KEY = os.environ.get("OPENROUTER_API_KEY")
|
|
_LIVE_REASON = (
|
|
"set INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER=1 and OPENROUTER_API_KEY to run "
|
|
"the optional one-chapter OpenRouter smoke"
|
|
)
|
|
|
|
|
|
@pytest.mark.skipif(not (_LIVE_OPT_IN and _LIVE_API_KEY), reason=_LIVE_REASON)
|
|
def test_openrouter_one_chapter_smoke(tmp_path: Path) -> None:
|
|
"""Live smoke: one Lefevre fixture chapter through OpenRouter end-to-end.
|
|
|
|
Asserts the run completes, produces a manifest-backed infospace, and that
|
|
the run record + generated artifact provenance carry the provider model,
|
|
request_id, usage counts, and retry count fields T06 promises.
|
|
"""
|
|
book = _build_fixture_epub(tmp_path / "lefevre.epub")
|
|
model = os.environ.get("INFOSPACE_BENCH_LIVE_MODEL", "openai/gpt-4o-mini")
|
|
|
|
infospace = init_generation_infospace(
|
|
tmp_path,
|
|
book,
|
|
"lefevre-live-smoke",
|
|
name="Lefevre Live Smoke",
|
|
profile="trading-literature",
|
|
chapter_filter=["I"],
|
|
)
|
|
plan_generation(infospace.root, cost_per_1k_tokens=0.5)
|
|
result = run_generation(
|
|
infospace.root,
|
|
provider="openrouter",
|
|
model=model,
|
|
)
|
|
status = status_generation(infospace.root)
|
|
|
|
assert result.status == "completed"
|
|
assert status["source_chunk_count"] == 1
|
|
assert status["entity_count"] >= 1
|
|
assert status["evaluation_count"] >= 1
|
|
|
|
# At least one run record must carry provider metadata for openrouter calls.
|
|
run_records = sorted((infospace.root / "output" / "workflows" / "runs").glob("*.yaml"))
|
|
assert run_records
|
|
saw_openrouter_metadata = False
|
|
for record_path in run_records:
|
|
data = yaml.safe_load(record_path.read_text(encoding="utf-8"))
|
|
for stage in data.get("stages") or []:
|
|
if stage.get("provider") != "openrouter":
|
|
continue
|
|
metadata = stage.get("metadata") or {}
|
|
assert metadata.get("model") == model
|
|
assert metadata.get("request_id"), "expected non-empty OpenRouter request_id"
|
|
usage = metadata.get("usage") or {}
|
|
assert int(usage.get("prompt_tokens") or 0) > 0
|
|
assert int(usage.get("completion_tokens") or 0) > 0
|
|
assert metadata.get("retry_count") is not None
|
|
assert float(metadata.get("duration_seconds") or 0) > 0
|
|
saw_openrouter_metadata = True
|
|
assert saw_openrouter_metadata, "no openrouter stage recorded provider metadata"
|
|
|
|
# Provider metadata also reaches generated artifact provenance.
|
|
import yaml as _yaml
|
|
|
|
index = _yaml.safe_load((infospace.root / "artifacts" / "index.yaml").read_text(encoding="utf-8"))
|
|
generated_with_metadata = [
|
|
item
|
|
for item in index["artifacts"]
|
|
if item["kind"] in {"entity", "relation", "generated"}
|
|
and item.get("provenance", {}).get("provider_metadata", {}).get("request_id")
|
|
]
|
|
assert generated_with_metadata, "generated artifacts should carry provider_metadata.request_id"
|
|
|
|
|
|
_LIVE_ROUTING_REASON = (
|
|
"set INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER=1 and OPENROUTER_API_KEY to run "
|
|
"the optional one-chapter routing smoke against OpenRouter"
|
|
)
|
|
|
|
|
|
@pytest.mark.skipif(not (_LIVE_OPT_IN and _LIVE_API_KEY), reason=_LIVE_ROUTING_REASON)
|
|
def test_provider_routing_one_chapter_live_smoke(tmp_path: Path) -> None:
|
|
"""Live smoke: one chapter through --provider routing against OpenRouter.
|
|
|
|
Uses a minimal one-candidate-per-task-type routing config so the test
|
|
spends roughly the same as the static OpenRouter smoke. Asserts the run
|
|
completes, the routing bridge recorded adapter_id / task_type on
|
|
provider_metadata, and the per-stage adapter-choices report section
|
|
reflects routed choices.
|
|
"""
|
|
book = _build_fixture_epub(tmp_path / "lefevre.epub")
|
|
model = os.environ.get("INFOSPACE_BENCH_LIVE_MODEL", "openai/gpt-4o-mini")
|
|
|
|
routing_config = tmp_path / "routing.yaml"
|
|
routing_config.write_text(
|
|
yaml.safe_dump(
|
|
{
|
|
"schema_version": 1,
|
|
"stage_to_task_type": {
|
|
"summarize-source": "cheap",
|
|
"extract-entities": "cheap",
|
|
"extract-relations": "cheap",
|
|
"evaluate-entity": "cheap",
|
|
"synthesize-report": "cheap",
|
|
},
|
|
"task_types": {
|
|
"cheap": {
|
|
"candidates": [
|
|
{
|
|
"id": f"openrouter:{model}",
|
|
"provider": "openrouter",
|
|
"model": model,
|
|
"api_key_env": "OPENROUTER_API_KEY",
|
|
},
|
|
],
|
|
},
|
|
},
|
|
},
|
|
sort_keys=False,
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
infospace = init_generation_infospace(
|
|
tmp_path,
|
|
book,
|
|
"lefevre-live-routing",
|
|
name="Lefevre Live Routing",
|
|
profile="trading-literature",
|
|
chapter_filter=["I"],
|
|
)
|
|
plan_generation(infospace.root, cost_per_1k_tokens=0.5)
|
|
result = run_generation(
|
|
infospace.root,
|
|
provider="routing",
|
|
routing_config=routing_config,
|
|
)
|
|
status = status_generation(infospace.root)
|
|
|
|
assert result.status == "completed"
|
|
assert status["source_chunk_count"] == 1
|
|
assert status["entity_count"] >= 1
|
|
|
|
report = (infospace.root / "reports" / "generation-summary.md").read_text(encoding="utf-8")
|
|
assert "## Per-stage adapter choices" in report
|
|
assert model in report, "report should name the routed model"
|
|
|
|
# The routing bridge writes adapter_id + task_type onto provider_metadata.
|
|
index = yaml.safe_load((infospace.root / "artifacts" / "index.yaml").read_text(encoding="utf-8"))
|
|
routed_artifacts = [
|
|
item
|
|
for item in index["artifacts"]
|
|
if item["kind"] in {"entity", "relation", "generated"}
|
|
and (item.get("provenance") or {}).get("provider_metadata", {}).get("adapter_id")
|
|
]
|
|
assert routed_artifacts, "routed artifacts must carry adapter_id provenance"
|