""" Optional live-run smoke against OpenRouter (IB-WP-0016-T06). Skipped unless the caller has explicitly opted in by setting BOTH `OPENROUTER_API_KEY` and `INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER=1`. The smoke run touches the network and is billed; keep `--chapter`-style selection bound to a single chapter and a cheap model. Also includes a non-live regression test that exercises the new chapter selection plumbing without hitting the network. """ import json import os import subprocess import sys import zipfile from pathlib import Path import pytest import yaml from infospace_bench.generator import ( init_generation_infospace, plan_generation, run_generation, status_generation, ) FIXTURE_ROOT = Path(__file__).parent / "fixtures" / "lefevre" def _build_fixture_epub(target: Path) -> Path: sources = FIXTURE_ROOT / "sources" layout: dict[str, str] = { "mimetype": "application/epub+zip", "META-INF/container.xml": (sources / "container.xml").read_text(encoding="utf-8"), } for source in sorted(sources.glob("*.xhtml")): layout[f"OEBPS/{source.name}"] = source.read_text(encoding="utf-8") layout["OEBPS/content.opf"] = (sources / "content.opf").read_text(encoding="utf-8") with zipfile.ZipFile(target, "w") as archive: for path_in_zip, contents in layout.items(): archive.writestr(path_in_zip, contents) return target def test_init_chapter_filter_keeps_only_selected_chapters(tmp_path: Path) -> None: book = _build_fixture_epub(tmp_path / "lefevre.epub") infospace = init_generation_infospace( tmp_path, book, "lefevre-chapter-1", name="Lefevre Chapter I", profile="trading-literature", chapter_filter=["I"], ) sources = sorted(p.name for p in (infospace.root / "artifacts" / "sources").glob("*.md")) assert sources == ["chapter-01.md"], sources def test_init_chapter_range_keeps_only_selected_range(tmp_path: Path) -> None: book = _build_fixture_epub(tmp_path / "lefevre.epub") infospace = init_generation_infospace( tmp_path, book, "lefevre-chapter-range", name="Lefevre Chapters II-III", profile="trading-literature", from_chapter=2, to_chapter=3, ) sources = sorted(p.name for p in (infospace.root / "artifacts" / "sources").glob("*.md")) assert sources == ["chapter-02.md", "chapter-03.md"], sources def test_init_empty_selection_raises(tmp_path: Path) -> None: from infospace_bench.errors import InfospaceError book = _build_fixture_epub(tmp_path / "lefevre.epub") with pytest.raises(InfospaceError) as exc_info: init_generation_infospace( tmp_path, book, "lefevre-no-match", name="None", profile="trading-literature", chapter_filter=["nope"], ) assert exc_info.value.code == "empty_chapter_selection" def test_from_source_cli_chapter_filter(tmp_path: Path) -> None: book = _build_fixture_epub(tmp_path / "lefevre.epub") env = os.environ.copy() env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src" result = subprocess.run( [ sys.executable, "-m", "infospace_bench", "generate", "from-source", str(book), "--workspace", str(tmp_path), "--slug", "lefevre-cli-ch1", "--name", "Lefevre CLI Chapter I", "--profile", "trading-literature", "--chapter", "I", "--fixture-responses", str(FIXTURE_ROOT / "responses.yaml"), "--apply", ], check=False, env=env, text=True, capture_output=True, ) assert result.returncode == 0, result.stderr payload = json.loads(result.stdout) assert payload["status"] == "completed" root = Path(payload["root"]) sources = sorted(p.name for p in (root / "artifacts" / "sources").glob("*.md")) assert sources == ["chapter-01.md"] _LIVE_OPT_IN = os.environ.get("INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER") _LIVE_API_KEY = os.environ.get("OPENROUTER_API_KEY") _LIVE_REASON = ( "set INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER=1 and OPENROUTER_API_KEY to run " "the optional one-chapter OpenRouter smoke" ) @pytest.mark.skipif(not (_LIVE_OPT_IN and _LIVE_API_KEY), reason=_LIVE_REASON) def test_openrouter_one_chapter_smoke(tmp_path: Path) -> None: """Live smoke: one Lefevre fixture chapter through OpenRouter end-to-end. Asserts the run completes, produces a manifest-backed infospace, and that the run record + generated artifact provenance carry the provider model, request_id, usage counts, and retry count fields T06 promises. """ book = _build_fixture_epub(tmp_path / "lefevre.epub") model = os.environ.get("INFOSPACE_BENCH_LIVE_MODEL", "openai/gpt-4o-mini") infospace = init_generation_infospace( tmp_path, book, "lefevre-live-smoke", name="Lefevre Live Smoke", profile="trading-literature", chapter_filter=["I"], ) plan_generation(infospace.root, cost_per_1k_tokens=0.5) result = run_generation( infospace.root, provider="openrouter", model=model, ) status = status_generation(infospace.root) assert result.status == "completed" assert status["source_chunk_count"] == 1 assert status["entity_count"] >= 1 assert status["evaluation_count"] >= 1 # At least one run record must carry provider metadata for openrouter calls. run_records = sorted((infospace.root / "output" / "workflows" / "runs").glob("*.yaml")) assert run_records saw_openrouter_metadata = False for record_path in run_records: data = yaml.safe_load(record_path.read_text(encoding="utf-8")) for stage in data.get("stages") or []: if stage.get("provider") != "openrouter": continue metadata = stage.get("metadata") or {} assert metadata.get("model") == model assert metadata.get("request_id"), "expected non-empty OpenRouter request_id" usage = metadata.get("usage") or {} assert int(usage.get("prompt_tokens") or 0) > 0 assert int(usage.get("completion_tokens") or 0) > 0 assert metadata.get("retry_count") is not None assert float(metadata.get("duration_seconds") or 0) > 0 saw_openrouter_metadata = True assert saw_openrouter_metadata, "no openrouter stage recorded provider metadata" # Provider metadata also reaches generated artifact provenance. import yaml as _yaml index = _yaml.safe_load((infospace.root / "artifacts" / "index.yaml").read_text(encoding="utf-8")) generated_with_metadata = [ item for item in index["artifacts"] if item["kind"] in {"entity", "relation", "generated"} and item.get("provenance", {}).get("provider_metadata", {}).get("request_id") ] assert generated_with_metadata, "generated artifacts should carry provider_metadata.request_id"