diff --git a/docs/generic-source-generator.md b/docs/generic-source-generator.md index 0a917d8..f71b58d 100644 --- a/docs/generic-source-generator.md +++ b/docs/generic-source-generator.md @@ -48,6 +48,52 @@ infospace-bench generate status ./infospaces/book-space shows chunk counts, generated artifact counts, evaluations, metrics, history, and stale source/profile inputs. +### Live OpenRouter runs (handle with care) + +A single-chapter live run is the only OpenRouter shape the test suite +covers today. Use `--chapter` (or `--from-chapter` / `--to-chapter`) on +`generate init` or `generate from-source` to scope what gets registered +before any provider calls happen: + +```bash +export OPENROUTER_API_KEY=... + +# Preview the cost first +infospace-bench generate plan ./infospaces/foo --chapter I --cost-per-1k 0.30 + +# Run only Chapter I against a cheap model +infospace-bench generate from-source ./LEFEVRE.epub \ + --workspace ./infospaces \ + --slug reminiscences-ch1 \ + --name "Reminiscences (Ch I)" \ + --profile trading-literature \ + --provider openrouter \ + --model openai/gpt-4o-mini \ + --chapter I \ + --apply +``` + +`output/budget/plans.yaml`, `usage.yaml`, and `summary.yaml` record what +was estimated, what was actually spent, and the plan-vs-actual delta. +`output/workflows/runs/*.yaml` carry the OpenRouter request_id, model, +token usage, retry count, and per-call duration; the same metadata +reaches the entity/relation/evaluation artifacts via +`provenance.provider_metadata`. + +Before scaling to the full book: + +- Inspect each chapter's outputs and `generation-summary.md` +- Multiply the per-chapter `total_provider_calls_estimate` and + `estimated_cost_usd` by the chapter count and compare to your budget +- Decide on a final model and confirm the rate-table entry exists in + `src/infospace_bench/model_rates.yaml` or your workspace override + +The optional live-smoke test in `tests/test_openrouter_live.py` is +skipped unless both `OPENROUTER_API_KEY` and +`INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER=1` are set. It runs a single +chapter through the same path and asserts the provider metadata +plumb-through. + ### Budget and usage registry Every `generate plan` invocation appends a compact snapshot to diff --git a/src/infospace_bench/cli.py b/src/infospace_bench/cli.py index d93adf5..55d2e83 100644 --- a/src/infospace_bench/cli.py +++ b/src/infospace_bench/cli.py @@ -148,6 +148,20 @@ def build_parser() -> argparse.ArgumentParser: generate_init.add_argument("--name", required=True) generate_init.add_argument("--profile", default="general-knowledge") generate_init.add_argument("--max-chunks", type=int, default=0) + generate_init.add_argument( + "--chapter", + action="append", + default=[], + help="Filter to chapter label or number (repeatable: --chapter I --chapter II)", + ) + generate_init.add_argument("--from-chapter", type=int, default=None) + generate_init.add_argument("--to-chapter", type=int, default=None) + generate_init.add_argument( + "--chunk", + action="append", + default=[], + help="Filter to source chunk id (repeatable)", + ) generate_plan = generate_sub.add_parser( "plan", @@ -226,6 +240,20 @@ def build_parser() -> argparse.ArgumentParser: generate_from_source.add_argument("--model", default="") generate_from_source.add_argument("--fixture-responses", default="") generate_from_source.add_argument("--max-chunks", type=int, default=0) + generate_from_source.add_argument( + "--chapter", + action="append", + default=[], + help="Filter to chapter label or number (repeatable)", + ) + generate_from_source.add_argument("--from-chapter", type=int, default=None) + generate_from_source.add_argument("--to-chapter", type=int, default=None) + generate_from_source.add_argument( + "--chunk", + action="append", + default=[], + help="Filter to source chunk id (repeatable)", + ) generate_from_source.add_argument("--apply", action="store_true") budget = sub.add_parser("budget", help="Inspect per-infospace budget and usage records") @@ -479,6 +507,10 @@ def main(argv: list[str] | None = None) -> int: name=args.name, profile=args.profile, max_chunks=_optional_positive(args.max_chunks), + chapter_filter=args.chapter or None, + chunk_filter=args.chunk or None, + from_chapter=args.from_chapter, + to_chapter=args.to_chapter, ) _write_json( { @@ -537,6 +569,10 @@ def main(argv: list[str] | None = None) -> int: name=args.name, profile=args.profile, max_chunks=_optional_positive(args.max_chunks), + chapter_filter=args.chapter or None, + chunk_filter=args.chunk or None, + from_chapter=args.from_chapter, + to_chapter=args.to_chapter, ) if args.apply: result = run_generation( diff --git a/src/infospace_bench/generator.py b/src/infospace_bench/generator.py index 14b6aec..5354923 100644 --- a/src/infospace_bench/generator.py +++ b/src/infospace_bench/generator.py @@ -79,8 +79,30 @@ def init_generation_infospace( name: str, profile: str = DEFAULT_PROFILE, max_chunks: int | None = None, + chapter_filter: list[str] | None = None, + chunk_filter: list[str] | None = None, + from_chapter: int | None = None, + to_chapter: int | None = None, ) -> Any: chunks = normalize_source(source, max_chunks=max_chunks) + chunks = _filter_chunks_by_chapter( + chunks, + chapter_filter=chapter_filter, + chunk_filter=chunk_filter, + from_chapter=from_chapter, + to_chapter=to_chapter, + ) + if not chunks: + raise InfospaceError( + "empty_chapter_selection", + "Selection filters excluded every chunk; nothing to register", + { + "chapter_filter": chapter_filter, + "chunk_filter": chunk_filter, + "from_chapter": from_chapter, + "to_chapter": to_chapter, + }, + ) infospace = create_infospace(Path(workspace), slug, name=name) _install_profile(infospace.root, profile) _write_workflows(infospace.root, profile) @@ -256,6 +278,42 @@ def plan_generation_summary( } +def _filter_chunks_by_chapter( + chunks: list[SourceChunk], + *, + chapter_filter: list[str] | None, + chunk_filter: list[str] | None, + from_chapter: int | None, + to_chapter: int | None, +) -> list[SourceChunk]: + chunk_set = {value.strip() for value in (chunk_filter or []) if value.strip()} + label_set = {value.strip().lower() for value in (chapter_filter or []) if value.strip()} + if not chunk_set and not label_set and from_chapter is None and to_chapter is None: + return list(chunks) + out: list[SourceChunk] = [] + for chunk in chunks: + if chunk_set and chunk.chunk_id not in chunk_set: + continue + if label_set: + label = (chunk.chapter_label or "").strip().lower() + number_match = ( + chunk.chapter_number is not None + and str(chunk.chapter_number) in label_set + ) + label_match = label in label_set if label else False + if not (number_match or label_match): + continue + if from_chapter is not None or to_chapter is not None: + if chunk.chapter_number is None: + continue + if from_chapter is not None and chunk.chapter_number < from_chapter: + continue + if to_chapter is not None and chunk.chapter_number > to_chapter: + continue + out.append(chunk) + return out + + def _select_source_chunks( sources: list[Any], *, diff --git a/tests/test_openrouter_live.py b/tests/test_openrouter_live.py new file mode 100644 index 0000000..a8c0f97 --- /dev/null +++ b/tests/test_openrouter_live.py @@ -0,0 +1,210 @@ +""" +Optional live-run smoke against OpenRouter (IB-WP-0016-T06). + +Skipped unless the caller has explicitly opted in by setting BOTH +`OPENROUTER_API_KEY` and `INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER=1`. The +smoke run touches the network and is billed; keep `--chapter`-style +selection bound to a single chapter and a cheap model. + +Also includes a non-live regression test that exercises the new chapter +selection plumbing without hitting the network. +""" + +import json +import os +import subprocess +import sys +import zipfile +from pathlib import Path + +import pytest +import yaml + +from infospace_bench.generator import ( + init_generation_infospace, + plan_generation, + run_generation, + status_generation, +) + + +FIXTURE_ROOT = Path(__file__).parent / "fixtures" / "lefevre" + + +def _build_fixture_epub(target: Path) -> Path: + sources = FIXTURE_ROOT / "sources" + layout: dict[str, str] = { + "mimetype": "application/epub+zip", + "META-INF/container.xml": (sources / "container.xml").read_text(encoding="utf-8"), + } + for source in sorted(sources.glob("*.xhtml")): + layout[f"OEBPS/{source.name}"] = source.read_text(encoding="utf-8") + layout["OEBPS/content.opf"] = (sources / "content.opf").read_text(encoding="utf-8") + with zipfile.ZipFile(target, "w") as archive: + for path_in_zip, contents in layout.items(): + archive.writestr(path_in_zip, contents) + return target + + +def test_init_chapter_filter_keeps_only_selected_chapters(tmp_path: Path) -> None: + book = _build_fixture_epub(tmp_path / "lefevre.epub") + + infospace = init_generation_infospace( + tmp_path, + book, + "lefevre-chapter-1", + name="Lefevre Chapter I", + profile="trading-literature", + chapter_filter=["I"], + ) + + sources = sorted(p.name for p in (infospace.root / "artifacts" / "sources").glob("*.md")) + assert sources == ["chapter-01.md"], sources + + +def test_init_chapter_range_keeps_only_selected_range(tmp_path: Path) -> None: + book = _build_fixture_epub(tmp_path / "lefevre.epub") + + infospace = init_generation_infospace( + tmp_path, + book, + "lefevre-chapter-range", + name="Lefevre Chapters II-III", + profile="trading-literature", + from_chapter=2, + to_chapter=3, + ) + + sources = sorted(p.name for p in (infospace.root / "artifacts" / "sources").glob("*.md")) + assert sources == ["chapter-02.md", "chapter-03.md"], sources + + +def test_init_empty_selection_raises(tmp_path: Path) -> None: + from infospace_bench.errors import InfospaceError + + book = _build_fixture_epub(tmp_path / "lefevre.epub") + + with pytest.raises(InfospaceError) as exc_info: + init_generation_infospace( + tmp_path, + book, + "lefevre-no-match", + name="None", + profile="trading-literature", + chapter_filter=["nope"], + ) + assert exc_info.value.code == "empty_chapter_selection" + + +def test_from_source_cli_chapter_filter(tmp_path: Path) -> None: + book = _build_fixture_epub(tmp_path / "lefevre.epub") + env = os.environ.copy() + env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src" + + result = subprocess.run( + [ + sys.executable, + "-m", + "infospace_bench", + "generate", + "from-source", + str(book), + "--workspace", + str(tmp_path), + "--slug", + "lefevre-cli-ch1", + "--name", + "Lefevre CLI Chapter I", + "--profile", + "trading-literature", + "--chapter", + "I", + "--fixture-responses", + str(FIXTURE_ROOT / "responses.yaml"), + "--apply", + ], + check=False, + env=env, + text=True, + capture_output=True, + ) + + assert result.returncode == 0, result.stderr + payload = json.loads(result.stdout) + assert payload["status"] == "completed" + root = Path(payload["root"]) + sources = sorted(p.name for p in (root / "artifacts" / "sources").glob("*.md")) + assert sources == ["chapter-01.md"] + + +_LIVE_OPT_IN = os.environ.get("INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER") +_LIVE_API_KEY = os.environ.get("OPENROUTER_API_KEY") +_LIVE_REASON = ( + "set INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER=1 and OPENROUTER_API_KEY to run " + "the optional one-chapter OpenRouter smoke" +) + + +@pytest.mark.skipif(not (_LIVE_OPT_IN and _LIVE_API_KEY), reason=_LIVE_REASON) +def test_openrouter_one_chapter_smoke(tmp_path: Path) -> None: + """Live smoke: one Lefevre fixture chapter through OpenRouter end-to-end. + + Asserts the run completes, produces a manifest-backed infospace, and that + the run record + generated artifact provenance carry the provider model, + request_id, usage counts, and retry count fields T06 promises. + """ + book = _build_fixture_epub(tmp_path / "lefevre.epub") + model = os.environ.get("INFOSPACE_BENCH_LIVE_MODEL", "openai/gpt-4o-mini") + + infospace = init_generation_infospace( + tmp_path, + book, + "lefevre-live-smoke", + name="Lefevre Live Smoke", + profile="trading-literature", + chapter_filter=["I"], + ) + plan_generation(infospace.root, cost_per_1k_tokens=0.5) + result = run_generation( + infospace.root, + provider="openrouter", + model=model, + ) + status = status_generation(infospace.root) + + assert result.status == "completed" + assert status["source_chunk_count"] == 1 + assert status["entity_count"] >= 1 + assert status["evaluation_count"] >= 1 + + # At least one run record must carry provider metadata for openrouter calls. + run_records = sorted((infospace.root / "output" / "workflows" / "runs").glob("*.yaml")) + assert run_records + saw_openrouter_metadata = False + for record_path in run_records: + data = yaml.safe_load(record_path.read_text(encoding="utf-8")) + for stage in data.get("stages") or []: + if stage.get("provider") != "openrouter": + continue + metadata = stage.get("metadata") or {} + assert metadata.get("model") == model + assert metadata.get("request_id"), "expected non-empty OpenRouter request_id" + usage = metadata.get("usage") or {} + assert int(usage.get("prompt_tokens") or 0) > 0 + assert int(usage.get("completion_tokens") or 0) > 0 + assert metadata.get("retry_count") is not None + assert float(metadata.get("duration_seconds") or 0) > 0 + saw_openrouter_metadata = True + assert saw_openrouter_metadata, "no openrouter stage recorded provider metadata" + + # Provider metadata also reaches generated artifact provenance. + import yaml as _yaml + + index = _yaml.safe_load((infospace.root / "artifacts" / "index.yaml").read_text(encoding="utf-8")) + generated_with_metadata = [ + item + for item in index["artifacts"] + if item["kind"] in {"entity", "relation", "generated"} + and item.get("provenance", {}).get("provider_metadata", {}).get("request_id") + ] + assert generated_with_metadata, "generated artifacts should carry provider_metadata.request_id" diff --git a/workplans/IB-WP-0016-lefevre-ebook-infospace-readiness.md b/workplans/IB-WP-0016-lefevre-ebook-infospace-readiness.md index 5654e9b..3d55b87 100644 --- a/workplans/IB-WP-0016-lefevre-ebook-infospace-readiness.md +++ b/workplans/IB-WP-0016-lefevre-ebook-infospace-readiness.md @@ -192,7 +192,7 @@ state_hub_task_id: "c9bbc84e-691b-4530-a79a-6ecfa9c41fdd" ```task id: IB-WP-0016-T06 -status: todo +status: done priority: high state_hub_task_id: "c6bf97c3-1c2c-4993-8f4f-97a48e01cce2" ```