IB-WP-0016-T06: OpenRouter live-run guardrails

Add --chapter / --from-chapter / --to-chapter / --chunk selection flags to generate init and generate from-source, plumb them into init_generation_infospace via a new _filter_chunks_by_chapter helper, and refuse to create an infospace when the filters reject every chunk (InfospaceError "empty_chapter_selection"). The flags use the same T03/T02 plumbing (chapter labels, roman numerals, chunk ids) so a single-chapter selection is a one-flag command. OpenRouter run-record metadata (model, request_id, usage tokens, retry_count, duration_seconds) already lands in output/workflows/runs/*.yaml; this task just adds the smoke test that proves it stays there, plus the parallel guarantee that the same provider metadata reaches generated artifact provenance via provenance.provider_metadata. tests/test_openrouter_live.py covers: - chapter-filter, from/to-chapter range, and empty-selection failure on init (non-live, deterministic) - CLI smoke through generate from-source with --chapter - a pytest-skipped live OpenRouter one-chapter end-to-end gated by OPENROUTER_API_KEY + INFOSPACE_BENCH_ENABLE_LIVE_OPENROUTER, with INFOSPACE_BENCH_LIVE_MODEL override (default openai/gpt-4o-mini) docs/generic-source-generator.md gains a "Live OpenRouter runs (handle with care)" section that walks plan-before-run, single-chapter live run, the budget/usage artifacts, and the checks a reviewer should run before scaling to the full book. 129 tests pass, 1 skipped (the live smoke, correctly gated). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-17 23:04:19 +02:00
parent 348deca9f2
commit ab23c5873e
5 changed files with 351 additions and 1 deletions
--- a/src/infospace_bench/cli.py
+++ b/src/infospace_bench/cli.py
@@ -148,6 +148,20 @@ def build_parser() -> argparse.ArgumentParser:
    generate_init.add_argument("--name", required=True)
    generate_init.add_argument("--profile", default="general-knowledge")
    generate_init.add_argument("--max-chunks", type=int, default=0)
+    generate_init.add_argument(
+        "--chapter",
+        action="append",
+        default=[],
+        help="Filter to chapter label or number (repeatable: --chapter I --chapter II)",
+    )
+    generate_init.add_argument("--from-chapter", type=int, default=None)
+    generate_init.add_argument("--to-chapter", type=int, default=None)
+    generate_init.add_argument(
+        "--chunk",
+        action="append",
+        default=[],
+        help="Filter to source chunk id (repeatable)",
+    )

    generate_plan = generate_sub.add_parser(
        "plan",
@@ -226,6 +240,20 @@ def build_parser() -> argparse.ArgumentParser:
    generate_from_source.add_argument("--model", default="")
    generate_from_source.add_argument("--fixture-responses", default="")
    generate_from_source.add_argument("--max-chunks", type=int, default=0)
+    generate_from_source.add_argument(
+        "--chapter",
+        action="append",
+        default=[],
+        help="Filter to chapter label or number (repeatable)",
+    )
+    generate_from_source.add_argument("--from-chapter", type=int, default=None)
+    generate_from_source.add_argument("--to-chapter", type=int, default=None)
+    generate_from_source.add_argument(
+        "--chunk",
+        action="append",
+        default=[],
+        help="Filter to source chunk id (repeatable)",
+    )
    generate_from_source.add_argument("--apply", action="store_true")

    budget = sub.add_parser("budget", help="Inspect per-infospace budget and usage records")
@@ -479,6 +507,10 @@ def main(argv: list[str] | None = None) -> int:
                    name=args.name,
                    profile=args.profile,
                    max_chunks=_optional_positive(args.max_chunks),
+                    chapter_filter=args.chapter or None,
+                    chunk_filter=args.chunk or None,
+                    from_chapter=args.from_chapter,
+                    to_chapter=args.to_chapter,
                )
                _write_json(
                    {
@@ -537,6 +569,10 @@ def main(argv: list[str] | None = None) -> int:
                    name=args.name,
                    profile=args.profile,
                    max_chunks=_optional_positive(args.max_chunks),
+                    chapter_filter=args.chapter or None,
+                    chunk_filter=args.chunk or None,
+                    from_chapter=args.from_chapter,
+                    to_chapter=args.to_chapter,
                )
                if args.apply:
                    result = run_generation(
--- a/src/infospace_bench/generator.py
+++ b/src/infospace_bench/generator.py
@@ -79,8 +79,30 @@ def init_generation_infospace(
    name: str,
    profile: str = DEFAULT_PROFILE,
    max_chunks: int | None = None,
+    chapter_filter: list[str] | None = None,
+    chunk_filter: list[str] | None = None,
+    from_chapter: int | None = None,
+    to_chapter: int | None = None,
 ) -> Any:
    chunks = normalize_source(source, max_chunks=max_chunks)
+    chunks = _filter_chunks_by_chapter(
+        chunks,
+        chapter_filter=chapter_filter,
+        chunk_filter=chunk_filter,
+        from_chapter=from_chapter,
+        to_chapter=to_chapter,
+    )
+    if not chunks:
+        raise InfospaceError(
+            "empty_chapter_selection",
+            "Selection filters excluded every chunk; nothing to register",
+            {
+                "chapter_filter": chapter_filter,
+                "chunk_filter": chunk_filter,
+                "from_chapter": from_chapter,
+                "to_chapter": to_chapter,
+            },
+        )
    infospace = create_infospace(Path(workspace), slug, name=name)
    _install_profile(infospace.root, profile)
    _write_workflows(infospace.root, profile)
@@ -256,6 +278,42 @@ def plan_generation_summary(
    }


+def _filter_chunks_by_chapter(
+    chunks: list[SourceChunk],
+    *,
+    chapter_filter: list[str] | None,
+    chunk_filter: list[str] | None,
+    from_chapter: int | None,
+    to_chapter: int | None,
+) -> list[SourceChunk]:
+    chunk_set = {value.strip() for value in (chunk_filter or []) if value.strip()}
+    label_set = {value.strip().lower() for value in (chapter_filter or []) if value.strip()}
+    if not chunk_set and not label_set and from_chapter is None and to_chapter is None:
+        return list(chunks)
+    out: list[SourceChunk] = []
+    for chunk in chunks:
+        if chunk_set and chunk.chunk_id not in chunk_set:
+            continue
+        if label_set:
+            label = (chunk.chapter_label or "").strip().lower()
+            number_match = (
+                chunk.chapter_number is not None
+                and str(chunk.chapter_number) in label_set
+            )
+            label_match = label in label_set if label else False
+            if not (number_match or label_match):
+                continue
+        if from_chapter is not None or to_chapter is not None:
+            if chunk.chapter_number is None:
+                continue
+            if from_chapter is not None and chunk.chapter_number < from_chapter:
+                continue
+            if to_chapter is not None and chunk.chapter_number > to_chapter:
+                continue
+        out.append(chunk)
+    return out
+
+
 def _select_source_chunks(
    sources: list[Any],
    *,