IB-WP-0016-T03: scale-aware planning

Replace generate plan's full-prompt dump with a compact summary that reports selected-chunk counts, selected chapter numbers, per-workflow call counts, prompt-word and token estimates, and a rough USD cost when --cost-per-1k is supplied. Selection filters --chapter (label or number, repeatable), --from-chapter / --to-chapter (numeric range), and --chunk (repeatable id) shape the estimate. Budget caps --max-calls and --cost-cap are reported as exceeds_* booleans so callers can fail fast before run. The old full per-workflow plan with prompts remains available behind --full so deep inspection is opt-in instead of the default. Whole-Lefevre estimate at default max_words=800: 146 chunks, 730 calls, ~518k prompt tokens, ~$155 at $0.30/1k. Chapters 3-5 only: 19 chunks, 95 calls, ~64k tokens. 87 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-17 18:18:09 +02:00
parent f8289699e7
commit 13f9c1895c
6 changed files with 434 additions and 9 deletions
--- a/src/infospace_bench/cli.py
+++ b/src/infospace_bench/cli.py
@@ -155,6 +155,33 @@ def build_parser() -> argparse.ArgumentParser:
    )
    generate_plan.add_argument("root")
    generate_plan.add_argument("--stage", default="all")
+    generate_plan.add_argument(
+        "--chapter",
+        action="append",
+        default=[],
+        help="Filter to chapter label or number (repeatable: --chapter I --chapter II)",
+    )
+    generate_plan.add_argument("--from-chapter", type=int, default=None)
+    generate_plan.add_argument("--to-chapter", type=int, default=None)
+    generate_plan.add_argument(
+        "--chunk",
+        action="append",
+        default=[],
+        help="Filter to source chunk id (repeatable)",
+    )
+    generate_plan.add_argument("--max-calls", type=int, default=None)
+    generate_plan.add_argument("--cost-cap", type=float, default=None)
+    generate_plan.add_argument(
+        "--cost-per-1k", type=float, default=0.0, help="USD per 1k prompt tokens for rough cost estimate"
+    )
+    generate_plan.add_argument(
+        "--entities-per-chunk", type=int, default=2, help="Estimate of entities each chunk yields"
+    )
+    generate_plan.add_argument(
+        "--full",
+        action="store_true",
+        help="Include full per-stage prompts in the output (off by default for long corpora)",
+    )

    generate_run = generate_sub.add_parser(
        "run",
@@ -448,7 +475,21 @@ def main(argv: list[str] | None = None) -> int:
                    }
                )
            elif args.generate_command == "plan":
-                _write_json(plan_generation(Path(args.root), stage=args.stage))
+                _write_json(
+                    plan_generation(
+                        Path(args.root),
+                        stage=args.stage,
+                        chapter_filter=args.chapter or None,
+                        chunk_filter=args.chunk or None,
+                        from_chapter=args.from_chapter,
+                        to_chapter=args.to_chapter,
+                        max_calls=args.max_calls,
+                        cost_cap=args.cost_cap,
+                        cost_per_1k_tokens=args.cost_per_1k,
+                        entities_per_chunk=args.entities_per_chunk,
+                        full=args.full,
+                    )
+                )
            elif args.generate_command == "run":
                _write_json(
                    run_generation(