From 13f9c1895c0a3a9c197dc677424ef65e6de7858c Mon Sep 17 00:00:00 2001 From: tegwick Date: Sun, 17 May 2026 18:18:09 +0200 Subject: [PATCH] IB-WP-0016-T03: scale-aware planning Replace generate plan's full-prompt dump with a compact summary that reports selected-chunk counts, selected chapter numbers, per-workflow call counts, prompt-word and token estimates, and a rough USD cost when --cost-per-1k is supplied. Selection filters --chapter (label or number, repeatable), --from-chapter / --to-chapter (numeric range), and --chunk (repeatable id) shape the estimate. Budget caps --max-calls and --cost-cap are reported as exceeds_* booleans so callers can fail fast before run. The old full per-workflow plan with prompts remains available behind --full so deep inspection is opt-in instead of the default. Whole-Lefevre estimate at default max_words=800: 146 chunks, 730 calls, ~518k prompt tokens, ~$155 at $0.30/1k. Chapters 3-5 only: 19 chunks, 95 calls, ~64k tokens. 87 tests pass. Co-Authored-By: Claude Opus 4.7 --- docs/generic-source-generator.md | 28 +++ src/infospace_bench/cli.py | 43 +++- src/infospace_bench/generator.py | 208 +++++++++++++++++- tests/test_plan_scale.py | 161 ++++++++++++++ ...-0016-lefevre-ebook-infospace-readiness.md | 2 +- ...B-WP-0018-adaptive-llm-routing-consumer.md | 1 + 6 files changed, 434 insertions(+), 9 deletions(-) create mode 100644 tests/test_plan_scale.py diff --git a/docs/generic-source-generator.md b/docs/generic-source-generator.md index dd3d373..50dd3d8 100644 --- a/docs/generic-source-generator.md +++ b/docs/generic-source-generator.md @@ -48,6 +48,34 @@ infospace-bench generate status ./infospaces/book-space shows chunk counts, generated artifact counts, evaluations, metrics, history, and stale source/profile inputs. +### Scale-aware plan + +`generate plan` returns a compact estimate by default — counts of selected +chunks, calls per workflow, prompt-word and token estimates, and a rough +USD cost when `--cost-per-1k` is supplied. Long corpora no longer dump +hundreds of full prompts unless `--full` is set. + +```bash +infospace-bench generate plan ./infospaces/book-space \ + --from-chapter 1 --to-chapter 3 \ + --cost-per-1k 0.30 \ + --max-calls 50 \ + --cost-cap 2.00 +``` + +Selection filters: + +- `--chapter LABEL` (repeatable) — match a chapter by roman/arabic label + or numeric value (e.g. `--chapter I` or `--chapter 2`) +- `--from-chapter N` / `--to-chapter N` — numeric chapter range +- `--chunk ID` (repeatable) — exact source chunk id (e.g. + `chapter-01-part-002`) + +Budget flags `--max-calls` and `--cost-cap` are reported as +`exceeds_max_calls` / `exceeds_cost_cap` booleans in the summary, so a +caller can fail fast before invoking `run`. Use `--full` to opt back into +the full per-workflow plan with prompts for deep inspection. + ## OpenRouter Live model calls are explicit: diff --git a/src/infospace_bench/cli.py b/src/infospace_bench/cli.py index be0cfe4..8eab20b 100644 --- a/src/infospace_bench/cli.py +++ b/src/infospace_bench/cli.py @@ -155,6 +155,33 @@ def build_parser() -> argparse.ArgumentParser: ) generate_plan.add_argument("root") generate_plan.add_argument("--stage", default="all") + generate_plan.add_argument( + "--chapter", + action="append", + default=[], + help="Filter to chapter label or number (repeatable: --chapter I --chapter II)", + ) + generate_plan.add_argument("--from-chapter", type=int, default=None) + generate_plan.add_argument("--to-chapter", type=int, default=None) + generate_plan.add_argument( + "--chunk", + action="append", + default=[], + help="Filter to source chunk id (repeatable)", + ) + generate_plan.add_argument("--max-calls", type=int, default=None) + generate_plan.add_argument("--cost-cap", type=float, default=None) + generate_plan.add_argument( + "--cost-per-1k", type=float, default=0.0, help="USD per 1k prompt tokens for rough cost estimate" + ) + generate_plan.add_argument( + "--entities-per-chunk", type=int, default=2, help="Estimate of entities each chunk yields" + ) + generate_plan.add_argument( + "--full", + action="store_true", + help="Include full per-stage prompts in the output (off by default for long corpora)", + ) generate_run = generate_sub.add_parser( "run", @@ -448,7 +475,21 @@ def main(argv: list[str] | None = None) -> int: } ) elif args.generate_command == "plan": - _write_json(plan_generation(Path(args.root), stage=args.stage)) + _write_json( + plan_generation( + Path(args.root), + stage=args.stage, + chapter_filter=args.chapter or None, + chunk_filter=args.chunk or None, + from_chapter=args.from_chapter, + to_chapter=args.to_chapter, + max_calls=args.max_calls, + cost_cap=args.cost_cap, + cost_per_1k_tokens=args.cost_per_1k, + entities_per_chunk=args.entities_per_chunk, + full=args.full, + ) + ) elif args.generate_command == "run": _write_json( run_generation( diff --git a/src/infospace_bench/generator.py b/src/infospace_bench/generator.py index f296613..b997fe4 100644 --- a/src/infospace_bench/generator.py +++ b/src/infospace_bench/generator.py @@ -89,8 +89,51 @@ def init_generation_infospace( return load_infospace(infospace.root) -def plan_generation(root: str | Path, *, stage: str = "all") -> dict[str, Any]: +WORDS_PER_TOKEN_DEFAULT = 0.75 +ENTITIES_PER_CHUNK_ESTIMATE = 2 + +_CALLS_PER_CHUNK_BY_WORKFLOW = { + "generic-source-summary": 1, + "generic-source-entities": 1, + "generic-source-relations": 1, +} + + +def plan_generation( + root: str | Path, + *, + stage: str = "all", + chapter_filter: list[str] | None = None, + chunk_filter: list[str] | None = None, + from_chapter: int | None = None, + to_chapter: int | None = None, + max_calls: int | None = None, + cost_cap: float | None = None, + cost_per_1k_tokens: float = 0.0, + words_per_token: float = WORDS_PER_TOKEN_DEFAULT, + entities_per_chunk: int = ENTITIES_PER_CHUNK_ESTIMATE, + full: bool = False, +) -> dict[str, Any]: root_path = Path(root) + status = status_generation(root_path) + summary = plan_generation_summary( + root_path, + stage=stage, + chapter_filter=chapter_filter, + chunk_filter=chunk_filter, + from_chapter=from_chapter, + to_chapter=to_chapter, + max_calls=max_calls, + cost_cap=cost_cap, + cost_per_1k_tokens=cost_per_1k_tokens, + words_per_token=words_per_token, + entities_per_chunk=entities_per_chunk, + ) + summary["root"] = str(root_path) + summary["stale"] = status["stale"] + summary["status"] = "planned" + if not full: + return summary workflow_ids = _workflow_ids_for_stage(stage) plans: list[dict[str, Any]] = [] for workflow_id in workflow_ids: @@ -104,17 +147,168 @@ def plan_generation(root: str | Path, *, stage: str = "all") -> dict[str, Any]: "error": exc.to_dict(), } ) - status = status_generation(root_path) + summary["workflows"] = plans + return summary + + +def plan_generation_summary( + root: str | Path, + *, + stage: str = "all", + chapter_filter: list[str] | None = None, + chunk_filter: list[str] | None = None, + from_chapter: int | None = None, + to_chapter: int | None = None, + max_calls: int | None = None, + cost_cap: float | None = None, + cost_per_1k_tokens: float = 0.0, + words_per_token: float = WORDS_PER_TOKEN_DEFAULT, + entities_per_chunk: int = ENTITIES_PER_CHUNK_ESTIMATE, +) -> dict[str, Any]: + root_path = Path(root) + infospace = load_infospace(root_path) + sources = [item for item in infospace.artifacts if item.kind == "source"] + selected = _select_source_chunks( + sources, + chapter_filter=chapter_filter, + chunk_filter=chunk_filter, + from_chapter=from_chapter, + to_chapter=to_chapter, + ) + workflow_ids = _workflow_ids_for_stage(stage) + profile_name = _read_profile_name(root_path) + template_words = _profile_template_words(root_path, profile_name) + chunk_word_total = sum(_source_word_count(root_path, item) for item in selected) + per_stage: list[dict[str, Any]] = [] + total_calls = 0 + total_prompt_words = 0 + for workflow_id in workflow_ids: + if workflow_id == "generic-source-evaluations": + calls = len(selected) * max(0, entities_per_chunk) + template_label = "evaluate-entity" + entity_words_estimate = 80 + prompt_words = calls * ( + template_words.get(template_label, 0) + entity_words_estimate + ) + else: + calls = len(selected) * _CALLS_PER_CHUNK_BY_WORKFLOW.get(workflow_id, 0) + template_label = _template_for_workflow(workflow_id) + prompt_words = calls * template_words.get(template_label, 0) + chunk_word_total * ( + 1 if calls else 0 + ) + per_stage.append( + { + "workflow_id": workflow_id, + "calls": calls, + "prompt_words_estimate": prompt_words, + } + ) + total_calls += calls + total_prompt_words += prompt_words + total_tokens = int(round(total_prompt_words / words_per_token)) if words_per_token > 0 else 0 + cost: float | None = None + if cost_per_1k_tokens > 0: + cost = round((total_tokens / 1000.0) * cost_per_1k_tokens, 4) + chapter_numbers = sorted( + { + int(item.provenance.get("chapter_number")) + for item in selected + if isinstance(item.provenance.get("chapter_number"), int) + } + ) return { - "root": str(root_path), "stage": stage, - "status": "planned", - "stale": status["stale"], - "source_chunk_count": status["source_chunk_count"], - "workflows": plans, + "source_chunk_count": len(sources), + "selected_chunk_count": len(selected), + "selected_chunk_ids": [item.id.split("/", 1)[-1].rsplit(".md", 1)[0] for item in selected], + "selected_chapter_numbers": chapter_numbers, + "per_workflow": per_stage, + "total_provider_calls_estimate": total_calls, + "total_prompt_words_estimate": total_prompt_words, + "total_prompt_tokens_estimate": total_tokens, + "estimated_cost_usd": cost, + "cost_per_1k_tokens": cost_per_1k_tokens or None, + "words_per_token": words_per_token, + "entities_per_chunk_estimate": entities_per_chunk, + "max_calls": max_calls, + "cost_cap": cost_cap, + "exceeds_max_calls": bool(max_calls is not None and total_calls > max_calls), + "exceeds_cost_cap": bool(cost_cap is not None and cost is not None and cost > cost_cap), } +def _select_source_chunks( + sources: list[Any], + *, + chapter_filter: list[str] | None, + chunk_filter: list[str] | None, + from_chapter: int | None, + to_chapter: int | None, +) -> list[Any]: + chunk_set = {value.strip() for value in (chunk_filter or []) if value.strip()} + label_set = {value.strip().lower() for value in (chapter_filter or []) if value.strip()} + out: list[Any] = [] + for item in sources: + chunk_id = item.provenance.get("chunk_id") or item.id.split("/", 1)[-1].rsplit(".md", 1)[0] + if chunk_set and chunk_id not in chunk_set: + continue + chapter_number = item.provenance.get("chapter_number") + chapter_label = (item.provenance.get("chapter_label") or "").strip().lower() + if label_set: + number_match = ( + isinstance(chapter_number, int) and str(chapter_number) in label_set + ) + label_match = chapter_label in label_set if chapter_label else False + if not (number_match or label_match): + continue + if from_chapter is not None or to_chapter is not None: + if not isinstance(chapter_number, int): + continue + if from_chapter is not None and chapter_number < from_chapter: + continue + if to_chapter is not None and chapter_number > to_chapter: + continue + out.append(item) + return out + + +def _template_for_workflow(workflow_id: str) -> str: + mapping = { + "generic-source-summary": "summarize-source", + "generic-source-entities": "extract-entities", + "generic-source-relations": "extract-relations", + "generic-source-evaluations": "evaluate-entity", + } + return mapping.get(workflow_id, "") + + +def _profile_template_words(root: Path, profile: str) -> dict[str, int]: + template_dir = Path(root) / "profiles" / profile / "templates" + counts: dict[str, int] = {} + if not template_dir.is_dir(): + return counts + for path in template_dir.glob("*.md"): + try: + text = path.read_text(encoding="utf-8") + except OSError: + continue + counts[path.stem] = len(text.split()) + return counts + + +def _source_word_count(root: Path, artifact: Any) -> int: + path = Path(root) / artifact.path + try: + return len(path.read_text(encoding="utf-8").split()) + except OSError: + return 0 + + +def _read_profile_name(root: Path) -> str: + state = _read_state(root) + return str(state.get("profile") or DEFAULT_PROFILE) + + def run_generation( root: str | Path, *, diff --git a/tests/test_plan_scale.py b/tests/test_plan_scale.py new file mode 100644 index 0000000..b2959ce --- /dev/null +++ b/tests/test_plan_scale.py @@ -0,0 +1,161 @@ +import json +import os +import subprocess +import sys +import zipfile +from pathlib import Path + +from infospace_bench.generator import ( + init_generation_infospace, + plan_generation, + plan_generation_summary, +) + + +CONTAINER_XML = """ + + + + + +""" + +PACKAGE_OPF = """ + + + urn:test:plan + Plan Test Book + Author + en + + + + + + + + + + + + + + +""" + + +def _write_four_chapter_epub(path: Path) -> None: + with zipfile.ZipFile(path, "w") as archive: + archive.writestr("mimetype", "application/epub+zip") + archive.writestr("META-INF/container.xml", CONTAINER_XML) + archive.writestr("OEBPS/content.opf", PACKAGE_OPF) + for idx, label in enumerate(("I", "II", "III", "IV"), start=1): + archive.writestr( + f"OEBPS/ch{idx}.xhtml", + f"Book" + f"

{label}

" + f"

The narrator describes chapter {label} events with stocks and traders. " + + " ".join(f"sentence{n}" for n in range(40)) + + "

", + ) + + +def _build_plan_infospace(tmp_path: Path) -> Path: + book = tmp_path / "book.epub" + _write_four_chapter_epub(book) + infospace = init_generation_infospace( + tmp_path, book, "plan-test", name="Plan Test", profile="general-knowledge" + ) + return infospace.root + + +def test_plan_summary_is_compact_and_does_not_dump_prompts(tmp_path: Path) -> None: + root = _build_plan_infospace(tmp_path) + + summary = plan_generation(root) + + serialized = json.dumps(summary) + assert '"prompt":' not in serialized, "compact plan must not embed full prompts" + assert summary["source_chunk_count"] == 4 + assert summary["selected_chunk_count"] == 4 + assert summary["selected_chapter_numbers"] == [1, 2, 3, 4] + assert summary["total_provider_calls_estimate"] > 0 + assert summary["total_prompt_tokens_estimate"] > 0 + assert summary["estimated_cost_usd"] is None + assert "workflows" not in summary + + +def test_plan_chapter_filter_selects_subset(tmp_path: Path) -> None: + root = _build_plan_infospace(tmp_path) + + by_label = plan_generation_summary(root, chapter_filter=["I"]) + by_number = plan_generation_summary(root, chapter_filter=["2"]) + by_range = plan_generation_summary(root, from_chapter=2, to_chapter=3) + by_chunk = plan_generation_summary(root, chunk_filter=["chapter-04"]) + + assert by_label["selected_chapter_numbers"] == [1] + assert by_number["selected_chapter_numbers"] == [2] + assert by_range["selected_chapter_numbers"] == [2, 3] + assert by_chunk["selected_chunk_ids"] == ["chapter-04"] + + +def test_plan_caps_flag_when_estimate_exceeds_budget(tmp_path: Path) -> None: + root = _build_plan_infospace(tmp_path) + + summary = plan_generation_summary( + root, + max_calls=2, + cost_cap=0.01, + cost_per_1k_tokens=1.0, + ) + + assert summary["total_provider_calls_estimate"] > 2 + assert summary["exceeds_max_calls"] is True + assert summary["estimated_cost_usd"] is not None and summary["estimated_cost_usd"] > 0.01 + assert summary["exceeds_cost_cap"] is True + + +def test_plan_full_mode_includes_workflow_plans(tmp_path: Path) -> None: + root = _build_plan_infospace(tmp_path) + + full_plan = plan_generation(root, full=True) + + assert "workflows" in full_plan + assert len(full_plan["workflows"]) >= 1 + + +def test_plan_cli_compact_default_and_filters(tmp_path: Path) -> None: + root = _build_plan_infospace(tmp_path) + env = os.environ.copy() + env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src" + + result = subprocess.run( + [ + sys.executable, + "-m", + "infospace_bench", + "generate", + "plan", + str(root), + "--from-chapter", + "2", + "--to-chapter", + "3", + "--cost-per-1k", + "0.5", + "--max-calls", + "1", + ], + check=False, + env=env, + text=True, + capture_output=True, + ) + + assert result.returncode == 0, result.stderr + payload = json.loads(result.stdout) + assert payload["selected_chapter_numbers"] == [2, 3] + assert payload["estimated_cost_usd"] is not None + assert payload["exceeds_max_calls"] is True + assert "workflows" not in payload + assert '"prompt":' not in result.stdout diff --git a/workplans/IB-WP-0016-lefevre-ebook-infospace-readiness.md b/workplans/IB-WP-0016-lefevre-ebook-infospace-readiness.md index dc18cad..126a0c8 100644 --- a/workplans/IB-WP-0016-lefevre-ebook-infospace-readiness.md +++ b/workplans/IB-WP-0016-lefevre-ebook-infospace-readiness.md @@ -139,7 +139,7 @@ state_hub_task_id: "47de1110-36d0-4d63-bf87-389746509e03" ```task id: IB-WP-0016-T03 -status: in_progress +status: done priority: high state_hub_task_id: "bee5c38a-f052-4edb-9313-b3a2ee5a6c26" ``` diff --git a/workplans/IB-WP-0018-adaptive-llm-routing-consumer.md b/workplans/IB-WP-0018-adaptive-llm-routing-consumer.md index f2799a2..c4ce0e8 100644 --- a/workplans/IB-WP-0018-adaptive-llm-routing-consumer.md +++ b/workplans/IB-WP-0018-adaptive-llm-routing-consumer.md @@ -13,6 +13,7 @@ depends_on_workplans: - LLM-WP-0004 related_workplans: - IB-WP-0016 +state_hub_workstream_id: "3d38642e-9d6d-4c7f-869f-b185a00bd0e6" --- # IB-WP-0018 — Adaptive LLM Routing — infospace-bench Consumer Wiring