generated from coulomb/repo-seed
IB-WP-0016-T03: scale-aware planning
Replace generate plan's full-prompt dump with a compact summary that reports selected-chunk counts, selected chapter numbers, per-workflow call counts, prompt-word and token estimates, and a rough USD cost when --cost-per-1k is supplied. Selection filters --chapter (label or number, repeatable), --from-chapter / --to-chapter (numeric range), and --chunk (repeatable id) shape the estimate. Budget caps --max-calls and --cost-cap are reported as exceeds_* booleans so callers can fail fast before run. The old full per-workflow plan with prompts remains available behind --full so deep inspection is opt-in instead of the default. Whole-Lefevre estimate at default max_words=800: 146 chunks, 730 calls, ~518k prompt tokens, ~$155 at $0.30/1k. Chapters 3-5 only: 19 chunks, 95 calls, ~64k tokens. 87 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
161
tests/test_plan_scale.py
Normal file
161
tests/test_plan_scale.py
Normal file
@@ -0,0 +1,161 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
from infospace_bench.generator import (
|
||||
init_generation_infospace,
|
||||
plan_generation,
|
||||
plan_generation_summary,
|
||||
)
|
||||
|
||||
|
||||
CONTAINER_XML = """<?xml version="1.0"?>
|
||||
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
||||
<rootfiles>
|
||||
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
|
||||
</rootfiles>
|
||||
</container>
|
||||
"""
|
||||
|
||||
PACKAGE_OPF = """<?xml version="1.0" encoding="utf-8"?>
|
||||
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="bookid">
|
||||
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||
<dc:identifier id="bookid">urn:test:plan</dc:identifier>
|
||||
<dc:title>Plan Test Book</dc:title>
|
||||
<dc:creator>Author</dc:creator>
|
||||
<dc:language>en</dc:language>
|
||||
</metadata>
|
||||
<manifest>
|
||||
<item id="ch1" href="ch1.xhtml" media-type="application/xhtml+xml"/>
|
||||
<item id="ch2" href="ch2.xhtml" media-type="application/xhtml+xml"/>
|
||||
<item id="ch3" href="ch3.xhtml" media-type="application/xhtml+xml"/>
|
||||
<item id="ch4" href="ch4.xhtml" media-type="application/xhtml+xml"/>
|
||||
</manifest>
|
||||
<spine>
|
||||
<itemref idref="ch1"/>
|
||||
<itemref idref="ch2"/>
|
||||
<itemref idref="ch3"/>
|
||||
<itemref idref="ch4"/>
|
||||
</spine>
|
||||
</package>
|
||||
"""
|
||||
|
||||
|
||||
def _write_four_chapter_epub(path: Path) -> None:
|
||||
with zipfile.ZipFile(path, "w") as archive:
|
||||
archive.writestr("mimetype", "application/epub+zip")
|
||||
archive.writestr("META-INF/container.xml", CONTAINER_XML)
|
||||
archive.writestr("OEBPS/content.opf", PACKAGE_OPF)
|
||||
for idx, label in enumerate(("I", "II", "III", "IV"), start=1):
|
||||
archive.writestr(
|
||||
f"OEBPS/ch{idx}.xhtml",
|
||||
f"<html><head><title>Book</title></head>"
|
||||
f"<body><h2>{label}</h2>"
|
||||
f"<p>The narrator describes chapter {label} events with stocks and traders. "
|
||||
+ " ".join(f"sentence{n}" for n in range(40))
|
||||
+ "</p></body></html>",
|
||||
)
|
||||
|
||||
|
||||
def _build_plan_infospace(tmp_path: Path) -> Path:
|
||||
book = tmp_path / "book.epub"
|
||||
_write_four_chapter_epub(book)
|
||||
infospace = init_generation_infospace(
|
||||
tmp_path, book, "plan-test", name="Plan Test", profile="general-knowledge"
|
||||
)
|
||||
return infospace.root
|
||||
|
||||
|
||||
def test_plan_summary_is_compact_and_does_not_dump_prompts(tmp_path: Path) -> None:
|
||||
root = _build_plan_infospace(tmp_path)
|
||||
|
||||
summary = plan_generation(root)
|
||||
|
||||
serialized = json.dumps(summary)
|
||||
assert '"prompt":' not in serialized, "compact plan must not embed full prompts"
|
||||
assert summary["source_chunk_count"] == 4
|
||||
assert summary["selected_chunk_count"] == 4
|
||||
assert summary["selected_chapter_numbers"] == [1, 2, 3, 4]
|
||||
assert summary["total_provider_calls_estimate"] > 0
|
||||
assert summary["total_prompt_tokens_estimate"] > 0
|
||||
assert summary["estimated_cost_usd"] is None
|
||||
assert "workflows" not in summary
|
||||
|
||||
|
||||
def test_plan_chapter_filter_selects_subset(tmp_path: Path) -> None:
|
||||
root = _build_plan_infospace(tmp_path)
|
||||
|
||||
by_label = plan_generation_summary(root, chapter_filter=["I"])
|
||||
by_number = plan_generation_summary(root, chapter_filter=["2"])
|
||||
by_range = plan_generation_summary(root, from_chapter=2, to_chapter=3)
|
||||
by_chunk = plan_generation_summary(root, chunk_filter=["chapter-04"])
|
||||
|
||||
assert by_label["selected_chapter_numbers"] == [1]
|
||||
assert by_number["selected_chapter_numbers"] == [2]
|
||||
assert by_range["selected_chapter_numbers"] == [2, 3]
|
||||
assert by_chunk["selected_chunk_ids"] == ["chapter-04"]
|
||||
|
||||
|
||||
def test_plan_caps_flag_when_estimate_exceeds_budget(tmp_path: Path) -> None:
|
||||
root = _build_plan_infospace(tmp_path)
|
||||
|
||||
summary = plan_generation_summary(
|
||||
root,
|
||||
max_calls=2,
|
||||
cost_cap=0.01,
|
||||
cost_per_1k_tokens=1.0,
|
||||
)
|
||||
|
||||
assert summary["total_provider_calls_estimate"] > 2
|
||||
assert summary["exceeds_max_calls"] is True
|
||||
assert summary["estimated_cost_usd"] is not None and summary["estimated_cost_usd"] > 0.01
|
||||
assert summary["exceeds_cost_cap"] is True
|
||||
|
||||
|
||||
def test_plan_full_mode_includes_workflow_plans(tmp_path: Path) -> None:
|
||||
root = _build_plan_infospace(tmp_path)
|
||||
|
||||
full_plan = plan_generation(root, full=True)
|
||||
|
||||
assert "workflows" in full_plan
|
||||
assert len(full_plan["workflows"]) >= 1
|
||||
|
||||
|
||||
def test_plan_cli_compact_default_and_filters(tmp_path: Path) -> None:
|
||||
root = _build_plan_infospace(tmp_path)
|
||||
env = os.environ.copy()
|
||||
env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src"
|
||||
|
||||
result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
"-m",
|
||||
"infospace_bench",
|
||||
"generate",
|
||||
"plan",
|
||||
str(root),
|
||||
"--from-chapter",
|
||||
"2",
|
||||
"--to-chapter",
|
||||
"3",
|
||||
"--cost-per-1k",
|
||||
"0.5",
|
||||
"--max-calls",
|
||||
"1",
|
||||
],
|
||||
check=False,
|
||||
env=env,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
)
|
||||
|
||||
assert result.returncode == 0, result.stderr
|
||||
payload = json.loads(result.stdout)
|
||||
assert payload["selected_chapter_numbers"] == [2, 3]
|
||||
assert payload["estimated_cost_usd"] is not None
|
||||
assert payload["exceeds_max_calls"] is True
|
||||
assert "workflows" not in payload
|
||||
assert '"prompt":' not in result.stdout
|
||||
Reference in New Issue
Block a user