generated from coulomb/repo-seed
Every generate plan invocation now appends its compact summary to output/budget/plans.yaml with a deterministic 12-char snapshot_id hashed over the selection filters and the estimated call/token/cost totals. Identical-fingerprint plans refresh the most recent entry's recorded_at instead of stacking duplicates. Retention defaults to the last 50 snapshots; older entries are pruned and counted on a top-level pruned_count field. The summary now echoes its input filters (chapter_filter, chunk_filter, from_chapter, to_chapter) so reviewers can read the snapshot without cross-referencing the CLI invocation. New module src/infospace_bench/budget.py owns layer 1 (per-infospace recording) of the IB-WP-0019 three-layer design; layer 2 still belongs in llm-connect LLM-WP-0004 and layer 3 in state-hub. 99 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
180 lines
6.0 KiB
Python
180 lines
6.0 KiB
Python
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import zipfile
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
|
|
from infospace_bench.budget import (
|
|
PLAN_RETENTION_DEFAULT,
|
|
PLANS_FILE,
|
|
PLANS_SCHEMA_VERSION,
|
|
read_plan_snapshots,
|
|
record_plan_snapshot,
|
|
)
|
|
from infospace_bench.generator import init_generation_infospace, plan_generation
|
|
|
|
|
|
CONTAINER_XML = """<?xml version="1.0"?>
|
|
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
|
<rootfiles>
|
|
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
|
|
</rootfiles>
|
|
</container>
|
|
"""
|
|
|
|
PACKAGE_OPF = """<?xml version="1.0" encoding="utf-8"?>
|
|
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="bookid">
|
|
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
|
|
<dc:identifier id="bookid">urn:test:budget</dc:identifier>
|
|
<dc:title>Budget Test Book</dc:title>
|
|
<dc:creator>Author</dc:creator>
|
|
<dc:language>en</dc:language>
|
|
</metadata>
|
|
<manifest>
|
|
<item id="ch1" href="ch1.xhtml" media-type="application/xhtml+xml"/>
|
|
<item id="ch2" href="ch2.xhtml" media-type="application/xhtml+xml"/>
|
|
<item id="ch3" href="ch3.xhtml" media-type="application/xhtml+xml"/>
|
|
</manifest>
|
|
<spine>
|
|
<itemref idref="ch1"/>
|
|
<itemref idref="ch2"/>
|
|
<itemref idref="ch3"/>
|
|
</spine>
|
|
</package>
|
|
"""
|
|
|
|
|
|
def _write_three_chapter_epub(path: Path) -> None:
|
|
with zipfile.ZipFile(path, "w") as archive:
|
|
archive.writestr("mimetype", "application/epub+zip")
|
|
archive.writestr("META-INF/container.xml", CONTAINER_XML)
|
|
archive.writestr("OEBPS/content.opf", PACKAGE_OPF)
|
|
for idx, label in enumerate(("I", "II", "III"), start=1):
|
|
archive.writestr(
|
|
f"OEBPS/ch{idx}.xhtml",
|
|
f"<html><head><title>Book</title></head>"
|
|
f"<body><h2>{label}</h2>"
|
|
f"<p>Body of chapter {label} with " + " ".join(f"word{n}" for n in range(40)) + ".</p></body></html>",
|
|
)
|
|
|
|
|
|
def _build_infospace(tmp_path: Path) -> Path:
|
|
book = tmp_path / "book.epub"
|
|
_write_three_chapter_epub(book)
|
|
infospace = init_generation_infospace(
|
|
tmp_path, book, "budget-test", name="Budget Test", profile="general-knowledge"
|
|
)
|
|
return infospace.root
|
|
|
|
|
|
def test_record_plan_snapshot_writes_yaml_with_stable_id(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
|
|
summary = plan_generation(root, persist=False)
|
|
snapshot_id_1 = record_plan_snapshot(root, summary)
|
|
snapshot_id_2 = record_plan_snapshot(root, summary)
|
|
|
|
persisted = (root / PLANS_FILE).read_text(encoding="utf-8")
|
|
data = yaml.safe_load(persisted)
|
|
|
|
assert data["schema_version"] == PLANS_SCHEMA_VERSION
|
|
assert data["pruned_count"] == 0
|
|
assert snapshot_id_1 == snapshot_id_2, "same summary must yield same snapshot_id"
|
|
# Duplicate writes refresh recorded_at instead of stacking
|
|
assert len(data["snapshots"]) == 1
|
|
assert data["snapshots"][0]["snapshot_id"] == snapshot_id_1
|
|
|
|
|
|
def test_different_filters_produce_distinct_snapshots(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
|
|
full_plan = plan_generation(root, persist=False)
|
|
chapter_only = plan_generation(root, from_chapter=2, to_chapter=2, persist=False)
|
|
record_plan_snapshot(root, full_plan)
|
|
record_plan_snapshot(root, chapter_only)
|
|
|
|
snapshots = read_plan_snapshots(root)
|
|
assert len(snapshots) == 2
|
|
ids = {snap["snapshot_id"] for snap in snapshots}
|
|
assert len(ids) == 2
|
|
# Filter values are echoed back into the snapshot
|
|
chapter_snapshot = next(s for s in snapshots if s["selected_chunk_count"] == 1)
|
|
assert chapter_snapshot["filters"]["from_chapter"] == 2
|
|
assert chapter_snapshot["filters"]["to_chapter"] == 2
|
|
|
|
|
|
def test_plan_generation_persists_snapshot_by_default(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
|
|
result = plan_generation(root, from_chapter=1, to_chapter=2)
|
|
|
|
assert "snapshot_id" in result
|
|
assert (root / PLANS_FILE).is_file()
|
|
snapshots = read_plan_snapshots(root)
|
|
assert len(snapshots) == 1
|
|
assert snapshots[0]["snapshot_id"] == result["snapshot_id"]
|
|
|
|
|
|
def test_plan_generation_persist_false_skips_write(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
|
|
plan_generation(root, persist=False)
|
|
|
|
assert not (root / PLANS_FILE).exists()
|
|
|
|
|
|
def test_plan_snapshot_retention_prunes_old_entries(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
|
|
# Produce 5 distinct snapshots and cap retention at 3.
|
|
for chapter in (1, 2, 3, None, None):
|
|
kwargs = {"from_chapter": chapter, "to_chapter": chapter} if chapter else {}
|
|
summary = plan_generation(root, persist=False, **kwargs)
|
|
if not chapter:
|
|
# vary another field to avoid duplicate refresh
|
|
summary["max_calls"] = (summary.get("max_calls") or 0) + 1
|
|
summary["exceeds_max_calls"] = False
|
|
record_plan_snapshot(root, summary, retention=3)
|
|
|
|
data = yaml.safe_load((root / PLANS_FILE).read_text(encoding="utf-8"))
|
|
assert len(data["snapshots"]) == 3
|
|
assert data["pruned_count"] >= 1
|
|
|
|
|
|
def test_plan_cli_writes_snapshot(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
env = os.environ.copy()
|
|
env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src"
|
|
|
|
result = subprocess.run(
|
|
[
|
|
sys.executable,
|
|
"-m",
|
|
"infospace_bench",
|
|
"generate",
|
|
"plan",
|
|
str(root),
|
|
"--from-chapter",
|
|
"1",
|
|
"--to-chapter",
|
|
"2",
|
|
"--cost-per-1k",
|
|
"0.5",
|
|
],
|
|
check=False,
|
|
env=env,
|
|
text=True,
|
|
capture_output=True,
|
|
)
|
|
|
|
assert result.returncode == 0, result.stderr
|
|
payload = json.loads(result.stdout)
|
|
assert "snapshot_id" in payload
|
|
snapshots = read_plan_snapshots(root)
|
|
assert len(snapshots) == 1
|
|
assert snapshots[0]["filters"]["from_chapter"] == 1
|
|
assert snapshots[0]["filters"]["to_chapter"] == 2
|