import json import os import subprocess import sys import zipfile from pathlib import Path import yaml from infospace_bench.budget import ( PLAN_RETENTION_DEFAULT, PLANS_FILE, PLANS_SCHEMA_VERSION, read_plan_snapshots, record_plan_snapshot, ) from infospace_bench.generator import init_generation_infospace, plan_generation CONTAINER_XML = """ """ PACKAGE_OPF = """ urn:test:budget Budget Test Book Author en """ def _write_three_chapter_epub(path: Path) -> None: with zipfile.ZipFile(path, "w") as archive: archive.writestr("mimetype", "application/epub+zip") archive.writestr("META-INF/container.xml", CONTAINER_XML) archive.writestr("OEBPS/content.opf", PACKAGE_OPF) for idx, label in enumerate(("I", "II", "III"), start=1): archive.writestr( f"OEBPS/ch{idx}.xhtml", f"Book" f"

{label}

" f"

Body of chapter {label} with " + " ".join(f"word{n}" for n in range(40)) + ".

", ) def _build_infospace(tmp_path: Path) -> Path: book = tmp_path / "book.epub" _write_three_chapter_epub(book) infospace = init_generation_infospace( tmp_path, book, "budget-test", name="Budget Test", profile="general-knowledge" ) return infospace.root def test_record_plan_snapshot_writes_yaml_with_stable_id(tmp_path: Path) -> None: root = _build_infospace(tmp_path) summary = plan_generation(root, persist=False) snapshot_id_1 = record_plan_snapshot(root, summary) snapshot_id_2 = record_plan_snapshot(root, summary) persisted = (root / PLANS_FILE).read_text(encoding="utf-8") data = yaml.safe_load(persisted) assert data["schema_version"] == PLANS_SCHEMA_VERSION assert data["pruned_count"] == 0 assert snapshot_id_1 == snapshot_id_2, "same summary must yield same snapshot_id" # Duplicate writes refresh recorded_at instead of stacking assert len(data["snapshots"]) == 1 assert data["snapshots"][0]["snapshot_id"] == snapshot_id_1 def test_different_filters_produce_distinct_snapshots(tmp_path: Path) -> None: root = _build_infospace(tmp_path) full_plan = plan_generation(root, persist=False) chapter_only = plan_generation(root, from_chapter=2, to_chapter=2, persist=False) record_plan_snapshot(root, full_plan) record_plan_snapshot(root, chapter_only) snapshots = read_plan_snapshots(root) assert len(snapshots) == 2 ids = {snap["snapshot_id"] for snap in snapshots} assert len(ids) == 2 # Filter values are echoed back into the snapshot chapter_snapshot = next(s for s in snapshots if s["selected_chunk_count"] == 1) assert chapter_snapshot["filters"]["from_chapter"] == 2 assert chapter_snapshot["filters"]["to_chapter"] == 2 def test_plan_generation_persists_snapshot_by_default(tmp_path: Path) -> None: root = _build_infospace(tmp_path) result = plan_generation(root, from_chapter=1, to_chapter=2) assert "snapshot_id" in result assert (root / PLANS_FILE).is_file() snapshots = read_plan_snapshots(root) assert len(snapshots) == 1 assert snapshots[0]["snapshot_id"] == result["snapshot_id"] def test_plan_generation_persist_false_skips_write(tmp_path: Path) -> None: root = _build_infospace(tmp_path) plan_generation(root, persist=False) assert not (root / PLANS_FILE).exists() def test_plan_snapshot_retention_prunes_old_entries(tmp_path: Path) -> None: root = _build_infospace(tmp_path) # Produce 5 distinct snapshots and cap retention at 3. for chapter in (1, 2, 3, None, None): kwargs = {"from_chapter": chapter, "to_chapter": chapter} if chapter else {} summary = plan_generation(root, persist=False, **kwargs) if not chapter: # vary another field to avoid duplicate refresh summary["max_calls"] = (summary.get("max_calls") or 0) + 1 summary["exceeds_max_calls"] = False record_plan_snapshot(root, summary, retention=3) data = yaml.safe_load((root / PLANS_FILE).read_text(encoding="utf-8")) assert len(data["snapshots"]) == 3 assert data["pruned_count"] >= 1 def test_plan_cli_writes_snapshot(tmp_path: Path) -> None: root = _build_infospace(tmp_path) env = os.environ.copy() env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src" result = subprocess.run( [ sys.executable, "-m", "infospace_bench", "generate", "plan", str(root), "--from-chapter", "1", "--to-chapter", "2", "--cost-per-1k", "0.5", ], check=False, env=env, text=True, capture_output=True, ) assert result.returncode == 0, result.stderr payload = json.loads(result.stdout) assert "snapshot_id" in payload snapshots = read_plan_snapshots(root) assert len(snapshots) == 1 assert snapshots[0]["filters"]["from_chapter"] == 1 assert snapshots[0]["filters"]["to_chapter"] == 2