import json import os import subprocess import sys import zipfile from pathlib import Path import yaml from infospace_bench.budget import ( PLAN_RETENTION_DEFAULT, PLANS_FILE, PLANS_SCHEMA_VERSION, read_plan_snapshots, record_plan_snapshot, ) from infospace_bench.generator import init_generation_infospace, plan_generation CONTAINER_XML = """ """ PACKAGE_OPF = """ urn:test:budget Budget Test Book Author en """ def _write_three_chapter_epub(path: Path) -> None: with zipfile.ZipFile(path, "w") as archive: archive.writestr("mimetype", "application/epub+zip") archive.writestr("META-INF/container.xml", CONTAINER_XML) archive.writestr("OEBPS/content.opf", PACKAGE_OPF) for idx, label in enumerate(("I", "II", "III"), start=1): archive.writestr( f"OEBPS/ch{idx}.xhtml", f"Book" f"

{label}

" f"

Body of chapter {label} with " + " ".join(f"word{n}" for n in range(40)) + ".

", ) def _write_minimal_fixture(path: Path) -> None: data = { "responses": [ { "stage_id": "summarize-source", "input_artifact_id": "*", "markdown": "# Source Summary\n\nA stub summary.\n", }, { "stage_id": "extract-entities", "input_artifact_id": "*", "markdown": ( "# Stub Entity\n\n## Definition\n\nA stub.\n\n## Context\n\nFor a budget test.\n" ), }, { "stage_id": "extract-relations", "input_artifact_id": "*", "markdown": ( "# Stub Entity Practices Something\n\n## Subject\n\nStub Entity\n\n" "## Predicate\n\npractices\n\n## Object\n\nSomething\n\n## Relation Type\n\nsupport\n\n" "## Evidence\n\nA stub.\n" ), }, { "stage_id": "evaluate-entity", "input_artifact_id": "*", "markdown": ( "---\nartifact_id: entity/stub-entity.md\nevaluator: fixture\n" "evaluated_at: '2026-05-17T00:00:00'\n" "scores:\n - name: groundedness\n value: 4.0\n max_value: 5.0\n" " - name: usefulness\n value: 4.0\n max_value: 5.0\n---\n\n" "# Evaluation: entity/stub-entity.md\n" ), }, ] } path.write_text(yaml.safe_dump(data, sort_keys=False), encoding="utf-8") def _build_infospace(tmp_path: Path) -> Path: book = tmp_path / "book.epub" _write_three_chapter_epub(book) infospace = init_generation_infospace( tmp_path, book, "budget-test", name="Budget Test", profile="general-knowledge" ) return infospace.root def test_record_plan_snapshot_writes_yaml_with_stable_id(tmp_path: Path) -> None: root = _build_infospace(tmp_path) summary = plan_generation(root, persist=False) snapshot_id_1 = record_plan_snapshot(root, summary) snapshot_id_2 = record_plan_snapshot(root, summary) persisted = (root / PLANS_FILE).read_text(encoding="utf-8") data = yaml.safe_load(persisted) assert data["schema_version"] == PLANS_SCHEMA_VERSION assert data["pruned_count"] == 0 assert snapshot_id_1 == snapshot_id_2, "same summary must yield same snapshot_id" # Duplicate writes refresh recorded_at instead of stacking assert len(data["snapshots"]) == 1 assert data["snapshots"][0]["snapshot_id"] == snapshot_id_1 def test_different_filters_produce_distinct_snapshots(tmp_path: Path) -> None: root = _build_infospace(tmp_path) full_plan = plan_generation(root, persist=False) chapter_only = plan_generation(root, from_chapter=2, to_chapter=2, persist=False) record_plan_snapshot(root, full_plan) record_plan_snapshot(root, chapter_only) snapshots = read_plan_snapshots(root) assert len(snapshots) == 2 ids = {snap["snapshot_id"] for snap in snapshots} assert len(ids) == 2 # Filter values are echoed back into the snapshot chapter_snapshot = next(s for s in snapshots if s["selected_chunk_count"] == 1) assert chapter_snapshot["filters"]["from_chapter"] == 2 assert chapter_snapshot["filters"]["to_chapter"] == 2 def test_plan_generation_persists_snapshot_by_default(tmp_path: Path) -> None: root = _build_infospace(tmp_path) result = plan_generation(root, from_chapter=1, to_chapter=2) assert "snapshot_id" in result assert (root / PLANS_FILE).is_file() snapshots = read_plan_snapshots(root) assert len(snapshots) == 1 assert snapshots[0]["snapshot_id"] == result["snapshot_id"] def test_plan_generation_persist_false_skips_write(tmp_path: Path) -> None: root = _build_infospace(tmp_path) plan_generation(root, persist=False) assert not (root / PLANS_FILE).exists() def test_plan_snapshot_retention_prunes_old_entries(tmp_path: Path) -> None: root = _build_infospace(tmp_path) # Produce 5 distinct snapshots and cap retention at 3. for chapter in (1, 2, 3, None, None): kwargs = {"from_chapter": chapter, "to_chapter": chapter} if chapter else {} summary = plan_generation(root, persist=False, **kwargs) if not chapter: # vary another field to avoid duplicate refresh summary["max_calls"] = (summary.get("max_calls") or 0) + 1 summary["exceeds_max_calls"] = False record_plan_snapshot(root, summary, retention=3) data = yaml.safe_load((root / PLANS_FILE).read_text(encoding="utf-8")) assert len(data["snapshots"]) == 3 assert data["pruned_count"] >= 1 def test_record_run_usage_aggregates_by_workflow_stage_provider_model(tmp_path: Path) -> None: root = _build_infospace(tmp_path) from infospace_bench.budget import record_run_usage, read_usage_runs workflow_results = [ { "run_id": "run-1", "workflow_id": "generic-source-entities", "status": "completed", "stages": [ { "stage_id": "extract-entities", "provider": "openrouter", "metadata": { "model": "openai/gpt-4o-mini", "usage": {"prompt_tokens": 1000, "completion_tokens": 200, "total_tokens": 1200}, }, }, { "stage_id": "extract-entities", "provider": "openrouter", "metadata": { "model": "openai/gpt-4o-mini", "usage": {"prompt_tokens": 800, "completion_tokens": 150, "cost": 0.0012}, }, }, {"stage_id": "split-entities", "message": "split 3 entities"}, ], } ] entry = record_run_usage(root, workflow_results, snapshot_id="abc123", duration_seconds=4.2) assert entry["rollup"]["total_calls"] == 2 assert entry["rollup"]["total_prompt_tokens"] == 1800 assert entry["rollup"]["total_completion_tokens"] == 350 assert entry["rollup"]["total_cost_usd_known"] == 0.0012 assert entry["snapshot_id"] == "abc123" assert entry["duration_seconds"] == 4.2 assert len(entry["per_bucket"]) == 1 bucket = entry["per_bucket"][0] assert bucket["workflow_id"] == "generic-source-entities" assert bucket["stage_id"] == "extract-entities" assert bucket["provider"] == "openrouter" assert bucket["model"] == "openai/gpt-4o-mini" assert bucket["calls"] == 2 runs = read_usage_runs(root) assert len(runs) == 1 assert runs[0]["run_index"] == 1 def test_record_run_usage_handles_fixture_runs_without_aborting(tmp_path: Path) -> None: root = _build_infospace(tmp_path) from infospace_bench.budget import record_run_usage workflow_results = [ { "run_id": "fix-1", "workflow_id": "generic-source-summary", "stages": [ {"stage_id": "summarize-source", "provider": "fixture"}, {"stage_id": "summarize-source", "provider": "fixture"}, ], } ] entry = record_run_usage(root, workflow_results) fixture_bucket = next(b for b in entry["per_bucket"] if b["provider"] == "fixture") assert fixture_bucket["calls"] == 2 assert fixture_bucket["prompt_tokens"] == 0 assert fixture_bucket["cost_status"] == "unknown" assert entry["rollup"]["total_cost_usd_known"] == 0.0 def test_run_generation_writes_usage_yaml_with_plan_snapshot_id(tmp_path: Path) -> None: root = _build_infospace(tmp_path) from infospace_bench.budget import USAGE_FILE, read_usage_runs from infospace_bench.generator import run_generation fixture = tmp_path / "responses.yaml" _write_minimal_fixture(fixture) plan_payload = plan_generation(root) run_generation(root, fixture_responses=fixture) runs = read_usage_runs(root) assert (root / USAGE_FILE).is_file() assert len(runs) == 1 assert runs[0]["snapshot_id"] == plan_payload["snapshot_id"] assert runs[0]["duration_seconds"] is not None and runs[0]["duration_seconds"] >= 0 assert runs[0]["rollup"]["total_calls"] >= 0 # Fixture mode runs should not claim any known cost assert runs[0]["rollup"]["total_cost_usd_known"] == 0.0 def test_rate_table_known_model_resolves_cost(tmp_path: Path) -> None: from infospace_bench.budget import estimate_cost_usd, load_rate_table rates = load_rate_table() assert "openai/gpt-4o-mini" in rates cost = estimate_cost_usd("openai/gpt-4o-mini", 1000, 500, rates) # gpt-4o-mini: prompt 0.00015/1k, completion 0.0006/1k → 0.00015 + 0.0003 = 0.00045 assert cost is not None assert abs(cost - 0.00045) < 1e-9 def test_rate_table_unknown_model_returns_none(tmp_path: Path) -> None: from infospace_bench.budget import estimate_cost_usd, load_rate_table rates = load_rate_table() assert estimate_cost_usd("acme/no-such-model", 1000, 500, rates) is None def test_workspace_rate_table_overrides_package_default(tmp_path: Path) -> None: from infospace_bench.budget import estimate_cost_usd, load_rate_table override = tmp_path / "model-rates.yaml" override.write_text( yaml.safe_dump( { "schema_version": 1, "rates": { "openai/gpt-4o-mini": { "prompt_per_1k": 1.0, "completion_per_1k": 2.0, }, "acme/bespoke": { "prompt_per_1k": 0.1, "completion_per_1k": 0.2, }, }, } ), encoding="utf-8", ) rates = load_rate_table(tmp_path) overridden = estimate_cost_usd("openai/gpt-4o-mini", 1000, 1000, rates) bespoke = estimate_cost_usd("acme/bespoke", 1000, 1000, rates) assert overridden == round(1.0 + 2.0, 6) assert bespoke == round(0.1 + 0.2, 6) def test_record_run_usage_fills_estimated_cost_via_resolver(tmp_path: Path) -> None: root = _build_infospace(tmp_path) from infospace_bench.budget import make_cost_resolver, record_run_usage workflow_results = [ { "run_id": "run-cost", "workflow_id": "generic-source-entities", "stages": [ { "stage_id": "extract-entities", "provider": "openrouter", "metadata": { "model": "openai/gpt-4o-mini", "usage": {"prompt_tokens": 2000, "completion_tokens": 1000}, }, }, { "stage_id": "extract-entities", "provider": "openrouter", "metadata": { "model": "openai/gpt-4o-mini", "usage": { "prompt_tokens": 1000, "completion_tokens": 500, "cost": 0.123, }, }, }, ], } ] entry = record_run_usage( root, workflow_results, cost_resolver=make_cost_resolver(tmp_path), ) bucket = entry["per_bucket"][0] # The first call has no adapter cost so it gets estimated: # 2000/1000*0.00015 + 1000/1000*0.0006 = 0.0003 + 0.0006 = 0.0009 assert bucket["cost_usd_estimated"] == round(0.0009, 6) assert bucket["cost_usd_known"] == 0.123 assert bucket["cost_status"] == "known" # at least one call returned cost assert entry["rollup"]["total_cost_usd_known"] == 0.123 assert entry["rollup"]["total_cost_usd_estimated"] == round(0.0009, 6) def test_plan_cli_writes_snapshot(tmp_path: Path) -> None: root = _build_infospace(tmp_path) env = os.environ.copy() env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src" result = subprocess.run( [ sys.executable, "-m", "infospace_bench", "generate", "plan", str(root), "--from-chapter", "1", "--to-chapter", "2", "--cost-per-1k", "0.5", ], check=False, env=env, text=True, capture_output=True, ) assert result.returncode == 0, result.stderr payload = json.loads(result.stdout) assert "snapshot_id" in payload snapshots = read_plan_snapshots(root) assert len(snapshots) == 1 assert snapshots[0]["filters"]["from_chapter"] == 1 assert snapshots[0]["filters"]["to_chapter"] == 2