Files
infospace-bench/tests/test_budget_registry.py
tegwick 678508226a IB-WP-0019-T02: usage rollup from run records
Every completed generate run now aggregates per-call adapter usage from
the workflow-engine run records into output/budget/usage.yaml. Per-call
data is bucketed by (workflow_id, stage_id, provider, model) with
running totals for calls, prompt_tokens, completion_tokens,
total_tokens, and cost_usd_known (sum of adapter-reported cost when the
provider returns it; usually zero today). A run-level entry captures
run_index, started_at, completed_at, duration_seconds, the executing
plan snapshot_id (resolved from the latest plans.yaml entry), and the
workflow-level run_id / stage_count summaries.

cost_usd_estimated is left as None for this task; T03 wires the
rate-table resolver so the same bucket gets a model-priced fallback
when the adapter does not return cost directly.

Fixture-mode runs are recorded with provider='fixture', zero tokens,
and cost_status='unknown' rather than silently skipped, so the rollup
honestly reflects which stages actually ran.

102 tests pass.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-17 19:46:40 +02:00

317 lines
11 KiB
Python

import json
import os
import subprocess
import sys
import zipfile
from pathlib import Path
import yaml
from infospace_bench.budget import (
PLAN_RETENTION_DEFAULT,
PLANS_FILE,
PLANS_SCHEMA_VERSION,
read_plan_snapshots,
record_plan_snapshot,
)
from infospace_bench.generator import init_generation_infospace, plan_generation
CONTAINER_XML = """<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>
"""
PACKAGE_OPF = """<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="bookid">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:identifier id="bookid">urn:test:budget</dc:identifier>
<dc:title>Budget Test Book</dc:title>
<dc:creator>Author</dc:creator>
<dc:language>en</dc:language>
</metadata>
<manifest>
<item id="ch1" href="ch1.xhtml" media-type="application/xhtml+xml"/>
<item id="ch2" href="ch2.xhtml" media-type="application/xhtml+xml"/>
<item id="ch3" href="ch3.xhtml" media-type="application/xhtml+xml"/>
</manifest>
<spine>
<itemref idref="ch1"/>
<itemref idref="ch2"/>
<itemref idref="ch3"/>
</spine>
</package>
"""
def _write_three_chapter_epub(path: Path) -> None:
with zipfile.ZipFile(path, "w") as archive:
archive.writestr("mimetype", "application/epub+zip")
archive.writestr("META-INF/container.xml", CONTAINER_XML)
archive.writestr("OEBPS/content.opf", PACKAGE_OPF)
for idx, label in enumerate(("I", "II", "III"), start=1):
archive.writestr(
f"OEBPS/ch{idx}.xhtml",
f"<html><head><title>Book</title></head>"
f"<body><h2>{label}</h2>"
f"<p>Body of chapter {label} with " + " ".join(f"word{n}" for n in range(40)) + ".</p></body></html>",
)
def _write_minimal_fixture(path: Path) -> None:
data = {
"responses": [
{
"stage_id": "summarize-source",
"input_artifact_id": "*",
"markdown": "# Source Summary\n\nA stub summary.\n",
},
{
"stage_id": "extract-entities",
"input_artifact_id": "*",
"markdown": (
"# Stub Entity\n\n## Definition\n\nA stub.\n\n## Context\n\nFor a budget test.\n"
),
},
{
"stage_id": "extract-relations",
"input_artifact_id": "*",
"markdown": (
"# Stub Entity Practices Something\n\n## Subject\n\nStub Entity\n\n"
"## Predicate\n\npractices\n\n## Object\n\nSomething\n\n## Relation Type\n\nsupport\n\n"
"## Evidence\n\nA stub.\n"
),
},
{
"stage_id": "evaluate-entity",
"input_artifact_id": "*",
"markdown": (
"---\nartifact_id: entity/stub-entity.md\nevaluator: fixture\n"
"evaluated_at: '2026-05-17T00:00:00'\n"
"scores:\n - name: groundedness\n value: 4.0\n max_value: 5.0\n"
" - name: usefulness\n value: 4.0\n max_value: 5.0\n---\n\n"
"# Evaluation: entity/stub-entity.md\n"
),
},
]
}
path.write_text(yaml.safe_dump(data, sort_keys=False), encoding="utf-8")
def _build_infospace(tmp_path: Path) -> Path:
book = tmp_path / "book.epub"
_write_three_chapter_epub(book)
infospace = init_generation_infospace(
tmp_path, book, "budget-test", name="Budget Test", profile="general-knowledge"
)
return infospace.root
def test_record_plan_snapshot_writes_yaml_with_stable_id(tmp_path: Path) -> None:
root = _build_infospace(tmp_path)
summary = plan_generation(root, persist=False)
snapshot_id_1 = record_plan_snapshot(root, summary)
snapshot_id_2 = record_plan_snapshot(root, summary)
persisted = (root / PLANS_FILE).read_text(encoding="utf-8")
data = yaml.safe_load(persisted)
assert data["schema_version"] == PLANS_SCHEMA_VERSION
assert data["pruned_count"] == 0
assert snapshot_id_1 == snapshot_id_2, "same summary must yield same snapshot_id"
# Duplicate writes refresh recorded_at instead of stacking
assert len(data["snapshots"]) == 1
assert data["snapshots"][0]["snapshot_id"] == snapshot_id_1
def test_different_filters_produce_distinct_snapshots(tmp_path: Path) -> None:
root = _build_infospace(tmp_path)
full_plan = plan_generation(root, persist=False)
chapter_only = plan_generation(root, from_chapter=2, to_chapter=2, persist=False)
record_plan_snapshot(root, full_plan)
record_plan_snapshot(root, chapter_only)
snapshots = read_plan_snapshots(root)
assert len(snapshots) == 2
ids = {snap["snapshot_id"] for snap in snapshots}
assert len(ids) == 2
# Filter values are echoed back into the snapshot
chapter_snapshot = next(s for s in snapshots if s["selected_chunk_count"] == 1)
assert chapter_snapshot["filters"]["from_chapter"] == 2
assert chapter_snapshot["filters"]["to_chapter"] == 2
def test_plan_generation_persists_snapshot_by_default(tmp_path: Path) -> None:
root = _build_infospace(tmp_path)
result = plan_generation(root, from_chapter=1, to_chapter=2)
assert "snapshot_id" in result
assert (root / PLANS_FILE).is_file()
snapshots = read_plan_snapshots(root)
assert len(snapshots) == 1
assert snapshots[0]["snapshot_id"] == result["snapshot_id"]
def test_plan_generation_persist_false_skips_write(tmp_path: Path) -> None:
root = _build_infospace(tmp_path)
plan_generation(root, persist=False)
assert not (root / PLANS_FILE).exists()
def test_plan_snapshot_retention_prunes_old_entries(tmp_path: Path) -> None:
root = _build_infospace(tmp_path)
# Produce 5 distinct snapshots and cap retention at 3.
for chapter in (1, 2, 3, None, None):
kwargs = {"from_chapter": chapter, "to_chapter": chapter} if chapter else {}
summary = plan_generation(root, persist=False, **kwargs)
if not chapter:
# vary another field to avoid duplicate refresh
summary["max_calls"] = (summary.get("max_calls") or 0) + 1
summary["exceeds_max_calls"] = False
record_plan_snapshot(root, summary, retention=3)
data = yaml.safe_load((root / PLANS_FILE).read_text(encoding="utf-8"))
assert len(data["snapshots"]) == 3
assert data["pruned_count"] >= 1
def test_record_run_usage_aggregates_by_workflow_stage_provider_model(tmp_path: Path) -> None:
root = _build_infospace(tmp_path)
from infospace_bench.budget import record_run_usage, read_usage_runs
workflow_results = [
{
"run_id": "run-1",
"workflow_id": "generic-source-entities",
"status": "completed",
"stages": [
{
"stage_id": "extract-entities",
"provider": "openrouter",
"metadata": {
"model": "openai/gpt-4o-mini",
"usage": {"prompt_tokens": 1000, "completion_tokens": 200, "total_tokens": 1200},
},
},
{
"stage_id": "extract-entities",
"provider": "openrouter",
"metadata": {
"model": "openai/gpt-4o-mini",
"usage": {"prompt_tokens": 800, "completion_tokens": 150, "cost": 0.0012},
},
},
{"stage_id": "split-entities", "message": "split 3 entities"},
],
}
]
entry = record_run_usage(root, workflow_results, snapshot_id="abc123", duration_seconds=4.2)
assert entry["rollup"]["total_calls"] == 2
assert entry["rollup"]["total_prompt_tokens"] == 1800
assert entry["rollup"]["total_completion_tokens"] == 350
assert entry["rollup"]["total_cost_usd_known"] == 0.0012
assert entry["snapshot_id"] == "abc123"
assert entry["duration_seconds"] == 4.2
assert len(entry["per_bucket"]) == 1
bucket = entry["per_bucket"][0]
assert bucket["workflow_id"] == "generic-source-entities"
assert bucket["stage_id"] == "extract-entities"
assert bucket["provider"] == "openrouter"
assert bucket["model"] == "openai/gpt-4o-mini"
assert bucket["calls"] == 2
runs = read_usage_runs(root)
assert len(runs) == 1
assert runs[0]["run_index"] == 1
def test_record_run_usage_handles_fixture_runs_without_aborting(tmp_path: Path) -> None:
root = _build_infospace(tmp_path)
from infospace_bench.budget import record_run_usage
workflow_results = [
{
"run_id": "fix-1",
"workflow_id": "generic-source-summary",
"stages": [
{"stage_id": "summarize-source", "provider": "fixture"},
{"stage_id": "summarize-source", "provider": "fixture"},
],
}
]
entry = record_run_usage(root, workflow_results)
fixture_bucket = next(b for b in entry["per_bucket"] if b["provider"] == "fixture")
assert fixture_bucket["calls"] == 2
assert fixture_bucket["prompt_tokens"] == 0
assert fixture_bucket["cost_status"] == "unknown"
assert entry["rollup"]["total_cost_usd_known"] == 0.0
def test_run_generation_writes_usage_yaml_with_plan_snapshot_id(tmp_path: Path) -> None:
root = _build_infospace(tmp_path)
from infospace_bench.budget import USAGE_FILE, read_usage_runs
from infospace_bench.generator import run_generation
fixture = tmp_path / "responses.yaml"
_write_minimal_fixture(fixture)
plan_payload = plan_generation(root)
run_generation(root, fixture_responses=fixture)
runs = read_usage_runs(root)
assert (root / USAGE_FILE).is_file()
assert len(runs) == 1
assert runs[0]["snapshot_id"] == plan_payload["snapshot_id"]
assert runs[0]["duration_seconds"] is not None and runs[0]["duration_seconds"] >= 0
assert runs[0]["rollup"]["total_calls"] >= 0
# Fixture mode runs should not claim any known cost
assert runs[0]["rollup"]["total_cost_usd_known"] == 0.0
def test_plan_cli_writes_snapshot(tmp_path: Path) -> None:
root = _build_infospace(tmp_path)
env = os.environ.copy()
env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src"
result = subprocess.run(
[
sys.executable,
"-m",
"infospace_bench",
"generate",
"plan",
str(root),
"--from-chapter",
"1",
"--to-chapter",
"2",
"--cost-per-1k",
"0.5",
],
check=False,
env=env,
text=True,
capture_output=True,
)
assert result.returncode == 0, result.stderr
payload = json.loads(result.stdout)
assert "snapshot_id" in payload
snapshots = read_plan_snapshots(root)
assert len(snapshots) == 1
assert snapshots[0]["filters"]["from_chapter"] == 1
assert snapshots[0]["filters"]["to_chapter"] == 2