generated from coulomb/repo-seed
The default archive include set already pulls output/ in wholesale, so output/budget/ already lands inside the archive package with no code change. Add a budget_summary block to ArchiveRecord.metadata so catalog-level tools can see plans_count, runs_count, total_tokens, total_cost_usd_known, total_cost_usd_estimated, and the latest_snapshot_id without unpacking the archive. An infospace with no budget data still archives cleanly with an empty metadata dict. Closes IB-WP-0019 (Budget and Usage Registry): T01-T07 all done. Three-layer design landed end-to-end — layer 1 (per-infospace plans.yaml / usage.yaml / summary.yaml) and layer 3 (state-hub record_token_event emission with failure isolation) live here; layer 2 (cross-application QualityLedger for adaptive routing) is parked in llm-connect LLM-WP-0004 and infospace-bench IB-WP-0018 awaits it. 122 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
792 lines
28 KiB
Python
792 lines
28 KiB
Python
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import zipfile
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
|
|
from infospace_bench.budget import (
|
|
PLAN_RETENTION_DEFAULT,
|
|
PLANS_FILE,
|
|
PLANS_SCHEMA_VERSION,
|
|
read_plan_snapshots,
|
|
record_plan_snapshot,
|
|
)
|
|
from infospace_bench.generator import init_generation_infospace, plan_generation
|
|
|
|
|
|
CONTAINER_XML = """<?xml version="1.0"?>
|
|
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
|
<rootfiles>
|
|
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
|
|
</rootfiles>
|
|
</container>
|
|
"""
|
|
|
|
PACKAGE_OPF = """<?xml version="1.0" encoding="utf-8"?>
|
|
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="bookid">
|
|
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
|
|
<dc:identifier id="bookid">urn:test:budget</dc:identifier>
|
|
<dc:title>Budget Test Book</dc:title>
|
|
<dc:creator>Author</dc:creator>
|
|
<dc:language>en</dc:language>
|
|
</metadata>
|
|
<manifest>
|
|
<item id="ch1" href="ch1.xhtml" media-type="application/xhtml+xml"/>
|
|
<item id="ch2" href="ch2.xhtml" media-type="application/xhtml+xml"/>
|
|
<item id="ch3" href="ch3.xhtml" media-type="application/xhtml+xml"/>
|
|
</manifest>
|
|
<spine>
|
|
<itemref idref="ch1"/>
|
|
<itemref idref="ch2"/>
|
|
<itemref idref="ch3"/>
|
|
</spine>
|
|
</package>
|
|
"""
|
|
|
|
|
|
def _write_three_chapter_epub(path: Path) -> None:
|
|
with zipfile.ZipFile(path, "w") as archive:
|
|
archive.writestr("mimetype", "application/epub+zip")
|
|
archive.writestr("META-INF/container.xml", CONTAINER_XML)
|
|
archive.writestr("OEBPS/content.opf", PACKAGE_OPF)
|
|
for idx, label in enumerate(("I", "II", "III"), start=1):
|
|
archive.writestr(
|
|
f"OEBPS/ch{idx}.xhtml",
|
|
f"<html><head><title>Book</title></head>"
|
|
f"<body><h2>{label}</h2>"
|
|
f"<p>Body of chapter {label} with " + " ".join(f"word{n}" for n in range(40)) + ".</p></body></html>",
|
|
)
|
|
|
|
|
|
def _write_minimal_fixture(path: Path) -> None:
|
|
data = {
|
|
"responses": [
|
|
{
|
|
"stage_id": "summarize-source",
|
|
"input_artifact_id": "*",
|
|
"markdown": "# Source Summary\n\nA stub summary.\n",
|
|
},
|
|
{
|
|
"stage_id": "extract-entities",
|
|
"input_artifact_id": "*",
|
|
"markdown": (
|
|
"# Stub Entity\n\n## Definition\n\nA stub.\n\n## Context\n\nFor a budget test.\n"
|
|
),
|
|
},
|
|
{
|
|
"stage_id": "extract-relations",
|
|
"input_artifact_id": "*",
|
|
"markdown": (
|
|
"# Stub Entity Practices Something\n\n## Subject\n\nStub Entity\n\n"
|
|
"## Predicate\n\npractices\n\n## Object\n\nSomething\n\n## Relation Type\n\nsupport\n\n"
|
|
"## Evidence\n\nA stub.\n"
|
|
),
|
|
},
|
|
{
|
|
"stage_id": "evaluate-entity",
|
|
"input_artifact_id": "*",
|
|
"markdown": (
|
|
"---\nartifact_id: entity/stub-entity.md\nevaluator: fixture\n"
|
|
"evaluated_at: '2026-05-17T00:00:00'\n"
|
|
"scores:\n - name: groundedness\n value: 4.0\n max_value: 5.0\n"
|
|
" - name: usefulness\n value: 4.0\n max_value: 5.0\n---\n\n"
|
|
"# Evaluation: entity/stub-entity.md\n"
|
|
),
|
|
},
|
|
]
|
|
}
|
|
path.write_text(yaml.safe_dump(data, sort_keys=False), encoding="utf-8")
|
|
|
|
|
|
def _build_infospace(tmp_path: Path) -> Path:
|
|
book = tmp_path / "book.epub"
|
|
_write_three_chapter_epub(book)
|
|
infospace = init_generation_infospace(
|
|
tmp_path, book, "budget-test", name="Budget Test", profile="general-knowledge"
|
|
)
|
|
return infospace.root
|
|
|
|
|
|
def test_record_plan_snapshot_writes_yaml_with_stable_id(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
|
|
summary = plan_generation(root, persist=False)
|
|
snapshot_id_1 = record_plan_snapshot(root, summary)
|
|
snapshot_id_2 = record_plan_snapshot(root, summary)
|
|
|
|
persisted = (root / PLANS_FILE).read_text(encoding="utf-8")
|
|
data = yaml.safe_load(persisted)
|
|
|
|
assert data["schema_version"] == PLANS_SCHEMA_VERSION
|
|
assert data["pruned_count"] == 0
|
|
assert snapshot_id_1 == snapshot_id_2, "same summary must yield same snapshot_id"
|
|
# Duplicate writes refresh recorded_at instead of stacking
|
|
assert len(data["snapshots"]) == 1
|
|
assert data["snapshots"][0]["snapshot_id"] == snapshot_id_1
|
|
|
|
|
|
def test_different_filters_produce_distinct_snapshots(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
|
|
full_plan = plan_generation(root, persist=False)
|
|
chapter_only = plan_generation(root, from_chapter=2, to_chapter=2, persist=False)
|
|
record_plan_snapshot(root, full_plan)
|
|
record_plan_snapshot(root, chapter_only)
|
|
|
|
snapshots = read_plan_snapshots(root)
|
|
assert len(snapshots) == 2
|
|
ids = {snap["snapshot_id"] for snap in snapshots}
|
|
assert len(ids) == 2
|
|
# Filter values are echoed back into the snapshot
|
|
chapter_snapshot = next(s for s in snapshots if s["selected_chunk_count"] == 1)
|
|
assert chapter_snapshot["filters"]["from_chapter"] == 2
|
|
assert chapter_snapshot["filters"]["to_chapter"] == 2
|
|
|
|
|
|
def test_plan_generation_persists_snapshot_by_default(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
|
|
result = plan_generation(root, from_chapter=1, to_chapter=2)
|
|
|
|
assert "snapshot_id" in result
|
|
assert (root / PLANS_FILE).is_file()
|
|
snapshots = read_plan_snapshots(root)
|
|
assert len(snapshots) == 1
|
|
assert snapshots[0]["snapshot_id"] == result["snapshot_id"]
|
|
|
|
|
|
def test_plan_generation_persist_false_skips_write(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
|
|
plan_generation(root, persist=False)
|
|
|
|
assert not (root / PLANS_FILE).exists()
|
|
|
|
|
|
def test_plan_snapshot_retention_prunes_old_entries(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
|
|
# Produce 5 distinct snapshots and cap retention at 3.
|
|
for chapter in (1, 2, 3, None, None):
|
|
kwargs = {"from_chapter": chapter, "to_chapter": chapter} if chapter else {}
|
|
summary = plan_generation(root, persist=False, **kwargs)
|
|
if not chapter:
|
|
# vary another field to avoid duplicate refresh
|
|
summary["max_calls"] = (summary.get("max_calls") or 0) + 1
|
|
summary["exceeds_max_calls"] = False
|
|
record_plan_snapshot(root, summary, retention=3)
|
|
|
|
data = yaml.safe_load((root / PLANS_FILE).read_text(encoding="utf-8"))
|
|
assert len(data["snapshots"]) == 3
|
|
assert data["pruned_count"] >= 1
|
|
|
|
|
|
def test_record_run_usage_aggregates_by_workflow_stage_provider_model(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
from infospace_bench.budget import record_run_usage, read_usage_runs
|
|
|
|
workflow_results = [
|
|
{
|
|
"run_id": "run-1",
|
|
"workflow_id": "generic-source-entities",
|
|
"status": "completed",
|
|
"stages": [
|
|
{
|
|
"stage_id": "extract-entities",
|
|
"provider": "openrouter",
|
|
"metadata": {
|
|
"model": "openai/gpt-4o-mini",
|
|
"usage": {"prompt_tokens": 1000, "completion_tokens": 200, "total_tokens": 1200},
|
|
},
|
|
},
|
|
{
|
|
"stage_id": "extract-entities",
|
|
"provider": "openrouter",
|
|
"metadata": {
|
|
"model": "openai/gpt-4o-mini",
|
|
"usage": {"prompt_tokens": 800, "completion_tokens": 150, "cost": 0.0012},
|
|
},
|
|
},
|
|
{"stage_id": "split-entities", "message": "split 3 entities"},
|
|
],
|
|
}
|
|
]
|
|
|
|
entry = record_run_usage(root, workflow_results, snapshot_id="abc123", duration_seconds=4.2)
|
|
|
|
assert entry["rollup"]["total_calls"] == 2
|
|
assert entry["rollup"]["total_prompt_tokens"] == 1800
|
|
assert entry["rollup"]["total_completion_tokens"] == 350
|
|
assert entry["rollup"]["total_cost_usd_known"] == 0.0012
|
|
assert entry["snapshot_id"] == "abc123"
|
|
assert entry["duration_seconds"] == 4.2
|
|
assert len(entry["per_bucket"]) == 1
|
|
bucket = entry["per_bucket"][0]
|
|
assert bucket["workflow_id"] == "generic-source-entities"
|
|
assert bucket["stage_id"] == "extract-entities"
|
|
assert bucket["provider"] == "openrouter"
|
|
assert bucket["model"] == "openai/gpt-4o-mini"
|
|
assert bucket["calls"] == 2
|
|
|
|
runs = read_usage_runs(root)
|
|
assert len(runs) == 1
|
|
assert runs[0]["run_index"] == 1
|
|
|
|
|
|
def test_record_run_usage_handles_fixture_runs_without_aborting(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
from infospace_bench.budget import record_run_usage
|
|
|
|
workflow_results = [
|
|
{
|
|
"run_id": "fix-1",
|
|
"workflow_id": "generic-source-summary",
|
|
"stages": [
|
|
{"stage_id": "summarize-source", "provider": "fixture"},
|
|
{"stage_id": "summarize-source", "provider": "fixture"},
|
|
],
|
|
}
|
|
]
|
|
|
|
entry = record_run_usage(root, workflow_results)
|
|
|
|
fixture_bucket = next(b for b in entry["per_bucket"] if b["provider"] == "fixture")
|
|
assert fixture_bucket["calls"] == 2
|
|
assert fixture_bucket["prompt_tokens"] == 0
|
|
assert fixture_bucket["cost_status"] == "unknown"
|
|
assert entry["rollup"]["total_cost_usd_known"] == 0.0
|
|
|
|
|
|
def test_run_generation_writes_usage_yaml_with_plan_snapshot_id(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
from infospace_bench.budget import USAGE_FILE, read_usage_runs
|
|
from infospace_bench.generator import run_generation
|
|
|
|
fixture = tmp_path / "responses.yaml"
|
|
_write_minimal_fixture(fixture)
|
|
|
|
plan_payload = plan_generation(root)
|
|
run_generation(root, fixture_responses=fixture)
|
|
|
|
runs = read_usage_runs(root)
|
|
assert (root / USAGE_FILE).is_file()
|
|
assert len(runs) == 1
|
|
assert runs[0]["snapshot_id"] == plan_payload["snapshot_id"]
|
|
assert runs[0]["duration_seconds"] is not None and runs[0]["duration_seconds"] >= 0
|
|
assert runs[0]["rollup"]["total_calls"] >= 0
|
|
# Fixture mode runs should not claim any known cost
|
|
assert runs[0]["rollup"]["total_cost_usd_known"] == 0.0
|
|
|
|
|
|
def test_rate_table_known_model_resolves_cost(tmp_path: Path) -> None:
|
|
from infospace_bench.budget import estimate_cost_usd, load_rate_table
|
|
|
|
rates = load_rate_table()
|
|
|
|
assert "openai/gpt-4o-mini" in rates
|
|
cost = estimate_cost_usd("openai/gpt-4o-mini", 1000, 500, rates)
|
|
# gpt-4o-mini: prompt 0.00015/1k, completion 0.0006/1k → 0.00015 + 0.0003 = 0.00045
|
|
assert cost is not None
|
|
assert abs(cost - 0.00045) < 1e-9
|
|
|
|
|
|
def test_rate_table_unknown_model_returns_none(tmp_path: Path) -> None:
|
|
from infospace_bench.budget import estimate_cost_usd, load_rate_table
|
|
|
|
rates = load_rate_table()
|
|
|
|
assert estimate_cost_usd("acme/no-such-model", 1000, 500, rates) is None
|
|
|
|
|
|
def test_workspace_rate_table_overrides_package_default(tmp_path: Path) -> None:
|
|
from infospace_bench.budget import estimate_cost_usd, load_rate_table
|
|
|
|
override = tmp_path / "model-rates.yaml"
|
|
override.write_text(
|
|
yaml.safe_dump(
|
|
{
|
|
"schema_version": 1,
|
|
"rates": {
|
|
"openai/gpt-4o-mini": {
|
|
"prompt_per_1k": 1.0,
|
|
"completion_per_1k": 2.0,
|
|
},
|
|
"acme/bespoke": {
|
|
"prompt_per_1k": 0.1,
|
|
"completion_per_1k": 0.2,
|
|
},
|
|
},
|
|
}
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
rates = load_rate_table(tmp_path)
|
|
|
|
overridden = estimate_cost_usd("openai/gpt-4o-mini", 1000, 1000, rates)
|
|
bespoke = estimate_cost_usd("acme/bespoke", 1000, 1000, rates)
|
|
|
|
assert overridden == round(1.0 + 2.0, 6)
|
|
assert bespoke == round(0.1 + 0.2, 6)
|
|
|
|
|
|
def test_record_run_usage_fills_estimated_cost_via_resolver(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
from infospace_bench.budget import make_cost_resolver, record_run_usage
|
|
|
|
workflow_results = [
|
|
{
|
|
"run_id": "run-cost",
|
|
"workflow_id": "generic-source-entities",
|
|
"stages": [
|
|
{
|
|
"stage_id": "extract-entities",
|
|
"provider": "openrouter",
|
|
"metadata": {
|
|
"model": "openai/gpt-4o-mini",
|
|
"usage": {"prompt_tokens": 2000, "completion_tokens": 1000},
|
|
},
|
|
},
|
|
{
|
|
"stage_id": "extract-entities",
|
|
"provider": "openrouter",
|
|
"metadata": {
|
|
"model": "openai/gpt-4o-mini",
|
|
"usage": {
|
|
"prompt_tokens": 1000,
|
|
"completion_tokens": 500,
|
|
"cost": 0.123,
|
|
},
|
|
},
|
|
},
|
|
],
|
|
}
|
|
]
|
|
|
|
entry = record_run_usage(
|
|
root,
|
|
workflow_results,
|
|
cost_resolver=make_cost_resolver(tmp_path),
|
|
)
|
|
|
|
bucket = entry["per_bucket"][0]
|
|
# The first call has no adapter cost so it gets estimated:
|
|
# 2000/1000*0.00015 + 1000/1000*0.0006 = 0.0003 + 0.0006 = 0.0009
|
|
assert bucket["cost_usd_estimated"] == round(0.0009, 6)
|
|
assert bucket["cost_usd_known"] == 0.123
|
|
assert bucket["cost_status"] == "known" # at least one call returned cost
|
|
assert entry["rollup"]["total_cost_usd_known"] == 0.123
|
|
assert entry["rollup"]["total_cost_usd_estimated"] == round(0.0009, 6)
|
|
|
|
|
|
def test_record_run_variance_computes_plan_vs_actual(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
from infospace_bench.budget import record_run_variance
|
|
|
|
run_entry = {
|
|
"run_index": 1,
|
|
"snapshot_id": "abc123",
|
|
"rollup": {
|
|
"total_calls": 10,
|
|
"total_prompt_tokens": 1500,
|
|
"total_completion_tokens": 500,
|
|
"total_tokens": 2000,
|
|
"total_cost_usd_known": 0.1,
|
|
"total_cost_usd_estimated": 0.05,
|
|
},
|
|
"per_bucket": [
|
|
{"workflow_id": "generic-source-entities", "calls": 6, "prompt_tokens": 1200, "completion_tokens": 400},
|
|
{"workflow_id": "generic-source-summary", "calls": 4, "prompt_tokens": 300, "completion_tokens": 100},
|
|
],
|
|
"duration_seconds": 3.5,
|
|
}
|
|
|
|
# No snapshot persisted yet — variance fields fall back to null
|
|
summary = record_run_variance(root, run_entry)
|
|
|
|
assert summary["snapshot_id"] == "abc123"
|
|
assert summary["snapshot_resolved"] is False
|
|
assert summary["calls"]["estimated"] is None
|
|
assert summary["calls"]["actual"] == 10
|
|
assert summary["cost_usd"]["actual_known"] == 0.1
|
|
assert summary["cost_usd"]["actual_estimated_from_rates"] == 0.05
|
|
assert summary["cost_usd"]["actual_total"] == round(0.15, 6)
|
|
|
|
|
|
def test_record_run_variance_resolves_snapshot_and_computes_ratios(tmp_path: Path) -> None:
|
|
from infospace_bench.budget import record_plan_snapshot, record_run_variance
|
|
|
|
root = _build_infospace(tmp_path)
|
|
plan_summary = plan_generation(root, cost_per_1k_tokens=0.5, persist=False)
|
|
plan_summary["total_provider_calls_estimate"] = 8
|
|
plan_summary["total_prompt_tokens_estimate"] = 1000
|
|
plan_summary["estimated_cost_usd"] = 0.5
|
|
snapshot_id = record_plan_snapshot(root, plan_summary)
|
|
|
|
run_entry = {
|
|
"run_index": 1,
|
|
"snapshot_id": snapshot_id,
|
|
"rollup": {
|
|
"total_calls": 10,
|
|
"total_prompt_tokens": 1500,
|
|
"total_completion_tokens": 500,
|
|
"total_tokens": 2000,
|
|
"total_cost_usd_known": 0.0,
|
|
"total_cost_usd_estimated": 0.625,
|
|
},
|
|
"per_bucket": [],
|
|
}
|
|
|
|
summary = record_run_variance(root, run_entry)
|
|
|
|
assert summary["snapshot_resolved"] is True
|
|
assert summary["calls"]["estimated"] == 8
|
|
assert summary["calls"]["actual"] == 10
|
|
assert summary["calls"]["delta"] == 2
|
|
assert summary["calls"]["ratio"] == 1.25
|
|
assert summary["prompt_tokens"]["delta"] == 500
|
|
assert summary["cost_usd"]["estimated"] == 0.5
|
|
assert summary["cost_usd"]["actual_total"] == 0.625
|
|
assert summary["cost_usd"]["delta"] == 0.125
|
|
assert summary["cost_usd"]["ratio"] == 1.25
|
|
|
|
|
|
def test_run_generation_persists_variance_and_status_surfaces_it(tmp_path: Path) -> None:
|
|
from infospace_bench.budget import SUMMARY_FILE
|
|
from infospace_bench.generator import run_generation, status_generation
|
|
|
|
root = _build_infospace(tmp_path)
|
|
fixture = tmp_path / "responses.yaml"
|
|
_write_minimal_fixture(fixture)
|
|
plan_payload = plan_generation(root)
|
|
|
|
run_generation(root, fixture_responses=fixture)
|
|
status = status_generation(root)
|
|
|
|
assert (root / SUMMARY_FILE).is_file()
|
|
assert status["budget_summary"] is not None
|
|
assert status["budget_summary"]["snapshot_id"] == plan_payload["snapshot_id"]
|
|
assert status["budget_summary"]["snapshot_resolved"] is True
|
|
# Fixture runs report zero known cost; per_workflow variance is keyed by workflow_id
|
|
per_workflow = {item["workflow_id"]: item for item in status["budget_summary"]["per_workflow"]}
|
|
assert "generic-source-entities" in per_workflow
|
|
|
|
|
|
def test_generation_report_includes_variance_line(tmp_path: Path) -> None:
|
|
from infospace_bench.generator import run_generation
|
|
|
|
root = _build_infospace(tmp_path)
|
|
fixture = tmp_path / "responses.yaml"
|
|
_write_minimal_fixture(fixture)
|
|
plan_generation(root)
|
|
run_generation(root, fixture_responses=fixture)
|
|
|
|
report = (root / "reports" / "generation-summary.md").read_text(encoding="utf-8")
|
|
assert "## Plan variance" in report
|
|
assert "calls" in report.lower()
|
|
|
|
|
|
def test_emit_token_event_calls_poster_with_record_token_payload(tmp_path: Path) -> None:
|
|
from infospace_bench.budget import emit_token_event
|
|
|
|
calls: list[tuple[str, dict, float]] = []
|
|
|
|
def fake_poster(url: str, payload: dict, timeout: float) -> None:
|
|
calls.append((url, payload, timeout))
|
|
|
|
run_entry = {
|
|
"run_index": 2,
|
|
"snapshot_id": "abc123",
|
|
"rollup": {
|
|
"total_prompt_tokens": 1200,
|
|
"total_completion_tokens": 400,
|
|
"total_cost_usd_known": 0.0,
|
|
"total_cost_usd_estimated": 0.05,
|
|
},
|
|
"per_bucket": [
|
|
{"model": "openai/gpt-4o-mini", "total_tokens": 1600},
|
|
],
|
|
}
|
|
|
|
result = emit_token_event(
|
|
run_entry,
|
|
infospace_slug="lefevre",
|
|
workspace="/tmp/workspaces/lefevre",
|
|
hub_url="http://hub.example",
|
|
poster=fake_poster,
|
|
)
|
|
|
|
assert result["status"] == "emitted"
|
|
assert len(calls) == 1
|
|
url, payload, timeout = calls[0]
|
|
assert url == "http://hub.example/state/token-events"
|
|
assert payload["tokens_in"] == 1200
|
|
assert payload["tokens_out"] == 400
|
|
assert payload["model"] == "openai/gpt-4o-mini"
|
|
assert payload["agent"] == "infospace-bench"
|
|
assert payload["ref_type"] == "session"
|
|
assert payload["ref_id"] == "lefevre/run-2"
|
|
assert "infospace=lefevre" in payload["note"]
|
|
assert "snapshot=abc123" in payload["note"]
|
|
assert timeout > 0
|
|
|
|
|
|
def test_emit_token_event_respects_disable_env(monkeypatch, tmp_path: Path) -> None:
|
|
from infospace_bench.budget import HUB_DISABLE_ENV, emit_token_event
|
|
|
|
monkeypatch.setenv(HUB_DISABLE_ENV, "1")
|
|
calls: list = []
|
|
result = emit_token_event(
|
|
{"run_index": 1, "rollup": {"total_prompt_tokens": 100, "total_completion_tokens": 50}, "per_bucket": []},
|
|
infospace_slug="foo",
|
|
poster=lambda *a, **k: calls.append(a),
|
|
)
|
|
|
|
assert result["status"] == "disabled"
|
|
assert calls == []
|
|
|
|
|
|
def test_emit_token_event_isolates_poster_failure(tmp_path: Path) -> None:
|
|
from infospace_bench.budget import emit_token_event
|
|
|
|
def angry_poster(url: str, payload: dict, timeout: float) -> None:
|
|
raise RuntimeError("hub down")
|
|
|
|
result = emit_token_event(
|
|
{
|
|
"run_index": 1,
|
|
"rollup": {"total_prompt_tokens": 50, "total_completion_tokens": 25},
|
|
"per_bucket": [{"model": "openai/gpt-4o-mini", "total_tokens": 75}],
|
|
},
|
|
infospace_slug="foo",
|
|
poster=angry_poster,
|
|
)
|
|
|
|
assert result["status"] == "failed"
|
|
assert "hub down" in result["reason"]
|
|
|
|
|
|
def test_emit_token_event_skips_when_no_token_usage() -> None:
|
|
from infospace_bench.budget import emit_token_event
|
|
|
|
result = emit_token_event(
|
|
{"run_index": 1, "rollup": {"total_prompt_tokens": 0, "total_completion_tokens": 0}, "per_bucket": []},
|
|
infospace_slug="foo",
|
|
poster=lambda *a, **k: None,
|
|
)
|
|
|
|
assert result["status"] == "skipped"
|
|
|
|
|
|
def test_emit_token_event_marks_multi_model_as_mixed() -> None:
|
|
from infospace_bench.budget import emit_token_event
|
|
|
|
captured: list[dict] = []
|
|
|
|
def fake_poster(url: str, payload: dict, timeout: float) -> None:
|
|
captured.append(payload)
|
|
|
|
emit_token_event(
|
|
{
|
|
"run_index": 1,
|
|
"rollup": {"total_prompt_tokens": 200, "total_completion_tokens": 100},
|
|
"per_bucket": [
|
|
{"model": "openai/gpt-4o-mini", "total_tokens": 150},
|
|
{"model": "anthropic/claude-3.5-haiku", "total_tokens": 150},
|
|
],
|
|
},
|
|
infospace_slug="foo",
|
|
poster=fake_poster,
|
|
)
|
|
|
|
assert captured[0]["model"] == "mixed"
|
|
|
|
|
|
def test_run_generation_never_fails_when_hub_is_down(tmp_path: Path, monkeypatch) -> None:
|
|
# Force the default hub URL to a known-bad port so the real poster fails fast.
|
|
from infospace_bench.budget import HUB_URL_ENV
|
|
from infospace_bench.generator import run_generation, status_generation
|
|
|
|
monkeypatch.setenv(HUB_URL_ENV, "http://127.0.0.1:1") # reserved unbindable port
|
|
root = _build_infospace(tmp_path)
|
|
fixture = tmp_path / "responses.yaml"
|
|
_write_minimal_fixture(fixture)
|
|
plan_generation(root)
|
|
|
|
result = run_generation(root, fixture_responses=fixture)
|
|
status = status_generation(root)
|
|
|
|
assert result.status == "completed"
|
|
assert status["completed"] is True
|
|
|
|
|
|
def test_budget_show_returns_full_per_infospace_structure(tmp_path: Path) -> None:
|
|
from infospace_bench.budget import budget_show
|
|
from infospace_bench.generator import run_generation
|
|
|
|
root = _build_infospace(tmp_path)
|
|
fixture = tmp_path / "responses.yaml"
|
|
_write_minimal_fixture(fixture)
|
|
plan_generation(root)
|
|
run_generation(root, fixture_responses=fixture)
|
|
|
|
payload = budget_show(root)
|
|
|
|
assert payload["root"] == str(root)
|
|
assert payload["plans"]["snapshots"], "plans must round-trip"
|
|
assert payload["usage"]["runs"], "usage runs must round-trip"
|
|
assert payload["summary"] is not None
|
|
assert payload["summary"]["snapshot_resolved"] is True
|
|
|
|
|
|
def test_budget_list_workspace_rolls_up_multiple_infospaces(tmp_path: Path) -> None:
|
|
from infospace_bench.budget import budget_list_workspace
|
|
from infospace_bench.generator import init_generation_infospace, run_generation
|
|
|
|
book = tmp_path / "book.epub"
|
|
_write_three_chapter_epub(book)
|
|
fixture = tmp_path / "responses.yaml"
|
|
_write_minimal_fixture(fixture)
|
|
|
|
a = init_generation_infospace(tmp_path, book, "alpha", name="Alpha")
|
|
b = init_generation_infospace(tmp_path, book, "beta", name="Beta")
|
|
plan_generation(a.root, from_chapter=1, to_chapter=2)
|
|
plan_generation(b.root)
|
|
run_generation(a.root, fixture_responses=fixture)
|
|
# leave beta with a plan but no run, to verify partial rollups
|
|
|
|
rollups = budget_list_workspace(tmp_path)
|
|
by_slug = {item["slug"]: item for item in rollups}
|
|
|
|
assert {"alpha", "beta"} <= set(by_slug.keys())
|
|
assert by_slug["alpha"]["plans_count"] >= 1
|
|
assert by_slug["alpha"]["runs_count"] == 1
|
|
assert by_slug["alpha"]["last_run_at"] is not None
|
|
assert by_slug["beta"]["plans_count"] >= 1
|
|
assert by_slug["beta"]["runs_count"] == 0
|
|
assert by_slug["beta"]["last_run_at"] is None
|
|
|
|
|
|
def test_budget_list_workspace_handles_missing_or_empty_directories(tmp_path: Path) -> None:
|
|
from infospace_bench.budget import budget_list_workspace
|
|
from infospace_bench.generator import init_generation_infospace
|
|
|
|
# No infospaces/ at all → empty list, not an error
|
|
assert budget_list_workspace(tmp_path) == []
|
|
|
|
# An infospace with no budget dir → rollup with zeros, not a crash
|
|
book = tmp_path / "book.epub"
|
|
_write_three_chapter_epub(book)
|
|
init_generation_infospace(tmp_path, book, "zero", name="Zero")
|
|
|
|
rollups = budget_list_workspace(tmp_path)
|
|
assert len(rollups) == 1
|
|
assert rollups[0]["slug"] == "zero"
|
|
assert rollups[0]["plans_count"] == 0
|
|
assert rollups[0]["runs_count"] == 0
|
|
assert rollups[0]["total_tokens"] == 0
|
|
assert rollups[0]["total_cost_usd_known"] == 0.0
|
|
|
|
|
|
def test_budget_cli_list_and_show(tmp_path: Path) -> None:
|
|
from infospace_bench.generator import run_generation
|
|
|
|
root = _build_infospace(tmp_path)
|
|
fixture = tmp_path / "responses.yaml"
|
|
_write_minimal_fixture(fixture)
|
|
plan_generation(root)
|
|
run_generation(root, fixture_responses=fixture)
|
|
|
|
env = os.environ.copy()
|
|
env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src"
|
|
|
|
list_result = subprocess.run(
|
|
[sys.executable, "-m", "infospace_bench", "budget", "list", str(tmp_path)],
|
|
check=False, env=env, text=True, capture_output=True,
|
|
)
|
|
show_result = subprocess.run(
|
|
[sys.executable, "-m", "infospace_bench", "budget", "show", str(root)],
|
|
check=False, env=env, text=True, capture_output=True,
|
|
)
|
|
|
|
assert list_result.returncode == 0, list_result.stderr
|
|
assert show_result.returncode == 0, show_result.stderr
|
|
list_payload = json.loads(list_result.stdout)
|
|
show_payload = json.loads(show_result.stdout)
|
|
assert list_payload["workspace"] == str(tmp_path)
|
|
assert any(item["slug"] == "budget-test" for item in list_payload["infospaces"])
|
|
assert show_payload["plans"]["snapshots"]
|
|
assert show_payload["usage"]["runs"]
|
|
|
|
|
|
def test_archive_includes_budget_dir_and_records_summary(tmp_path: Path) -> None:
|
|
from infospace_bench.archive import archive_infospace
|
|
from infospace_bench.budget import PLANS_FILE, USAGE_FILE
|
|
from infospace_bench.generator import run_generation
|
|
|
|
root = _build_infospace(tmp_path)
|
|
fixture = tmp_path / "responses.yaml"
|
|
_write_minimal_fixture(fixture)
|
|
plan_generation(root)
|
|
run_generation(root, fixture_responses=fixture)
|
|
assert (root / PLANS_FILE).is_file()
|
|
assert (root / USAGE_FILE).is_file()
|
|
|
|
record = archive_infospace(root, retention_class="release-evidence")
|
|
|
|
summary = record.metadata.get("budget_summary")
|
|
assert summary is not None
|
|
assert summary["plans_count"] >= 1
|
|
assert summary["runs_count"] == 1
|
|
assert "latest_snapshot_id" in summary
|
|
# The budget dir is implicitly included via output/, so the archive's
|
|
# file_count should reflect that.
|
|
assert record.file_count > 0
|
|
|
|
|
|
def test_archive_metadata_empty_when_no_budget_data(tmp_path: Path) -> None:
|
|
from infospace_bench.archive import archive_infospace
|
|
|
|
root = _build_infospace(tmp_path)
|
|
|
|
record = archive_infospace(root, retention_class="release-evidence")
|
|
assert record.metadata.get("budget_summary") is None or record.metadata == {}
|
|
|
|
|
|
def test_plan_cli_writes_snapshot(tmp_path: Path) -> None:
|
|
root = _build_infospace(tmp_path)
|
|
env = os.environ.copy()
|
|
env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src"
|
|
|
|
result = subprocess.run(
|
|
[
|
|
sys.executable,
|
|
"-m",
|
|
"infospace_bench",
|
|
"generate",
|
|
"plan",
|
|
str(root),
|
|
"--from-chapter",
|
|
"1",
|
|
"--to-chapter",
|
|
"2",
|
|
"--cost-per-1k",
|
|
"0.5",
|
|
],
|
|
check=False,
|
|
env=env,
|
|
text=True,
|
|
capture_output=True,
|
|
)
|
|
|
|
assert result.returncode == 0, result.stderr
|
|
payload = json.loads(result.stdout)
|
|
assert "snapshot_id" in payload
|
|
snapshots = read_plan_snapshots(root)
|
|
assert len(snapshots) == 1
|
|
assert snapshots[0]["filters"]["from_chapter"] == 1
|
|
assert snapshots[0]["filters"]["to_chapter"] == 2
|