From bb70b2f4b9f86b4111329dde2f2a25b3ccf631d3 Mon Sep 17 00:00:00 2001 From: tegwick Date: Sun, 17 May 2026 21:53:28 +0200 Subject: [PATCH] IB-WP-0019-T07: archive integration; close IB-WP-0019 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default archive include set already pulls output/ in wholesale, so output/budget/ already lands inside the archive package with no code change. Add a budget_summary block to ArchiveRecord.metadata so catalog-level tools can see plans_count, runs_count, total_tokens, total_cost_usd_known, total_cost_usd_estimated, and the latest_snapshot_id without unpacking the archive. An infospace with no budget data still archives cleanly with an empty metadata dict. Closes IB-WP-0019 (Budget and Usage Registry): T01-T07 all done. Three-layer design landed end-to-end — layer 1 (per-infospace plans.yaml / usage.yaml / summary.yaml) and layer 3 (state-hub record_token_event emission with failure isolation) live here; layer 2 (cross-application QualityLedger for adaptive routing) is parked in llm-connect LLM-WP-0004 and infospace-bench IB-WP-0018 awaits it. 122 tests pass. Co-Authored-By: Claude Opus 4.7 --- src/infospace_bench/archive.py | 53 +++++++++++++++++++ tests/test_budget_registry.py | 34 ++++++++++++ .../IB-WP-0019-budget-and-usage-registry.md | 4 +- 3 files changed, 89 insertions(+), 2 deletions(-) diff --git a/src/infospace_bench/archive.py b/src/infospace_bench/archive.py index ee1da28..032851a 100644 --- a/src/infospace_bench/archive.py +++ b/src/infospace_bench/archive.py @@ -337,12 +337,65 @@ async def _archive_infospace_async( producer=PRODUCER, subject=subject, store_root=str(effective_store_root) if effective_store_root else None, + metadata=_archive_metadata(root), skipped_top_level=skipped_top_level, ) _append_index(root, record) return record +def _archive_metadata(root: Path) -> dict[str, Any]: + """Compute a small budget_summary for the archive manifest. + + Lets catalog-level tools find an archived infospace's cost shape without + unpacking it. Returns an empty dict when no budget data exists. + """ + from .budget import ( + read_plan_snapshots, + read_run_variance, + read_usage_runs, + ) + + try: + plans = read_plan_snapshots(root) + runs = read_usage_runs(root) + summary = read_run_variance(root) + except Exception: + return {} + if not plans and not runs and summary is None: + return {} + total_tokens = 0 + total_cost_known = 0.0 + total_cost_estimated = 0.0 + for run in runs: + rollup = run.get("rollup") or {} + total_tokens += int(rollup.get("total_tokens") or 0) + try: + total_cost_known += float(rollup.get("total_cost_usd_known") or 0.0) + except (TypeError, ValueError): + pass + estimated = rollup.get("total_cost_usd_estimated") + if estimated is not None: + try: + total_cost_estimated += float(estimated) + except (TypeError, ValueError): + pass + budget_summary: dict[str, Any] = { + "plans_count": len(plans), + "runs_count": len(runs), + "total_tokens": total_tokens, + "total_cost_usd_known": round(total_cost_known, 6), + "total_cost_usd_estimated": round(total_cost_estimated, 6) if total_cost_estimated else None, + } + if plans: + budget_summary["latest_snapshot_id"] = plans[-1].get("snapshot_id") + if summary is not None and isinstance(summary.get("snapshot_id"), str): + budget_summary["last_run_snapshot_id"] = summary.get("snapshot_id") + return {"budget_summary": budget_summary} + + + + def _collect_files( root: Path, *, diff --git a/tests/test_budget_registry.py b/tests/test_budget_registry.py index dd02690..85453b5 100644 --- a/tests/test_budget_registry.py +++ b/tests/test_budget_registry.py @@ -722,6 +722,40 @@ def test_budget_cli_list_and_show(tmp_path: Path) -> None: assert show_payload["usage"]["runs"] +def test_archive_includes_budget_dir_and_records_summary(tmp_path: Path) -> None: + from infospace_bench.archive import archive_infospace + from infospace_bench.budget import PLANS_FILE, USAGE_FILE + from infospace_bench.generator import run_generation + + root = _build_infospace(tmp_path) + fixture = tmp_path / "responses.yaml" + _write_minimal_fixture(fixture) + plan_generation(root) + run_generation(root, fixture_responses=fixture) + assert (root / PLANS_FILE).is_file() + assert (root / USAGE_FILE).is_file() + + record = archive_infospace(root, retention_class="release-evidence") + + summary = record.metadata.get("budget_summary") + assert summary is not None + assert summary["plans_count"] >= 1 + assert summary["runs_count"] == 1 + assert "latest_snapshot_id" in summary + # The budget dir is implicitly included via output/, so the archive's + # file_count should reflect that. + assert record.file_count > 0 + + +def test_archive_metadata_empty_when_no_budget_data(tmp_path: Path) -> None: + from infospace_bench.archive import archive_infospace + + root = _build_infospace(tmp_path) + + record = archive_infospace(root, retention_class="release-evidence") + assert record.metadata.get("budget_summary") is None or record.metadata == {} + + def test_plan_cli_writes_snapshot(tmp_path: Path) -> None: root = _build_infospace(tmp_path) env = os.environ.copy() diff --git a/workplans/IB-WP-0019-budget-and-usage-registry.md b/workplans/IB-WP-0019-budget-and-usage-registry.md index 60116e6..c86ea69 100644 --- a/workplans/IB-WP-0019-budget-and-usage-registry.md +++ b/workplans/IB-WP-0019-budget-and-usage-registry.md @@ -4,7 +4,7 @@ type: workplan title: "Budget and Usage Registry for Infospaces" domain: markitect repo: infospace-bench -status: todo +status: done owner: markitect topic_slug: markitect created: "2026-05-17" @@ -195,7 +195,7 @@ state_hub_task_id: "7cb34bfc-c562-4dda-a6d4-b44158644e19" ```task id: IB-WP-0019-T07 -status: todo +status: done priority: low state_hub_task_id: "b97906e0-2835-4246-9868-840c02d64fae" ```