IB-WP-0019-T02: usage rollup from run records

Every completed generate run now aggregates per-call adapter usage from the workflow-engine run records into output/budget/usage.yaml. Per-call data is bucketed by (workflow_id, stage_id, provider, model) with running totals for calls, prompt_tokens, completion_tokens, total_tokens, and cost_usd_known (sum of adapter-reported cost when the provider returns it; usually zero today). A run-level entry captures run_index, started_at, completed_at, duration_seconds, the executing plan snapshot_id (resolved from the latest plans.yaml entry), and the workflow-level run_id / stage_count summaries. cost_usd_estimated is left as None for this task; T03 wires the rate-table resolver so the same bucket gets a model-priced fallback when the adapter does not return cost directly. Fixture-mode runs are recorded with provider='fixture', zero tokens, and cost_status='unknown' rather than silently skipped, so the rollup honestly reflects which stages actually ran. 102 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-17 19:46:40 +02:00
parent 37bbaf9fab
commit 678508226a
4 changed files with 315 additions and 2 deletions
--- a/src/infospace_bench/generator.py
+++ b/src/infospace_bench/generator.py
@@ -2,11 +2,14 @@ from __future__ import annotations

 import hashlib
 import shutil
+import time
 from dataclasses import asdict, dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any

+_monotonic = time.monotonic
+
 import yaml

 from .checks import run_collection_checks
@@ -15,7 +18,11 @@ from .evaluation_io import read_entity_evaluations
 from .history import get_history, read_metrics_file, record_check_results
 from .lifecycle import create_infospace, load_infospace, register_artifact
 from .openrouter import OpenRouterAssistedGenerationAdapter
-from .budget import record_plan_snapshot
+from .budget import (
+    latest_plan_snapshot_id,
+    record_plan_snapshot,
+    record_run_usage,
+)
 from .source_intake import SourceChunk, normalize_source
 from .workflow import (
    AssistedGenerationAdapter,
@@ -343,6 +350,8 @@ def run_generation(
            metrics=status.get("metrics", {}),
        )

+    started_wall = datetime.now(timezone.utc)
+    monotonic_start = _monotonic()
    adapter = (
        _adapter_for(provider, model=model, fixture_responses=fixture_responses)
        if workflow_ids
@@ -379,6 +388,15 @@ def run_generation(
        }
    )
    _write_state(root_path, state)
+    if workflow_results:
+        duration_seconds = round(_monotonic() - monotonic_start, 3)
+        record_run_usage(
+            root_path,
+            workflow_results,
+            snapshot_id=latest_plan_snapshot_id(root_path),
+            duration_seconds=duration_seconds,
+            started_at=started_wall.isoformat(),
+        )
    return GenerationRunResult(
        root=str(root_path),
        status="completed",