IB-WP-0019-T05: state-hub token-event emission with failure isolation

Emit one record_token_event payload per completed generate run, derived from the just-recorded usage rollup. tokens_in/out come from the rollup, model defaults to the dominant model used (or "mixed" when buckets disagree), agent="infospace-bench", ref_type="session", and ref_id="<slug>/run-<run_index>". The note carries the infospace slug, workspace, snapshot_id, and any known/estimated cost so the hub event is self-describing. Failure isolation: any exception from the HTTP poster (hub down, timeout, 5xx) is caught, logged to stderr, and reported as status=failed; the generate run still completes. INFOSPACE_BENCH_HUB_URL overrides the default http://127.0.0.1:8000 base; INFOSPACE_BENCH_DISABLE_HUB_TOKEN_EVENTS skips emission entirely. Tests cover the happy path, the disable env var, poster failure, the no-usage skip, multi-model coalescing to "mixed", and an end-to-end run_generation against an unbindable hub port to prove the run survives when the hub is unreachable. 116 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-17 20:33:29 +02:00
parent d4c9c56f5c
commit 110c78b9ad
4 changed files with 241 additions and 1 deletions
--- a/src/infospace_bench/budget.py
+++ b/src/infospace_bench/budget.py
@@ -13,6 +13,10 @@ from __future__ import annotations

 import hashlib
 import json
+import os
+import sys
+import urllib.error
+import urllib.request
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Callable
@@ -22,6 +26,12 @@ import yaml
 RATES_FILENAME = "model-rates.yaml"
 _PACKAGE_RATES_PATH = Path(__file__).parent / "model_rates.yaml"

+HUB_URL_ENV = "INFOSPACE_BENCH_HUB_URL"
+HUB_DISABLE_ENV = "INFOSPACE_BENCH_DISABLE_HUB_TOKEN_EVENTS"
+DEFAULT_HUB_URL = "http://127.0.0.1:8000"
+TOKEN_EVENTS_PATH = "/state/token-events"
+HUB_TIMEOUT_SECONDS = 3.0
+
 BUDGET_DIR = Path("output/budget")
 PLANS_FILE = BUDGET_DIR / "plans.yaml"
 USAGE_FILE = BUDGET_DIR / "usage.yaml"
@@ -275,6 +285,96 @@ def record_run_variance(
    return summary


+def emit_token_event(
+    run_entry: dict[str, Any],
+    *,
+    infospace_slug: str,
+    workspace: str | None = None,
+    hub_url: str | None = None,
+    poster: Callable[[str, dict[str, Any], float], Any] | None = None,
+) -> dict[str, Any]:
+    """POST one record_token_event payload to state-hub.
+
+    Returns a result dict with ``status`` in {emitted, disabled, skipped,
+    failed} and a ``reason`` field when not emitted. Never raises: a
+    state-hub outage is logged to stderr and reported as ``failed``.
+    """
+    if os.environ.get(HUB_DISABLE_ENV):
+        return {"status": "disabled", "reason": f"{HUB_DISABLE_ENV} set"}
+    rollup = run_entry.get("rollup") or {}
+    tokens_in = int(rollup.get("total_prompt_tokens") or 0)
+    tokens_out = int(rollup.get("total_completion_tokens") or 0)
+    if tokens_in == 0 and tokens_out == 0:
+        return {"status": "skipped", "reason": "no token usage to report"}
+    model = _dominant_model(run_entry.get("per_bucket") or [])
+    payload = {
+        "tokens_in": tokens_in,
+        "tokens_out": tokens_out,
+        "model": model,
+        "agent": "infospace-bench",
+        "ref_type": "session",
+        "ref_id": f"{infospace_slug}/run-{run_entry.get('run_index')}",
+        "note": _token_event_note(run_entry, infospace_slug, workspace),
+    }
+    url = (hub_url or os.environ.get(HUB_URL_ENV) or DEFAULT_HUB_URL).rstrip("/") + TOKEN_EVENTS_PATH
+    try:
+        if poster is not None:
+            poster(url, payload, HUB_TIMEOUT_SECONDS)
+        else:
+            _post_json(url, payload, HUB_TIMEOUT_SECONDS)
+    except Exception as exc:  # never let hub problems fail the run
+        sys.stderr.write(
+            f"[budget] state-hub token event failed ({url}): {exc}\n"
+        )
+        return {"status": "failed", "reason": str(exc), "url": url}
+    return {"status": "emitted", "url": url, "model": model, "tokens_in": tokens_in, "tokens_out": tokens_out}
+
+
+def _dominant_model(per_bucket: list[dict[str, Any]]) -> str:
+    totals: dict[str, int] = {}
+    for bucket in per_bucket:
+        model = str(bucket.get("model") or "")
+        if not model:
+            continue
+        totals[model] = totals.get(model, 0) + int(bucket.get("total_tokens") or 0)
+    if not totals:
+        return ""
+    if len(totals) == 1:
+        return next(iter(totals))
+    return "mixed"
+
+
+def _token_event_note(
+    run_entry: dict[str, Any], infospace_slug: str, workspace: str | None
+) -> str:
+    rollup = run_entry.get("rollup") or {}
+    parts = [f"infospace={infospace_slug}"]
+    if workspace:
+        parts.append(f"workspace={workspace}")
+    snapshot_id = run_entry.get("snapshot_id")
+    if snapshot_id:
+        parts.append(f"snapshot={snapshot_id}")
+    cost_known = rollup.get("total_cost_usd_known")
+    if cost_known:
+        parts.append(f"cost_known_usd={cost_known}")
+    cost_estimated = rollup.get("total_cost_usd_estimated")
+    if cost_estimated:
+        parts.append(f"cost_estimated_usd={cost_estimated}")
+    return " ".join(parts)
+
+
+def _post_json(url: str, payload: dict[str, Any], timeout: float) -> None:
+    body = json.dumps(payload).encode("utf-8")
+    request = urllib.request.Request(
+        url,
+        data=body,
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    with urllib.request.urlopen(request, timeout=timeout) as response:
+        response.read()
+
+
 def read_run_variance(root: str | Path) -> dict[str, Any] | None:
    path = Path(root) / SUMMARY_FILE
    if not path.is_file():
--- a/src/infospace_bench/generator.py
+++ b/src/infospace_bench/generator.py
@@ -19,6 +19,7 @@ from .history import get_history, read_metrics_file, record_check_results
 from .lifecycle import create_infospace, load_infospace, register_artifact
 from .openrouter import OpenRouterAssistedGenerationAdapter
 from .budget import (
+    emit_token_event,
    latest_plan_snapshot_id,
    make_cost_resolver,
    read_run_variance,
@@ -411,6 +412,11 @@ def run_generation(
            cost_resolver=make_cost_resolver(_workspace_for(root_path)),
        )
        record_run_variance(root_path, usage_entry)
+        emit_token_event(
+            usage_entry,
+            infospace_slug=load_infospace(root_path).config.slug,
+            workspace=str(_workspace_for(root_path)),
+        )

    metrics: dict[str, Any] = {}
    snapshot_id = ""