IB-WP-0019-T05: state-hub token-event emission with failure isolation

Emit one record_token_event payload per completed generate run, derived
from the just-recorded usage rollup. tokens_in/out come from the
rollup, model defaults to the dominant model used (or "mixed" when
buckets disagree), agent="infospace-bench", ref_type="session", and
ref_id="<slug>/run-<run_index>". The note carries the infospace slug,
workspace, snapshot_id, and any known/estimated cost so the hub event
is self-describing.

Failure isolation: any exception from the HTTP poster (hub down,
timeout, 5xx) is caught, logged to stderr, and reported as
status=failed; the generate run still completes. INFOSPACE_BENCH_HUB_URL
overrides the default http://127.0.0.1:8000 base;
INFOSPACE_BENCH_DISABLE_HUB_TOKEN_EVENTS skips emission entirely.

Tests cover the happy path, the disable env var, poster failure, the
no-usage skip, multi-model coalescing to "mixed", and an end-to-end
run_generation against an unbindable hub port to prove the run survives
when the hub is unreachable. 116 tests pass.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-17 20:33:29 +02:00
parent d4c9c56f5c
commit 110c78b9ad
4 changed files with 241 additions and 1 deletions

View File

@@ -489,6 +489,140 @@ def test_generation_report_includes_variance_line(tmp_path: Path) -> None:
assert "calls" in report.lower()
def test_emit_token_event_calls_poster_with_record_token_payload(tmp_path: Path) -> None:
from infospace_bench.budget import emit_token_event
calls: list[tuple[str, dict, float]] = []
def fake_poster(url: str, payload: dict, timeout: float) -> None:
calls.append((url, payload, timeout))
run_entry = {
"run_index": 2,
"snapshot_id": "abc123",
"rollup": {
"total_prompt_tokens": 1200,
"total_completion_tokens": 400,
"total_cost_usd_known": 0.0,
"total_cost_usd_estimated": 0.05,
},
"per_bucket": [
{"model": "openai/gpt-4o-mini", "total_tokens": 1600},
],
}
result = emit_token_event(
run_entry,
infospace_slug="lefevre",
workspace="/tmp/workspaces/lefevre",
hub_url="http://hub.example",
poster=fake_poster,
)
assert result["status"] == "emitted"
assert len(calls) == 1
url, payload, timeout = calls[0]
assert url == "http://hub.example/state/token-events"
assert payload["tokens_in"] == 1200
assert payload["tokens_out"] == 400
assert payload["model"] == "openai/gpt-4o-mini"
assert payload["agent"] == "infospace-bench"
assert payload["ref_type"] == "session"
assert payload["ref_id"] == "lefevre/run-2"
assert "infospace=lefevre" in payload["note"]
assert "snapshot=abc123" in payload["note"]
assert timeout > 0
def test_emit_token_event_respects_disable_env(monkeypatch, tmp_path: Path) -> None:
from infospace_bench.budget import HUB_DISABLE_ENV, emit_token_event
monkeypatch.setenv(HUB_DISABLE_ENV, "1")
calls: list = []
result = emit_token_event(
{"run_index": 1, "rollup": {"total_prompt_tokens": 100, "total_completion_tokens": 50}, "per_bucket": []},
infospace_slug="foo",
poster=lambda *a, **k: calls.append(a),
)
assert result["status"] == "disabled"
assert calls == []
def test_emit_token_event_isolates_poster_failure(tmp_path: Path) -> None:
from infospace_bench.budget import emit_token_event
def angry_poster(url: str, payload: dict, timeout: float) -> None:
raise RuntimeError("hub down")
result = emit_token_event(
{
"run_index": 1,
"rollup": {"total_prompt_tokens": 50, "total_completion_tokens": 25},
"per_bucket": [{"model": "openai/gpt-4o-mini", "total_tokens": 75}],
},
infospace_slug="foo",
poster=angry_poster,
)
assert result["status"] == "failed"
assert "hub down" in result["reason"]
def test_emit_token_event_skips_when_no_token_usage() -> None:
from infospace_bench.budget import emit_token_event
result = emit_token_event(
{"run_index": 1, "rollup": {"total_prompt_tokens": 0, "total_completion_tokens": 0}, "per_bucket": []},
infospace_slug="foo",
poster=lambda *a, **k: None,
)
assert result["status"] == "skipped"
def test_emit_token_event_marks_multi_model_as_mixed() -> None:
from infospace_bench.budget import emit_token_event
captured: list[dict] = []
def fake_poster(url: str, payload: dict, timeout: float) -> None:
captured.append(payload)
emit_token_event(
{
"run_index": 1,
"rollup": {"total_prompt_tokens": 200, "total_completion_tokens": 100},
"per_bucket": [
{"model": "openai/gpt-4o-mini", "total_tokens": 150},
{"model": "anthropic/claude-3.5-haiku", "total_tokens": 150},
],
},
infospace_slug="foo",
poster=fake_poster,
)
assert captured[0]["model"] == "mixed"
def test_run_generation_never_fails_when_hub_is_down(tmp_path: Path, monkeypatch) -> None:
# Force the default hub URL to a known-bad port so the real poster fails fast.
from infospace_bench.budget import HUB_URL_ENV
from infospace_bench.generator import run_generation, status_generation
monkeypatch.setenv(HUB_URL_ENV, "http://127.0.0.1:1") # reserved unbindable port
root = _build_infospace(tmp_path)
fixture = tmp_path / "responses.yaml"
_write_minimal_fixture(fixture)
plan_generation(root)
result = run_generation(root, fixture_responses=fixture)
status = status_generation(root)
assert result.status == "completed"
assert status["completed"] is True
def test_plan_cli_writes_snapshot(tmp_path: Path) -> None:
root = _build_infospace(tmp_path)
env = os.environ.copy()