IB-WP-0019-T03: rate-table cost computation

Ship a starter model rate table at src/infospace_bench/model_rates.yaml
(prompt_per_1k / completion_per_1k for the OpenRouter models we have
actually touched: gpt-4o, gpt-4o-mini, gpt-4-turbo, claude 3.5 sonnet
and haiku, claude 3 opus, gemini 1.5 flash/pro, llama 3.1 70b) and a
load_rate_table() / estimate_cost_usd() pair that overlays an optional
<workspace>/model-rates.yaml on top of the bundled defaults.

generate run now passes a workspace-aware cost_resolver into
record_run_usage, so cost_usd_estimated lands on every usage bucket
whose model matches the table. Adapter-returned cost still wins
(cost_status="known"); rate-table cost is reported under
cost_status="estimated"; unmatched models are recorded as
cost_status="unknown" rather than silently zeroed. Rate-table file is
listed in pyproject.toml package-data so pip-installed users keep the
defaults.

106 tests pass.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-17 19:54:30 +02:00
parent 678508226a
commit a4dde53fc3
7 changed files with 252 additions and 3 deletions

View File

@@ -48,6 +48,29 @@ infospace-bench generate status ./infospaces/book-space
shows chunk counts, generated artifact counts, evaluations, metrics, history, shows chunk counts, generated artifact counts, evaluations, metrics, history,
and stale source/profile inputs. and stale source/profile inputs.
### Budget and usage registry
Every `generate plan` invocation appends a compact snapshot to
`output/budget/plans.yaml` (deterministic 12-char `snapshot_id`, 50-entry
sliding retention). Every `generate run` invocation appends a usage
rollup to `output/budget/usage.yaml`, bucketed by `(workflow_id,
stage_id, provider, model)` with prompt and completion token counts,
known cost (when the adapter returned it), and estimated cost (when a
rate table entry matches the model).
The default rate table is bundled at
`src/infospace_bench/model_rates.yaml` and covers a handful of common
OpenRouter models at list price (see the file for the captured-at
timestamp). A workspace can override or extend entries by placing
`model-rates.yaml` next to its `infospaces/` directory; the workspace
file is overlaid on top of the package default so partial overrides
are fine.
Cost resolution order on each run: adapter-returned `cost` first, then
the rate table, then `cost_status="unknown"` (recorded explicitly,
never silently zeroed). The plan-vs-actual variance summary lands in
follow-on task T04.
### Profiles ### Profiles
Two profiles ship today: Two profiles ship today:

View File

@@ -13,7 +13,7 @@ dependencies = [
infospace-bench = "infospace_bench.cli:main" infospace-bench = "infospace_bench.cli:main"
[tool.setuptools.package-data] [tool.setuptools.package-data]
infospace_bench = ["profiles/**/*"] infospace_bench = ["profiles/**/*", "model_rates.yaml"]
[tool.pytest.ini_options] [tool.pytest.ini_options]
pythonpath = ["src", "../markitect-tool/src"] pythonpath = ["src", "../markitect-tool/src"]

View File

@@ -15,10 +15,13 @@ import hashlib
import json import json
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any, Callable
import yaml import yaml
RATES_FILENAME = "model-rates.yaml"
_PACKAGE_RATES_PATH = Path(__file__).parent / "model_rates.yaml"
BUDGET_DIR = Path("output/budget") BUDGET_DIR = Path("output/budget")
PLANS_FILE = BUDGET_DIR / "plans.yaml" PLANS_FILE = BUDGET_DIR / "plans.yaml"
USAGE_FILE = BUDGET_DIR / "usage.yaml" USAGE_FILE = BUDGET_DIR / "usage.yaml"
@@ -210,6 +213,76 @@ def read_usage_runs(root: str | Path) -> list[dict[str, Any]]:
return list(payload.get("runs") or []) return list(payload.get("runs") or [])
def load_rate_table(workspace: Path | str | None = None) -> dict[str, dict[str, float]]:
"""Load the model rate table, with optional workspace override.
Returns a mapping ``model_id -> {prompt_per_1k, completion_per_1k}``. The
workspace override (``<workspace>/model-rates.yaml``) is overlaid on top of
the package default, so individual models can be tweaked without copying
the whole table.
"""
rates: dict[str, dict[str, float]] = {}
for path in (_PACKAGE_RATES_PATH, _workspace_rate_path(workspace)):
if path is None or not path.is_file():
continue
try:
data = yaml.safe_load(path.read_text(encoding="utf-8"))
except yaml.YAMLError:
continue
if not isinstance(data, dict):
continue
for model, entry in (data.get("rates") or {}).items():
if not isinstance(entry, dict):
continue
prompt = _coerce_float(entry.get("prompt_per_1k"))
completion = _coerce_float(entry.get("completion_per_1k"))
if prompt is None and completion is None:
continue
rates[str(model)] = {
"prompt_per_1k": prompt if prompt is not None else 0.0,
"completion_per_1k": completion if completion is not None else 0.0,
}
return rates
def estimate_cost_usd(
model: str,
prompt_tokens: int,
completion_tokens: int,
rate_table: dict[str, dict[str, float]],
) -> float | None:
entry = rate_table.get(model)
if entry is None:
return None
prompt_rate = float(entry.get("prompt_per_1k") or 0.0)
completion_rate = float(entry.get("completion_per_1k") or 0.0)
cost = (prompt_tokens / 1000.0) * prompt_rate + (
completion_tokens / 1000.0
) * completion_rate
return round(cost, 6)
def make_cost_resolver(
workspace: Path | str | None,
) -> Callable[[str, str, int, int], float | None]:
"""Return a resolver suitable for ``record_run_usage(..., cost_resolver=...)``."""
rates = load_rate_table(workspace)
def _resolve(provider: str, model: str, prompt_tokens: int, completion_tokens: int) -> float | None:
if not model:
return None
return estimate_cost_usd(model, prompt_tokens, completion_tokens, rates)
return _resolve
def _workspace_rate_path(workspace: Path | str | None) -> Path | None:
if workspace is None:
return None
candidate = Path(workspace) / RATES_FILENAME
return candidate
def _coerce_float(value: Any) -> float | None: def _coerce_float(value: Any) -> float | None:
if value is None: if value is None:
return None return None

View File

@@ -20,6 +20,7 @@ from .lifecycle import create_infospace, load_infospace, register_artifact
from .openrouter import OpenRouterAssistedGenerationAdapter from .openrouter import OpenRouterAssistedGenerationAdapter
from .budget import ( from .budget import (
latest_plan_snapshot_id, latest_plan_snapshot_id,
make_cost_resolver,
record_plan_snapshot, record_plan_snapshot,
record_run_usage, record_run_usage,
) )
@@ -324,6 +325,15 @@ def _read_profile_name(root: Path) -> str:
return str(state.get("profile") or DEFAULT_PROFILE) return str(state.get("profile") or DEFAULT_PROFILE)
def _workspace_for(root: Path) -> Path:
"""Resolve the workspace directory that contains this infospace.
The standard layout is ``<workspace>/infospaces/<slug>``, so the
workspace is two levels above the infospace root.
"""
return root.parent.parent
def run_generation( def run_generation(
root: str | Path, root: str | Path,
*, *,
@@ -396,6 +406,7 @@ def run_generation(
snapshot_id=latest_plan_snapshot_id(root_path), snapshot_id=latest_plan_snapshot_id(root_path),
duration_seconds=duration_seconds, duration_seconds=duration_seconds,
started_at=started_wall.isoformat(), started_at=started_wall.isoformat(),
cost_resolver=make_cost_resolver(_workspace_for(root_path)),
) )
return GenerationRunResult( return GenerationRunResult(
root=str(root_path), root=str(root_path),

View File

@@ -0,0 +1,41 @@
# Default model rate table for cost estimation.
#
# Rates are best-effort OpenRouter list prices in USD per 1 000 tokens. Provider
# rates drift; treat any cost computed from this table as an estimate
# (cost_status="estimated") and refresh the table when prices change. Adapter-
# returned cost always takes precedence over this table.
#
# Consumers can override entries by placing a `model-rates.yaml` with the same
# top-level shape in their workspace directory (alongside `infospaces/`).
schema_version: 1
currency: USD
source_url: https://openrouter.ai/models
captured_at: "2026-05-17"
rates:
openai/gpt-4o-mini:
prompt_per_1k: 0.00015
completion_per_1k: 0.00060
openai/gpt-4o:
prompt_per_1k: 0.0025
completion_per_1k: 0.01
openai/gpt-4-turbo:
prompt_per_1k: 0.01
completion_per_1k: 0.03
anthropic/claude-3.5-sonnet:
prompt_per_1k: 0.003
completion_per_1k: 0.015
anthropic/claude-3.5-haiku:
prompt_per_1k: 0.0008
completion_per_1k: 0.004
anthropic/claude-3-opus:
prompt_per_1k: 0.015
completion_per_1k: 0.075
google/gemini-1.5-flash:
prompt_per_1k: 0.000075
completion_per_1k: 0.0003
google/gemini-1.5-pro:
prompt_per_1k: 0.00125
completion_per_1k: 0.005
meta-llama/llama-3.1-70b-instruct:
prompt_per_1k: 0.00059
completion_per_1k: 0.00079

View File

@@ -281,6 +281,107 @@ def test_run_generation_writes_usage_yaml_with_plan_snapshot_id(tmp_path: Path)
assert runs[0]["rollup"]["total_cost_usd_known"] == 0.0 assert runs[0]["rollup"]["total_cost_usd_known"] == 0.0
def test_rate_table_known_model_resolves_cost(tmp_path: Path) -> None:
from infospace_bench.budget import estimate_cost_usd, load_rate_table
rates = load_rate_table()
assert "openai/gpt-4o-mini" in rates
cost = estimate_cost_usd("openai/gpt-4o-mini", 1000, 500, rates)
# gpt-4o-mini: prompt 0.00015/1k, completion 0.0006/1k → 0.00015 + 0.0003 = 0.00045
assert cost is not None
assert abs(cost - 0.00045) < 1e-9
def test_rate_table_unknown_model_returns_none(tmp_path: Path) -> None:
from infospace_bench.budget import estimate_cost_usd, load_rate_table
rates = load_rate_table()
assert estimate_cost_usd("acme/no-such-model", 1000, 500, rates) is None
def test_workspace_rate_table_overrides_package_default(tmp_path: Path) -> None:
from infospace_bench.budget import estimate_cost_usd, load_rate_table
override = tmp_path / "model-rates.yaml"
override.write_text(
yaml.safe_dump(
{
"schema_version": 1,
"rates": {
"openai/gpt-4o-mini": {
"prompt_per_1k": 1.0,
"completion_per_1k": 2.0,
},
"acme/bespoke": {
"prompt_per_1k": 0.1,
"completion_per_1k": 0.2,
},
},
}
),
encoding="utf-8",
)
rates = load_rate_table(tmp_path)
overridden = estimate_cost_usd("openai/gpt-4o-mini", 1000, 1000, rates)
bespoke = estimate_cost_usd("acme/bespoke", 1000, 1000, rates)
assert overridden == round(1.0 + 2.0, 6)
assert bespoke == round(0.1 + 0.2, 6)
def test_record_run_usage_fills_estimated_cost_via_resolver(tmp_path: Path) -> None:
root = _build_infospace(tmp_path)
from infospace_bench.budget import make_cost_resolver, record_run_usage
workflow_results = [
{
"run_id": "run-cost",
"workflow_id": "generic-source-entities",
"stages": [
{
"stage_id": "extract-entities",
"provider": "openrouter",
"metadata": {
"model": "openai/gpt-4o-mini",
"usage": {"prompt_tokens": 2000, "completion_tokens": 1000},
},
},
{
"stage_id": "extract-entities",
"provider": "openrouter",
"metadata": {
"model": "openai/gpt-4o-mini",
"usage": {
"prompt_tokens": 1000,
"completion_tokens": 500,
"cost": 0.123,
},
},
},
],
}
]
entry = record_run_usage(
root,
workflow_results,
cost_resolver=make_cost_resolver(tmp_path),
)
bucket = entry["per_bucket"][0]
# The first call has no adapter cost so it gets estimated:
# 2000/1000*0.00015 + 1000/1000*0.0006 = 0.0003 + 0.0006 = 0.0009
assert bucket["cost_usd_estimated"] == round(0.0009, 6)
assert bucket["cost_usd_known"] == 0.123
assert bucket["cost_status"] == "known" # at least one call returned cost
assert entry["rollup"]["total_cost_usd_known"] == 0.123
assert entry["rollup"]["total_cost_usd_estimated"] == round(0.0009, 6)
def test_plan_cli_writes_snapshot(tmp_path: Path) -> None: def test_plan_cli_writes_snapshot(tmp_path: Path) -> None:
root = _build_infospace(tmp_path) root = _build_infospace(tmp_path)
env = os.environ.copy() env = os.environ.copy()

View File

@@ -117,7 +117,7 @@ state_hub_task_id: "a612f8d4-f96d-4fae-9aa6-66a7946414f5"
```task ```task
id: IB-WP-0019-T03 id: IB-WP-0019-T03
status: todo status: done
priority: high priority: high
state_hub_task_id: "688c590d-8885-455e-bcf6-61409a45e001" state_hub_task_id: "688c590d-8885-455e-bcf6-61409a45e001"
``` ```