generated from coulomb/repo-seed
IB-WP-0019-T03: rate-table cost computation
Ship a starter model rate table at src/infospace_bench/model_rates.yaml (prompt_per_1k / completion_per_1k for the OpenRouter models we have actually touched: gpt-4o, gpt-4o-mini, gpt-4-turbo, claude 3.5 sonnet and haiku, claude 3 opus, gemini 1.5 flash/pro, llama 3.1 70b) and a load_rate_table() / estimate_cost_usd() pair that overlays an optional <workspace>/model-rates.yaml on top of the bundled defaults. generate run now passes a workspace-aware cost_resolver into record_run_usage, so cost_usd_estimated lands on every usage bucket whose model matches the table. Adapter-returned cost still wins (cost_status="known"); rate-table cost is reported under cost_status="estimated"; unmatched models are recorded as cost_status="unknown" rather than silently zeroed. Rate-table file is listed in pyproject.toml package-data so pip-installed users keep the defaults. 106 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -15,10 +15,13 @@ import hashlib
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from typing import Any, Callable
|
||||
|
||||
import yaml
|
||||
|
||||
RATES_FILENAME = "model-rates.yaml"
|
||||
_PACKAGE_RATES_PATH = Path(__file__).parent / "model_rates.yaml"
|
||||
|
||||
BUDGET_DIR = Path("output/budget")
|
||||
PLANS_FILE = BUDGET_DIR / "plans.yaml"
|
||||
USAGE_FILE = BUDGET_DIR / "usage.yaml"
|
||||
@@ -210,6 +213,76 @@ def read_usage_runs(root: str | Path) -> list[dict[str, Any]]:
|
||||
return list(payload.get("runs") or [])
|
||||
|
||||
|
||||
def load_rate_table(workspace: Path | str | None = None) -> dict[str, dict[str, float]]:
|
||||
"""Load the model rate table, with optional workspace override.
|
||||
|
||||
Returns a mapping ``model_id -> {prompt_per_1k, completion_per_1k}``. The
|
||||
workspace override (``<workspace>/model-rates.yaml``) is overlaid on top of
|
||||
the package default, so individual models can be tweaked without copying
|
||||
the whole table.
|
||||
"""
|
||||
rates: dict[str, dict[str, float]] = {}
|
||||
for path in (_PACKAGE_RATES_PATH, _workspace_rate_path(workspace)):
|
||||
if path is None or not path.is_file():
|
||||
continue
|
||||
try:
|
||||
data = yaml.safe_load(path.read_text(encoding="utf-8"))
|
||||
except yaml.YAMLError:
|
||||
continue
|
||||
if not isinstance(data, dict):
|
||||
continue
|
||||
for model, entry in (data.get("rates") or {}).items():
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
prompt = _coerce_float(entry.get("prompt_per_1k"))
|
||||
completion = _coerce_float(entry.get("completion_per_1k"))
|
||||
if prompt is None and completion is None:
|
||||
continue
|
||||
rates[str(model)] = {
|
||||
"prompt_per_1k": prompt if prompt is not None else 0.0,
|
||||
"completion_per_1k": completion if completion is not None else 0.0,
|
||||
}
|
||||
return rates
|
||||
|
||||
|
||||
def estimate_cost_usd(
|
||||
model: str,
|
||||
prompt_tokens: int,
|
||||
completion_tokens: int,
|
||||
rate_table: dict[str, dict[str, float]],
|
||||
) -> float | None:
|
||||
entry = rate_table.get(model)
|
||||
if entry is None:
|
||||
return None
|
||||
prompt_rate = float(entry.get("prompt_per_1k") or 0.0)
|
||||
completion_rate = float(entry.get("completion_per_1k") or 0.0)
|
||||
cost = (prompt_tokens / 1000.0) * prompt_rate + (
|
||||
completion_tokens / 1000.0
|
||||
) * completion_rate
|
||||
return round(cost, 6)
|
||||
|
||||
|
||||
def make_cost_resolver(
|
||||
workspace: Path | str | None,
|
||||
) -> Callable[[str, str, int, int], float | None]:
|
||||
"""Return a resolver suitable for ``record_run_usage(..., cost_resolver=...)``."""
|
||||
rates = load_rate_table(workspace)
|
||||
|
||||
def _resolve(provider: str, model: str, prompt_tokens: int, completion_tokens: int) -> float | None:
|
||||
if not model:
|
||||
return None
|
||||
return estimate_cost_usd(model, prompt_tokens, completion_tokens, rates)
|
||||
|
||||
return _resolve
|
||||
|
||||
|
||||
def _workspace_rate_path(workspace: Path | str | None) -> Path | None:
|
||||
if workspace is None:
|
||||
return None
|
||||
candidate = Path(workspace) / RATES_FILENAME
|
||||
return candidate
|
||||
|
||||
|
||||
def _coerce_float(value: Any) -> float | None:
|
||||
if value is None:
|
||||
return None
|
||||
|
||||
@@ -20,6 +20,7 @@ from .lifecycle import create_infospace, load_infospace, register_artifact
|
||||
from .openrouter import OpenRouterAssistedGenerationAdapter
|
||||
from .budget import (
|
||||
latest_plan_snapshot_id,
|
||||
make_cost_resolver,
|
||||
record_plan_snapshot,
|
||||
record_run_usage,
|
||||
)
|
||||
@@ -324,6 +325,15 @@ def _read_profile_name(root: Path) -> str:
|
||||
return str(state.get("profile") or DEFAULT_PROFILE)
|
||||
|
||||
|
||||
def _workspace_for(root: Path) -> Path:
|
||||
"""Resolve the workspace directory that contains this infospace.
|
||||
|
||||
The standard layout is ``<workspace>/infospaces/<slug>``, so the
|
||||
workspace is two levels above the infospace root.
|
||||
"""
|
||||
return root.parent.parent
|
||||
|
||||
|
||||
def run_generation(
|
||||
root: str | Path,
|
||||
*,
|
||||
@@ -396,6 +406,7 @@ def run_generation(
|
||||
snapshot_id=latest_plan_snapshot_id(root_path),
|
||||
duration_seconds=duration_seconds,
|
||||
started_at=started_wall.isoformat(),
|
||||
cost_resolver=make_cost_resolver(_workspace_for(root_path)),
|
||||
)
|
||||
return GenerationRunResult(
|
||||
root=str(root_path),
|
||||
|
||||
41
src/infospace_bench/model_rates.yaml
Normal file
41
src/infospace_bench/model_rates.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
# Default model rate table for cost estimation.
|
||||
#
|
||||
# Rates are best-effort OpenRouter list prices in USD per 1 000 tokens. Provider
|
||||
# rates drift; treat any cost computed from this table as an estimate
|
||||
# (cost_status="estimated") and refresh the table when prices change. Adapter-
|
||||
# returned cost always takes precedence over this table.
|
||||
#
|
||||
# Consumers can override entries by placing a `model-rates.yaml` with the same
|
||||
# top-level shape in their workspace directory (alongside `infospaces/`).
|
||||
schema_version: 1
|
||||
currency: USD
|
||||
source_url: https://openrouter.ai/models
|
||||
captured_at: "2026-05-17"
|
||||
rates:
|
||||
openai/gpt-4o-mini:
|
||||
prompt_per_1k: 0.00015
|
||||
completion_per_1k: 0.00060
|
||||
openai/gpt-4o:
|
||||
prompt_per_1k: 0.0025
|
||||
completion_per_1k: 0.01
|
||||
openai/gpt-4-turbo:
|
||||
prompt_per_1k: 0.01
|
||||
completion_per_1k: 0.03
|
||||
anthropic/claude-3.5-sonnet:
|
||||
prompt_per_1k: 0.003
|
||||
completion_per_1k: 0.015
|
||||
anthropic/claude-3.5-haiku:
|
||||
prompt_per_1k: 0.0008
|
||||
completion_per_1k: 0.004
|
||||
anthropic/claude-3-opus:
|
||||
prompt_per_1k: 0.015
|
||||
completion_per_1k: 0.075
|
||||
google/gemini-1.5-flash:
|
||||
prompt_per_1k: 0.000075
|
||||
completion_per_1k: 0.0003
|
||||
google/gemini-1.5-pro:
|
||||
prompt_per_1k: 0.00125
|
||||
completion_per_1k: 0.005
|
||||
meta-llama/llama-3.1-70b-instruct:
|
||||
prompt_per_1k: 0.00059
|
||||
completion_per_1k: 0.00079
|
||||
Reference in New Issue
Block a user