IB-WP-0019-T03: rate-table cost computation

Ship a starter model rate table at src/infospace_bench/model_rates.yaml (prompt_per_1k / completion_per_1k for the OpenRouter models we have actually touched: gpt-4o, gpt-4o-mini, gpt-4-turbo, claude 3.5 sonnet and haiku, claude 3 opus, gemini 1.5 flash/pro, llama 3.1 70b) and a load_rate_table() / estimate_cost_usd() pair that overlays an optional <workspace>/model-rates.yaml on top of the bundled defaults. generate run now passes a workspace-aware cost_resolver into record_run_usage, so cost_usd_estimated lands on every usage bucket whose model matches the table. Adapter-returned cost still wins (cost_status="known"); rate-table cost is reported under cost_status="estimated"; unmatched models are recorded as cost_status="unknown" rather than silently zeroed. Rate-table file is listed in pyproject.toml package-data so pip-installed users keep the defaults. 106 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-17 19:54:30 +02:00
parent 678508226a
commit a4dde53fc3
7 changed files with 252 additions and 3 deletions
--- a/docs/generic-source-generator.md
+++ b/docs/generic-source-generator.md
@@ -48,6 +48,29 @@ infospace-bench generate status ./infospaces/book-space
 shows chunk counts, generated artifact counts, evaluations, metrics, history,
 and stale source/profile inputs.
 ### Budget and usage registry
 Every `generate plan` invocation appends a compact snapshot to
 `output/budget/plans.yaml` (deterministic 12-char `snapshot_id`, 50-entry
 sliding retention). Every `generate run` invocation appends a usage
 rollup to `output/budget/usage.yaml`, bucketed by `(workflow_id,
 stage_id, provider, model)` with prompt and completion token counts,
 known cost (when the adapter returned it), and estimated cost (when a
 rate table entry matches the model).
 The default rate table is bundled at
 `src/infospace_bench/model_rates.yaml` and covers a handful of common
 OpenRouter models at list price (see the file for the captured-at
 timestamp). A workspace can override or extend entries by placing
 `model-rates.yaml` next to its `infospaces/` directory; the workspace
 file is overlaid on top of the package default so partial overrides
 are fine.
 Cost resolution order on each run: adapter-returned `cost` first, then
 the rate table, then `cost_status="unknown"` (recorded explicitly,
 never silently zeroed). The plan-vs-actual variance summary lands in
 follow-on task T04.
 ### Profiles
 Two profiles ship today:
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ dependencies = [
 infospace-bench = "infospace_bench.cli:main"
 [tool.setuptools.package-data]
-infospace_bench = ["profiles/**/*"]
+infospace_bench = ["profiles/**/*", "model_rates.yaml"]
 [tool.pytest.ini_options]
 pythonpath = ["src", "../markitect-tool/src"]
--- a/src/infospace_bench/budget.py
+++ b/src/infospace_bench/budget.py
@@ -15,10 +15,13 @@ import hashlib
 import json
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any
+from typing import Any, Callable
 import yaml
 RATES_FILENAME = "model-rates.yaml"
 _PACKAGE_RATES_PATH = Path(__file__).parent / "model_rates.yaml"
 BUDGET_DIR = Path("output/budget")
 PLANS_FILE = BUDGET_DIR / "plans.yaml"
 USAGE_FILE = BUDGET_DIR / "usage.yaml"
@@ -210,6 +213,76 @@ def read_usage_runs(root: str | Path) -> list[dict[str, Any]]:
    return list(payload.get("runs") or [])
 def load_rate_table(workspace: Path | str | None = None) -> dict[str, dict[str, float]]:
    """Load the model rate table, with optional workspace override.
    Returns a mapping ``model_id -> {prompt_per_1k, completion_per_1k}``. The
    workspace override (``<workspace>/model-rates.yaml``) is overlaid on top of
    the package default, so individual models can be tweaked without copying
    the whole table.
    """
    rates: dict[str, dict[str, float]] = {}
    for path in (_PACKAGE_RATES_PATH, _workspace_rate_path(workspace)):
        if path is None or not path.is_file():
            continue
        try:
            data = yaml.safe_load(path.read_text(encoding="utf-8"))
        except yaml.YAMLError:
            continue
        if not isinstance(data, dict):
            continue
        for model, entry in (data.get("rates") or {}).items():
            if not isinstance(entry, dict):
                continue
            prompt = _coerce_float(entry.get("prompt_per_1k"))
            completion = _coerce_float(entry.get("completion_per_1k"))
            if prompt is None and completion is None:
                continue
            rates[str(model)] = {
                "prompt_per_1k": prompt if prompt is not None else 0.0,
                "completion_per_1k": completion if completion is not None else 0.0,
            }
    return rates
 def estimate_cost_usd(
    model: str,
    prompt_tokens: int,
    completion_tokens: int,
    rate_table: dict[str, dict[str, float]],
 ) -> float | None:
    entry = rate_table.get(model)
    if entry is None:
        return None
    prompt_rate = float(entry.get("prompt_per_1k") or 0.0)
    completion_rate = float(entry.get("completion_per_1k") or 0.0)
    cost = (prompt_tokens / 1000.0) * prompt_rate + (
        completion_tokens / 1000.0
    ) * completion_rate
    return round(cost, 6)
 def make_cost_resolver(
    workspace: Path | str | None,
 ) -> Callable[[str, str, int, int], float | None]:
    """Return a resolver suitable for ``record_run_usage(..., cost_resolver=...)``."""
    rates = load_rate_table(workspace)
    def _resolve(provider: str, model: str, prompt_tokens: int, completion_tokens: int) -> float | None:
        if not model:
            return None
        return estimate_cost_usd(model, prompt_tokens, completion_tokens, rates)
    return _resolve
 def _workspace_rate_path(workspace: Path | str | None) -> Path | None:
    if workspace is None:
        return None
    candidate = Path(workspace) / RATES_FILENAME
    return candidate
 def _coerce_float(value: Any) -> float | None:
    if value is None:
        return None
--- a/src/infospace_bench/generator.py
+++ b/src/infospace_bench/generator.py
@@ -20,6 +20,7 @@ from .lifecycle import create_infospace, load_infospace, register_artifact
 from .openrouter import OpenRouterAssistedGenerationAdapter
 from .budget import (
    latest_plan_snapshot_id,
    make_cost_resolver,
    record_plan_snapshot,
    record_run_usage,
 )
@@ -324,6 +325,15 @@ def _read_profile_name(root: Path) -> str:
    return str(state.get("profile") or DEFAULT_PROFILE)
 def _workspace_for(root: Path) -> Path:
    """Resolve the workspace directory that contains this infospace.
    The standard layout is ``<workspace>/infospaces/<slug>``, so the
    workspace is two levels above the infospace root.
    """
    return root.parent.parent
 def run_generation(
    root: str | Path,
    *,
@@ -396,6 +406,7 @@ def run_generation(
            snapshot_id=latest_plan_snapshot_id(root_path),
            duration_seconds=duration_seconds,
            started_at=started_wall.isoformat(),
            cost_resolver=make_cost_resolver(_workspace_for(root_path)),
        )
    return GenerationRunResult(
        root=str(root_path),
--- a/src/infospace_bench/model_rates.yaml
+++ b/src/infospace_bench/model_rates.yaml
@@ -0,0 +1,41 @@
 # Default model rate table for cost estimation.
 #
 # Rates are best-effort OpenRouter list prices in USD per 1 000 tokens. Provider
 # rates drift; treat any cost computed from this table as an estimate
 # (cost_status="estimated") and refresh the table when prices change. Adapter-
 # returned cost always takes precedence over this table.
 #
 # Consumers can override entries by placing a `model-rates.yaml` with the same
 # top-level shape in their workspace directory (alongside `infospaces/`).
 schema_version: 1
 currency: USD
 source_url: https://openrouter.ai/models
 captured_at: "2026-05-17"
 rates:
  openai/gpt-4o-mini:
    prompt_per_1k: 0.00015
    completion_per_1k: 0.00060
  openai/gpt-4o:
    prompt_per_1k: 0.0025
    completion_per_1k: 0.01
  openai/gpt-4-turbo:
    prompt_per_1k: 0.01
    completion_per_1k: 0.03
  anthropic/claude-3.5-sonnet:
    prompt_per_1k: 0.003
    completion_per_1k: 0.015
  anthropic/claude-3.5-haiku:
    prompt_per_1k: 0.0008
    completion_per_1k: 0.004
  anthropic/claude-3-opus:
    prompt_per_1k: 0.015
    completion_per_1k: 0.075
  google/gemini-1.5-flash:
    prompt_per_1k: 0.000075
    completion_per_1k: 0.0003
  google/gemini-1.5-pro:
    prompt_per_1k: 0.00125
    completion_per_1k: 0.005
  meta-llama/llama-3.1-70b-instruct:
    prompt_per_1k: 0.00059
    completion_per_1k: 0.00079
--- a/tests/test_budget_registry.py
+++ b/tests/test_budget_registry.py
@@ -281,6 +281,107 @@ def test_run_generation_writes_usage_yaml_with_plan_snapshot_id(tmp_path: Path)
    assert runs[0]["rollup"]["total_cost_usd_known"] == 0.0
 def test_rate_table_known_model_resolves_cost(tmp_path: Path) -> None:
    from infospace_bench.budget import estimate_cost_usd, load_rate_table
    rates = load_rate_table()
    assert "openai/gpt-4o-mini" in rates
    cost = estimate_cost_usd("openai/gpt-4o-mini", 1000, 500, rates)
    # gpt-4o-mini: prompt 0.00015/1k, completion 0.0006/1k → 0.00015 + 0.0003 = 0.00045
    assert cost is not None
    assert abs(cost - 0.00045) < 1e-9
 def test_rate_table_unknown_model_returns_none(tmp_path: Path) -> None:
    from infospace_bench.budget import estimate_cost_usd, load_rate_table
    rates = load_rate_table()
    assert estimate_cost_usd("acme/no-such-model", 1000, 500, rates) is None
 def test_workspace_rate_table_overrides_package_default(tmp_path: Path) -> None:
    from infospace_bench.budget import estimate_cost_usd, load_rate_table
    override = tmp_path / "model-rates.yaml"
    override.write_text(
        yaml.safe_dump(
            {
                "schema_version": 1,
                "rates": {
                    "openai/gpt-4o-mini": {
                        "prompt_per_1k": 1.0,
                        "completion_per_1k": 2.0,
                    },
                    "acme/bespoke": {
                        "prompt_per_1k": 0.1,
                        "completion_per_1k": 0.2,
                    },
                },
            }
        ),
        encoding="utf-8",
    )
    rates = load_rate_table(tmp_path)
    overridden = estimate_cost_usd("openai/gpt-4o-mini", 1000, 1000, rates)
    bespoke = estimate_cost_usd("acme/bespoke", 1000, 1000, rates)
    assert overridden == round(1.0 + 2.0, 6)
    assert bespoke == round(0.1 + 0.2, 6)
 def test_record_run_usage_fills_estimated_cost_via_resolver(tmp_path: Path) -> None:
    root = _build_infospace(tmp_path)
    from infospace_bench.budget import make_cost_resolver, record_run_usage
    workflow_results = [
        {
            "run_id": "run-cost",
            "workflow_id": "generic-source-entities",
            "stages": [
                {
                    "stage_id": "extract-entities",
                    "provider": "openrouter",
                    "metadata": {
                        "model": "openai/gpt-4o-mini",
                        "usage": {"prompt_tokens": 2000, "completion_tokens": 1000},
                    },
                },
                {
                    "stage_id": "extract-entities",
                    "provider": "openrouter",
                    "metadata": {
                        "model": "openai/gpt-4o-mini",
                        "usage": {
                            "prompt_tokens": 1000,
                            "completion_tokens": 500,
                            "cost": 0.123,
                        },
                    },
                },
            ],
        }
    ]
    entry = record_run_usage(
        root,
        workflow_results,
        cost_resolver=make_cost_resolver(tmp_path),
    )
    bucket = entry["per_bucket"][0]
    # The first call has no adapter cost so it gets estimated:
    # 2000/1000*0.00015 + 1000/1000*0.0006 = 0.0003 + 0.0006 = 0.0009
    assert bucket["cost_usd_estimated"] == round(0.0009, 6)
    assert bucket["cost_usd_known"] == 0.123
    assert bucket["cost_status"] == "known"  # at least one call returned cost
    assert entry["rollup"]["total_cost_usd_known"] == 0.123
    assert entry["rollup"]["total_cost_usd_estimated"] == round(0.0009, 6)
 def test_plan_cli_writes_snapshot(tmp_path: Path) -> None:
    root = _build_infospace(tmp_path)
    env = os.environ.copy()
--- a/workplans/IB-WP-0019-budget-and-usage-registry.md
+++ b/workplans/IB-WP-0019-budget-and-usage-registry.md
@@ -117,7 +117,7 @@ state_hub_task_id: "a612f8d4-f96d-4fae-9aa6-66a7946414f5"
 ```task
 id: IB-WP-0019-T03
-status: todo
+status: done
 priority: high
 state_hub_task_id: "688c590d-8885-455e-bcf6-61409a45e001"
 ```