From a4dde53fc3001315a305f113cc91cd71fdc64b82 Mon Sep 17 00:00:00 2001 From: tegwick Date: Sun, 17 May 2026 19:54:30 +0200 Subject: [PATCH] IB-WP-0019-T03: rate-table cost computation Ship a starter model rate table at src/infospace_bench/model_rates.yaml (prompt_per_1k / completion_per_1k for the OpenRouter models we have actually touched: gpt-4o, gpt-4o-mini, gpt-4-turbo, claude 3.5 sonnet and haiku, claude 3 opus, gemini 1.5 flash/pro, llama 3.1 70b) and a load_rate_table() / estimate_cost_usd() pair that overlays an optional /model-rates.yaml on top of the bundled defaults. generate run now passes a workspace-aware cost_resolver into record_run_usage, so cost_usd_estimated lands on every usage bucket whose model matches the table. Adapter-returned cost still wins (cost_status="known"); rate-table cost is reported under cost_status="estimated"; unmatched models are recorded as cost_status="unknown" rather than silently zeroed. Rate-table file is listed in pyproject.toml package-data so pip-installed users keep the defaults. 106 tests pass. Co-Authored-By: Claude Opus 4.7 --- docs/generic-source-generator.md | 23 ++++ pyproject.toml | 2 +- src/infospace_bench/budget.py | 75 ++++++++++++- src/infospace_bench/generator.py | 11 ++ src/infospace_bench/model_rates.yaml | 41 +++++++ tests/test_budget_registry.py | 101 ++++++++++++++++++ .../IB-WP-0019-budget-and-usage-registry.md | 2 +- 7 files changed, 252 insertions(+), 3 deletions(-) create mode 100644 src/infospace_bench/model_rates.yaml diff --git a/docs/generic-source-generator.md b/docs/generic-source-generator.md index 78267fb..0a917d8 100644 --- a/docs/generic-source-generator.md +++ b/docs/generic-source-generator.md @@ -48,6 +48,29 @@ infospace-bench generate status ./infospaces/book-space shows chunk counts, generated artifact counts, evaluations, metrics, history, and stale source/profile inputs. +### Budget and usage registry + +Every `generate plan` invocation appends a compact snapshot to +`output/budget/plans.yaml` (deterministic 12-char `snapshot_id`, 50-entry +sliding retention). Every `generate run` invocation appends a usage +rollup to `output/budget/usage.yaml`, bucketed by `(workflow_id, +stage_id, provider, model)` with prompt and completion token counts, +known cost (when the adapter returned it), and estimated cost (when a +rate table entry matches the model). + +The default rate table is bundled at +`src/infospace_bench/model_rates.yaml` and covers a handful of common +OpenRouter models at list price (see the file for the captured-at +timestamp). A workspace can override or extend entries by placing +`model-rates.yaml` next to its `infospaces/` directory; the workspace +file is overlaid on top of the package default so partial overrides +are fine. + +Cost resolution order on each run: adapter-returned `cost` first, then +the rate table, then `cost_status="unknown"` (recorded explicitly, +never silently zeroed). The plan-vs-actual variance summary lands in +follow-on task T04. + ### Profiles Two profiles ship today: diff --git a/pyproject.toml b/pyproject.toml index 415e819..5139848 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ dependencies = [ infospace-bench = "infospace_bench.cli:main" [tool.setuptools.package-data] -infospace_bench = ["profiles/**/*"] +infospace_bench = ["profiles/**/*", "model_rates.yaml"] [tool.pytest.ini_options] pythonpath = ["src", "../markitect-tool/src"] diff --git a/src/infospace_bench/budget.py b/src/infospace_bench/budget.py index 90b53f0..764cd59 100644 --- a/src/infospace_bench/budget.py +++ b/src/infospace_bench/budget.py @@ -15,10 +15,13 @@ import hashlib import json from datetime import datetime, timezone from pathlib import Path -from typing import Any +from typing import Any, Callable import yaml +RATES_FILENAME = "model-rates.yaml" +_PACKAGE_RATES_PATH = Path(__file__).parent / "model_rates.yaml" + BUDGET_DIR = Path("output/budget") PLANS_FILE = BUDGET_DIR / "plans.yaml" USAGE_FILE = BUDGET_DIR / "usage.yaml" @@ -210,6 +213,76 @@ def read_usage_runs(root: str | Path) -> list[dict[str, Any]]: return list(payload.get("runs") or []) +def load_rate_table(workspace: Path | str | None = None) -> dict[str, dict[str, float]]: + """Load the model rate table, with optional workspace override. + + Returns a mapping ``model_id -> {prompt_per_1k, completion_per_1k}``. The + workspace override (``/model-rates.yaml``) is overlaid on top of + the package default, so individual models can be tweaked without copying + the whole table. + """ + rates: dict[str, dict[str, float]] = {} + for path in (_PACKAGE_RATES_PATH, _workspace_rate_path(workspace)): + if path is None or not path.is_file(): + continue + try: + data = yaml.safe_load(path.read_text(encoding="utf-8")) + except yaml.YAMLError: + continue + if not isinstance(data, dict): + continue + for model, entry in (data.get("rates") or {}).items(): + if not isinstance(entry, dict): + continue + prompt = _coerce_float(entry.get("prompt_per_1k")) + completion = _coerce_float(entry.get("completion_per_1k")) + if prompt is None and completion is None: + continue + rates[str(model)] = { + "prompt_per_1k": prompt if prompt is not None else 0.0, + "completion_per_1k": completion if completion is not None else 0.0, + } + return rates + + +def estimate_cost_usd( + model: str, + prompt_tokens: int, + completion_tokens: int, + rate_table: dict[str, dict[str, float]], +) -> float | None: + entry = rate_table.get(model) + if entry is None: + return None + prompt_rate = float(entry.get("prompt_per_1k") or 0.0) + completion_rate = float(entry.get("completion_per_1k") or 0.0) + cost = (prompt_tokens / 1000.0) * prompt_rate + ( + completion_tokens / 1000.0 + ) * completion_rate + return round(cost, 6) + + +def make_cost_resolver( + workspace: Path | str | None, +) -> Callable[[str, str, int, int], float | None]: + """Return a resolver suitable for ``record_run_usage(..., cost_resolver=...)``.""" + rates = load_rate_table(workspace) + + def _resolve(provider: str, model: str, prompt_tokens: int, completion_tokens: int) -> float | None: + if not model: + return None + return estimate_cost_usd(model, prompt_tokens, completion_tokens, rates) + + return _resolve + + +def _workspace_rate_path(workspace: Path | str | None) -> Path | None: + if workspace is None: + return None + candidate = Path(workspace) / RATES_FILENAME + return candidate + + def _coerce_float(value: Any) -> float | None: if value is None: return None diff --git a/src/infospace_bench/generator.py b/src/infospace_bench/generator.py index 8d30ba4..e73e57c 100644 --- a/src/infospace_bench/generator.py +++ b/src/infospace_bench/generator.py @@ -20,6 +20,7 @@ from .lifecycle import create_infospace, load_infospace, register_artifact from .openrouter import OpenRouterAssistedGenerationAdapter from .budget import ( latest_plan_snapshot_id, + make_cost_resolver, record_plan_snapshot, record_run_usage, ) @@ -324,6 +325,15 @@ def _read_profile_name(root: Path) -> str: return str(state.get("profile") or DEFAULT_PROFILE) +def _workspace_for(root: Path) -> Path: + """Resolve the workspace directory that contains this infospace. + + The standard layout is ``/infospaces/``, so the + workspace is two levels above the infospace root. + """ + return root.parent.parent + + def run_generation( root: str | Path, *, @@ -396,6 +406,7 @@ def run_generation( snapshot_id=latest_plan_snapshot_id(root_path), duration_seconds=duration_seconds, started_at=started_wall.isoformat(), + cost_resolver=make_cost_resolver(_workspace_for(root_path)), ) return GenerationRunResult( root=str(root_path), diff --git a/src/infospace_bench/model_rates.yaml b/src/infospace_bench/model_rates.yaml new file mode 100644 index 0000000..fad2ed3 --- /dev/null +++ b/src/infospace_bench/model_rates.yaml @@ -0,0 +1,41 @@ +# Default model rate table for cost estimation. +# +# Rates are best-effort OpenRouter list prices in USD per 1 000 tokens. Provider +# rates drift; treat any cost computed from this table as an estimate +# (cost_status="estimated") and refresh the table when prices change. Adapter- +# returned cost always takes precedence over this table. +# +# Consumers can override entries by placing a `model-rates.yaml` with the same +# top-level shape in their workspace directory (alongside `infospaces/`). +schema_version: 1 +currency: USD +source_url: https://openrouter.ai/models +captured_at: "2026-05-17" +rates: + openai/gpt-4o-mini: + prompt_per_1k: 0.00015 + completion_per_1k: 0.00060 + openai/gpt-4o: + prompt_per_1k: 0.0025 + completion_per_1k: 0.01 + openai/gpt-4-turbo: + prompt_per_1k: 0.01 + completion_per_1k: 0.03 + anthropic/claude-3.5-sonnet: + prompt_per_1k: 0.003 + completion_per_1k: 0.015 + anthropic/claude-3.5-haiku: + prompt_per_1k: 0.0008 + completion_per_1k: 0.004 + anthropic/claude-3-opus: + prompt_per_1k: 0.015 + completion_per_1k: 0.075 + google/gemini-1.5-flash: + prompt_per_1k: 0.000075 + completion_per_1k: 0.0003 + google/gemini-1.5-pro: + prompt_per_1k: 0.00125 + completion_per_1k: 0.005 + meta-llama/llama-3.1-70b-instruct: + prompt_per_1k: 0.00059 + completion_per_1k: 0.00079 diff --git a/tests/test_budget_registry.py b/tests/test_budget_registry.py index 8b3269b..4526d2e 100644 --- a/tests/test_budget_registry.py +++ b/tests/test_budget_registry.py @@ -281,6 +281,107 @@ def test_run_generation_writes_usage_yaml_with_plan_snapshot_id(tmp_path: Path) assert runs[0]["rollup"]["total_cost_usd_known"] == 0.0 +def test_rate_table_known_model_resolves_cost(tmp_path: Path) -> None: + from infospace_bench.budget import estimate_cost_usd, load_rate_table + + rates = load_rate_table() + + assert "openai/gpt-4o-mini" in rates + cost = estimate_cost_usd("openai/gpt-4o-mini", 1000, 500, rates) + # gpt-4o-mini: prompt 0.00015/1k, completion 0.0006/1k → 0.00015 + 0.0003 = 0.00045 + assert cost is not None + assert abs(cost - 0.00045) < 1e-9 + + +def test_rate_table_unknown_model_returns_none(tmp_path: Path) -> None: + from infospace_bench.budget import estimate_cost_usd, load_rate_table + + rates = load_rate_table() + + assert estimate_cost_usd("acme/no-such-model", 1000, 500, rates) is None + + +def test_workspace_rate_table_overrides_package_default(tmp_path: Path) -> None: + from infospace_bench.budget import estimate_cost_usd, load_rate_table + + override = tmp_path / "model-rates.yaml" + override.write_text( + yaml.safe_dump( + { + "schema_version": 1, + "rates": { + "openai/gpt-4o-mini": { + "prompt_per_1k": 1.0, + "completion_per_1k": 2.0, + }, + "acme/bespoke": { + "prompt_per_1k": 0.1, + "completion_per_1k": 0.2, + }, + }, + } + ), + encoding="utf-8", + ) + + rates = load_rate_table(tmp_path) + + overridden = estimate_cost_usd("openai/gpt-4o-mini", 1000, 1000, rates) + bespoke = estimate_cost_usd("acme/bespoke", 1000, 1000, rates) + + assert overridden == round(1.0 + 2.0, 6) + assert bespoke == round(0.1 + 0.2, 6) + + +def test_record_run_usage_fills_estimated_cost_via_resolver(tmp_path: Path) -> None: + root = _build_infospace(tmp_path) + from infospace_bench.budget import make_cost_resolver, record_run_usage + + workflow_results = [ + { + "run_id": "run-cost", + "workflow_id": "generic-source-entities", + "stages": [ + { + "stage_id": "extract-entities", + "provider": "openrouter", + "metadata": { + "model": "openai/gpt-4o-mini", + "usage": {"prompt_tokens": 2000, "completion_tokens": 1000}, + }, + }, + { + "stage_id": "extract-entities", + "provider": "openrouter", + "metadata": { + "model": "openai/gpt-4o-mini", + "usage": { + "prompt_tokens": 1000, + "completion_tokens": 500, + "cost": 0.123, + }, + }, + }, + ], + } + ] + + entry = record_run_usage( + root, + workflow_results, + cost_resolver=make_cost_resolver(tmp_path), + ) + + bucket = entry["per_bucket"][0] + # The first call has no adapter cost so it gets estimated: + # 2000/1000*0.00015 + 1000/1000*0.0006 = 0.0003 + 0.0006 = 0.0009 + assert bucket["cost_usd_estimated"] == round(0.0009, 6) + assert bucket["cost_usd_known"] == 0.123 + assert bucket["cost_status"] == "known" # at least one call returned cost + assert entry["rollup"]["total_cost_usd_known"] == 0.123 + assert entry["rollup"]["total_cost_usd_estimated"] == round(0.0009, 6) + + def test_plan_cli_writes_snapshot(tmp_path: Path) -> None: root = _build_infospace(tmp_path) env = os.environ.copy() diff --git a/workplans/IB-WP-0019-budget-and-usage-registry.md b/workplans/IB-WP-0019-budget-and-usage-registry.md index bfba44b..160edde 100644 --- a/workplans/IB-WP-0019-budget-and-usage-registry.md +++ b/workplans/IB-WP-0019-budget-and-usage-registry.md @@ -117,7 +117,7 @@ state_hub_task_id: "a612f8d4-f96d-4fae-9aa6-66a7946414f5" ```task id: IB-WP-0019-T03 -status: todo +status: done priority: high state_hub_task_id: "688c590d-8885-455e-bcf6-61409a45e001" ```