generated from coulomb/repo-seed
IB-WP-0019-T03: rate-table cost computation
Ship a starter model rate table at src/infospace_bench/model_rates.yaml (prompt_per_1k / completion_per_1k for the OpenRouter models we have actually touched: gpt-4o, gpt-4o-mini, gpt-4-turbo, claude 3.5 sonnet and haiku, claude 3 opus, gemini 1.5 flash/pro, llama 3.1 70b) and a load_rate_table() / estimate_cost_usd() pair that overlays an optional <workspace>/model-rates.yaml on top of the bundled defaults. generate run now passes a workspace-aware cost_resolver into record_run_usage, so cost_usd_estimated lands on every usage bucket whose model matches the table. Adapter-returned cost still wins (cost_status="known"); rate-table cost is reported under cost_status="estimated"; unmatched models are recorded as cost_status="unknown" rather than silently zeroed. Rate-table file is listed in pyproject.toml package-data so pip-installed users keep the defaults. 106 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -281,6 +281,107 @@ def test_run_generation_writes_usage_yaml_with_plan_snapshot_id(tmp_path: Path)
|
||||
assert runs[0]["rollup"]["total_cost_usd_known"] == 0.0
|
||||
|
||||
|
||||
def test_rate_table_known_model_resolves_cost(tmp_path: Path) -> None:
|
||||
from infospace_bench.budget import estimate_cost_usd, load_rate_table
|
||||
|
||||
rates = load_rate_table()
|
||||
|
||||
assert "openai/gpt-4o-mini" in rates
|
||||
cost = estimate_cost_usd("openai/gpt-4o-mini", 1000, 500, rates)
|
||||
# gpt-4o-mini: prompt 0.00015/1k, completion 0.0006/1k → 0.00015 + 0.0003 = 0.00045
|
||||
assert cost is not None
|
||||
assert abs(cost - 0.00045) < 1e-9
|
||||
|
||||
|
||||
def test_rate_table_unknown_model_returns_none(tmp_path: Path) -> None:
|
||||
from infospace_bench.budget import estimate_cost_usd, load_rate_table
|
||||
|
||||
rates = load_rate_table()
|
||||
|
||||
assert estimate_cost_usd("acme/no-such-model", 1000, 500, rates) is None
|
||||
|
||||
|
||||
def test_workspace_rate_table_overrides_package_default(tmp_path: Path) -> None:
|
||||
from infospace_bench.budget import estimate_cost_usd, load_rate_table
|
||||
|
||||
override = tmp_path / "model-rates.yaml"
|
||||
override.write_text(
|
||||
yaml.safe_dump(
|
||||
{
|
||||
"schema_version": 1,
|
||||
"rates": {
|
||||
"openai/gpt-4o-mini": {
|
||||
"prompt_per_1k": 1.0,
|
||||
"completion_per_1k": 2.0,
|
||||
},
|
||||
"acme/bespoke": {
|
||||
"prompt_per_1k": 0.1,
|
||||
"completion_per_1k": 0.2,
|
||||
},
|
||||
},
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
rates = load_rate_table(tmp_path)
|
||||
|
||||
overridden = estimate_cost_usd("openai/gpt-4o-mini", 1000, 1000, rates)
|
||||
bespoke = estimate_cost_usd("acme/bespoke", 1000, 1000, rates)
|
||||
|
||||
assert overridden == round(1.0 + 2.0, 6)
|
||||
assert bespoke == round(0.1 + 0.2, 6)
|
||||
|
||||
|
||||
def test_record_run_usage_fills_estimated_cost_via_resolver(tmp_path: Path) -> None:
|
||||
root = _build_infospace(tmp_path)
|
||||
from infospace_bench.budget import make_cost_resolver, record_run_usage
|
||||
|
||||
workflow_results = [
|
||||
{
|
||||
"run_id": "run-cost",
|
||||
"workflow_id": "generic-source-entities",
|
||||
"stages": [
|
||||
{
|
||||
"stage_id": "extract-entities",
|
||||
"provider": "openrouter",
|
||||
"metadata": {
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"usage": {"prompt_tokens": 2000, "completion_tokens": 1000},
|
||||
},
|
||||
},
|
||||
{
|
||||
"stage_id": "extract-entities",
|
||||
"provider": "openrouter",
|
||||
"metadata": {
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"usage": {
|
||||
"prompt_tokens": 1000,
|
||||
"completion_tokens": 500,
|
||||
"cost": 0.123,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
entry = record_run_usage(
|
||||
root,
|
||||
workflow_results,
|
||||
cost_resolver=make_cost_resolver(tmp_path),
|
||||
)
|
||||
|
||||
bucket = entry["per_bucket"][0]
|
||||
# The first call has no adapter cost so it gets estimated:
|
||||
# 2000/1000*0.00015 + 1000/1000*0.0006 = 0.0003 + 0.0006 = 0.0009
|
||||
assert bucket["cost_usd_estimated"] == round(0.0009, 6)
|
||||
assert bucket["cost_usd_known"] == 0.123
|
||||
assert bucket["cost_status"] == "known" # at least one call returned cost
|
||||
assert entry["rollup"]["total_cost_usd_known"] == 0.123
|
||||
assert entry["rollup"]["total_cost_usd_estimated"] == round(0.0009, 6)
|
||||
|
||||
|
||||
def test_plan_cli_writes_snapshot(tmp_path: Path) -> None:
|
||||
root = _build_infospace(tmp_path)
|
||||
env = os.environ.copy()
|
||||
|
||||
Reference in New Issue
Block a user