Implement-LLM-WP-0005-cost-model-estimators
Some checks failed
CI / test (3.10) (push) Has been cancelled
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled

This commit is contained in:
2026-05-19 05:02:20 +02:00
parent 0054afe689
commit c11c6afa3f
16 changed files with 1525 additions and 10 deletions

54
tests/test_cli.py Normal file
View File

@@ -0,0 +1,54 @@
import json
from datetime import datetime, timezone
from llm_connect.cli import main
from llm_connect.quality import QualityLedger, QualityObservation
def test_rates_show_json_outputs_default_registry(capsys):
assert main(["rates", "show", "--json"]) == 0
payload = json.loads(capsys.readouterr().out)
assert payload["openai/gpt-4o-mini"]["prompt_per_1k"] == 0.00015
def test_classes_show_lists_builtins(capsys):
assert main(["classes", "show"]) == 0
output = capsys.readouterr().out
assert "chunk-summarization" in output
assert "entity-extraction" in output
def test_classes_fit_reads_quality_ledger(tmp_path, capsys):
ledger = QualityLedger(tmp_path / "quality.jsonl")
for _ in range(3):
ledger.append(
QualityObservation(
task_type="extract",
adapter_id="openrouter",
model_id="openai/gpt-4o-mini",
cost_usd=0.001,
quality_score=0.9,
latency_ms=100,
tokens_in=500,
tokens_out=350,
recorded_at=datetime(2026, 5, 19, tzinfo=timezone.utc),
tags={
"problem_class": "entity-extraction",
"dimensions": {
"chunk_words": 300,
"template_words": 100,
"expected_entities": 5,
},
},
)
)
assert main(["classes", "fit", str(ledger.path), "--class", "entity-extraction", "--json"]) == 0
payload = json.loads(capsys.readouterr().out)
assert payload["entity-extraction"]["params"]["tokens_per_entity"] == 70

49
tests/test_costs.py Normal file
View File

@@ -0,0 +1,49 @@
import pytest
from llm_connect.costs import CostEstimate, CostModel, estimate_cost
from llm_connect.rates import ModelRate, ModelRateRegistry
def test_known_model_cost_matches_lefevre_smoke_budget():
estimate = estimate_cost("openai/gpt-4o-mini", 28_000, 7_500)
assert estimate.cost_source == "rate_table:openai/gpt-4o-mini"
assert estimate.cost_usd == pytest.approx(0.0087)
assert estimate.cost_usd == pytest.approx(0.009, rel=0.2)
def test_unknown_model_returns_unknown_without_zeroing_cost():
estimate = estimate_cost("unknown/model", 100, 50)
assert estimate == CostEstimate(cost_usd=None, cost_source="unknown")
def test_registry_override_controls_estimate():
registry = ModelRateRegistry(
{
"vendor/model": ModelRate(
"vendor/model",
prompt_per_1k=1.0,
completion_per_1k=2.0,
)
}
)
estimate = estimate_cost("vendor/model", 1_000, 500, registry=registry)
assert estimate.cost_usd == pytest.approx(2.0)
assert estimate.prompt_cost_usd == pytest.approx(1.0)
assert estimate.completion_cost_usd == pytest.approx(1.0)
def test_zero_tokens_are_valid_and_cost_zero_for_known_model():
estimate = CostModel().estimate_cost("openai/gpt-4o-mini", 0, 0)
assert estimate.cost_usd == 0
assert estimate.prompt_cost_usd == 0
assert estimate.completion_cost_usd == 0
def test_negative_tokens_are_rejected():
with pytest.raises(ValueError, match="prompt_tokens"):
estimate_cost("openai/gpt-4o-mini", -1, 0)

View File

@@ -24,3 +24,27 @@ def test_wp_0004_primitives_are_exported_from_package_root():
for name in expected_names:
assert hasattr(llm_connect, name)
assert name in llm_connect.__all__
def test_wp_0005_primitives_are_exported_from_package_root():
expected_names = [
"ModelRate",
"ModelRateRegistry",
"CostEstimate",
"CostModel",
"estimate_cost",
"TokenEstimate",
"Observation",
"ProblemClass",
"ProblemClassRegistry",
"default_problem_class_registry",
"ChunkSummarizationProblemClass",
"EntityExtractionProblemClass",
"RelationExtractionProblemClass",
"JudgeEvalProblemClass",
"ReportSynthesisProblemClass",
]
for name in expected_names:
assert hasattr(llm_connect, name)
assert name in llm_connect.__all__

View File

@@ -0,0 +1,137 @@
from datetime import datetime, timezone
import pytest
from llm_connect.problem_classes import (
EntityExtractionProblemClass,
Observation,
ProblemClassRegistry,
TokenEstimate,
)
from llm_connect.quality import QualityObservation
DIMENSIONS_BY_CLASS = {
"chunk-summarization": [
{"chunk_words": 900, "template_words": 150},
{"chunk_words": 400, "template_words": 125},
{"chunk_words": 1200, "template_words": 200},
],
"entity-extraction": [
{"chunk_words": 900, "template_words": 200, "expected_entities": 4},
{"chunk_words": 450, "template_words": 180, "expected_entities": 6},
{"chunk_words": 1200, "template_words": 220, "expected_entities": 8},
],
"relation-extraction": [
{"chunk_words": 900, "template_words": 200, "expected_relations": 3},
{"chunk_words": 450, "template_words": 180, "expected_relations": 5},
{"chunk_words": 1200, "template_words": 220, "expected_relations": 7},
],
"judge-eval": [
{"artifact_words": 700, "template_words": 180, "n_criteria": 4},
{"artifact_words": 300, "template_words": 160, "n_criteria": 5},
{"artifact_words": 1100, "template_words": 200, "n_criteria": 6},
],
"report-synthesis": [
{"n_chunks": 5, "n_entities": 20, "n_relations": 8, "template_words": 250},
{"n_chunks": 8, "n_entities": 30, "n_relations": 12, "template_words": 250},
{"n_chunks": 2, "n_entities": 10, "n_relations": 3, "template_words": 180},
],
}
def test_default_registry_exposes_builtin_classes():
registry = ProblemClassRegistry.default()
assert set(registry.all()) == set(DIMENSIONS_BY_CLASS)
assert registry.schema_version == 1
@pytest.mark.parametrize("name,dimensions_list", DIMENSIONS_BY_CLASS.items())
def test_builtin_estimators_produce_token_estimates(name, dimensions_list):
problem_class = ProblemClassRegistry.default().get(name)
estimate = problem_class.estimate(dimensions_list[0])
assert isinstance(estimate, TokenEstimate)
assert estimate.prompt_tokens >= 0
assert estimate.completion_tokens >= 0
assert 0 <= estimate.confidence <= 1
@pytest.mark.parametrize("name,dimensions_list", DIMENSIONS_BY_CLASS.items())
def test_fit_recovers_seeded_params_from_synthetic_observations(name, dimensions_list):
seeded = ProblemClassRegistry.default().get(name)
param_name = seeded.tunable_params[0]
off_seed = type(seeded)(params={param_name: seeded.params[param_name] * 2})
observations = []
for dimensions in dimensions_list:
estimate = seeded.estimate(dimensions)
observations.append(
Observation(
dimensions=dimensions,
prompt_tokens=estimate.prompt_tokens,
completion_tokens=estimate.completion_tokens,
)
)
fitted = off_seed.fit(observations, min_observations=3)
assert fitted.params[param_name] == pytest.approx(seeded.params[param_name], rel=0.1)
def test_fit_uses_quality_ledger_observation_shape():
problem_class = EntityExtractionProblemClass(params={"tokens_per_entity": 10})
observations = [
QualityObservation(
task_type="extract",
adapter_id="openrouter",
model_id="openai/gpt-4o-mini",
cost_usd=0.001,
quality_score=0.9,
latency_ms=100,
tokens_in=500,
tokens_out=350,
recorded_at=datetime(2026, 5, 19, tzinfo=timezone.utc),
tags={
"problem_class": "entity-extraction",
"dimensions": {
"chunk_words": 300,
"template_words": 100,
"expected_entities": 5,
},
},
)
for _ in range(3)
]
fitted = problem_class.fit(observations)
assert fitted.params["tokens_per_entity"] == pytest.approx(70)
def test_fit_keeps_seed_when_sample_is_too_small():
problem_class = EntityExtractionProblemClass()
estimate = problem_class.estimate(
{"chunk_words": 300, "template_words": 100, "expected_entities": 5}
)
fitted = problem_class.fit(
[
Observation(
dimensions={"chunk_words": 300, "template_words": 100, "expected_entities": 5},
prompt_tokens=estimate.prompt_tokens,
completion_tokens=estimate.completion_tokens,
)
],
min_observations=3,
)
assert fitted is problem_class
def test_missing_dimensions_are_rejected():
problem_class = ProblemClassRegistry.default().get("chunk-summarization")
with pytest.raises(ValueError, match="Missing dimensions"):
problem_class.estimate({"chunk_words": 100})

65
tests/test_rates.py Normal file
View File

@@ -0,0 +1,65 @@
import pytest
from llm_connect.rates import ModelRate, ModelRateRegistry
def test_default_registry_contains_openrouter_seed_models():
registry = ModelRateRegistry.default()
rates = registry.all()
assert len(rates) >= 9
assert rates["openai/gpt-4o-mini"].captured_at == "2026-05-17"
assert rates["openai/gpt-4o-mini"].source_url == "https://openrouter.ai/models"
def test_from_yaml_loads_package_shape(tmp_path):
path = tmp_path / "model-rates.yaml"
path.write_text(
"""
schema_version: 1
currency: USD
source_url: https://example.test/rates
captured_at: "2026-05-19"
rates:
vendor/model:
prompt_per_1k: 0.1
completion_per_1k: 0.2
""",
encoding="utf-8",
)
registry = ModelRateRegistry.from_yaml(path)
rate = registry.get("vendor/model")
assert rate == ModelRate(
model_id="vendor/model",
prompt_per_1k=0.1,
completion_per_1k=0.2,
currency="USD",
source_url="https://example.test/rates",
captured_at="2026-05-19",
)
def test_merged_with_overrides_matching_model():
base = ModelRateRegistry.default()
override = ModelRateRegistry(
{
"openai/gpt-4o-mini": ModelRate(
"openai/gpt-4o-mini",
prompt_per_1k=1,
completion_per_1k=2,
captured_at="override",
)
}
)
merged = base.merged_with(override)
assert merged.get("openai/gpt-4o-mini").prompt_per_1k == 1
assert merged.get("openai/gpt-4o-mini").captured_at == "override"
def test_negative_rates_are_rejected():
with pytest.raises(ValueError, match="prompt_per_1k"):
ModelRate("bad/model", prompt_per_1k=-1, completion_per_1k=0)