generated from coulomb/repo-seed
Implement-LLM-WP-0005-cost-model-estimators
This commit is contained in:
54
tests/test_cli.py
Normal file
54
tests/test_cli.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from llm_connect.cli import main
|
||||
from llm_connect.quality import QualityLedger, QualityObservation
|
||||
|
||||
|
||||
def test_rates_show_json_outputs_default_registry(capsys):
|
||||
assert main(["rates", "show", "--json"]) == 0
|
||||
|
||||
payload = json.loads(capsys.readouterr().out)
|
||||
|
||||
assert payload["openai/gpt-4o-mini"]["prompt_per_1k"] == 0.00015
|
||||
|
||||
|
||||
def test_classes_show_lists_builtins(capsys):
|
||||
assert main(["classes", "show"]) == 0
|
||||
|
||||
output = capsys.readouterr().out
|
||||
|
||||
assert "chunk-summarization" in output
|
||||
assert "entity-extraction" in output
|
||||
|
||||
|
||||
def test_classes_fit_reads_quality_ledger(tmp_path, capsys):
|
||||
ledger = QualityLedger(tmp_path / "quality.jsonl")
|
||||
for _ in range(3):
|
||||
ledger.append(
|
||||
QualityObservation(
|
||||
task_type="extract",
|
||||
adapter_id="openrouter",
|
||||
model_id="openai/gpt-4o-mini",
|
||||
cost_usd=0.001,
|
||||
quality_score=0.9,
|
||||
latency_ms=100,
|
||||
tokens_in=500,
|
||||
tokens_out=350,
|
||||
recorded_at=datetime(2026, 5, 19, tzinfo=timezone.utc),
|
||||
tags={
|
||||
"problem_class": "entity-extraction",
|
||||
"dimensions": {
|
||||
"chunk_words": 300,
|
||||
"template_words": 100,
|
||||
"expected_entities": 5,
|
||||
},
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
assert main(["classes", "fit", str(ledger.path), "--class", "entity-extraction", "--json"]) == 0
|
||||
|
||||
payload = json.loads(capsys.readouterr().out)
|
||||
|
||||
assert payload["entity-extraction"]["params"]["tokens_per_entity"] == 70
|
||||
49
tests/test_costs.py
Normal file
49
tests/test_costs.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import pytest
|
||||
|
||||
from llm_connect.costs import CostEstimate, CostModel, estimate_cost
|
||||
from llm_connect.rates import ModelRate, ModelRateRegistry
|
||||
|
||||
|
||||
def test_known_model_cost_matches_lefevre_smoke_budget():
|
||||
estimate = estimate_cost("openai/gpt-4o-mini", 28_000, 7_500)
|
||||
|
||||
assert estimate.cost_source == "rate_table:openai/gpt-4o-mini"
|
||||
assert estimate.cost_usd == pytest.approx(0.0087)
|
||||
assert estimate.cost_usd == pytest.approx(0.009, rel=0.2)
|
||||
|
||||
|
||||
def test_unknown_model_returns_unknown_without_zeroing_cost():
|
||||
estimate = estimate_cost("unknown/model", 100, 50)
|
||||
|
||||
assert estimate == CostEstimate(cost_usd=None, cost_source="unknown")
|
||||
|
||||
|
||||
def test_registry_override_controls_estimate():
|
||||
registry = ModelRateRegistry(
|
||||
{
|
||||
"vendor/model": ModelRate(
|
||||
"vendor/model",
|
||||
prompt_per_1k=1.0,
|
||||
completion_per_1k=2.0,
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
estimate = estimate_cost("vendor/model", 1_000, 500, registry=registry)
|
||||
|
||||
assert estimate.cost_usd == pytest.approx(2.0)
|
||||
assert estimate.prompt_cost_usd == pytest.approx(1.0)
|
||||
assert estimate.completion_cost_usd == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_zero_tokens_are_valid_and_cost_zero_for_known_model():
|
||||
estimate = CostModel().estimate_cost("openai/gpt-4o-mini", 0, 0)
|
||||
|
||||
assert estimate.cost_usd == 0
|
||||
assert estimate.prompt_cost_usd == 0
|
||||
assert estimate.completion_cost_usd == 0
|
||||
|
||||
|
||||
def test_negative_tokens_are_rejected():
|
||||
with pytest.raises(ValueError, match="prompt_tokens"):
|
||||
estimate_cost("openai/gpt-4o-mini", -1, 0)
|
||||
@@ -24,3 +24,27 @@ def test_wp_0004_primitives_are_exported_from_package_root():
|
||||
for name in expected_names:
|
||||
assert hasattr(llm_connect, name)
|
||||
assert name in llm_connect.__all__
|
||||
|
||||
|
||||
def test_wp_0005_primitives_are_exported_from_package_root():
|
||||
expected_names = [
|
||||
"ModelRate",
|
||||
"ModelRateRegistry",
|
||||
"CostEstimate",
|
||||
"CostModel",
|
||||
"estimate_cost",
|
||||
"TokenEstimate",
|
||||
"Observation",
|
||||
"ProblemClass",
|
||||
"ProblemClassRegistry",
|
||||
"default_problem_class_registry",
|
||||
"ChunkSummarizationProblemClass",
|
||||
"EntityExtractionProblemClass",
|
||||
"RelationExtractionProblemClass",
|
||||
"JudgeEvalProblemClass",
|
||||
"ReportSynthesisProblemClass",
|
||||
]
|
||||
|
||||
for name in expected_names:
|
||||
assert hasattr(llm_connect, name)
|
||||
assert name in llm_connect.__all__
|
||||
|
||||
137
tests/test_problem_classes.py
Normal file
137
tests/test_problem_classes.py
Normal file
@@ -0,0 +1,137 @@
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from llm_connect.problem_classes import (
|
||||
EntityExtractionProblemClass,
|
||||
Observation,
|
||||
ProblemClassRegistry,
|
||||
TokenEstimate,
|
||||
)
|
||||
from llm_connect.quality import QualityObservation
|
||||
|
||||
|
||||
DIMENSIONS_BY_CLASS = {
|
||||
"chunk-summarization": [
|
||||
{"chunk_words": 900, "template_words": 150},
|
||||
{"chunk_words": 400, "template_words": 125},
|
||||
{"chunk_words": 1200, "template_words": 200},
|
||||
],
|
||||
"entity-extraction": [
|
||||
{"chunk_words": 900, "template_words": 200, "expected_entities": 4},
|
||||
{"chunk_words": 450, "template_words": 180, "expected_entities": 6},
|
||||
{"chunk_words": 1200, "template_words": 220, "expected_entities": 8},
|
||||
],
|
||||
"relation-extraction": [
|
||||
{"chunk_words": 900, "template_words": 200, "expected_relations": 3},
|
||||
{"chunk_words": 450, "template_words": 180, "expected_relations": 5},
|
||||
{"chunk_words": 1200, "template_words": 220, "expected_relations": 7},
|
||||
],
|
||||
"judge-eval": [
|
||||
{"artifact_words": 700, "template_words": 180, "n_criteria": 4},
|
||||
{"artifact_words": 300, "template_words": 160, "n_criteria": 5},
|
||||
{"artifact_words": 1100, "template_words": 200, "n_criteria": 6},
|
||||
],
|
||||
"report-synthesis": [
|
||||
{"n_chunks": 5, "n_entities": 20, "n_relations": 8, "template_words": 250},
|
||||
{"n_chunks": 8, "n_entities": 30, "n_relations": 12, "template_words": 250},
|
||||
{"n_chunks": 2, "n_entities": 10, "n_relations": 3, "template_words": 180},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def test_default_registry_exposes_builtin_classes():
|
||||
registry = ProblemClassRegistry.default()
|
||||
|
||||
assert set(registry.all()) == set(DIMENSIONS_BY_CLASS)
|
||||
assert registry.schema_version == 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name,dimensions_list", DIMENSIONS_BY_CLASS.items())
|
||||
def test_builtin_estimators_produce_token_estimates(name, dimensions_list):
|
||||
problem_class = ProblemClassRegistry.default().get(name)
|
||||
|
||||
estimate = problem_class.estimate(dimensions_list[0])
|
||||
|
||||
assert isinstance(estimate, TokenEstimate)
|
||||
assert estimate.prompt_tokens >= 0
|
||||
assert estimate.completion_tokens >= 0
|
||||
assert 0 <= estimate.confidence <= 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name,dimensions_list", DIMENSIONS_BY_CLASS.items())
|
||||
def test_fit_recovers_seeded_params_from_synthetic_observations(name, dimensions_list):
|
||||
seeded = ProblemClassRegistry.default().get(name)
|
||||
param_name = seeded.tunable_params[0]
|
||||
off_seed = type(seeded)(params={param_name: seeded.params[param_name] * 2})
|
||||
observations = []
|
||||
for dimensions in dimensions_list:
|
||||
estimate = seeded.estimate(dimensions)
|
||||
observations.append(
|
||||
Observation(
|
||||
dimensions=dimensions,
|
||||
prompt_tokens=estimate.prompt_tokens,
|
||||
completion_tokens=estimate.completion_tokens,
|
||||
)
|
||||
)
|
||||
|
||||
fitted = off_seed.fit(observations, min_observations=3)
|
||||
|
||||
assert fitted.params[param_name] == pytest.approx(seeded.params[param_name], rel=0.1)
|
||||
|
||||
|
||||
def test_fit_uses_quality_ledger_observation_shape():
|
||||
problem_class = EntityExtractionProblemClass(params={"tokens_per_entity": 10})
|
||||
observations = [
|
||||
QualityObservation(
|
||||
task_type="extract",
|
||||
adapter_id="openrouter",
|
||||
model_id="openai/gpt-4o-mini",
|
||||
cost_usd=0.001,
|
||||
quality_score=0.9,
|
||||
latency_ms=100,
|
||||
tokens_in=500,
|
||||
tokens_out=350,
|
||||
recorded_at=datetime(2026, 5, 19, tzinfo=timezone.utc),
|
||||
tags={
|
||||
"problem_class": "entity-extraction",
|
||||
"dimensions": {
|
||||
"chunk_words": 300,
|
||||
"template_words": 100,
|
||||
"expected_entities": 5,
|
||||
},
|
||||
},
|
||||
)
|
||||
for _ in range(3)
|
||||
]
|
||||
|
||||
fitted = problem_class.fit(observations)
|
||||
|
||||
assert fitted.params["tokens_per_entity"] == pytest.approx(70)
|
||||
|
||||
|
||||
def test_fit_keeps_seed_when_sample_is_too_small():
|
||||
problem_class = EntityExtractionProblemClass()
|
||||
estimate = problem_class.estimate(
|
||||
{"chunk_words": 300, "template_words": 100, "expected_entities": 5}
|
||||
)
|
||||
|
||||
fitted = problem_class.fit(
|
||||
[
|
||||
Observation(
|
||||
dimensions={"chunk_words": 300, "template_words": 100, "expected_entities": 5},
|
||||
prompt_tokens=estimate.prompt_tokens,
|
||||
completion_tokens=estimate.completion_tokens,
|
||||
)
|
||||
],
|
||||
min_observations=3,
|
||||
)
|
||||
|
||||
assert fitted is problem_class
|
||||
|
||||
|
||||
def test_missing_dimensions_are_rejected():
|
||||
problem_class = ProblemClassRegistry.default().get("chunk-summarization")
|
||||
|
||||
with pytest.raises(ValueError, match="Missing dimensions"):
|
||||
problem_class.estimate({"chunk_words": 100})
|
||||
65
tests/test_rates.py
Normal file
65
tests/test_rates.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import pytest
|
||||
|
||||
from llm_connect.rates import ModelRate, ModelRateRegistry
|
||||
|
||||
|
||||
def test_default_registry_contains_openrouter_seed_models():
|
||||
registry = ModelRateRegistry.default()
|
||||
rates = registry.all()
|
||||
|
||||
assert len(rates) >= 9
|
||||
assert rates["openai/gpt-4o-mini"].captured_at == "2026-05-17"
|
||||
assert rates["openai/gpt-4o-mini"].source_url == "https://openrouter.ai/models"
|
||||
|
||||
|
||||
def test_from_yaml_loads_package_shape(tmp_path):
|
||||
path = tmp_path / "model-rates.yaml"
|
||||
path.write_text(
|
||||
"""
|
||||
schema_version: 1
|
||||
currency: USD
|
||||
source_url: https://example.test/rates
|
||||
captured_at: "2026-05-19"
|
||||
rates:
|
||||
vendor/model:
|
||||
prompt_per_1k: 0.1
|
||||
completion_per_1k: 0.2
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
registry = ModelRateRegistry.from_yaml(path)
|
||||
rate = registry.get("vendor/model")
|
||||
|
||||
assert rate == ModelRate(
|
||||
model_id="vendor/model",
|
||||
prompt_per_1k=0.1,
|
||||
completion_per_1k=0.2,
|
||||
currency="USD",
|
||||
source_url="https://example.test/rates",
|
||||
captured_at="2026-05-19",
|
||||
)
|
||||
|
||||
|
||||
def test_merged_with_overrides_matching_model():
|
||||
base = ModelRateRegistry.default()
|
||||
override = ModelRateRegistry(
|
||||
{
|
||||
"openai/gpt-4o-mini": ModelRate(
|
||||
"openai/gpt-4o-mini",
|
||||
prompt_per_1k=1,
|
||||
completion_per_1k=2,
|
||||
captured_at="override",
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
merged = base.merged_with(override)
|
||||
|
||||
assert merged.get("openai/gpt-4o-mini").prompt_per_1k == 1
|
||||
assert merged.get("openai/gpt-4o-mini").captured_at == "override"
|
||||
|
||||
|
||||
def test_negative_rates_are_rejected():
|
||||
with pytest.raises(ValueError, match="prompt_per_1k"):
|
||||
ModelRate("bad/model", prompt_per_1k=-1, completion_per_1k=0)
|
||||
Reference in New Issue
Block a user