generated from coulomb/repo-seed
Implement-LLM-WP-0005-cost-model-estimators
This commit is contained in:
100
docs/infospace-bench-cost-model-migration.md
Normal file
100
docs/infospace-bench-cost-model-migration.md
Normal file
@@ -0,0 +1,100 @@
|
||||
# infospace-bench Cost Estimator Migration
|
||||
|
||||
`infospace-bench` can replace its local rate table and coarse word-count
|
||||
budget math with the primitives added in `LLM-WP-0005`.
|
||||
|
||||
## Rate Table
|
||||
|
||||
- Drop `src/infospace_bench/model_rates.yaml` after the dependency is bumped.
|
||||
- Load `ModelRateRegistry.default()` from `llm-connect`.
|
||||
- Keep the workspace-level `model-rates.yaml` override and merge it with
|
||||
`default().merged_with(ModelRateRegistry.from_yaml(path))`.
|
||||
- Preserve `--cost-per-1k` as an explicit blended-rate override. When supplied,
|
||||
it should win over the registry and report `cost_source="cost_per_1k_blended"`.
|
||||
|
||||
## Plan Summary Sketch
|
||||
|
||||
```python
|
||||
from llm_connect import (
|
||||
CostEstimate,
|
||||
ModelRateRegistry,
|
||||
ProblemClassRegistry,
|
||||
estimate_cost,
|
||||
)
|
||||
|
||||
|
||||
def plan_generation_summary(...):
|
||||
problem_classes = ProblemClassRegistry.default()
|
||||
rates = ModelRateRegistry.default()
|
||||
workspace_rates = _workspace_rate_path(root_path)
|
||||
if workspace_rates.exists():
|
||||
rates = rates.merged_with(ModelRateRegistry.from_yaml(workspace_rates))
|
||||
|
||||
total_prompt_tokens = 0
|
||||
total_completion_tokens = 0
|
||||
per_stage = []
|
||||
for workflow_id in workflow_ids:
|
||||
class_name, dimensions = _problem_class_for_workflow(
|
||||
workflow_id,
|
||||
selected_chunks=selected,
|
||||
template_words=template_words,
|
||||
entities_per_chunk=entities_per_chunk,
|
||||
)
|
||||
estimate = problem_classes.get(class_name).estimate(dimensions)
|
||||
calls = _calls_for_workflow(workflow_id, selected, entities_per_chunk)
|
||||
prompt_tokens = estimate.prompt_tokens * calls
|
||||
completion_tokens = estimate.completion_tokens * calls
|
||||
total_prompt_tokens += prompt_tokens
|
||||
total_completion_tokens += completion_tokens
|
||||
per_stage.append(
|
||||
{
|
||||
"workflow_id": workflow_id,
|
||||
"problem_class": class_name,
|
||||
"calls": calls,
|
||||
"prompt_tokens_estimate": prompt_tokens,
|
||||
"completion_tokens_estimate": completion_tokens,
|
||||
"confidence": estimate.confidence,
|
||||
}
|
||||
)
|
||||
|
||||
if cost_per_1k_tokens > 0:
|
||||
total_tokens = total_prompt_tokens + total_completion_tokens
|
||||
cost = (total_tokens / 1000.0) * cost_per_1k_tokens
|
||||
cost_source = "cost_per_1k_blended"
|
||||
elif model:
|
||||
cost_estimate = estimate_cost(
|
||||
model,
|
||||
total_prompt_tokens,
|
||||
total_completion_tokens,
|
||||
registry=rates,
|
||||
)
|
||||
cost = cost_estimate.cost_usd
|
||||
cost_source = cost_estimate.cost_source
|
||||
else:
|
||||
cost = None
|
||||
cost_source = None
|
||||
|
||||
return {
|
||||
"per_workflow": per_stage,
|
||||
"total_prompt_tokens_estimate": total_prompt_tokens,
|
||||
"estimated_completion_tokens": total_completion_tokens,
|
||||
"estimated_cost_usd": round(cost, 6) if cost is not None else None,
|
||||
"cost_source": cost_source,
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
## Workflow Mapping
|
||||
|
||||
Initial mapping can stay intentionally thin:
|
||||
|
||||
| infospace-bench workflow | llm-connect problem class |
|
||||
|---|---|
|
||||
| `summarize-source` | `chunk-summarization` |
|
||||
| entity extraction workflows | `entity-extraction` |
|
||||
| relation extraction workflows | `relation-extraction` |
|
||||
| `generic-source-evaluations` | `judge-eval` |
|
||||
| final report or rollup synthesis | `report-synthesis` |
|
||||
|
||||
The consumer still owns structure-specific dimensions such as selected chunk
|
||||
counts, profile template word counts, and expected entities per chunk.
|
||||
Reference in New Issue
Block a user