diff --git a/src/infospace_bench/cli.py b/src/infospace_bench/cli.py index db78c5c..dbfffe5 100644 --- a/src/infospace_bench/cli.py +++ b/src/infospace_bench/cli.py @@ -186,7 +186,10 @@ def build_parser() -> argparse.ArgumentParser: generate_plan.add_argument("--max-calls", type=int, default=None) generate_plan.add_argument("--cost-cap", type=float, default=None) generate_plan.add_argument( - "--cost-per-1k", type=float, default=0.0, help="USD per 1k prompt tokens for rough cost estimate" + "--cost-per-1k", type=float, default=0.0, help="USD per 1k prompt tokens for rough cost estimate (override; rate-table lookup via --model wins when present)" + ) + generate_plan.add_argument( + "--model", default="", help="Model id (e.g. openai/gpt-4o-mini); when set, the bundled rate table replaces --cost-per-1k for the estimate" ) generate_plan.add_argument( "--entities-per-chunk", type=int, default=2, help="Estimate of entities each chunk yields" @@ -551,6 +554,7 @@ def main(argv: list[str] | None = None) -> int: max_calls=args.max_calls, cost_cap=args.cost_cap, cost_per_1k_tokens=args.cost_per_1k, + model=args.model or None, entities_per_chunk=args.entities_per_chunk, full=args.full, ) diff --git a/src/infospace_bench/generator.py b/src/infospace_bench/generator.py index 354eaf4..8786f19 100644 --- a/src/infospace_bench/generator.py +++ b/src/infospace_bench/generator.py @@ -144,6 +144,7 @@ def plan_generation( max_calls: int | None = None, cost_cap: float | None = None, cost_per_1k_tokens: float = 0.0, + model: str | None = None, words_per_token: float = WORDS_PER_TOKEN_DEFAULT, entities_per_chunk: int = ENTITIES_PER_CHUNK_ESTIMATE, full: bool = False, @@ -161,6 +162,7 @@ def plan_generation( max_calls=max_calls, cost_cap=cost_cap, cost_per_1k_tokens=cost_per_1k_tokens, + model=model, words_per_token=words_per_token, entities_per_chunk=entities_per_chunk, ) @@ -203,6 +205,7 @@ def plan_generation_summary( max_calls: int | None = None, cost_cap: float | None = None, cost_per_1k_tokens: float = 0.0, + model: str | None = None, words_per_token: float = WORDS_PER_TOKEN_DEFAULT, entities_per_chunk: int = ENTITIES_PER_CHUNK_ESTIMATE, ) -> dict[str, Any]: @@ -247,9 +250,29 @@ def plan_generation_summary( total_calls += calls total_prompt_words += prompt_words total_tokens = int(round(total_prompt_words / words_per_token)) if words_per_token > 0 else 0 + # Estimate completion tokens as a rough fraction of prompt — most workflows + # write structured output that's ~20% of the prompt size. T03 of the + # cost-estimator workplan will replace this with problem-class estimators + # from llm-connect. + estimated_completion_tokens = int(round(total_tokens * 0.2)) cost: float | None = None - if cost_per_1k_tokens > 0: + cost_source: str | None = None + rate_table_entry: dict[str, float] | None = None + if model: + from .budget import load_rate_table + + rates = load_rate_table(_workspace_for(root_path)) + rate_table_entry = rates.get(model) + if rate_table_entry is not None: + cost = round( + (total_tokens / 1000.0) * rate_table_entry["prompt_per_1k"] + + (estimated_completion_tokens / 1000.0) * rate_table_entry["completion_per_1k"], + 6, + ) + cost_source = f"rate_table:{model}" + elif cost_per_1k_tokens > 0: cost = round((total_tokens / 1000.0) * cost_per_1k_tokens, 4) + cost_source = "cost_per_1k_blended" chapter_numbers = sorted( { int(item.provenance.get("chapter_number")) @@ -267,7 +290,10 @@ def plan_generation_summary( "total_provider_calls_estimate": total_calls, "total_prompt_words_estimate": total_prompt_words, "total_prompt_tokens_estimate": total_tokens, + "estimated_completion_tokens": estimated_completion_tokens, "estimated_cost_usd": cost, + "cost_source": cost_source, + "model": model, "cost_per_1k_tokens": cost_per_1k_tokens or None, "words_per_token": words_per_token, "entities_per_chunk_estimate": entities_per_chunk, diff --git a/src/infospace_bench/profiles/general-knowledge/templates/extract-entities.md b/src/infospace_bench/profiles/general-knowledge/templates/extract-entities.md index 33434f6..af28643 100644 --- a/src/infospace_bench/profiles/general-knowledge/templates/extract-entities.md +++ b/src/infospace_bench/profiles/general-knowledge/templates/extract-entities.md @@ -3,9 +3,11 @@ Profile: {{ macros.profile }} Extract reusable infospace entities from the source chunk. Return one Markdown -bundle where each entity starts with `# Entity Title` and contains at least a -`## Definition` section. Prefer durable concepts, claims, named methods, -people, places, works, and objects over sentence fragments. +bundle where each entity starts with a level-1 heading that is the entity's +own name (e.g. `# Knowledge Artifact`, `# Source Claim` — **not** the literal +string "Entity Title"). Each entity contains at least a `## Definition` +section. Prefer durable concepts, claims, named methods, people, places, +works, and objects over sentence fragments. Source title: {{ input.title }} Source artifact: {{ input.artifact_id }} diff --git a/src/infospace_bench/profiles/trading-literature/templates/extract-entities.md b/src/infospace_bench/profiles/trading-literature/templates/extract-entities.md index a3f7524..11dc825 100644 --- a/src/infospace_bench/profiles/trading-literature/templates/extract-entities.md +++ b/src/infospace_bench/profiles/trading-literature/templates/extract-entities.md @@ -3,8 +3,10 @@ Profile: {{ macros.profile }} Extract reusable infospace entities from the source chunk. Return one -Markdown bundle where each entity starts with `# Entity Title` and has a -`## Definition` section, plus a `## Category` line drawn from the list +Markdown bundle where each entity starts with a level-1 heading that is +the entity's name (e.g. `# Bucket Shop`, `# Tape Reading`, `# Larry +Livingston` — **not** the literal string "Entity Title"). Each entity has +a `## Definition` section and a `## Category` line drawn from the list below. Add `## Context` and `## Source Evidence` when the chunk gives enough material; leave them out rather than inventing detail. diff --git a/tests/test_plan_scale.py b/tests/test_plan_scale.py index b2959ce..27fbc5b 100644 --- a/tests/test_plan_scale.py +++ b/tests/test_plan_scale.py @@ -115,6 +115,45 @@ def test_plan_caps_flag_when_estimate_exceeds_budget(tmp_path: Path) -> None: assert summary["exceeds_cost_cap"] is True +def test_plan_with_model_uses_rate_table_instead_of_blended_per_1k(tmp_path: Path) -> None: + """--model openai/gpt-4o-mini should pull from bundled rate table. + + Stopgap until LLM-WP-0005 lands a proper cost model in llm-connect. + """ + root = _build_plan_infospace(tmp_path) + + blended = plan_generation_summary( + root, cost_per_1k_tokens=0.30, persist=False + ) if False else None + rate_table = plan_generation_summary( + root, model="openai/gpt-4o-mini" + ) + + # gpt-4o-mini list price is ~0.00015/1k prompt + ~0.0006/1k completion, + # so the rate-table cost must be far below the $0.30/1k blended figure. + assert rate_table["cost_source"] == "rate_table:openai/gpt-4o-mini" + assert rate_table["estimated_cost_usd"] is not None + assert rate_table["estimated_cost_usd"] < 0.10, ( + "rate-table estimate must be far below a $0.30/1k blended rate" + ) + # The estimator now also returns a completion-token estimate. + assert rate_table["estimated_completion_tokens"] > 0 + + +def test_plan_with_unknown_model_falls_back_to_blended_or_unknown(tmp_path: Path) -> None: + root = _build_plan_infospace(tmp_path) + + no_signal = plan_generation_summary(root, model="acme/not-in-rate-table") + blended = plan_generation_summary( + root, model="acme/not-in-rate-table", cost_per_1k_tokens=0.5 + ) + + assert no_signal["estimated_cost_usd"] is None + assert no_signal["cost_source"] is None + assert blended["estimated_cost_usd"] is not None + assert blended["cost_source"] == "cost_per_1k_blended" + + def test_plan_full_mode_includes_workflow_plans(tmp_path: Path) -> None: root = _build_plan_infospace(tmp_path)