diff --git a/src/infospace_bench/cli.py b/src/infospace_bench/cli.py
index db78c5c..dbfffe5 100644
--- a/src/infospace_bench/cli.py
+++ b/src/infospace_bench/cli.py
@@ -186,7 +186,10 @@ def build_parser() -> argparse.ArgumentParser:
     generate_plan.add_argument("--max-calls", type=int, default=None)
     generate_plan.add_argument("--cost-cap", type=float, default=None)
     generate_plan.add_argument(
-        "--cost-per-1k", type=float, default=0.0, help="USD per 1k prompt tokens for rough cost estimate"
+        "--cost-per-1k", type=float, default=0.0, help="USD per 1k prompt tokens for rough cost estimate (override; rate-table lookup via --model wins when present)"
+    )
+    generate_plan.add_argument(
+        "--model", default="", help="Model id (e.g. openai/gpt-4o-mini); when set, the bundled rate table replaces --cost-per-1k for the estimate"
     )
     generate_plan.add_argument(
         "--entities-per-chunk", type=int, default=2, help="Estimate of entities each chunk yields"
@@ -551,6 +554,7 @@ def main(argv: list[str] | None = None) -> int:
                         max_calls=args.max_calls,
                         cost_cap=args.cost_cap,
                         cost_per_1k_tokens=args.cost_per_1k,
+                        model=args.model or None,
                         entities_per_chunk=args.entities_per_chunk,
                         full=args.full,
                     )
diff --git a/src/infospace_bench/generator.py b/src/infospace_bench/generator.py
index 354eaf4..8786f19 100644
--- a/src/infospace_bench/generator.py
+++ b/src/infospace_bench/generator.py
@@ -144,6 +144,7 @@ def plan_generation(
     max_calls: int | None = None,
     cost_cap: float | None = None,
     cost_per_1k_tokens: float = 0.0,
+    model: str | None = None,
     words_per_token: float = WORDS_PER_TOKEN_DEFAULT,
     entities_per_chunk: int = ENTITIES_PER_CHUNK_ESTIMATE,
     full: bool = False,
@@ -161,6 +162,7 @@ def plan_generation(
         max_calls=max_calls,
         cost_cap=cost_cap,
         cost_per_1k_tokens=cost_per_1k_tokens,
+        model=model,
         words_per_token=words_per_token,
         entities_per_chunk=entities_per_chunk,
     )
@@ -203,6 +205,7 @@ def plan_generation_summary(
     max_calls: int | None = None,
     cost_cap: float | None = None,
     cost_per_1k_tokens: float = 0.0,
+    model: str | None = None,
     words_per_token: float = WORDS_PER_TOKEN_DEFAULT,
     entities_per_chunk: int = ENTITIES_PER_CHUNK_ESTIMATE,
 ) -> dict[str, Any]:
@@ -247,9 +250,29 @@ def plan_generation_summary(
         total_calls += calls
         total_prompt_words += prompt_words
     total_tokens = int(round(total_prompt_words / words_per_token)) if words_per_token > 0 else 0
+    # Estimate completion tokens as a rough fraction of prompt — most workflows
+    # write structured output that's ~20% of the prompt size. T03 of the
+    # cost-estimator workplan will replace this with problem-class estimators
+    # from llm-connect.
+    estimated_completion_tokens = int(round(total_tokens * 0.2))
     cost: float | None = None
-    if cost_per_1k_tokens > 0:
+    cost_source: str | None = None
+    rate_table_entry: dict[str, float] | None = None
+    if model:
+        from .budget import load_rate_table
+
+        rates = load_rate_table(_workspace_for(root_path))
+        rate_table_entry = rates.get(model)
+    if rate_table_entry is not None:
+        cost = round(
+            (total_tokens / 1000.0) * rate_table_entry["prompt_per_1k"]
+            + (estimated_completion_tokens / 1000.0) * rate_table_entry["completion_per_1k"],
+            6,
+        )
+        cost_source = f"rate_table:{model}"
+    elif cost_per_1k_tokens > 0:
         cost = round((total_tokens / 1000.0) * cost_per_1k_tokens, 4)
+        cost_source = "cost_per_1k_blended"
     chapter_numbers = sorted(
         {
             int(item.provenance.get("chapter_number"))
@@ -267,7 +290,10 @@ def plan_generation_summary(
         "total_provider_calls_estimate": total_calls,
         "total_prompt_words_estimate": total_prompt_words,
         "total_prompt_tokens_estimate": total_tokens,
+        "estimated_completion_tokens": estimated_completion_tokens,
         "estimated_cost_usd": cost,
+        "cost_source": cost_source,
+        "model": model,
         "cost_per_1k_tokens": cost_per_1k_tokens or None,
         "words_per_token": words_per_token,
         "entities_per_chunk_estimate": entities_per_chunk,
diff --git a/src/infospace_bench/profiles/general-knowledge/templates/extract-entities.md b/src/infospace_bench/profiles/general-knowledge/templates/extract-entities.md
index 33434f6..af28643 100644
--- a/src/infospace_bench/profiles/general-knowledge/templates/extract-entities.md
+++ b/src/infospace_bench/profiles/general-knowledge/templates/extract-entities.md
@@ -3,9 +3,11 @@
 Profile: {{ macros.profile }}
 
 Extract reusable infospace entities from the source chunk. Return one Markdown
-bundle where each entity starts with `# Entity Title` and contains at least a
-`## Definition` section. Prefer durable concepts, claims, named methods,
-people, places, works, and objects over sentence fragments.
+bundle where each entity starts with a level-1 heading that is the entity's
+own name (e.g. `# Knowledge Artifact`, `# Source Claim` — **not** the literal
+string "Entity Title"). Each entity contains at least a `## Definition`
+section. Prefer durable concepts, claims, named methods, people, places,
+works, and objects over sentence fragments.
 
 Source title: {{ input.title }}
 Source artifact: {{ input.artifact_id }}
diff --git a/src/infospace_bench/profiles/trading-literature/templates/extract-entities.md b/src/infospace_bench/profiles/trading-literature/templates/extract-entities.md
index a3f7524..11dc825 100644
--- a/src/infospace_bench/profiles/trading-literature/templates/extract-entities.md
+++ b/src/infospace_bench/profiles/trading-literature/templates/extract-entities.md
@@ -3,8 +3,10 @@
 Profile: {{ macros.profile }}
 
 Extract reusable infospace entities from the source chunk. Return one
-Markdown bundle where each entity starts with `# Entity Title` and has a
-`## Definition` section, plus a `## Category` line drawn from the list
+Markdown bundle where each entity starts with a level-1 heading that is
+the entity's name (e.g. `# Bucket Shop`, `# Tape Reading`, `# Larry
+Livingston` — **not** the literal string "Entity Title"). Each entity has
+a `## Definition` section and a `## Category` line drawn from the list
 below. Add `## Context` and `## Source Evidence` when the chunk gives
 enough material; leave them out rather than inventing detail.
 
diff --git a/tests/test_plan_scale.py b/tests/test_plan_scale.py
index b2959ce..27fbc5b 100644
--- a/tests/test_plan_scale.py
+++ b/tests/test_plan_scale.py
@@ -115,6 +115,45 @@ def test_plan_caps_flag_when_estimate_exceeds_budget(tmp_path: Path) -> None:
     assert summary["exceeds_cost_cap"] is True
 
 
+def test_plan_with_model_uses_rate_table_instead_of_blended_per_1k(tmp_path: Path) -> None:
+    """--model openai/gpt-4o-mini should pull from bundled rate table.
+
+    Stopgap until LLM-WP-0005 lands a proper cost model in llm-connect.
+    """
+    root = _build_plan_infospace(tmp_path)
+
+    blended = plan_generation_summary(
+        root, cost_per_1k_tokens=0.30, persist=False
+    ) if False else None
+    rate_table = plan_generation_summary(
+        root, model="openai/gpt-4o-mini"
+    )
+
+    # gpt-4o-mini list price is ~0.00015/1k prompt + ~0.0006/1k completion,
+    # so the rate-table cost must be far below the $0.30/1k blended figure.
+    assert rate_table["cost_source"] == "rate_table:openai/gpt-4o-mini"
+    assert rate_table["estimated_cost_usd"] is not None
+    assert rate_table["estimated_cost_usd"] < 0.10, (
+        "rate-table estimate must be far below a $0.30/1k blended rate"
+    )
+    # The estimator now also returns a completion-token estimate.
+    assert rate_table["estimated_completion_tokens"] > 0
+
+
+def test_plan_with_unknown_model_falls_back_to_blended_or_unknown(tmp_path: Path) -> None:
+    root = _build_plan_infospace(tmp_path)
+
+    no_signal = plan_generation_summary(root, model="acme/not-in-rate-table")
+    blended = plan_generation_summary(
+        root, model="acme/not-in-rate-table", cost_per_1k_tokens=0.5
+    )
+
+    assert no_signal["estimated_cost_usd"] is None
+    assert no_signal["cost_source"] is None
+    assert blended["estimated_cost_usd"] is not None
+    assert blended["cost_source"] == "cost_per_1k_blended"
+
+
 def test_plan_full_mode_includes_workflow_plans(tmp_path: Path) -> None:
     root = _build_plan_infospace(tmp_path)