generated from coulomb/repo-seed
Two small fixes informed by the 2026-05-18 live OpenRouter chapter-I run.
1. extract-entities templates (trading-literature and general-knowledge):
the # Entity Title placeholder was interpreted by gpt-4o-mini as a
literal heading prefix, so every entity came back as "# Entity Title:
Bucket Shop" etc. The instruction now spells the placeholder out
with concrete examples and an explicit "not the literal string"
note, so smaller models hit the intended shape.
2. generate plan grows --model <id>. When supplied, the cost estimate
pulls per-prompt and per-completion rates from the bundled
model_rates.yaml instead of multiplying a single blended
--cost-per-1k value across all tokens. The summary now also returns
a separate estimated_completion_tokens field plus a cost_source tag
("rate_table:<model>" | "cost_per_1k_blended" | None).
This is a stopgap. LLM-WP-0005 (proposed in llm-connect this round)
will move the rate registry and token-shape problem classes upstream
so consumers stop re-implementing them.
The live smoke ran 28k prompt tokens / 7.5k completion / $0.0088
actual. With --model openai/gpt-4o-mini the plan estimate now lands at
$0.0076 (within 14% of actual) versus the prior $8.40 estimate at
--cost-per-1k 0.30.
181 tests pass, 2 skipped (both live OpenRouter smokes).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
201 lines
6.8 KiB
Python
201 lines
6.8 KiB
Python
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import zipfile
|
|
from pathlib import Path
|
|
|
|
from infospace_bench.generator import (
|
|
init_generation_infospace,
|
|
plan_generation,
|
|
plan_generation_summary,
|
|
)
|
|
|
|
|
|
CONTAINER_XML = """<?xml version="1.0"?>
|
|
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
|
<rootfiles>
|
|
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
|
|
</rootfiles>
|
|
</container>
|
|
"""
|
|
|
|
PACKAGE_OPF = """<?xml version="1.0" encoding="utf-8"?>
|
|
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="bookid">
|
|
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
|
|
<dc:identifier id="bookid">urn:test:plan</dc:identifier>
|
|
<dc:title>Plan Test Book</dc:title>
|
|
<dc:creator>Author</dc:creator>
|
|
<dc:language>en</dc:language>
|
|
</metadata>
|
|
<manifest>
|
|
<item id="ch1" href="ch1.xhtml" media-type="application/xhtml+xml"/>
|
|
<item id="ch2" href="ch2.xhtml" media-type="application/xhtml+xml"/>
|
|
<item id="ch3" href="ch3.xhtml" media-type="application/xhtml+xml"/>
|
|
<item id="ch4" href="ch4.xhtml" media-type="application/xhtml+xml"/>
|
|
</manifest>
|
|
<spine>
|
|
<itemref idref="ch1"/>
|
|
<itemref idref="ch2"/>
|
|
<itemref idref="ch3"/>
|
|
<itemref idref="ch4"/>
|
|
</spine>
|
|
</package>
|
|
"""
|
|
|
|
|
|
def _write_four_chapter_epub(path: Path) -> None:
|
|
with zipfile.ZipFile(path, "w") as archive:
|
|
archive.writestr("mimetype", "application/epub+zip")
|
|
archive.writestr("META-INF/container.xml", CONTAINER_XML)
|
|
archive.writestr("OEBPS/content.opf", PACKAGE_OPF)
|
|
for idx, label in enumerate(("I", "II", "III", "IV"), start=1):
|
|
archive.writestr(
|
|
f"OEBPS/ch{idx}.xhtml",
|
|
f"<html><head><title>Book</title></head>"
|
|
f"<body><h2>{label}</h2>"
|
|
f"<p>The narrator describes chapter {label} events with stocks and traders. "
|
|
+ " ".join(f"sentence{n}" for n in range(40))
|
|
+ "</p></body></html>",
|
|
)
|
|
|
|
|
|
def _build_plan_infospace(tmp_path: Path) -> Path:
|
|
book = tmp_path / "book.epub"
|
|
_write_four_chapter_epub(book)
|
|
infospace = init_generation_infospace(
|
|
tmp_path, book, "plan-test", name="Plan Test", profile="general-knowledge"
|
|
)
|
|
return infospace.root
|
|
|
|
|
|
def test_plan_summary_is_compact_and_does_not_dump_prompts(tmp_path: Path) -> None:
|
|
root = _build_plan_infospace(tmp_path)
|
|
|
|
summary = plan_generation(root)
|
|
|
|
serialized = json.dumps(summary)
|
|
assert '"prompt":' not in serialized, "compact plan must not embed full prompts"
|
|
assert summary["source_chunk_count"] == 4
|
|
assert summary["selected_chunk_count"] == 4
|
|
assert summary["selected_chapter_numbers"] == [1, 2, 3, 4]
|
|
assert summary["total_provider_calls_estimate"] > 0
|
|
assert summary["total_prompt_tokens_estimate"] > 0
|
|
assert summary["estimated_cost_usd"] is None
|
|
assert "workflows" not in summary
|
|
|
|
|
|
def test_plan_chapter_filter_selects_subset(tmp_path: Path) -> None:
|
|
root = _build_plan_infospace(tmp_path)
|
|
|
|
by_label = plan_generation_summary(root, chapter_filter=["I"])
|
|
by_number = plan_generation_summary(root, chapter_filter=["2"])
|
|
by_range = plan_generation_summary(root, from_chapter=2, to_chapter=3)
|
|
by_chunk = plan_generation_summary(root, chunk_filter=["chapter-04"])
|
|
|
|
assert by_label["selected_chapter_numbers"] == [1]
|
|
assert by_number["selected_chapter_numbers"] == [2]
|
|
assert by_range["selected_chapter_numbers"] == [2, 3]
|
|
assert by_chunk["selected_chunk_ids"] == ["chapter-04"]
|
|
|
|
|
|
def test_plan_caps_flag_when_estimate_exceeds_budget(tmp_path: Path) -> None:
|
|
root = _build_plan_infospace(tmp_path)
|
|
|
|
summary = plan_generation_summary(
|
|
root,
|
|
max_calls=2,
|
|
cost_cap=0.01,
|
|
cost_per_1k_tokens=1.0,
|
|
)
|
|
|
|
assert summary["total_provider_calls_estimate"] > 2
|
|
assert summary["exceeds_max_calls"] is True
|
|
assert summary["estimated_cost_usd"] is not None and summary["estimated_cost_usd"] > 0.01
|
|
assert summary["exceeds_cost_cap"] is True
|
|
|
|
|
|
def test_plan_with_model_uses_rate_table_instead_of_blended_per_1k(tmp_path: Path) -> None:
|
|
"""--model openai/gpt-4o-mini should pull from bundled rate table.
|
|
|
|
Stopgap until LLM-WP-0005 lands a proper cost model in llm-connect.
|
|
"""
|
|
root = _build_plan_infospace(tmp_path)
|
|
|
|
blended = plan_generation_summary(
|
|
root, cost_per_1k_tokens=0.30, persist=False
|
|
) if False else None
|
|
rate_table = plan_generation_summary(
|
|
root, model="openai/gpt-4o-mini"
|
|
)
|
|
|
|
# gpt-4o-mini list price is ~0.00015/1k prompt + ~0.0006/1k completion,
|
|
# so the rate-table cost must be far below the $0.30/1k blended figure.
|
|
assert rate_table["cost_source"] == "rate_table:openai/gpt-4o-mini"
|
|
assert rate_table["estimated_cost_usd"] is not None
|
|
assert rate_table["estimated_cost_usd"] < 0.10, (
|
|
"rate-table estimate must be far below a $0.30/1k blended rate"
|
|
)
|
|
# The estimator now also returns a completion-token estimate.
|
|
assert rate_table["estimated_completion_tokens"] > 0
|
|
|
|
|
|
def test_plan_with_unknown_model_falls_back_to_blended_or_unknown(tmp_path: Path) -> None:
|
|
root = _build_plan_infospace(tmp_path)
|
|
|
|
no_signal = plan_generation_summary(root, model="acme/not-in-rate-table")
|
|
blended = plan_generation_summary(
|
|
root, model="acme/not-in-rate-table", cost_per_1k_tokens=0.5
|
|
)
|
|
|
|
assert no_signal["estimated_cost_usd"] is None
|
|
assert no_signal["cost_source"] is None
|
|
assert blended["estimated_cost_usd"] is not None
|
|
assert blended["cost_source"] == "cost_per_1k_blended"
|
|
|
|
|
|
def test_plan_full_mode_includes_workflow_plans(tmp_path: Path) -> None:
|
|
root = _build_plan_infospace(tmp_path)
|
|
|
|
full_plan = plan_generation(root, full=True)
|
|
|
|
assert "workflows" in full_plan
|
|
assert len(full_plan["workflows"]) >= 1
|
|
|
|
|
|
def test_plan_cli_compact_default_and_filters(tmp_path: Path) -> None:
|
|
root = _build_plan_infospace(tmp_path)
|
|
env = os.environ.copy()
|
|
env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src"
|
|
|
|
result = subprocess.run(
|
|
[
|
|
sys.executable,
|
|
"-m",
|
|
"infospace_bench",
|
|
"generate",
|
|
"plan",
|
|
str(root),
|
|
"--from-chapter",
|
|
"2",
|
|
"--to-chapter",
|
|
"3",
|
|
"--cost-per-1k",
|
|
"0.5",
|
|
"--max-calls",
|
|
"1",
|
|
],
|
|
check=False,
|
|
env=env,
|
|
text=True,
|
|
capture_output=True,
|
|
)
|
|
|
|
assert result.returncode == 0, result.stderr
|
|
payload = json.loads(result.stdout)
|
|
assert payload["selected_chapter_numbers"] == [2, 3]
|
|
assert payload["estimated_cost_usd"] is not None
|
|
assert payload["exceeds_max_calls"] is True
|
|
assert "workflows" not in payload
|
|
assert '"prompt":' not in result.stdout
|