infospace-bench/tests/test_plan_scale.py

import json
import os
import subprocess
import sys
import zipfile
from pathlib import Path

from infospace_bench.generator import (
    init_generation_infospace,
    plan_generation,
    plan_generation_summary,
)


CONTAINER_XML = """<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
  <rootfiles>
    <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
  </rootfiles>
</container>
"""

PACKAGE_OPF = """<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="bookid">
  <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
    <dc:identifier id="bookid">urn:test:plan</dc:identifier>
    <dc:title>Plan Test Book</dc:title>
    <dc:creator>Author</dc:creator>
    <dc:language>en</dc:language>
  </metadata>
  <manifest>
    <item id="ch1" href="ch1.xhtml" media-type="application/xhtml+xml"/>
    <item id="ch2" href="ch2.xhtml" media-type="application/xhtml+xml"/>
    <item id="ch3" href="ch3.xhtml" media-type="application/xhtml+xml"/>
    <item id="ch4" href="ch4.xhtml" media-type="application/xhtml+xml"/>
  </manifest>
  <spine>
    <itemref idref="ch1"/>
    <itemref idref="ch2"/>
    <itemref idref="ch3"/>
    <itemref idref="ch4"/>
  </spine>
</package>
"""


def _write_four_chapter_epub(path: Path) -> None:
    with zipfile.ZipFile(path, "w") as archive:
        archive.writestr("mimetype", "application/epub+zip")
        archive.writestr("META-INF/container.xml", CONTAINER_XML)
        archive.writestr("OEBPS/content.opf", PACKAGE_OPF)
        for idx, label in enumerate(("I", "II", "III", "IV"), start=1):
            archive.writestr(
                f"OEBPS/ch{idx}.xhtml",
                f"<html><head><title>Book</title></head>"
                f"<body><h2>{label}</h2>"
                f"<p>The narrator describes chapter {label} events with stocks and traders. "
                + " ".join(f"sentence{n}" for n in range(40))
                + "</p></body></html>",
            )


def _build_plan_infospace(tmp_path: Path) -> Path:
    book = tmp_path / "book.epub"
    _write_four_chapter_epub(book)
    infospace = init_generation_infospace(
        tmp_path, book, "plan-test", name="Plan Test", profile="general-knowledge"
    )
    return infospace.root


def test_plan_summary_is_compact_and_does_not_dump_prompts(tmp_path: Path) -> None:
    root = _build_plan_infospace(tmp_path)

    summary = plan_generation(root)

    serialized = json.dumps(summary)
    assert '"prompt":' not in serialized, "compact plan must not embed full prompts"
    assert summary["source_chunk_count"] == 4
    assert summary["selected_chunk_count"] == 4
    assert summary["selected_chapter_numbers"] == [1, 2, 3, 4]
    assert summary["total_provider_calls_estimate"] > 0
    assert summary["total_prompt_tokens_estimate"] > 0
    assert summary["estimated_cost_usd"] is None
    assert "workflows" not in summary


def test_plan_chapter_filter_selects_subset(tmp_path: Path) -> None:
    root = _build_plan_infospace(tmp_path)

    by_label = plan_generation_summary(root, chapter_filter=["I"])
    by_number = plan_generation_summary(root, chapter_filter=["2"])
    by_range = plan_generation_summary(root, from_chapter=2, to_chapter=3)
    by_chunk = plan_generation_summary(root, chunk_filter=["chapter-04"])

    assert by_label["selected_chapter_numbers"] == [1]
    assert by_number["selected_chapter_numbers"] == [2]
    assert by_range["selected_chapter_numbers"] == [2, 3]
    assert by_chunk["selected_chunk_ids"] == ["chapter-04"]


def test_plan_caps_flag_when_estimate_exceeds_budget(tmp_path: Path) -> None:
    root = _build_plan_infospace(tmp_path)

    summary = plan_generation_summary(
        root,
        max_calls=2,
        cost_cap=0.01,
        cost_per_1k_tokens=1.0,
    )

    assert summary["total_provider_calls_estimate"] > 2
    assert summary["exceeds_max_calls"] is True
    assert summary["estimated_cost_usd"] is not None and summary["estimated_cost_usd"] > 0.01
    assert summary["exceeds_cost_cap"] is True


def test_plan_with_model_uses_rate_table_instead_of_blended_per_1k(tmp_path: Path) -> None:
    """--model openai/gpt-4o-mini should pull from bundled rate table.

    Stopgap until LLM-WP-0005 lands a proper cost model in llm-connect.
    """
    root = _build_plan_infospace(tmp_path)

    blended = plan_generation_summary(
        root, cost_per_1k_tokens=0.30, persist=False
    ) if False else None
    rate_table = plan_generation_summary(
        root, model="openai/gpt-4o-mini"
    )

    # gpt-4o-mini list price is ~0.00015/1k prompt + ~0.0006/1k completion,
    # so the rate-table cost must be far below the $0.30/1k blended figure.
    assert rate_table["cost_source"] == "rate_table:openai/gpt-4o-mini"
    assert rate_table["estimated_cost_usd"] is not None
    assert rate_table["estimated_cost_usd"] < 0.10, (
        "rate-table estimate must be far below a $0.30/1k blended rate"
    )
    # The estimator now also returns a completion-token estimate.
    assert rate_table["estimated_completion_tokens"] > 0


def test_plan_with_unknown_model_falls_back_to_blended_or_unknown(tmp_path: Path) -> None:
    root = _build_plan_infospace(tmp_path)

    no_signal = plan_generation_summary(root, model="acme/not-in-rate-table")
    blended = plan_generation_summary(
        root, model="acme/not-in-rate-table", cost_per_1k_tokens=0.5
    )

    assert no_signal["estimated_cost_usd"] is None
    assert no_signal["cost_source"] is None
    assert blended["estimated_cost_usd"] is not None
    assert blended["cost_source"] == "cost_per_1k_blended"


def test_plan_full_mode_includes_workflow_plans(tmp_path: Path) -> None:
    root = _build_plan_infospace(tmp_path)

    full_plan = plan_generation(root, full=True)

    assert "workflows" in full_plan
    assert len(full_plan["workflows"]) >= 1


def test_plan_cli_compact_default_and_filters(tmp_path: Path) -> None:
    root = _build_plan_infospace(tmp_path)
    env = os.environ.copy()
    env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src"

    result = subprocess.run(
        [
            sys.executable,
            "-m",
            "infospace_bench",
            "generate",
            "plan",
            str(root),
            "--from-chapter",
            "2",
            "--to-chapter",
            "3",
            "--cost-per-1k",
            "0.5",
            "--max-calls",
            "1",
        ],
        check=False,
        env=env,
        text=True,
        capture_output=True,
    )

    assert result.returncode == 0, result.stderr
    payload = json.loads(result.stdout)
    assert payload["selected_chapter_numbers"] == [2, 3]
    assert payload["estimated_cost_usd"] is not None
    assert payload["exceeds_max_calls"] is True
    assert "workflows" not in payload
    assert '"prompt":' not in result.stdout