infospace-bench/tests/test_budget_registry.py

import json
import os
import subprocess
import sys
import zipfile
from pathlib import Path

import yaml

from infospace_bench.budget import (
    PLAN_RETENTION_DEFAULT,
    PLANS_FILE,
    PLANS_SCHEMA_VERSION,
    read_plan_snapshots,
    record_plan_snapshot,
)
from infospace_bench.generator import init_generation_infospace, plan_generation


CONTAINER_XML = """<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
  <rootfiles>
    <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
  </rootfiles>
</container>
"""

PACKAGE_OPF = """<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="bookid">
  <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
    <dc:identifier id="bookid">urn:test:budget</dc:identifier>
    <dc:title>Budget Test Book</dc:title>
    <dc:creator>Author</dc:creator>
    <dc:language>en</dc:language>
  </metadata>
  <manifest>
    <item id="ch1" href="ch1.xhtml" media-type="application/xhtml+xml"/>
    <item id="ch2" href="ch2.xhtml" media-type="application/xhtml+xml"/>
    <item id="ch3" href="ch3.xhtml" media-type="application/xhtml+xml"/>
  </manifest>
  <spine>
    <itemref idref="ch1"/>
    <itemref idref="ch2"/>
    <itemref idref="ch3"/>
  </spine>
</package>
"""


def _write_three_chapter_epub(path: Path) -> None:
    with zipfile.ZipFile(path, "w") as archive:
        archive.writestr("mimetype", "application/epub+zip")
        archive.writestr("META-INF/container.xml", CONTAINER_XML)
        archive.writestr("OEBPS/content.opf", PACKAGE_OPF)
        for idx, label in enumerate(("I", "II", "III"), start=1):
            archive.writestr(
                f"OEBPS/ch{idx}.xhtml",
                f"<html><head><title>Book</title></head>"
                f"<body><h2>{label}</h2>"
                f"<p>Body of chapter {label} with " + " ".join(f"word{n}" for n in range(40)) + ".</p></body></html>",
            )


def _write_minimal_fixture(path: Path) -> None:
    data = {
        "responses": [
            {
                "stage_id": "summarize-source",
                "input_artifact_id": "*",
                "markdown": "# Source Summary\n\nA stub summary.\n",
            },
            {
                "stage_id": "extract-entities",
                "input_artifact_id": "*",
                "markdown": (
                    "# Stub Entity\n\n## Definition\n\nA stub.\n\n## Context\n\nFor a budget test.\n"
                ),
            },
            {
                "stage_id": "extract-relations",
                "input_artifact_id": "*",
                "markdown": (
                    "# Stub Entity Practices Something\n\n## Subject\n\nStub Entity\n\n"
                    "## Predicate\n\npractices\n\n## Object\n\nSomething\n\n## Relation Type\n\nsupport\n\n"
                    "## Evidence\n\nA stub.\n"
                ),
            },
            {
                "stage_id": "evaluate-entity",
                "input_artifact_id": "*",
                "markdown": (
                    "---\nartifact_id: entity/stub-entity.md\nevaluator: fixture\n"
                    "evaluated_at: '2026-05-17T00:00:00'\n"
                    "scores:\n  - name: groundedness\n    value: 4.0\n    max_value: 5.0\n"
                    "  - name: usefulness\n    value: 4.0\n    max_value: 5.0\n---\n\n"
                    "# Evaluation: entity/stub-entity.md\n"
                ),
            },
        ]
    }
    path.write_text(yaml.safe_dump(data, sort_keys=False), encoding="utf-8")


def _build_infospace(tmp_path: Path) -> Path:
    book = tmp_path / "book.epub"
    _write_three_chapter_epub(book)
    infospace = init_generation_infospace(
        tmp_path, book, "budget-test", name="Budget Test", profile="general-knowledge"
    )
    return infospace.root


def test_record_plan_snapshot_writes_yaml_with_stable_id(tmp_path: Path) -> None:
    root = _build_infospace(tmp_path)

    summary = plan_generation(root, persist=False)
    snapshot_id_1 = record_plan_snapshot(root, summary)
    snapshot_id_2 = record_plan_snapshot(root, summary)

    persisted = (root / PLANS_FILE).read_text(encoding="utf-8")
    data = yaml.safe_load(persisted)

    assert data["schema_version"] == PLANS_SCHEMA_VERSION
    assert data["pruned_count"] == 0
    assert snapshot_id_1 == snapshot_id_2, "same summary must yield same snapshot_id"
    # Duplicate writes refresh recorded_at instead of stacking
    assert len(data["snapshots"]) == 1
    assert data["snapshots"][0]["snapshot_id"] == snapshot_id_1


def test_different_filters_produce_distinct_snapshots(tmp_path: Path) -> None:
    root = _build_infospace(tmp_path)

    full_plan = plan_generation(root, persist=False)
    chapter_only = plan_generation(root, from_chapter=2, to_chapter=2, persist=False)
    record_plan_snapshot(root, full_plan)
    record_plan_snapshot(root, chapter_only)

    snapshots = read_plan_snapshots(root)
    assert len(snapshots) == 2
    ids = {snap["snapshot_id"] for snap in snapshots}
    assert len(ids) == 2
    # Filter values are echoed back into the snapshot
    chapter_snapshot = next(s for s in snapshots if s["selected_chunk_count"] == 1)
    assert chapter_snapshot["filters"]["from_chapter"] == 2
    assert chapter_snapshot["filters"]["to_chapter"] == 2


def test_plan_generation_persists_snapshot_by_default(tmp_path: Path) -> None:
    root = _build_infospace(tmp_path)

    result = plan_generation(root, from_chapter=1, to_chapter=2)

    assert "snapshot_id" in result
    assert (root / PLANS_FILE).is_file()
    snapshots = read_plan_snapshots(root)
    assert len(snapshots) == 1
    assert snapshots[0]["snapshot_id"] == result["snapshot_id"]


def test_plan_generation_persist_false_skips_write(tmp_path: Path) -> None:
    root = _build_infospace(tmp_path)

    plan_generation(root, persist=False)

    assert not (root / PLANS_FILE).exists()


def test_plan_snapshot_retention_prunes_old_entries(tmp_path: Path) -> None:
    root = _build_infospace(tmp_path)

    # Produce 5 distinct snapshots and cap retention at 3.
    for chapter in (1, 2, 3, None, None):
        kwargs = {"from_chapter": chapter, "to_chapter": chapter} if chapter else {}
        summary = plan_generation(root, persist=False, **kwargs)
        if not chapter:
            # vary another field to avoid duplicate refresh
            summary["max_calls"] = (summary.get("max_calls") or 0) + 1
            summary["exceeds_max_calls"] = False
        record_plan_snapshot(root, summary, retention=3)

    data = yaml.safe_load((root / PLANS_FILE).read_text(encoding="utf-8"))
    assert len(data["snapshots"]) == 3
    assert data["pruned_count"] >= 1


def test_record_run_usage_aggregates_by_workflow_stage_provider_model(tmp_path: Path) -> None:
    root = _build_infospace(tmp_path)
    from infospace_bench.budget import record_run_usage, read_usage_runs

    workflow_results = [
        {
            "run_id": "run-1",
            "workflow_id": "generic-source-entities",
            "status": "completed",
            "stages": [
                {
                    "stage_id": "extract-entities",
                    "provider": "openrouter",
                    "metadata": {
                        "model": "openai/gpt-4o-mini",
                        "usage": {"prompt_tokens": 1000, "completion_tokens": 200, "total_tokens": 1200},
                    },
                },
                {
                    "stage_id": "extract-entities",
                    "provider": "openrouter",
                    "metadata": {
                        "model": "openai/gpt-4o-mini",
                        "usage": {"prompt_tokens": 800, "completion_tokens": 150, "cost": 0.0012},
                    },
                },
                {"stage_id": "split-entities", "message": "split 3 entities"},
            ],
        }
    ]

    entry = record_run_usage(root, workflow_results, snapshot_id="abc123", duration_seconds=4.2)

    assert entry["rollup"]["total_calls"] == 2
    assert entry["rollup"]["total_prompt_tokens"] == 1800
    assert entry["rollup"]["total_completion_tokens"] == 350
    assert entry["rollup"]["total_cost_usd_known"] == 0.0012
    assert entry["snapshot_id"] == "abc123"
    assert entry["duration_seconds"] == 4.2
    assert len(entry["per_bucket"]) == 1
    bucket = entry["per_bucket"][0]
    assert bucket["workflow_id"] == "generic-source-entities"
    assert bucket["stage_id"] == "extract-entities"
    assert bucket["provider"] == "openrouter"
    assert bucket["model"] == "openai/gpt-4o-mini"
    assert bucket["calls"] == 2

    runs = read_usage_runs(root)
    assert len(runs) == 1
    assert runs[0]["run_index"] == 1


def test_record_run_usage_handles_fixture_runs_without_aborting(tmp_path: Path) -> None:
    root = _build_infospace(tmp_path)
    from infospace_bench.budget import record_run_usage

    workflow_results = [
        {
            "run_id": "fix-1",
            "workflow_id": "generic-source-summary",
            "stages": [
                {"stage_id": "summarize-source", "provider": "fixture"},
                {"stage_id": "summarize-source", "provider": "fixture"},
            ],
        }
    ]

    entry = record_run_usage(root, workflow_results)

    fixture_bucket = next(b for b in entry["per_bucket"] if b["provider"] == "fixture")
    assert fixture_bucket["calls"] == 2
    assert fixture_bucket["prompt_tokens"] == 0
    assert fixture_bucket["cost_status"] == "unknown"
    assert entry["rollup"]["total_cost_usd_known"] == 0.0


def test_run_generation_writes_usage_yaml_with_plan_snapshot_id(tmp_path: Path) -> None:
    root = _build_infospace(tmp_path)
    from infospace_bench.budget import USAGE_FILE, read_usage_runs
    from infospace_bench.generator import run_generation

    fixture = tmp_path / "responses.yaml"
    _write_minimal_fixture(fixture)

    plan_payload = plan_generation(root)
    run_generation(root, fixture_responses=fixture)

    runs = read_usage_runs(root)
    assert (root / USAGE_FILE).is_file()
    assert len(runs) == 1
    assert runs[0]["snapshot_id"] == plan_payload["snapshot_id"]
    assert runs[0]["duration_seconds"] is not None and runs[0]["duration_seconds"] >= 0
    assert runs[0]["rollup"]["total_calls"] >= 0
    # Fixture mode runs should not claim any known cost
    assert runs[0]["rollup"]["total_cost_usd_known"] == 0.0


def test_rate_table_known_model_resolves_cost(tmp_path: Path) -> None:
    from infospace_bench.budget import estimate_cost_usd, load_rate_table

    rates = load_rate_table()

    assert "openai/gpt-4o-mini" in rates
    cost = estimate_cost_usd("openai/gpt-4o-mini", 1000, 500, rates)
    # gpt-4o-mini: prompt 0.00015/1k, completion 0.0006/1k → 0.00015 + 0.0003 = 0.00045
    assert cost is not None
    assert abs(cost - 0.00045) < 1e-9


def test_rate_table_unknown_model_returns_none(tmp_path: Path) -> None:
    from infospace_bench.budget import estimate_cost_usd, load_rate_table

    rates = load_rate_table()

    assert estimate_cost_usd("acme/no-such-model", 1000, 500, rates) is None


def test_workspace_rate_table_overrides_package_default(tmp_path: Path) -> None:
    from infospace_bench.budget import estimate_cost_usd, load_rate_table

    override = tmp_path / "model-rates.yaml"
    override.write_text(
        yaml.safe_dump(
            {
                "schema_version": 1,
                "rates": {
                    "openai/gpt-4o-mini": {
                        "prompt_per_1k": 1.0,
                        "completion_per_1k": 2.0,
                    },
                    "acme/bespoke": {
                        "prompt_per_1k": 0.1,
                        "completion_per_1k": 0.2,
                    },
                },
            }
        ),
        encoding="utf-8",
    )

    rates = load_rate_table(tmp_path)

    overridden = estimate_cost_usd("openai/gpt-4o-mini", 1000, 1000, rates)
    bespoke = estimate_cost_usd("acme/bespoke", 1000, 1000, rates)

    assert overridden == round(1.0 + 2.0, 6)
    assert bespoke == round(0.1 + 0.2, 6)


def test_record_run_usage_fills_estimated_cost_via_resolver(tmp_path: Path) -> None:
    root = _build_infospace(tmp_path)
    from infospace_bench.budget import make_cost_resolver, record_run_usage

    workflow_results = [
        {
            "run_id": "run-cost",
            "workflow_id": "generic-source-entities",
            "stages": [
                {
                    "stage_id": "extract-entities",
                    "provider": "openrouter",
                    "metadata": {
                        "model": "openai/gpt-4o-mini",
                        "usage": {"prompt_tokens": 2000, "completion_tokens": 1000},
                    },
                },
                {
                    "stage_id": "extract-entities",
                    "provider": "openrouter",
                    "metadata": {
                        "model": "openai/gpt-4o-mini",
                        "usage": {
                            "prompt_tokens": 1000,
                            "completion_tokens": 500,
                            "cost": 0.123,
                        },
                    },
                },
            ],
        }
    ]

    entry = record_run_usage(
        root,
        workflow_results,
        cost_resolver=make_cost_resolver(tmp_path),
    )

    bucket = entry["per_bucket"][0]
    # The first call has no adapter cost so it gets estimated:
    # 2000/1000*0.00015 + 1000/1000*0.0006 = 0.0003 + 0.0006 = 0.0009
    assert bucket["cost_usd_estimated"] == round(0.0009, 6)
    assert bucket["cost_usd_known"] == 0.123
    assert bucket["cost_status"] == "known"  # at least one call returned cost
    assert entry["rollup"]["total_cost_usd_known"] == 0.123
    assert entry["rollup"]["total_cost_usd_estimated"] == round(0.0009, 6)


def test_plan_cli_writes_snapshot(tmp_path: Path) -> None:
    root = _build_infospace(tmp_path)
    env = os.environ.copy()
    env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src"

    result = subprocess.run(
        [
            sys.executable,
            "-m",
            "infospace_bench",
            "generate",
            "plan",
            str(root),
            "--from-chapter",
            "1",
            "--to-chapter",
            "2",
            "--cost-per-1k",
            "0.5",
        ],
        check=False,
        env=env,
        text=True,
        capture_output=True,
    )

    assert result.returncode == 0, result.stderr
    payload = json.loads(result.stdout)
    assert "snapshot_id" in payload
    snapshots = read_plan_snapshots(root)
    assert len(snapshots) == 1
    assert snapshots[0]["filters"]["from_chapter"] == 1
    assert snapshots[0]["filters"]["to_chapter"] == 2