Files
infospace-bench/tests/test_lefevre_fixture.py
tegwick 348deca9f2 IB-WP-0016-T05: deterministic Lefevre acceptance fixture
Check in a small Lefevre-shaped EPUB fixture as separate source files
under tests/fixtures/lefevre/sources/ (container.xml, OPF, nav, cover,
PG header, three roman-numeral chapters with page anchors,
transcriber notes, license, PG footer). The test helper assembles
these into an EPUB at test time so the inputs stay inspectable in git.

Fixture responses tuned to the trading-literature profile (T04) live
at tests/fixtures/lefevre/responses.yaml: trader / institution /
strategy categories on entities, strategy_outcome / actor_venue
relation types, and all four trading-tuned evaluation criteria.

Three tests cover the acceptance:
- end-to-end Python pipeline: stable chapter-NN source slugs, full
  artifact tree (entities, relations, evaluations, metrics, history,
  generation report), budget registry persisted, chapter_number
  provenance round-trips through artifacts/index.yaml
- regression: PG boilerplate (cover, nav, header, notes, license,
  footer) is excluded by default and only appears under
  include_non_body=True
- CLI smoke through generate from-source --profile trading-literature
  --fixture-responses ...

125 tests pass.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-17 22:31:17 +02:00

161 lines
5.7 KiB
Python

import json
import os
import subprocess
import sys
import zipfile
from pathlib import Path
from infospace_bench.budget import (
read_plan_snapshots,
read_usage_runs,
)
from infospace_bench.generator import (
init_generation_infospace,
plan_generation,
run_generation,
status_generation,
)
from infospace_bench.source_intake import (
SECTION_ROLE_BODY,
SECTION_ROLE_COVER,
SECTION_ROLE_FOOTER,
SECTION_ROLE_HEADER,
SECTION_ROLE_LICENSE,
SECTION_ROLE_NAV,
SECTION_ROLE_NOTES,
normalize_source,
)
FIXTURE_ROOT = Path(__file__).parent / "fixtures" / "lefevre"
FIXTURE_SOURCES = FIXTURE_ROOT / "sources"
FIXTURE_RESPONSES = FIXTURE_ROOT / "responses.yaml"
def _build_fixture_epub(target: Path) -> Path:
"""Assemble the checked-in Lefevre fixture sources into a single EPUB zip."""
layout = {
"mimetype": "application/epub+zip",
"META-INF/container.xml": (FIXTURE_SOURCES / "container.xml").read_text(encoding="utf-8"),
}
for source in sorted(FIXTURE_SOURCES.glob("*.xhtml")):
layout[f"OEBPS/{source.name}"] = source.read_text(encoding="utf-8")
layout["OEBPS/content.opf"] = (FIXTURE_SOURCES / "content.opf").read_text(encoding="utf-8")
with zipfile.ZipFile(target, "w") as archive:
for path_in_zip, contents in layout.items():
archive.writestr(path_in_zip, contents)
return target
def test_lefevre_fixture_builds_a_complete_infospace(tmp_path: Path) -> None:
book = _build_fixture_epub(tmp_path / "lefevre.epub")
infospace = init_generation_infospace(
tmp_path,
book,
"lefevre-fixture",
name="Reminiscences of a Stock Operator (Fixture)",
profile="trading-literature",
)
plan_generation(infospace.root)
result = run_generation(infospace.root, fixture_responses=FIXTURE_RESPONSES)
status = status_generation(infospace.root)
assert result.status == "completed"
assert status["profile"] == "trading-literature"
# Three body chapters in the fixture spine; cover/nav/header/notes/license/footer are excluded by default.
assert status["source_chunk_count"] == 3
assert status["entity_count"] >= 1
assert status["relation_count"] >= 1
assert status["evaluation_count"] >= 1
assert status["history_snapshot_count"] >= 1
# Stable chapter-NN source filenames from the IB-WP-0016 T02 work.
expected_sources = {"chapter-01.md", "chapter-02.md", "chapter-03.md"}
actual_sources = {
path.name
for path in (infospace.root / "artifacts" / "sources").glob("*.md")
}
assert expected_sources == actual_sources
# Manifest-backed artifacts: entities, relations, evaluations, metrics, history, report
assert (infospace.root / "artifacts" / "entities").is_dir()
assert (infospace.root / "artifacts" / "relations").is_dir()
assert any((infospace.root / "output" / "evaluations").glob("*.md"))
assert (infospace.root / "output" / "metrics" / "metrics.yaml").is_file()
assert (infospace.root / "output" / "metrics" / "history.yaml").is_file()
assert (infospace.root / "reports" / "generation-summary.md").is_file()
# Budget registry artifacts (IB-WP-0019) should land alongside the run.
assert read_plan_snapshots(infospace.root), "plan snapshot must persist"
runs = read_usage_runs(infospace.root)
assert runs and runs[0]["snapshot_id"] == read_plan_snapshots(infospace.root)[-1]["snapshot_id"]
# Book provenance plumb-through: every source artifact knows the chapter it came from.
import yaml as _yaml
index = _yaml.safe_load((infospace.root / "artifacts" / "index.yaml").read_text(encoding="utf-8"))
chapter_numbers = sorted(
item["provenance"]["chapter_number"]
for item in index["artifacts"]
if item["kind"] == "source"
)
assert chapter_numbers == [1, 2, 3]
def test_lefevre_fixture_excludes_gutenberg_boilerplate_by_default(tmp_path: Path) -> None:
book = _build_fixture_epub(tmp_path / "lefevre.epub")
default_chunks = normalize_source(book)
include_all_chunks = normalize_source(book, include_non_body=True)
# Default: only the three body chapters survive.
assert [chunk.chapter_label for chunk in default_chunks] == ["I", "II", "III"]
assert {chunk.section_role for chunk in default_chunks} == {SECTION_ROLE_BODY}
# include_non_body: cover, nav, PG header, notes, license, footer all appear.
roles = {chunk.section_role for chunk in include_all_chunks}
assert SECTION_ROLE_COVER in roles
assert SECTION_ROLE_NAV in roles
assert SECTION_ROLE_HEADER in roles
assert SECTION_ROLE_NOTES in roles
assert SECTION_ROLE_LICENSE in roles
assert SECTION_ROLE_FOOTER in roles
def test_lefevre_fixture_cli_end_to_end(tmp_path: Path) -> None:
book = _build_fixture_epub(tmp_path / "lefevre.epub")
env = os.environ.copy()
env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src"
result = subprocess.run(
[
sys.executable,
"-m",
"infospace_bench",
"generate",
"from-source",
str(book),
"--workspace",
str(tmp_path),
"--slug",
"lefevre-fixture-cli",
"--name",
"Lefevre Fixture (CLI)",
"--profile",
"trading-literature",
"--fixture-responses",
str(FIXTURE_RESPONSES),
"--apply",
],
check=False,
env=env,
text=True,
capture_output=True,
)
assert result.returncode == 0, result.stderr
payload = json.loads(result.stdout)
assert payload["status"] == "completed"
assert "lefevre-fixture-cli" in payload["root"]