generated from coulomb/repo-seed
Check in a small Lefevre-shaped EPUB fixture as separate source files under tests/fixtures/lefevre/sources/ (container.xml, OPF, nav, cover, PG header, three roman-numeral chapters with page anchors, transcriber notes, license, PG footer). The test helper assembles these into an EPUB at test time so the inputs stay inspectable in git. Fixture responses tuned to the trading-literature profile (T04) live at tests/fixtures/lefevre/responses.yaml: trader / institution / strategy categories on entities, strategy_outcome / actor_venue relation types, and all four trading-tuned evaluation criteria. Three tests cover the acceptance: - end-to-end Python pipeline: stable chapter-NN source slugs, full artifact tree (entities, relations, evaluations, metrics, history, generation report), budget registry persisted, chapter_number provenance round-trips through artifacts/index.yaml - regression: PG boilerplate (cover, nav, header, notes, license, footer) is excluded by default and only appears under include_non_body=True - CLI smoke through generate from-source --profile trading-literature --fixture-responses ... 125 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
161 lines
5.7 KiB
Python
161 lines
5.7 KiB
Python
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import zipfile
|
|
from pathlib import Path
|
|
|
|
from infospace_bench.budget import (
|
|
read_plan_snapshots,
|
|
read_usage_runs,
|
|
)
|
|
from infospace_bench.generator import (
|
|
init_generation_infospace,
|
|
plan_generation,
|
|
run_generation,
|
|
status_generation,
|
|
)
|
|
from infospace_bench.source_intake import (
|
|
SECTION_ROLE_BODY,
|
|
SECTION_ROLE_COVER,
|
|
SECTION_ROLE_FOOTER,
|
|
SECTION_ROLE_HEADER,
|
|
SECTION_ROLE_LICENSE,
|
|
SECTION_ROLE_NAV,
|
|
SECTION_ROLE_NOTES,
|
|
normalize_source,
|
|
)
|
|
|
|
|
|
FIXTURE_ROOT = Path(__file__).parent / "fixtures" / "lefevre"
|
|
FIXTURE_SOURCES = FIXTURE_ROOT / "sources"
|
|
FIXTURE_RESPONSES = FIXTURE_ROOT / "responses.yaml"
|
|
|
|
|
|
def _build_fixture_epub(target: Path) -> Path:
|
|
"""Assemble the checked-in Lefevre fixture sources into a single EPUB zip."""
|
|
layout = {
|
|
"mimetype": "application/epub+zip",
|
|
"META-INF/container.xml": (FIXTURE_SOURCES / "container.xml").read_text(encoding="utf-8"),
|
|
}
|
|
for source in sorted(FIXTURE_SOURCES.glob("*.xhtml")):
|
|
layout[f"OEBPS/{source.name}"] = source.read_text(encoding="utf-8")
|
|
layout["OEBPS/content.opf"] = (FIXTURE_SOURCES / "content.opf").read_text(encoding="utf-8")
|
|
with zipfile.ZipFile(target, "w") as archive:
|
|
for path_in_zip, contents in layout.items():
|
|
archive.writestr(path_in_zip, contents)
|
|
return target
|
|
|
|
|
|
def test_lefevre_fixture_builds_a_complete_infospace(tmp_path: Path) -> None:
|
|
book = _build_fixture_epub(tmp_path / "lefevre.epub")
|
|
|
|
infospace = init_generation_infospace(
|
|
tmp_path,
|
|
book,
|
|
"lefevre-fixture",
|
|
name="Reminiscences of a Stock Operator (Fixture)",
|
|
profile="trading-literature",
|
|
)
|
|
plan_generation(infospace.root)
|
|
result = run_generation(infospace.root, fixture_responses=FIXTURE_RESPONSES)
|
|
status = status_generation(infospace.root)
|
|
|
|
assert result.status == "completed"
|
|
assert status["profile"] == "trading-literature"
|
|
# Three body chapters in the fixture spine; cover/nav/header/notes/license/footer are excluded by default.
|
|
assert status["source_chunk_count"] == 3
|
|
assert status["entity_count"] >= 1
|
|
assert status["relation_count"] >= 1
|
|
assert status["evaluation_count"] >= 1
|
|
assert status["history_snapshot_count"] >= 1
|
|
|
|
# Stable chapter-NN source filenames from the IB-WP-0016 T02 work.
|
|
expected_sources = {"chapter-01.md", "chapter-02.md", "chapter-03.md"}
|
|
actual_sources = {
|
|
path.name
|
|
for path in (infospace.root / "artifacts" / "sources").glob("*.md")
|
|
}
|
|
assert expected_sources == actual_sources
|
|
|
|
# Manifest-backed artifacts: entities, relations, evaluations, metrics, history, report
|
|
assert (infospace.root / "artifacts" / "entities").is_dir()
|
|
assert (infospace.root / "artifacts" / "relations").is_dir()
|
|
assert any((infospace.root / "output" / "evaluations").glob("*.md"))
|
|
assert (infospace.root / "output" / "metrics" / "metrics.yaml").is_file()
|
|
assert (infospace.root / "output" / "metrics" / "history.yaml").is_file()
|
|
assert (infospace.root / "reports" / "generation-summary.md").is_file()
|
|
|
|
# Budget registry artifacts (IB-WP-0019) should land alongside the run.
|
|
assert read_plan_snapshots(infospace.root), "plan snapshot must persist"
|
|
runs = read_usage_runs(infospace.root)
|
|
assert runs and runs[0]["snapshot_id"] == read_plan_snapshots(infospace.root)[-1]["snapshot_id"]
|
|
|
|
# Book provenance plumb-through: every source artifact knows the chapter it came from.
|
|
import yaml as _yaml
|
|
|
|
index = _yaml.safe_load((infospace.root / "artifacts" / "index.yaml").read_text(encoding="utf-8"))
|
|
chapter_numbers = sorted(
|
|
item["provenance"]["chapter_number"]
|
|
for item in index["artifacts"]
|
|
if item["kind"] == "source"
|
|
)
|
|
assert chapter_numbers == [1, 2, 3]
|
|
|
|
|
|
def test_lefevre_fixture_excludes_gutenberg_boilerplate_by_default(tmp_path: Path) -> None:
|
|
book = _build_fixture_epub(tmp_path / "lefevre.epub")
|
|
|
|
default_chunks = normalize_source(book)
|
|
include_all_chunks = normalize_source(book, include_non_body=True)
|
|
|
|
# Default: only the three body chapters survive.
|
|
assert [chunk.chapter_label for chunk in default_chunks] == ["I", "II", "III"]
|
|
assert {chunk.section_role for chunk in default_chunks} == {SECTION_ROLE_BODY}
|
|
|
|
# include_non_body: cover, nav, PG header, notes, license, footer all appear.
|
|
roles = {chunk.section_role for chunk in include_all_chunks}
|
|
assert SECTION_ROLE_COVER in roles
|
|
assert SECTION_ROLE_NAV in roles
|
|
assert SECTION_ROLE_HEADER in roles
|
|
assert SECTION_ROLE_NOTES in roles
|
|
assert SECTION_ROLE_LICENSE in roles
|
|
assert SECTION_ROLE_FOOTER in roles
|
|
|
|
|
|
def test_lefevre_fixture_cli_end_to_end(tmp_path: Path) -> None:
|
|
book = _build_fixture_epub(tmp_path / "lefevre.epub")
|
|
env = os.environ.copy()
|
|
env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src"
|
|
|
|
result = subprocess.run(
|
|
[
|
|
sys.executable,
|
|
"-m",
|
|
"infospace_bench",
|
|
"generate",
|
|
"from-source",
|
|
str(book),
|
|
"--workspace",
|
|
str(tmp_path),
|
|
"--slug",
|
|
"lefevre-fixture-cli",
|
|
"--name",
|
|
"Lefevre Fixture (CLI)",
|
|
"--profile",
|
|
"trading-literature",
|
|
"--fixture-responses",
|
|
str(FIXTURE_RESPONSES),
|
|
"--apply",
|
|
],
|
|
check=False,
|
|
env=env,
|
|
text=True,
|
|
capture_output=True,
|
|
)
|
|
|
|
assert result.returncode == 0, result.stderr
|
|
payload = json.loads(result.stdout)
|
|
assert payload["status"] == "completed"
|
|
assert "lefevre-fixture-cli" in payload["root"]
|