Files
marki-docx/tests/regression/test_level3_roundtrip.py
Bernd Worsch ac442ea41f feat: WP-0003 complete — LEVEL3 advanced features + error framework
Implements full LEVEL3 feature set: cross-references (xref.py), numbered
figures (figures.py), auto-diagrams (diagrams.py), bibliography/citations
(bibliography.py), LEVEL3 capability detection (level3.py), and structured
error/warning records (errors.py). Builder, importer, and differ updated for
LEVEL3 round-trip support. REST and MCP interfaces updated with structured
warning records. 259 tests passing.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-16 10:51:38 +00:00

262 lines
9.7 KiB
Python

"""LEVEL3 end-to-end round-trip regression tests (FR-1100, MRKD-WP-0003 T07).
Tests the full build → import → compare cycle for each corpus file in
tests/regression/level3/, using feature_level: level3.
All LEVEL1 regression tests must remain green (non-regression gate).
"""
from __future__ import annotations
from pathlib import Path
import pytest
import yaml
from markidocx.builder import build_document
from markidocx.differ import compare
from markidocx.importer import import_document
from markidocx.manifest import load_manifest
# Corpus files in tests/regression/level3/
CORPUS_DIR = Path(__file__).parent / "level3"
CORPUS_FILES = [
"xref_document.md",
"figures_document.md",
"diagrams_document.md",
"bibliography_document.md",
"combined_document.md",
]
def _make_level3_project(tmp_path: Path, markdown: str, name: str = "test") -> Path:
(tmp_path / "doc.md").write_text(markdown, encoding="utf-8")
manifest_path = tmp_path / "manifest.yaml"
manifest_path.write_text(
yaml.dump(
{
"project": {"name": name, "feature_level": "level3", "family": "article"},
"sources": [{"path": "doc.md"}],
"output": {"dir": "./dist"},
}
)
)
(tmp_path / "dist").mkdir()
return manifest_path
# ---------------------------------------------------------------------------
# Corpus round-trip tests
# ---------------------------------------------------------------------------
@pytest.mark.parametrize("corpus_file", CORPUS_FILES)
def test_level3_corpus_builds(tmp_path: Path, corpus_file: str) -> None:
"""Each corpus file builds successfully under LEVEL3."""
md = (CORPUS_DIR / corpus_file).read_text(encoding="utf-8")
manifest_path = _make_level3_project(tmp_path, md, name=corpus_file.replace(".md", ""))
manifest = load_manifest(manifest_path)
result = build_document(manifest)
assert result.success, f"Build failed for {corpus_file}: {result.errors}"
assert result.output_path.exists()
assert result.feature_level == "level3"
@pytest.mark.parametrize("corpus_file", CORPUS_FILES)
def test_level3_corpus_imports(tmp_path: Path, corpus_file: str) -> None:
"""Each corpus file imports successfully after build."""
md = (CORPUS_DIR / corpus_file).read_text(encoding="utf-8")
manifest_path = _make_level3_project(tmp_path, md, name=corpus_file.replace(".md", ""))
manifest = load_manifest(manifest_path)
build_result = build_document(manifest)
assert build_result.success, f"Build failed for {corpus_file}"
import_result = import_document(manifest, build_result.output_path)
assert import_result.success, f"Import failed for {corpus_file}: {import_result.warnings}"
@pytest.mark.parametrize("corpus_file", CORPUS_FILES)
def test_level3_corpus_no_unexpected_breakage(tmp_path: Path, corpus_file: str) -> None:
"""Round-trip diff for each corpus file has no broken headings."""
md = (CORPUS_DIR / corpus_file).read_text(encoding="utf-8")
manifest_path = _make_level3_project(tmp_path, md, name=corpus_file.replace(".md", ""))
manifest = load_manifest(manifest_path)
build_result = build_document(manifest)
assert build_result.success
import_result = import_document(manifest, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
report = compare(md, reimported)
# Headings must not be broken
broken_headings = [b for b in report.broken if b.startswith("heading:")]
assert not broken_headings, (
f"Broken headings in {corpus_file}: {broken_headings}"
)
# ---------------------------------------------------------------------------
# Specific corpus: xref_document — cross-ref anchors preserved
# ---------------------------------------------------------------------------
def test_xref_document_anchors_preserved(tmp_path: Path) -> None:
md = (CORPUS_DIR / "xref_document.md").read_text(encoding="utf-8")
manifest_path = _make_level3_project(tmp_path, md, name="xref")
manifest = load_manifest(manifest_path)
build_result = build_document(manifest)
assert build_result.success
import_result = import_document(manifest, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
# Core anchors must survive
assert "{#intro}" in reimported
assert "{#bg}" in reimported
assert "{#method}" in reimported
assert "{#results}" in reimported
# ---------------------------------------------------------------------------
# Specific corpus: figures_document — figure labels preserved
# ---------------------------------------------------------------------------
def test_figures_document_labels_preserved(tmp_path: Path) -> None:
md = (CORPUS_DIR / "figures_document.md").read_text(encoding="utf-8")
manifest_path = _make_level3_project(tmp_path, md, name="figures")
manifest = load_manifest(manifest_path)
build_result = build_document(manifest)
assert build_result.success
import_result = import_document(manifest, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
assert "fig:arch" in reimported
assert "fig:dataflow" in reimported
assert "fig:results" in reimported
# ---------------------------------------------------------------------------
# Specific corpus: diagrams_document — diagram sources preserved
# ---------------------------------------------------------------------------
def test_diagrams_document_sources_preserved(tmp_path: Path, monkeypatch) -> None:
"""Diagram sources survive round-trip in source-only path."""
import shutil
monkeypatch.setattr(shutil, "which", lambda _cmd: None)
md = (CORPUS_DIR / "diagrams_document.md").read_text(encoding="utf-8")
manifest_path = _make_level3_project(tmp_path, md, name="diagrams")
manifest = load_manifest(manifest_path)
build_result = build_document(manifest)
assert build_result.success
import_result = import_document(manifest, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
# At least one diagram type must appear in reimported
assert "mermaid" in reimported or "graphviz" in reimported or "plantuml" in reimported
# ---------------------------------------------------------------------------
# Specific corpus: bibliography_document — citation keys preserved
# ---------------------------------------------------------------------------
def test_bibliography_document_citations_preserved(tmp_path: Path) -> None:
md = (CORPUS_DIR / "bibliography_document.md").read_text(encoding="utf-8")
manifest_path = _make_level3_project(tmp_path, md, name="bibliography")
manifest = load_manifest(manifest_path)
build_result = build_document(manifest)
assert build_result.success
import_result = import_document(manifest, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
assert "smith2020" in reimported
assert "jones2021" in reimported
assert "brown2019" in reimported
# ---------------------------------------------------------------------------
# Specific corpus: combined_document — all LEVEL3 constructs
# ---------------------------------------------------------------------------
def test_combined_document_roundtrip(tmp_path: Path, monkeypatch) -> None:
"""Combined document with all LEVEL3 constructs survives build+import."""
import shutil
monkeypatch.setattr(shutil, "which", lambda _cmd: None)
md = (CORPUS_DIR / "combined_document.md").read_text(encoding="utf-8")
manifest_path = _make_level3_project(tmp_path, md, name="combined")
manifest = load_manifest(manifest_path)
build_result = build_document(manifest)
assert build_result.success
import_result = import_document(manifest, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
# Anchors preserved
assert "{#intro}" in reimported
# Figures preserved (at least the label)
assert "fig:arch" in reimported
# Citations preserved
assert "smith2020" in reimported
# ---------------------------------------------------------------------------
# CLI: markidocx test executes LEVEL1 + LEVEL3 corpus (non-regression gate)
# ---------------------------------------------------------------------------
def test_level1_regression_still_passes(tmp_path: Path) -> None:
"""LEVEL1 round-trip must remain green after LEVEL3 changes (non-regression)."""
from tests.regression.test_roundtrip import LEVEL1_MARKDOWN
(tmp_path / "doc.md").write_text(LEVEL1_MARKDOWN, encoding="utf-8")
manifest_path = tmp_path / "manifest.yaml"
manifest_path.write_text(
yaml.dump(
{
"project": {"name": "l1-nonreg", "feature_level": "level1", "family": "article"},
"sources": [{"path": "doc.md"}],
"output": {"dir": "./dist"},
}
)
)
(tmp_path / "dist").mkdir()
manifest = load_manifest(manifest_path)
build_result = build_document(manifest)
assert build_result.success
assert not build_result.errors
import_result = import_document(manifest, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
report = compare(LEVEL1_MARKDOWN, reimported)
broken_headings = [b for b in report.broken if b.startswith("heading:")]
assert not broken_headings