"""LEVEL3 end-to-end round-trip regression tests (FR-1100, MRKD-WP-0003 T07). Tests the full build → import → compare cycle for each corpus file in tests/regression/level3/, using feature_level: level3. All LEVEL1 regression tests must remain green (non-regression gate). """ from __future__ import annotations from pathlib import Path import pytest import yaml from markidocx.builder import build_document from markidocx.differ import compare from markidocx.importer import import_document from markidocx.manifest import load_manifest # Corpus files in tests/regression/level3/ CORPUS_DIR = Path(__file__).parent / "level3" CORPUS_FILES = [ "xref_document.md", "figures_document.md", "diagrams_document.md", "bibliography_document.md", "combined_document.md", ] def _make_level3_project(tmp_path: Path, markdown: str, name: str = "test") -> Path: (tmp_path / "doc.md").write_text(markdown, encoding="utf-8") manifest_path = tmp_path / "manifest.yaml" manifest_path.write_text( yaml.dump( { "project": {"name": name, "feature_level": "level3", "family": "article"}, "sources": [{"path": "doc.md"}], "output": {"dir": "./dist"}, } ) ) (tmp_path / "dist").mkdir() return manifest_path # --------------------------------------------------------------------------- # Corpus round-trip tests # --------------------------------------------------------------------------- @pytest.mark.parametrize("corpus_file", CORPUS_FILES) def test_level3_corpus_builds(tmp_path: Path, corpus_file: str) -> None: """Each corpus file builds successfully under LEVEL3.""" md = (CORPUS_DIR / corpus_file).read_text(encoding="utf-8") manifest_path = _make_level3_project(tmp_path, md, name=corpus_file.replace(".md", "")) manifest = load_manifest(manifest_path) result = build_document(manifest) assert result.success, f"Build failed for {corpus_file}: {result.errors}" assert result.output_path.exists() assert result.feature_level == "level3" @pytest.mark.parametrize("corpus_file", CORPUS_FILES) def test_level3_corpus_imports(tmp_path: Path, corpus_file: str) -> None: """Each corpus file imports successfully after build.""" md = (CORPUS_DIR / corpus_file).read_text(encoding="utf-8") manifest_path = _make_level3_project(tmp_path, md, name=corpus_file.replace(".md", "")) manifest = load_manifest(manifest_path) build_result = build_document(manifest) assert build_result.success, f"Build failed for {corpus_file}" import_result = import_document(manifest, build_result.output_path) assert import_result.success, f"Import failed for {corpus_file}: {import_result.warnings}" @pytest.mark.parametrize("corpus_file", CORPUS_FILES) def test_level3_corpus_no_unexpected_breakage(tmp_path: Path, corpus_file: str) -> None: """Round-trip diff for each corpus file has no broken headings.""" md = (CORPUS_DIR / corpus_file).read_text(encoding="utf-8") manifest_path = _make_level3_project(tmp_path, md, name=corpus_file.replace(".md", "")) manifest = load_manifest(manifest_path) build_result = build_document(manifest) assert build_result.success import_result = import_document(manifest, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") report = compare(md, reimported) # Headings must not be broken broken_headings = [b for b in report.broken if b.startswith("heading:")] assert not broken_headings, ( f"Broken headings in {corpus_file}: {broken_headings}" ) # --------------------------------------------------------------------------- # Specific corpus: xref_document — cross-ref anchors preserved # --------------------------------------------------------------------------- def test_xref_document_anchors_preserved(tmp_path: Path) -> None: md = (CORPUS_DIR / "xref_document.md").read_text(encoding="utf-8") manifest_path = _make_level3_project(tmp_path, md, name="xref") manifest = load_manifest(manifest_path) build_result = build_document(manifest) assert build_result.success import_result = import_document(manifest, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") # Core anchors must survive assert "{#intro}" in reimported assert "{#bg}" in reimported assert "{#method}" in reimported assert "{#results}" in reimported # --------------------------------------------------------------------------- # Specific corpus: figures_document — figure labels preserved # --------------------------------------------------------------------------- def test_figures_document_labels_preserved(tmp_path: Path) -> None: md = (CORPUS_DIR / "figures_document.md").read_text(encoding="utf-8") manifest_path = _make_level3_project(tmp_path, md, name="figures") manifest = load_manifest(manifest_path) build_result = build_document(manifest) assert build_result.success import_result = import_document(manifest, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") assert "fig:arch" in reimported assert "fig:dataflow" in reimported assert "fig:results" in reimported # --------------------------------------------------------------------------- # Specific corpus: diagrams_document — diagram sources preserved # --------------------------------------------------------------------------- def test_diagrams_document_sources_preserved(tmp_path: Path, monkeypatch) -> None: """Diagram sources survive round-trip in source-only path.""" import shutil monkeypatch.setattr(shutil, "which", lambda _cmd: None) md = (CORPUS_DIR / "diagrams_document.md").read_text(encoding="utf-8") manifest_path = _make_level3_project(tmp_path, md, name="diagrams") manifest = load_manifest(manifest_path) build_result = build_document(manifest) assert build_result.success import_result = import_document(manifest, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") # At least one diagram type must appear in reimported assert "mermaid" in reimported or "graphviz" in reimported or "plantuml" in reimported # --------------------------------------------------------------------------- # Specific corpus: bibliography_document — citation keys preserved # --------------------------------------------------------------------------- def test_bibliography_document_citations_preserved(tmp_path: Path) -> None: md = (CORPUS_DIR / "bibliography_document.md").read_text(encoding="utf-8") manifest_path = _make_level3_project(tmp_path, md, name="bibliography") manifest = load_manifest(manifest_path) build_result = build_document(manifest) assert build_result.success import_result = import_document(manifest, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") assert "smith2020" in reimported assert "jones2021" in reimported assert "brown2019" in reimported # --------------------------------------------------------------------------- # Specific corpus: combined_document — all LEVEL3 constructs # --------------------------------------------------------------------------- def test_combined_document_roundtrip(tmp_path: Path, monkeypatch) -> None: """Combined document with all LEVEL3 constructs survives build+import.""" import shutil monkeypatch.setattr(shutil, "which", lambda _cmd: None) md = (CORPUS_DIR / "combined_document.md").read_text(encoding="utf-8") manifest_path = _make_level3_project(tmp_path, md, name="combined") manifest = load_manifest(manifest_path) build_result = build_document(manifest) assert build_result.success import_result = import_document(manifest, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") # Anchors preserved assert "{#intro}" in reimported # Figures preserved (at least the label) assert "fig:arch" in reimported # Citations preserved assert "smith2020" in reimported # --------------------------------------------------------------------------- # CLI: markidocx test executes LEVEL1 + LEVEL3 corpus (non-regression gate) # --------------------------------------------------------------------------- def test_level1_regression_still_passes(tmp_path: Path) -> None: """LEVEL1 round-trip must remain green after LEVEL3 changes (non-regression).""" from tests.regression.test_roundtrip import LEVEL1_MARKDOWN (tmp_path / "doc.md").write_text(LEVEL1_MARKDOWN, encoding="utf-8") manifest_path = tmp_path / "manifest.yaml" manifest_path.write_text( yaml.dump( { "project": {"name": "l1-nonreg", "feature_level": "level1", "family": "article"}, "sources": [{"path": "doc.md"}], "output": {"dir": "./dist"}, } ) ) (tmp_path / "dist").mkdir() manifest = load_manifest(manifest_path) build_result = build_document(manifest) assert build_result.success assert not build_result.errors import_result = import_document(manifest, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") report = compare(LEVEL1_MARKDOWN, reimported) broken_headings = [b for b in report.broken if b.startswith("heading:")] assert not broken_headings