"""Tests for LEVEL3 cross-reference support (FR-531, FR-540).""" from __future__ import annotations import textwrap from pathlib import Path # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- LEVEL3_MANIFEST = textwrap.dedent("""\ project: name: xref-test feature_level: level3 family: article sources: - path: doc.md output: dir: ./dist """) def _make_project(tmp_path: Path, markdown: str, manifest_yaml: str = LEVEL3_MANIFEST) -> Path: (tmp_path / "doc.md").write_text(markdown, encoding="utf-8") (tmp_path / "manifest.yaml").write_text(manifest_yaml, encoding="utf-8") return tmp_path # --------------------------------------------------------------------------- # xref module helpers # --------------------------------------------------------------------------- class TestXrefHelpers: def test_extract_anchor_from_heading_plain(self) -> None: from markidocx.xref import extract_anchor_from_heading clean, anchor = extract_anchor_from_heading("Introduction {#intro}") assert clean == "Introduction" assert anchor == "intro" def test_extract_anchor_from_heading_no_anchor(self) -> None: from markidocx.xref import extract_anchor_from_heading clean, anchor = extract_anchor_from_heading("Introduction") assert clean == "Introduction" assert anchor is None def test_extract_anchors_from_text(self) -> None: from markidocx.xref import extract_anchors text = "# Section {#sec1}\n\n## Subsection {#sec2}\n\nNormal." anchors = extract_anchors(text) assert anchors == {"sec1", "sec2"} def test_extract_xref_links(self) -> None: from markidocx.xref import extract_xref_links text = "See [Section One][sec1] and [Section Two][sec2]." links = extract_xref_links(text) assert ("Section One", "sec1") in links assert ("Section Two", "sec2") in links def test_has_xref_links_true(self) -> None: from markidocx.xref import has_xref_links assert has_xref_links("See [Intro][intro] for details.") def test_has_xref_links_false(self) -> None: from markidocx.xref import has_xref_links assert not has_xref_links("Normal paragraph without refs.") # --------------------------------------------------------------------------- # Builder: headings with anchors → DOCX bookmarks (FR-531) # --------------------------------------------------------------------------- class TestBuilderXref: def test_build_with_anchor_succeeds(self, tmp_path: Path) -> None: from markidocx.builder import build_document from markidocx.manifest import load_manifest md = "# Introduction {#intro}\n\nSome text.\n\n## Section One {#sec1}\n\nContent." _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") result = build_document(m) assert result.success assert result.output_path.exists() def test_build_docx_contains_bookmark(self, tmp_path: Path) -> None: """The built DOCX XML should contain a bookmarkStart for {#intro}.""" from docx import Document as DocxReader from markidocx.builder import build_document from markidocx.manifest import load_manifest md = "# Introduction {#intro}\n\nContent." _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") result = build_document(m) assert result.success doc = DocxReader(str(result.output_path)) _W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main" bookmarks = [ elem.get(f"{{{_W}}}name") for elem in doc.element.body.iter(f"{{{_W}}}bookmarkStart") if elem.get(f"{{{_W}}}name") and not elem.get(f"{{{_W}}}name", "").startswith("_") ] assert "intro" in bookmarks def test_build_with_cross_ref_link(self, tmp_path: Path) -> None: """Cross-ref links [text][anchor] render without errors.""" from markidocx.builder import build_document from markidocx.manifest import load_manifest md = textwrap.dedent("""\ # Introduction {#intro} Some text. # Methodology {#method} See [Introduction][intro] for background. """) _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") result = build_document(m) assert result.success assert result.output_path.exists() def test_build_xref_not_activated_for_level1(self, tmp_path: Path) -> None: """Level1 build: {#anchor} syntax is treated as literal heading text.""" from markidocx.builder import build_document from markidocx.manifest import load_manifest manifest_yaml = textwrap.dedent("""\ project: name: l1-test feature_level: level1 family: article sources: - path: doc.md output: dir: ./dist """) # In LEVEL1, {#anchor} is not stripped and no bookmark is added md = "# Introduction {#intro}\n\nContent." _make_project(tmp_path, md, manifest_yaml) m = load_manifest(tmp_path / "manifest.yaml") result = build_document(m) assert result.success # No cross-ref warnings xref_warnings = [w for w in result.warning_records if "xref" in w.reason.lower()] assert not xref_warnings # --------------------------------------------------------------------------- # Importer: DOCX bookmarks → {#anchor} labels (FR-531) # --------------------------------------------------------------------------- class TestImporterXref: def test_roundtrip_preserves_anchor(self, tmp_path: Path) -> None: """Build LEVEL3 doc with {#anchor}, import back → heading has {#anchor}.""" from markidocx.builder import build_document from markidocx.importer import import_document from markidocx.manifest import load_manifest md = "# Introduction {#intro}\n\nSome text." _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") build_result = build_document(m) assert build_result.success import_result = import_document(m, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") assert "{#intro}" in reimported def test_roundtrip_preserves_cross_ref_link(self, tmp_path: Path) -> None: """Cross-ref link [text][anchor] survives a round trip.""" from markidocx.builder import build_document from markidocx.importer import import_document from markidocx.manifest import load_manifest md = textwrap.dedent("""\ # Introduction {#intro} Some intro text. # Methodology {#method} See [Introduction][intro] for background. """) _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") build_result = build_document(m) assert build_result.success import_result = import_document(m, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") assert "{#intro}" in reimported assert "[Introduction][intro]" in reimported # --------------------------------------------------------------------------- # Differ: cross-ref detection (FR-540) # --------------------------------------------------------------------------- class TestDifferXref: def test_preserved_anchor_reported(self) -> None: from markidocx.differ import compare original = "# Introduction {#intro}\n\nText." reimported = "# Introduction {#intro}\n\nText." report = compare(original, reimported) assert any("xref-anchor:intro" in p for p in report.preserved) assert not any("xref-anchor" in b for b in report.broken) def test_missing_anchor_reported_as_broken(self) -> None: from markidocx.differ import compare original = "# Introduction {#intro}\n\nText." reimported = "# Introduction\n\nText." report = compare(original, reimported) assert any("xref-anchor:missing 'intro'" in b for b in report.broken) assert report.has_drift def test_preserved_xref_link(self) -> None: from markidocx.differ import compare text = "# Intro {#intro}\n\nSee [Intro][intro]." report = compare(text, text) assert any("xref-link" in p for p in report.preserved) def test_broken_xref_link_target_missing(self) -> None: from markidocx.differ import compare original = "# Intro {#intro}\n\nSee [Intro][intro]." reimported = "# Intro\n\nSee something." report = compare(original, reimported) # anchor missing → broken xref link broken_xref = [b for b in report.broken if "xref" in b] assert broken_xref # --------------------------------------------------------------------------- # Full single-file xref round-trip # --------------------------------------------------------------------------- class TestXrefRoundTrip: def test_single_file_xref_roundtrip(self, tmp_path: Path) -> None: from markidocx.builder import build_document from markidocx.differ import compare from markidocx.importer import import_document from markidocx.manifest import load_manifest md = textwrap.dedent("""\ # Introduction {#intro} Welcome. # Background {#bg} See [Introduction][intro] and [Background][bg]. """) _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") build_result = build_document(m) assert build_result.success import_result = import_document(m, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") report = compare(md, reimported) # No broken cross-refs broken_xrefs = [b for b in report.broken if "xref" in b] assert not broken_xrefs, f"Broken xrefs found: {broken_xrefs}" def test_multi_ref_document(self, tmp_path: Path) -> None: """Document with multiple anchors and refs doesn't produce broken xrefs.""" from markidocx.builder import build_document from markidocx.importer import import_document from markidocx.manifest import load_manifest md = textwrap.dedent("""\ # Chapter One {#ch1} Opening. # Chapter Two {#ch2} See [Chapter One][ch1]. # Chapter Three {#ch3} Refers to [Chapter One][ch1] and [Chapter Two][ch2]. """) _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") build_result = build_document(m) assert build_result.success import_result = import_document(m, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") # All three anchors should be in reimported assert "{#ch1}" in reimported assert "{#ch2}" in reimported assert "{#ch3}" in reimported