Files
marki-docx/tests/test_level3_xref.py
Bernd Worsch ac442ea41f feat: WP-0003 complete — LEVEL3 advanced features + error framework
Implements full LEVEL3 feature set: cross-references (xref.py), numbered
figures (figures.py), auto-diagrams (diagrams.py), bibliography/citations
(bibliography.py), LEVEL3 capability detection (level3.py), and structured
error/warning records (errors.py). Builder, importer, and differ updated for
LEVEL3 round-trip support. REST and MCP interfaces updated with structured
warning records. 259 tests passing.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-16 10:51:38 +00:00

327 lines
11 KiB
Python

"""Tests for LEVEL3 cross-reference support (FR-531, FR-540)."""
from __future__ import annotations
import textwrap
from pathlib import Path
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
LEVEL3_MANIFEST = textwrap.dedent("""\
project:
name: xref-test
feature_level: level3
family: article
sources:
- path: doc.md
output:
dir: ./dist
""")
def _make_project(tmp_path: Path, markdown: str, manifest_yaml: str = LEVEL3_MANIFEST) -> Path:
(tmp_path / "doc.md").write_text(markdown, encoding="utf-8")
(tmp_path / "manifest.yaml").write_text(manifest_yaml, encoding="utf-8")
return tmp_path
# ---------------------------------------------------------------------------
# xref module helpers
# ---------------------------------------------------------------------------
class TestXrefHelpers:
def test_extract_anchor_from_heading_plain(self) -> None:
from markidocx.xref import extract_anchor_from_heading
clean, anchor = extract_anchor_from_heading("Introduction {#intro}")
assert clean == "Introduction"
assert anchor == "intro"
def test_extract_anchor_from_heading_no_anchor(self) -> None:
from markidocx.xref import extract_anchor_from_heading
clean, anchor = extract_anchor_from_heading("Introduction")
assert clean == "Introduction"
assert anchor is None
def test_extract_anchors_from_text(self) -> None:
from markidocx.xref import extract_anchors
text = "# Section {#sec1}\n\n## Subsection {#sec2}\n\nNormal."
anchors = extract_anchors(text)
assert anchors == {"sec1", "sec2"}
def test_extract_xref_links(self) -> None:
from markidocx.xref import extract_xref_links
text = "See [Section One][sec1] and [Section Two][sec2]."
links = extract_xref_links(text)
assert ("Section One", "sec1") in links
assert ("Section Two", "sec2") in links
def test_has_xref_links_true(self) -> None:
from markidocx.xref import has_xref_links
assert has_xref_links("See [Intro][intro] for details.")
def test_has_xref_links_false(self) -> None:
from markidocx.xref import has_xref_links
assert not has_xref_links("Normal paragraph without refs.")
# ---------------------------------------------------------------------------
# Builder: headings with anchors → DOCX bookmarks (FR-531)
# ---------------------------------------------------------------------------
class TestBuilderXref:
def test_build_with_anchor_succeeds(self, tmp_path: Path) -> None:
from markidocx.builder import build_document
from markidocx.manifest import load_manifest
md = "# Introduction {#intro}\n\nSome text.\n\n## Section One {#sec1}\n\nContent."
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
result = build_document(m)
assert result.success
assert result.output_path.exists()
def test_build_docx_contains_bookmark(self, tmp_path: Path) -> None:
"""The built DOCX XML should contain a bookmarkStart for {#intro}."""
from docx import Document as DocxReader
from markidocx.builder import build_document
from markidocx.manifest import load_manifest
md = "# Introduction {#intro}\n\nContent."
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
result = build_document(m)
assert result.success
doc = DocxReader(str(result.output_path))
_W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
bookmarks = [
elem.get(f"{{{_W}}}name")
for elem in doc.element.body.iter(f"{{{_W}}}bookmarkStart")
if elem.get(f"{{{_W}}}name") and not elem.get(f"{{{_W}}}name", "").startswith("_")
]
assert "intro" in bookmarks
def test_build_with_cross_ref_link(self, tmp_path: Path) -> None:
"""Cross-ref links [text][anchor] render without errors."""
from markidocx.builder import build_document
from markidocx.manifest import load_manifest
md = textwrap.dedent("""\
# Introduction {#intro}
Some text.
# Methodology {#method}
See [Introduction][intro] for background.
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
result = build_document(m)
assert result.success
assert result.output_path.exists()
def test_build_xref_not_activated_for_level1(self, tmp_path: Path) -> None:
"""Level1 build: {#anchor} syntax is treated as literal heading text."""
from markidocx.builder import build_document
from markidocx.manifest import load_manifest
manifest_yaml = textwrap.dedent("""\
project:
name: l1-test
feature_level: level1
family: article
sources:
- path: doc.md
output:
dir: ./dist
""")
# In LEVEL1, {#anchor} is not stripped and no bookmark is added
md = "# Introduction {#intro}\n\nContent."
_make_project(tmp_path, md, manifest_yaml)
m = load_manifest(tmp_path / "manifest.yaml")
result = build_document(m)
assert result.success
# No cross-ref warnings
xref_warnings = [w for w in result.warning_records if "xref" in w.reason.lower()]
assert not xref_warnings
# ---------------------------------------------------------------------------
# Importer: DOCX bookmarks → {#anchor} labels (FR-531)
# ---------------------------------------------------------------------------
class TestImporterXref:
def test_roundtrip_preserves_anchor(self, tmp_path: Path) -> None:
"""Build LEVEL3 doc with {#anchor}, import back → heading has {#anchor}."""
from markidocx.builder import build_document
from markidocx.importer import import_document
from markidocx.manifest import load_manifest
md = "# Introduction {#intro}\n\nSome text."
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
build_result = build_document(m)
assert build_result.success
import_result = import_document(m, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
assert "{#intro}" in reimported
def test_roundtrip_preserves_cross_ref_link(self, tmp_path: Path) -> None:
"""Cross-ref link [text][anchor] survives a round trip."""
from markidocx.builder import build_document
from markidocx.importer import import_document
from markidocx.manifest import load_manifest
md = textwrap.dedent("""\
# Introduction {#intro}
Some intro text.
# Methodology {#method}
See [Introduction][intro] for background.
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
build_result = build_document(m)
assert build_result.success
import_result = import_document(m, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
assert "{#intro}" in reimported
assert "[Introduction][intro]" in reimported
# ---------------------------------------------------------------------------
# Differ: cross-ref detection (FR-540)
# ---------------------------------------------------------------------------
class TestDifferXref:
def test_preserved_anchor_reported(self) -> None:
from markidocx.differ import compare
original = "# Introduction {#intro}\n\nText."
reimported = "# Introduction {#intro}\n\nText."
report = compare(original, reimported)
assert any("xref-anchor:intro" in p for p in report.preserved)
assert not any("xref-anchor" in b for b in report.broken)
def test_missing_anchor_reported_as_broken(self) -> None:
from markidocx.differ import compare
original = "# Introduction {#intro}\n\nText."
reimported = "# Introduction\n\nText."
report = compare(original, reimported)
assert any("xref-anchor:missing 'intro'" in b for b in report.broken)
assert report.has_drift
def test_preserved_xref_link(self) -> None:
from markidocx.differ import compare
text = "# Intro {#intro}\n\nSee [Intro][intro]."
report = compare(text, text)
assert any("xref-link" in p for p in report.preserved)
def test_broken_xref_link_target_missing(self) -> None:
from markidocx.differ import compare
original = "# Intro {#intro}\n\nSee [Intro][intro]."
reimported = "# Intro\n\nSee something."
report = compare(original, reimported)
# anchor missing → broken xref link
broken_xref = [b for b in report.broken if "xref" in b]
assert broken_xref
# ---------------------------------------------------------------------------
# Full single-file xref round-trip
# ---------------------------------------------------------------------------
class TestXrefRoundTrip:
def test_single_file_xref_roundtrip(self, tmp_path: Path) -> None:
from markidocx.builder import build_document
from markidocx.differ import compare
from markidocx.importer import import_document
from markidocx.manifest import load_manifest
md = textwrap.dedent("""\
# Introduction {#intro}
Welcome.
# Background {#bg}
See [Introduction][intro] and [Background][bg].
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
build_result = build_document(m)
assert build_result.success
import_result = import_document(m, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
report = compare(md, reimported)
# No broken cross-refs
broken_xrefs = [b for b in report.broken if "xref" in b]
assert not broken_xrefs, f"Broken xrefs found: {broken_xrefs}"
def test_multi_ref_document(self, tmp_path: Path) -> None:
"""Document with multiple anchors and refs doesn't produce broken xrefs."""
from markidocx.builder import build_document
from markidocx.importer import import_document
from markidocx.manifest import load_manifest
md = textwrap.dedent("""\
# Chapter One {#ch1}
Opening.
# Chapter Two {#ch2}
See [Chapter One][ch1].
# Chapter Three {#ch3}
Refers to [Chapter One][ch1] and [Chapter Two][ch2].
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
build_result = build_document(m)
assert build_result.success
import_result = import_document(m, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
# All three anchors should be in reimported
assert "{#ch1}" in reimported
assert "{#ch2}" in reimported
assert "{#ch3}" in reimported