Files
marki-docx/tests/test_level3_bibliography.py
Bernd Worsch ac442ea41f feat: WP-0003 complete — LEVEL3 advanced features + error framework
Implements full LEVEL3 feature set: cross-references (xref.py), numbered
figures (figures.py), auto-diagrams (diagrams.py), bibliography/citations
(bibliography.py), LEVEL3 capability detection (level3.py), and structured
error/warning records (errors.py). Builder, importer, and differ updated for
LEVEL3 round-trip support. REST and MCP interfaces updated with structured
warning records. 259 tests passing.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-16 10:51:38 +00:00

350 lines
12 KiB
Python

"""Tests for LEVEL3 bibliography & citation support (FR-535, FR-536, FR-542)."""
from __future__ import annotations
import textwrap
from pathlib import Path
LEVEL3_MANIFEST = textwrap.dedent("""\
project:
name: bib-test
feature_level: level3
family: article
sources:
- path: doc.md
output:
dir: ./dist
""")
def _make_project(tmp_path: Path, markdown: str) -> Path:
(tmp_path / "doc.md").write_text(markdown, encoding="utf-8")
(tmp_path / "manifest.yaml").write_text(LEVEL3_MANIFEST, encoding="utf-8")
return tmp_path
# ---------------------------------------------------------------------------
# bibliography module helpers
# ---------------------------------------------------------------------------
class TestBibliographyHelpers:
def test_has_citations_true(self) -> None:
from markidocx.bibliography import has_citations
assert has_citations("See [@smith2020] for details.")
def test_has_citations_false(self) -> None:
from markidocx.bibliography import has_citations
assert not has_citations("Normal paragraph without citations.")
def test_extract_citation_keys(self) -> None:
from markidocx.bibliography import extract_citation_keys
text = "See [@smith2020] and [@jones2021:chap] for more."
keys = extract_citation_keys(text)
assert "smith2020" in keys
assert "jones2021:chap" in keys
def test_is_references_heading(self) -> None:
from markidocx.bibliography import is_references_heading
assert is_references_heading("## References")
assert is_references_heading("# References")
assert is_references_heading("### References")
assert not is_references_heading("## Introduction")
def test_parse_reference_entry(self) -> None:
from markidocx.bibliography import parse_reference_entry
result = parse_reference_entry("- [@smith2020]: Smith, J. *Title*. 2020.")
assert result is not None
key, entry = result
assert key == "smith2020"
assert "Smith, J." in entry
def test_extract_references_section(self) -> None:
from markidocx.bibliography import extract_references_section
md = textwrap.dedent("""\
# Document
See [@smith2020].
## References
- [@smith2020]: Smith, J. *A Book*. 2020.
- [@jones2021]: Jones, B. *Another*. 2021.
""")
entries, text_without = extract_references_section(md)
assert len(entries) == 2
assert entries[0][0] == "smith2020"
assert entries[1][0] == "jones2021"
assert "## References" not in text_without
def test_render_citation_text_unchanged(self) -> None:
from markidocx.bibliography import render_citation_text
text = "See [@smith2020] for details."
assert render_citation_text(text) == text
# ---------------------------------------------------------------------------
# Builder: citations and references section (FR-535)
# ---------------------------------------------------------------------------
class TestBuilderBibliography:
def test_build_with_citation_succeeds(self, tmp_path: Path) -> None:
from markidocx.builder import build_document
from markidocx.manifest import load_manifest
md = textwrap.dedent("""\
# Document
As shown by [@smith2020], the approach works.
## References
- [@smith2020]: Smith, J. *A Work*. 2020.
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
result = build_document(m)
assert result.success
assert result.output_path.exists()
def test_build_docx_contains_citation_marker(self, tmp_path: Path) -> None:
"""The built DOCX should contain the citation text."""
from docx import Document as DocxReader
from markidocx.builder import build_document
from markidocx.manifest import load_manifest
md = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020."
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
result = build_document(m)
assert result.success
doc = DocxReader(str(result.output_path))
texts = [p.text for p in doc.paragraphs]
citation_paras = [t for t in texts if "smith2020" in t]
assert citation_paras, f"No citation found in DOCX. Paragraphs: {texts}"
def test_build_docx_contains_references_heading(self, tmp_path: Path) -> None:
"""The built DOCX should have a References heading."""
from docx import Document as DocxReader
from markidocx.builder import build_document
from markidocx.manifest import load_manifest
md = "# Doc\n\nText.\n\n## References\n\n- [@k1]: Author. *T*. 2020."
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
result = build_document(m)
assert result.success
doc = DocxReader(str(result.output_path))
texts = [p.text for p in doc.paragraphs]
assert "References" in texts, f"No References heading. Paragraphs: {texts}"
def test_build_multi_citation_document(self, tmp_path: Path) -> None:
"""Multiple citations and references entries all appear in DOCX."""
from docx import Document as DocxReader
from markidocx.builder import build_document
from markidocx.manifest import load_manifest
md = textwrap.dedent("""\
# Introduction
According to [@smith2020] and [@jones2021], this is true.
## References
- [@smith2020]: Smith, J. *Work A*. 2020.
- [@jones2021]: Jones, B. *Work B*. 2021.
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
result = build_document(m)
assert result.success
doc = DocxReader(str(result.output_path))
all_text = " ".join(p.text for p in doc.paragraphs)
assert "smith2020" in all_text
assert "jones2021" in all_text
# ---------------------------------------------------------------------------
# Importer: citations and references restoration (FR-536)
# ---------------------------------------------------------------------------
class TestImporterBibliography:
def test_roundtrip_preserves_citation(self, tmp_path: Path) -> None:
from markidocx.builder import build_document
from markidocx.importer import import_document
from markidocx.manifest import load_manifest
md = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020."
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
build_result = build_document(m)
assert build_result.success
import_result = import_document(m, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
assert "smith2020" in reimported
def test_roundtrip_preserves_reference_entry(self, tmp_path: Path) -> None:
from markidocx.builder import build_document
from markidocx.importer import import_document
from markidocx.manifest import load_manifest
md = textwrap.dedent("""\
# Doc
See [@k1].
## References
- [@k1]: Author. *Title*. 2020.
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
build_result = build_document(m)
assert build_result.success
import_result = import_document(m, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
assert "k1" in reimported
# ---------------------------------------------------------------------------
# Differ: citation and bibliography comparison (FR-542)
# ---------------------------------------------------------------------------
class TestDifferBibliography:
def test_preserved_citation(self) -> None:
from markidocx.differ import compare
text = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020."
report = compare(text, text)
assert any("citation:[@smith2020]" in p for p in report.preserved)
def test_missing_citation_broken(self) -> None:
from markidocx.differ import compare
original = "See [@smith2020]."
reimported = "See something."
report = compare(original, reimported)
assert any("citation:missing '[@smith2020]'" in b for b in report.broken)
assert report.has_drift
def test_missing_reference_entry_degraded(self) -> None:
from markidocx.differ import compare
original = textwrap.dedent("""\
See [@k1].
## References
- [@k1]: Author. *T*. 2020.
""")
reimported = "See [@k1]."
report = compare(original, reimported)
assert any("reference-entry" in d for d in report.degraded)
def test_unresolvable_citation_emits_warning(self) -> None:
"""Missing citation in reimported emits citation-ambiguity warning."""
from markidocx.bibliography import compare_citations
from markidocx.errors import WarningRecord
original = "See [@missing]."
reimported = "See something."
preserved: list[str] = []
degraded: list[str] = []
broken: list[str] = []
warning_records: list[WarningRecord] = []
compare_citations(original, reimported, preserved, degraded, broken, warning_records)
ambiguity = [w for w in warning_records if w.reason == "citation-ambiguity"]
assert ambiguity, "Expected citation-ambiguity warning"
assert ambiguity[0].construct == "@missing"
# ---------------------------------------------------------------------------
# Single citation round-trip
# ---------------------------------------------------------------------------
class TestCitationRoundTrip:
def test_single_citation_roundtrip(self, tmp_path: Path) -> None:
from markidocx.builder import build_document
from markidocx.differ import compare
from markidocx.importer import import_document
from markidocx.manifest import load_manifest
md = textwrap.dedent("""\
# Introduction
According to [@smith2020], things are good.
## References
- [@smith2020]: Smith, J. *Good Stuff*. 2020.
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
build_result = build_document(m)
assert build_result.success
import_result = import_document(m, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
report = compare(md, reimported)
broken_citations = [b for b in report.broken if "citation" in b]
assert not broken_citations, f"Broken citations: {broken_citations}"
def test_multi_citation_document(self, tmp_path: Path) -> None:
from markidocx.builder import build_document
from markidocx.importer import import_document
from markidocx.manifest import load_manifest
md = textwrap.dedent("""\
# Paper
First point from [@a2020]. Second from [@b2021].
## References
- [@a2020]: A. *Work A*. 2020.
- [@b2021]: B. *Work B*. 2021.
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
build_result = build_document(m)
assert build_result.success
import_result = import_document(m, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
assert "a2020" in reimported
assert "b2021" in reimported