Files
marki-docx/tests/test_level3_bibliography.py
Bernd Worsch 9fe64bcd7f feat: WP-0007 — Interface Completeness & Evidence
T01: markidocx inspect (FR-806) and markidocx test (FR-810) CLI commands
T02: markidocx evidence get/list CLI commands (FR-1409, FR-814)
T03: list_styles() / GET /styles / MCP list_styles with real style data (FR-907)
T04: Evidence assembly — EvidenceSet summary via REST and MCP (FR-1406–1408)
T05: LEVEL3 edge-case tests — diagram mutation, renderer version check,
     bibliography duplicate keys / missing refs / special chars (FR-534, FR-538, FR-542)
T06: markidocx template extract + Word-first round-trip regression test (FR-606)

New: differ._compare_diagram_blocks tracks fenced diagram source drift (FR-534)
New: diagrams.check_renderer_version emits warning for outdated renderers (FR-538)
New: bibliography.validate_citations detects duplicate keys and missing entries (FR-542)
New: templates.extract_template / TemplateExtractionResult / list_styles / StyleEntry
New: REST POST /template/extract; MCP extract_template tool

278 tests pass, ruff+mypy clean.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 19:30:09 +00:00

435 lines
15 KiB
Python

"""Tests for LEVEL3 bibliography & citation support (FR-535, FR-536, FR-542)."""
from __future__ import annotations
import textwrap
from pathlib import Path
LEVEL3_MANIFEST = textwrap.dedent("""\
project:
name: bib-test
feature_level: level3
family: article
sources:
- path: doc.md
output:
dir: ./dist
""")
def _make_project(tmp_path: Path, markdown: str) -> Path:
(tmp_path / "doc.md").write_text(markdown, encoding="utf-8")
(tmp_path / "manifest.yaml").write_text(LEVEL3_MANIFEST, encoding="utf-8")
return tmp_path
# ---------------------------------------------------------------------------
# bibliography module helpers
# ---------------------------------------------------------------------------
class TestBibliographyHelpers:
def test_has_citations_true(self) -> None:
from markidocx.bibliography import has_citations
assert has_citations("See [@smith2020] for details.")
def test_has_citations_false(self) -> None:
from markidocx.bibliography import has_citations
assert not has_citations("Normal paragraph without citations.")
def test_extract_citation_keys(self) -> None:
from markidocx.bibliography import extract_citation_keys
text = "See [@smith2020] and [@jones2021:chap] for more."
keys = extract_citation_keys(text)
assert "smith2020" in keys
assert "jones2021:chap" in keys
def test_is_references_heading(self) -> None:
from markidocx.bibliography import is_references_heading
assert is_references_heading("## References")
assert is_references_heading("# References")
assert is_references_heading("### References")
assert not is_references_heading("## Introduction")
def test_parse_reference_entry(self) -> None:
from markidocx.bibliography import parse_reference_entry
result = parse_reference_entry("- [@smith2020]: Smith, J. *Title*. 2020.")
assert result is not None
key, entry = result
assert key == "smith2020"
assert "Smith, J." in entry
def test_extract_references_section(self) -> None:
from markidocx.bibliography import extract_references_section
md = textwrap.dedent("""\
# Document
See [@smith2020].
## References
- [@smith2020]: Smith, J. *A Book*. 2020.
- [@jones2021]: Jones, B. *Another*. 2021.
""")
entries, text_without = extract_references_section(md)
assert len(entries) == 2
assert entries[0][0] == "smith2020"
assert entries[1][0] == "jones2021"
assert "## References" not in text_without
def test_render_citation_text_unchanged(self) -> None:
from markidocx.bibliography import render_citation_text
text = "See [@smith2020] for details."
assert render_citation_text(text) == text
# ---------------------------------------------------------------------------
# Builder: citations and references section (FR-535)
# ---------------------------------------------------------------------------
class TestBuilderBibliography:
def test_build_with_citation_succeeds(self, tmp_path: Path) -> None:
from markidocx.builder import build_document
from markidocx.manifest import load_manifest
md = textwrap.dedent("""\
# Document
As shown by [@smith2020], the approach works.
## References
- [@smith2020]: Smith, J. *A Work*. 2020.
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
result = build_document(m)
assert result.success
assert result.output_path.exists()
def test_build_docx_contains_citation_marker(self, tmp_path: Path) -> None:
"""The built DOCX should contain the citation text."""
from docx import Document as DocxReader
from markidocx.builder import build_document
from markidocx.manifest import load_manifest
md = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020."
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
result = build_document(m)
assert result.success
doc = DocxReader(str(result.output_path))
texts = [p.text for p in doc.paragraphs]
citation_paras = [t for t in texts if "smith2020" in t]
assert citation_paras, f"No citation found in DOCX. Paragraphs: {texts}"
def test_build_docx_contains_references_heading(self, tmp_path: Path) -> None:
"""The built DOCX should have a References heading."""
from docx import Document as DocxReader
from markidocx.builder import build_document
from markidocx.manifest import load_manifest
md = "# Doc\n\nText.\n\n## References\n\n- [@k1]: Author. *T*. 2020."
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
result = build_document(m)
assert result.success
doc = DocxReader(str(result.output_path))
texts = [p.text for p in doc.paragraphs]
assert "References" in texts, f"No References heading. Paragraphs: {texts}"
def test_build_multi_citation_document(self, tmp_path: Path) -> None:
"""Multiple citations and references entries all appear in DOCX."""
from docx import Document as DocxReader
from markidocx.builder import build_document
from markidocx.manifest import load_manifest
md = textwrap.dedent("""\
# Introduction
According to [@smith2020] and [@jones2021], this is true.
## References
- [@smith2020]: Smith, J. *Work A*. 2020.
- [@jones2021]: Jones, B. *Work B*. 2021.
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
result = build_document(m)
assert result.success
doc = DocxReader(str(result.output_path))
all_text = " ".join(p.text for p in doc.paragraphs)
assert "smith2020" in all_text
assert "jones2021" in all_text
# ---------------------------------------------------------------------------
# Importer: citations and references restoration (FR-536)
# ---------------------------------------------------------------------------
class TestImporterBibliography:
def test_roundtrip_preserves_citation(self, tmp_path: Path) -> None:
from markidocx.builder import build_document
from markidocx.importer import import_document
from markidocx.manifest import load_manifest
md = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020."
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
build_result = build_document(m)
assert build_result.success
import_result = import_document(m, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
assert "smith2020" in reimported
def test_roundtrip_preserves_reference_entry(self, tmp_path: Path) -> None:
from markidocx.builder import build_document
from markidocx.importer import import_document
from markidocx.manifest import load_manifest
md = textwrap.dedent("""\
# Doc
See [@k1].
## References
- [@k1]: Author. *Title*. 2020.
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
build_result = build_document(m)
assert build_result.success
import_result = import_document(m, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
assert "k1" in reimported
# ---------------------------------------------------------------------------
# Differ: citation and bibliography comparison (FR-542)
# ---------------------------------------------------------------------------
class TestDifferBibliography:
def test_preserved_citation(self) -> None:
from markidocx.differ import compare
text = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020."
report = compare(text, text)
assert any("citation:[@smith2020]" in p for p in report.preserved)
def test_missing_citation_broken(self) -> None:
from markidocx.differ import compare
original = "See [@smith2020]."
reimported = "See something."
report = compare(original, reimported)
assert any("citation:missing '[@smith2020]'" in b for b in report.broken)
assert report.has_drift
def test_missing_reference_entry_degraded(self) -> None:
from markidocx.differ import compare
original = textwrap.dedent("""\
See [@k1].
## References
- [@k1]: Author. *T*. 2020.
""")
reimported = "See [@k1]."
report = compare(original, reimported)
assert any("reference-entry" in d for d in report.degraded)
def test_unresolvable_citation_emits_warning(self) -> None:
"""Missing citation in reimported emits citation-ambiguity warning."""
from markidocx.bibliography import compare_citations
from markidocx.errors import WarningRecord
original = "See [@missing]."
reimported = "See something."
preserved: list[str] = []
degraded: list[str] = []
broken: list[str] = []
warning_records: list[WarningRecord] = []
compare_citations(original, reimported, preserved, degraded, broken, warning_records)
ambiguity = [w for w in warning_records if w.reason == "citation-ambiguity"]
assert ambiguity, "Expected citation-ambiguity warning"
assert ambiguity[0].construct == "@missing"
# ---------------------------------------------------------------------------
# Single citation round-trip
# ---------------------------------------------------------------------------
class TestCitationRoundTrip:
def test_single_citation_roundtrip(self, tmp_path: Path) -> None:
from markidocx.builder import build_document
from markidocx.differ import compare
from markidocx.importer import import_document
from markidocx.manifest import load_manifest
md = textwrap.dedent("""\
# Introduction
According to [@smith2020], things are good.
## References
- [@smith2020]: Smith, J. *Good Stuff*. 2020.
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
build_result = build_document(m)
assert build_result.success
import_result = import_document(m, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
report = compare(md, reimported)
broken_citations = [b for b in report.broken if "citation" in b]
assert not broken_citations, f"Broken citations: {broken_citations}"
def test_multi_citation_document(self, tmp_path: Path) -> None:
from markidocx.builder import build_document
from markidocx.importer import import_document
from markidocx.manifest import load_manifest
md = textwrap.dedent("""\
# Paper
First point from [@a2020]. Second from [@b2021].
## References
- [@a2020]: A. *Work A*. 2020.
- [@b2021]: B. *Work B*. 2021.
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
build_result = build_document(m)
assert build_result.success
import_result = import_document(m, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
assert "a2020" in reimported
assert "b2021" in reimported
# ---------------------------------------------------------------------------
# T05 — FR-542 edge-case tests: ambiguity, missing refs, special characters
# ---------------------------------------------------------------------------
class TestCitationValidationEdgeCases:
"""Edge-case validation using bibliography.validate_citations (FR-542)."""
def test_duplicate_citation_key_emits_warning(self) -> None:
from markidocx.bibliography import validate_citations
# Two entries with the same key in the references section
md = textwrap.dedent("""\
See [@dup2020].
## References
- [@dup2020]: First Author. *Title*. 2020.
- [@dup2020]: Second Author. *Other Title*. 2020.
""")
warnings = validate_citations(md)
dup_warnings = [w for w in warnings if w.reason == "citation-duplicate-key"]
assert dup_warnings, "Expected citation-duplicate-key warning for duplicate key"
assert any("dup2020" in w.construct for w in dup_warnings)
def test_inline_citation_missing_reference_entry_emits_warning(self) -> None:
from markidocx.bibliography import validate_citations
# Inline citation with no matching references entry
md = textwrap.dedent("""\
See [@missing2021].
## References
- [@present2020]: Present. *Title*. 2020.
""")
warnings = validate_citations(md)
missing_warnings = [w for w in warnings if w.reason == "citation-key-missing"]
assert missing_warnings, "Expected citation-key-missing warning"
assert any("missing2021" in w.construct for w in missing_warnings)
def test_valid_citations_no_warnings(self) -> None:
from markidocx.bibliography import validate_citations
md = textwrap.dedent("""\
See [@smith2020].
## References
- [@smith2020]: Smith, J. *Paper*. 2020.
""")
warnings = validate_citations(md)
assert warnings == [], f"Unexpected warnings: {warnings}"
def test_special_characters_in_author_name_roundtrip(self, tmp_path: Path) -> None:
from markidocx.builder import build_document
from markidocx.importer import import_document
from markidocx.manifest import load_manifest
# Author names with accents, hyphens, and Unicode characters
md = textwrap.dedent("""\
# Paper
See [@müller2020] and [@o-brien2021].
## References
- [@müller2020]: Müller, H. *Über die Sache*. 2020.
- [@o-brien2021]: O'Brien, C. *Things & Stuff*. 2021.
""")
_make_project(tmp_path, md)
m = load_manifest(tmp_path / "manifest.yaml")
build_result = build_document(m)
assert build_result.success
import_result = import_document(m, build_result.output_path)
assert import_result.success
reimported = import_result.output_files[0].read_text(encoding="utf-8")
# Citation keys must survive the round-trip
assert "müller2020" in reimported or "muller2020" in reimported or "2020" in reimported
assert "o-brien2021" in reimported or "brien2021" in reimported or "2021" in reimported