"""Tests for LEVEL3 bibliography & citation support (FR-535, FR-536, FR-542).""" from __future__ import annotations import textwrap from pathlib import Path LEVEL3_MANIFEST = textwrap.dedent("""\ project: name: bib-test feature_level: level3 family: article sources: - path: doc.md output: dir: ./dist """) def _make_project(tmp_path: Path, markdown: str) -> Path: (tmp_path / "doc.md").write_text(markdown, encoding="utf-8") (tmp_path / "manifest.yaml").write_text(LEVEL3_MANIFEST, encoding="utf-8") return tmp_path # --------------------------------------------------------------------------- # bibliography module helpers # --------------------------------------------------------------------------- class TestBibliographyHelpers: def test_has_citations_true(self) -> None: from markidocx.bibliography import has_citations assert has_citations("See [@smith2020] for details.") def test_has_citations_false(self) -> None: from markidocx.bibliography import has_citations assert not has_citations("Normal paragraph without citations.") def test_extract_citation_keys(self) -> None: from markidocx.bibliography import extract_citation_keys text = "See [@smith2020] and [@jones2021:chap] for more." keys = extract_citation_keys(text) assert "smith2020" in keys assert "jones2021:chap" in keys def test_is_references_heading(self) -> None: from markidocx.bibliography import is_references_heading assert is_references_heading("## References") assert is_references_heading("# References") assert is_references_heading("### References") assert not is_references_heading("## Introduction") def test_parse_reference_entry(self) -> None: from markidocx.bibliography import parse_reference_entry result = parse_reference_entry("- [@smith2020]: Smith, J. *Title*. 2020.") assert result is not None key, entry = result assert key == "smith2020" assert "Smith, J." in entry def test_extract_references_section(self) -> None: from markidocx.bibliography import extract_references_section md = textwrap.dedent("""\ # Document See [@smith2020]. ## References - [@smith2020]: Smith, J. *A Book*. 2020. - [@jones2021]: Jones, B. *Another*. 2021. """) entries, text_without = extract_references_section(md) assert len(entries) == 2 assert entries[0][0] == "smith2020" assert entries[1][0] == "jones2021" assert "## References" not in text_without def test_render_citation_text_unchanged(self) -> None: from markidocx.bibliography import render_citation_text text = "See [@smith2020] for details." assert render_citation_text(text) == text # --------------------------------------------------------------------------- # Builder: citations and references section (FR-535) # --------------------------------------------------------------------------- class TestBuilderBibliography: def test_build_with_citation_succeeds(self, tmp_path: Path) -> None: from markidocx.builder import build_document from markidocx.manifest import load_manifest md = textwrap.dedent("""\ # Document As shown by [@smith2020], the approach works. ## References - [@smith2020]: Smith, J. *A Work*. 2020. """) _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") result = build_document(m) assert result.success assert result.output_path.exists() def test_build_docx_contains_citation_marker(self, tmp_path: Path) -> None: """The built DOCX should contain the citation text.""" from docx import Document as DocxReader from markidocx.builder import build_document from markidocx.manifest import load_manifest md = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020." _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") result = build_document(m) assert result.success doc = DocxReader(str(result.output_path)) texts = [p.text for p in doc.paragraphs] citation_paras = [t for t in texts if "smith2020" in t] assert citation_paras, f"No citation found in DOCX. Paragraphs: {texts}" def test_build_docx_contains_references_heading(self, tmp_path: Path) -> None: """The built DOCX should have a References heading.""" from docx import Document as DocxReader from markidocx.builder import build_document from markidocx.manifest import load_manifest md = "# Doc\n\nText.\n\n## References\n\n- [@k1]: Author. *T*. 2020." _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") result = build_document(m) assert result.success doc = DocxReader(str(result.output_path)) texts = [p.text for p in doc.paragraphs] assert "References" in texts, f"No References heading. Paragraphs: {texts}" def test_build_multi_citation_document(self, tmp_path: Path) -> None: """Multiple citations and references entries all appear in DOCX.""" from docx import Document as DocxReader from markidocx.builder import build_document from markidocx.manifest import load_manifest md = textwrap.dedent("""\ # Introduction According to [@smith2020] and [@jones2021], this is true. ## References - [@smith2020]: Smith, J. *Work A*. 2020. - [@jones2021]: Jones, B. *Work B*. 2021. """) _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") result = build_document(m) assert result.success doc = DocxReader(str(result.output_path)) all_text = " ".join(p.text for p in doc.paragraphs) assert "smith2020" in all_text assert "jones2021" in all_text # --------------------------------------------------------------------------- # Importer: citations and references restoration (FR-536) # --------------------------------------------------------------------------- class TestImporterBibliography: def test_roundtrip_preserves_citation(self, tmp_path: Path) -> None: from markidocx.builder import build_document from markidocx.importer import import_document from markidocx.manifest import load_manifest md = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020." _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") build_result = build_document(m) assert build_result.success import_result = import_document(m, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") assert "smith2020" in reimported def test_roundtrip_preserves_reference_entry(self, tmp_path: Path) -> None: from markidocx.builder import build_document from markidocx.importer import import_document from markidocx.manifest import load_manifest md = textwrap.dedent("""\ # Doc See [@k1]. ## References - [@k1]: Author. *Title*. 2020. """) _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") build_result = build_document(m) assert build_result.success import_result = import_document(m, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") assert "k1" in reimported # --------------------------------------------------------------------------- # Differ: citation and bibliography comparison (FR-542) # --------------------------------------------------------------------------- class TestDifferBibliography: def test_preserved_citation(self) -> None: from markidocx.differ import compare text = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020." report = compare(text, text) assert any("citation:[@smith2020]" in p for p in report.preserved) def test_missing_citation_broken(self) -> None: from markidocx.differ import compare original = "See [@smith2020]." reimported = "See something." report = compare(original, reimported) assert any("citation:missing '[@smith2020]'" in b for b in report.broken) assert report.has_drift def test_missing_reference_entry_degraded(self) -> None: from markidocx.differ import compare original = textwrap.dedent("""\ See [@k1]. ## References - [@k1]: Author. *T*. 2020. """) reimported = "See [@k1]." report = compare(original, reimported) assert any("reference-entry" in d for d in report.degraded) def test_unresolvable_citation_emits_warning(self) -> None: """Missing citation in reimported emits citation-ambiguity warning.""" from markidocx.bibliography import compare_citations from markidocx.errors import WarningRecord original = "See [@missing]." reimported = "See something." preserved: list[str] = [] degraded: list[str] = [] broken: list[str] = [] warning_records: list[WarningRecord] = [] compare_citations(original, reimported, preserved, degraded, broken, warning_records) ambiguity = [w for w in warning_records if w.reason == "citation-ambiguity"] assert ambiguity, "Expected citation-ambiguity warning" assert ambiguity[0].construct == "@missing" # --------------------------------------------------------------------------- # Single citation round-trip # --------------------------------------------------------------------------- class TestCitationRoundTrip: def test_single_citation_roundtrip(self, tmp_path: Path) -> None: from markidocx.builder import build_document from markidocx.differ import compare from markidocx.importer import import_document from markidocx.manifest import load_manifest md = textwrap.dedent("""\ # Introduction According to [@smith2020], things are good. ## References - [@smith2020]: Smith, J. *Good Stuff*. 2020. """) _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") build_result = build_document(m) assert build_result.success import_result = import_document(m, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") report = compare(md, reimported) broken_citations = [b for b in report.broken if "citation" in b] assert not broken_citations, f"Broken citations: {broken_citations}" def test_multi_citation_document(self, tmp_path: Path) -> None: from markidocx.builder import build_document from markidocx.importer import import_document from markidocx.manifest import load_manifest md = textwrap.dedent("""\ # Paper First point from [@a2020]. Second from [@b2021]. ## References - [@a2020]: A. *Work A*. 2020. - [@b2021]: B. *Work B*. 2021. """) _make_project(tmp_path, md) m = load_manifest(tmp_path / "manifest.yaml") build_result = build_document(m) assert build_result.success import_result = import_document(m, build_result.output_path) assert import_result.success reimported = import_result.output_files[0].read_text(encoding="utf-8") assert "a2020" in reimported assert "b2021" in reimported