generated from coulomb/repo-seed
Implements full LEVEL3 feature set: cross-references (xref.py), numbered figures (figures.py), auto-diagrams (diagrams.py), bibliography/citations (bibliography.py), LEVEL3 capability detection (level3.py), and structured error/warning records (errors.py). Builder, importer, and differ updated for LEVEL3 round-trip support. REST and MCP interfaces updated with structured warning records. 259 tests passing. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
350 lines
12 KiB
Python
350 lines
12 KiB
Python
"""Tests for LEVEL3 bibliography & citation support (FR-535, FR-536, FR-542)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import textwrap
|
|
from pathlib import Path
|
|
|
|
LEVEL3_MANIFEST = textwrap.dedent("""\
|
|
project:
|
|
name: bib-test
|
|
feature_level: level3
|
|
family: article
|
|
sources:
|
|
- path: doc.md
|
|
output:
|
|
dir: ./dist
|
|
""")
|
|
|
|
|
|
def _make_project(tmp_path: Path, markdown: str) -> Path:
|
|
(tmp_path / "doc.md").write_text(markdown, encoding="utf-8")
|
|
(tmp_path / "manifest.yaml").write_text(LEVEL3_MANIFEST, encoding="utf-8")
|
|
return tmp_path
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# bibliography module helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestBibliographyHelpers:
|
|
def test_has_citations_true(self) -> None:
|
|
from markidocx.bibliography import has_citations
|
|
|
|
assert has_citations("See [@smith2020] for details.")
|
|
|
|
def test_has_citations_false(self) -> None:
|
|
from markidocx.bibliography import has_citations
|
|
|
|
assert not has_citations("Normal paragraph without citations.")
|
|
|
|
def test_extract_citation_keys(self) -> None:
|
|
from markidocx.bibliography import extract_citation_keys
|
|
|
|
text = "See [@smith2020] and [@jones2021:chap] for more."
|
|
keys = extract_citation_keys(text)
|
|
assert "smith2020" in keys
|
|
assert "jones2021:chap" in keys
|
|
|
|
def test_is_references_heading(self) -> None:
|
|
from markidocx.bibliography import is_references_heading
|
|
|
|
assert is_references_heading("## References")
|
|
assert is_references_heading("# References")
|
|
assert is_references_heading("### References")
|
|
assert not is_references_heading("## Introduction")
|
|
|
|
def test_parse_reference_entry(self) -> None:
|
|
from markidocx.bibliography import parse_reference_entry
|
|
|
|
result = parse_reference_entry("- [@smith2020]: Smith, J. *Title*. 2020.")
|
|
assert result is not None
|
|
key, entry = result
|
|
assert key == "smith2020"
|
|
assert "Smith, J." in entry
|
|
|
|
def test_extract_references_section(self) -> None:
|
|
from markidocx.bibliography import extract_references_section
|
|
|
|
md = textwrap.dedent("""\
|
|
# Document
|
|
|
|
See [@smith2020].
|
|
|
|
## References
|
|
|
|
- [@smith2020]: Smith, J. *A Book*. 2020.
|
|
- [@jones2021]: Jones, B. *Another*. 2021.
|
|
""")
|
|
entries, text_without = extract_references_section(md)
|
|
assert len(entries) == 2
|
|
assert entries[0][0] == "smith2020"
|
|
assert entries[1][0] == "jones2021"
|
|
assert "## References" not in text_without
|
|
|
|
def test_render_citation_text_unchanged(self) -> None:
|
|
from markidocx.bibliography import render_citation_text
|
|
|
|
text = "See [@smith2020] for details."
|
|
assert render_citation_text(text) == text
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Builder: citations and references section (FR-535)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestBuilderBibliography:
|
|
def test_build_with_citation_succeeds(self, tmp_path: Path) -> None:
|
|
from markidocx.builder import build_document
|
|
from markidocx.manifest import load_manifest
|
|
|
|
md = textwrap.dedent("""\
|
|
# Document
|
|
|
|
As shown by [@smith2020], the approach works.
|
|
|
|
## References
|
|
|
|
- [@smith2020]: Smith, J. *A Work*. 2020.
|
|
""")
|
|
_make_project(tmp_path, md)
|
|
m = load_manifest(tmp_path / "manifest.yaml")
|
|
result = build_document(m)
|
|
assert result.success
|
|
assert result.output_path.exists()
|
|
|
|
def test_build_docx_contains_citation_marker(self, tmp_path: Path) -> None:
|
|
"""The built DOCX should contain the citation text."""
|
|
from docx import Document as DocxReader
|
|
|
|
from markidocx.builder import build_document
|
|
from markidocx.manifest import load_manifest
|
|
|
|
md = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020."
|
|
_make_project(tmp_path, md)
|
|
m = load_manifest(tmp_path / "manifest.yaml")
|
|
result = build_document(m)
|
|
assert result.success
|
|
|
|
doc = DocxReader(str(result.output_path))
|
|
texts = [p.text for p in doc.paragraphs]
|
|
citation_paras = [t for t in texts if "smith2020" in t]
|
|
assert citation_paras, f"No citation found in DOCX. Paragraphs: {texts}"
|
|
|
|
def test_build_docx_contains_references_heading(self, tmp_path: Path) -> None:
|
|
"""The built DOCX should have a References heading."""
|
|
from docx import Document as DocxReader
|
|
|
|
from markidocx.builder import build_document
|
|
from markidocx.manifest import load_manifest
|
|
|
|
md = "# Doc\n\nText.\n\n## References\n\n- [@k1]: Author. *T*. 2020."
|
|
_make_project(tmp_path, md)
|
|
m = load_manifest(tmp_path / "manifest.yaml")
|
|
result = build_document(m)
|
|
assert result.success
|
|
|
|
doc = DocxReader(str(result.output_path))
|
|
texts = [p.text for p in doc.paragraphs]
|
|
assert "References" in texts, f"No References heading. Paragraphs: {texts}"
|
|
|
|
def test_build_multi_citation_document(self, tmp_path: Path) -> None:
|
|
"""Multiple citations and references entries all appear in DOCX."""
|
|
from docx import Document as DocxReader
|
|
|
|
from markidocx.builder import build_document
|
|
from markidocx.manifest import load_manifest
|
|
|
|
md = textwrap.dedent("""\
|
|
# Introduction
|
|
|
|
According to [@smith2020] and [@jones2021], this is true.
|
|
|
|
## References
|
|
|
|
- [@smith2020]: Smith, J. *Work A*. 2020.
|
|
- [@jones2021]: Jones, B. *Work B*. 2021.
|
|
""")
|
|
_make_project(tmp_path, md)
|
|
m = load_manifest(tmp_path / "manifest.yaml")
|
|
result = build_document(m)
|
|
assert result.success
|
|
|
|
doc = DocxReader(str(result.output_path))
|
|
all_text = " ".join(p.text for p in doc.paragraphs)
|
|
assert "smith2020" in all_text
|
|
assert "jones2021" in all_text
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Importer: citations and references restoration (FR-536)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestImporterBibliography:
|
|
def test_roundtrip_preserves_citation(self, tmp_path: Path) -> None:
|
|
from markidocx.builder import build_document
|
|
from markidocx.importer import import_document
|
|
from markidocx.manifest import load_manifest
|
|
|
|
md = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020."
|
|
_make_project(tmp_path, md)
|
|
m = load_manifest(tmp_path / "manifest.yaml")
|
|
|
|
build_result = build_document(m)
|
|
assert build_result.success
|
|
|
|
import_result = import_document(m, build_result.output_path)
|
|
assert import_result.success
|
|
|
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
|
assert "smith2020" in reimported
|
|
|
|
def test_roundtrip_preserves_reference_entry(self, tmp_path: Path) -> None:
|
|
from markidocx.builder import build_document
|
|
from markidocx.importer import import_document
|
|
from markidocx.manifest import load_manifest
|
|
|
|
md = textwrap.dedent("""\
|
|
# Doc
|
|
|
|
See [@k1].
|
|
|
|
## References
|
|
|
|
- [@k1]: Author. *Title*. 2020.
|
|
""")
|
|
_make_project(tmp_path, md)
|
|
m = load_manifest(tmp_path / "manifest.yaml")
|
|
|
|
build_result = build_document(m)
|
|
assert build_result.success
|
|
|
|
import_result = import_document(m, build_result.output_path)
|
|
assert import_result.success
|
|
|
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
|
assert "k1" in reimported
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Differ: citation and bibliography comparison (FR-542)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestDifferBibliography:
|
|
def test_preserved_citation(self) -> None:
|
|
from markidocx.differ import compare
|
|
|
|
text = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020."
|
|
report = compare(text, text)
|
|
assert any("citation:[@smith2020]" in p for p in report.preserved)
|
|
|
|
def test_missing_citation_broken(self) -> None:
|
|
from markidocx.differ import compare
|
|
|
|
original = "See [@smith2020]."
|
|
reimported = "See something."
|
|
report = compare(original, reimported)
|
|
assert any("citation:missing '[@smith2020]'" in b for b in report.broken)
|
|
assert report.has_drift
|
|
|
|
def test_missing_reference_entry_degraded(self) -> None:
|
|
from markidocx.differ import compare
|
|
|
|
original = textwrap.dedent("""\
|
|
See [@k1].
|
|
|
|
## References
|
|
|
|
- [@k1]: Author. *T*. 2020.
|
|
""")
|
|
reimported = "See [@k1]."
|
|
report = compare(original, reimported)
|
|
assert any("reference-entry" in d for d in report.degraded)
|
|
|
|
def test_unresolvable_citation_emits_warning(self) -> None:
|
|
"""Missing citation in reimported emits citation-ambiguity warning."""
|
|
from markidocx.bibliography import compare_citations
|
|
from markidocx.errors import WarningRecord
|
|
|
|
original = "See [@missing]."
|
|
reimported = "See something."
|
|
preserved: list[str] = []
|
|
degraded: list[str] = []
|
|
broken: list[str] = []
|
|
warning_records: list[WarningRecord] = []
|
|
|
|
compare_citations(original, reimported, preserved, degraded, broken, warning_records)
|
|
|
|
ambiguity = [w for w in warning_records if w.reason == "citation-ambiguity"]
|
|
assert ambiguity, "Expected citation-ambiguity warning"
|
|
assert ambiguity[0].construct == "@missing"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Single citation round-trip
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestCitationRoundTrip:
|
|
def test_single_citation_roundtrip(self, tmp_path: Path) -> None:
|
|
from markidocx.builder import build_document
|
|
from markidocx.differ import compare
|
|
from markidocx.importer import import_document
|
|
from markidocx.manifest import load_manifest
|
|
|
|
md = textwrap.dedent("""\
|
|
# Introduction
|
|
|
|
According to [@smith2020], things are good.
|
|
|
|
## References
|
|
|
|
- [@smith2020]: Smith, J. *Good Stuff*. 2020.
|
|
""")
|
|
_make_project(tmp_path, md)
|
|
m = load_manifest(tmp_path / "manifest.yaml")
|
|
|
|
build_result = build_document(m)
|
|
assert build_result.success
|
|
|
|
import_result = import_document(m, build_result.output_path)
|
|
assert import_result.success
|
|
|
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
|
report = compare(md, reimported)
|
|
|
|
broken_citations = [b for b in report.broken if "citation" in b]
|
|
assert not broken_citations, f"Broken citations: {broken_citations}"
|
|
|
|
def test_multi_citation_document(self, tmp_path: Path) -> None:
|
|
from markidocx.builder import build_document
|
|
from markidocx.importer import import_document
|
|
from markidocx.manifest import load_manifest
|
|
|
|
md = textwrap.dedent("""\
|
|
# Paper
|
|
|
|
First point from [@a2020]. Second from [@b2021].
|
|
|
|
## References
|
|
|
|
- [@a2020]: A. *Work A*. 2020.
|
|
- [@b2021]: B. *Work B*. 2021.
|
|
""")
|
|
_make_project(tmp_path, md)
|
|
m = load_manifest(tmp_path / "manifest.yaml")
|
|
|
|
build_result = build_document(m)
|
|
assert build_result.success
|
|
|
|
import_result = import_document(m, build_result.output_path)
|
|
assert import_result.success
|
|
|
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
|
assert "a2020" in reimported
|
|
assert "b2021" in reimported
|