marki-docx/tests/test_level3_figures.py

"""Tests for LEVEL3 numbered figure support (FR-532, FR-541)."""

from __future__ import annotations

import textwrap
from pathlib import Path

LEVEL3_MANIFEST = textwrap.dedent("""\
    project:
      name: fig-test
      feature_level: level3
      family: article
    sources:
      - path: doc.md
    output:
      dir: ./dist
""")


def _make_project(tmp_path: Path, markdown: str) -> Path:
    (tmp_path / "doc.md").write_text(markdown, encoding="utf-8")
    (tmp_path / "manifest.yaml").write_text(LEVEL3_MANIFEST, encoding="utf-8")
    return tmp_path


# ---------------------------------------------------------------------------
# figures module helpers
# ---------------------------------------------------------------------------


class TestFigureHelpers:
    def test_is_figure_paragraph_true(self) -> None:
        from markidocx.figures import is_figure_paragraph

        assert is_figure_paragraph("![My Caption](img/photo.png){#fig:photo}")

    def test_is_figure_paragraph_false(self) -> None:
        from markidocx.figures import is_figure_paragraph

        assert not is_figure_paragraph("Normal paragraph text.")
        assert not is_figure_paragraph("![alt](img.png)")  # no {#fig:} label

    def test_parse_figure(self) -> None:
        from markidocx.figures import parse_figure

        result = parse_figure("![Architecture Diagram](arch.png){#fig:arch}")
        assert result is not None
        caption, path, label = result
        assert caption == "Architecture Diagram"
        assert path == "arch.png"
        assert label == "fig:arch"

    def test_extract_figures_from_md(self) -> None:
        from markidocx.figures import extract_figures_from_md

        md = textwrap.dedent("""\
            # Title

            Some text.

            ![Figure One](fig1.png){#fig:f1}

            More text.

            ![Figure Two](fig2.png){#fig:f2}
        """)
        figs = extract_figures_from_md(md)
        assert len(figs) == 2
        assert figs[0] == ("Figure One", "fig1.png", "fig:f1")
        assert figs[1] == ("Figure Two", "fig2.png", "fig:f2")

    def test_extract_figure_labels(self) -> None:
        from markidocx.figures import extract_figure_labels

        md = "![Cap1](a.png){#fig:f1}\n\n![Cap2](b.png){#fig:f2}"
        labels = extract_figure_labels(md)
        assert labels == {"fig:f1", "fig:f2"}

    def test_is_caption_paragraph(self) -> None:
        from markidocx.figures import is_caption_paragraph

        assert is_caption_paragraph("Figure 1 — My Caption")
        assert is_caption_paragraph("Figure 3 - Another Caption")
        assert not is_caption_paragraph("Some normal text")

    def test_reconstruct_figure_md(self) -> None:
        from markidocx.figures import reconstruct_figure_md

        result = reconstruct_figure_md("My Caption", "img/photo.png", "fig:photo")
        assert result == "![My Caption](img/photo.png){#fig:photo}"


# ---------------------------------------------------------------------------
# Builder: figure declaration → DOCX caption paragraph (FR-532)
# ---------------------------------------------------------------------------


class TestBuilderFigures:
    def test_build_with_figure_succeeds(self, tmp_path: Path) -> None:
        from markidocx.builder import build_document
        from markidocx.manifest import load_manifest

        md = textwrap.dedent("""\
            # Document {#doc}

            Introduction.

            ![Architecture Diagram](arch.png){#fig:arch}

            More text.
        """)
        _make_project(tmp_path, md)
        m = load_manifest(tmp_path / "manifest.yaml")
        result = build_document(m)
        assert result.success
        assert result.output_path.exists()

    def test_build_docx_contains_figure_caption(self, tmp_path: Path) -> None:
        """The built DOCX should contain a caption paragraph with 'Figure 1'."""
        from docx import Document as DocxReader

        from markidocx.builder import build_document
        from markidocx.manifest import load_manifest

        md = "![My Diagram](diag.png){#fig:diag}\n\nSome text."
        _make_project(tmp_path, md)
        m = load_manifest(tmp_path / "manifest.yaml")
        result = build_document(m)
        assert result.success

        doc = DocxReader(str(result.output_path))
        texts = [p.text for p in doc.paragraphs]
        caption_paras = [t for t in texts if t.startswith("Figure 1")]
        assert caption_paras, f"No 'Figure 1' caption found. Paragraphs: {texts}"

    def test_multiple_figures_numbered_sequentially(self, tmp_path: Path) -> None:
        """Multiple figures get Figure 1, Figure 2, Figure 3."""
        from docx import Document as DocxReader

        from markidocx.builder import build_document
        from markidocx.manifest import load_manifest

        md = textwrap.dedent("""\
            # Doc

            ![First](a.png){#fig:a}

            Some text.

            ![Second](b.png){#fig:b}

            More text.

            ![Third](c.png){#fig:c}
        """)
        _make_project(tmp_path, md)
        m = load_manifest(tmp_path / "manifest.yaml")
        result = build_document(m)
        assert result.success

        doc = DocxReader(str(result.output_path))
        texts = [p.text for p in doc.paragraphs]
        assert any("Figure 1" in t for t in texts)
        assert any("Figure 2" in t for t in texts)
        assert any("Figure 3" in t for t in texts)

    def test_figure_not_activated_for_level1(self, tmp_path: Path) -> None:
        """LEVEL1: figure syntax is not stripped (no caption paragraphs added)."""
        from docx import Document as DocxReader

        from markidocx.builder import build_document
        from markidocx.manifest import load_manifest

        manifest_yaml = textwrap.dedent("""\
            project:
              name: l1-fig
              feature_level: level1
              family: article
            sources:
              - path: doc.md
            output:
              dir: ./dist
        """)
        (tmp_path / "doc.md").write_text(
            "# Title\n\n![My Diagram](diag.png){#fig:diag}", encoding="utf-8"
        )
        (tmp_path / "manifest.yaml").write_text(manifest_yaml, encoding="utf-8")
        m = load_manifest(tmp_path / "manifest.yaml")
        result = build_document(m)
        assert result.success
        doc = DocxReader(str(result.output_path))
        texts = [p.text for p in doc.paragraphs]
        # No "Figure N" captions in LEVEL1 output
        assert not any(t.startswith("Figure ") for t in texts)


# ---------------------------------------------------------------------------
# Importer: DOCX caption paragraphs → figure markdown (FR-532)
# ---------------------------------------------------------------------------


class TestImporterFigures:
    def test_roundtrip_preserves_figure_caption(self, tmp_path: Path) -> None:
        from markidocx.builder import build_document
        from markidocx.importer import import_document
        from markidocx.manifest import load_manifest

        md = "# Title\n\n![Architecture](arch.png){#fig:arch}\n\nText."
        _make_project(tmp_path, md)
        m = load_manifest(tmp_path / "manifest.yaml")

        build_result = build_document(m)
        assert build_result.success

        import_result = import_document(m, build_result.output_path)
        assert import_result.success

        reimported = import_result.output_files[0].read_text(encoding="utf-8")
        assert "Architecture" in reimported
        assert "fig:arch" in reimported

    def test_roundtrip_preserves_figure_label(self, tmp_path: Path) -> None:
        from markidocx.builder import build_document
        from markidocx.importer import import_document
        from markidocx.manifest import load_manifest

        md = "![Cap](img.png){#fig:myimg}\n\nText."
        _make_project(tmp_path, md)
        m = load_manifest(tmp_path / "manifest.yaml")

        build_result = build_document(m)
        assert build_result.success

        import_result = import_document(m, build_result.output_path)
        assert import_result.success

        reimported = import_result.output_files[0].read_text(encoding="utf-8")
        assert "{#fig:myimg}" in reimported


# ---------------------------------------------------------------------------
# Differ: figure identity coherence (FR-541)
# ---------------------------------------------------------------------------


class TestDifferFigures:
    def test_preserved_figure_label(self) -> None:
        from markidocx.differ import compare

        text = "# Title\n\n![Cap](img.png){#fig:img}\n\nText."
        report = compare(text, text)
        assert any("figure-label:fig:img" in p for p in report.preserved)

    def test_missing_figure_label_broken(self) -> None:
        from markidocx.differ import compare

        original = "![Cap](img.png){#fig:img}\n\nText."
        reimported = "Text."
        report = compare(original, reimported)
        assert any("figure-label:missing 'fig:img'" in b for b in report.broken)
        assert report.has_drift

    def test_missing_caption_degraded(self) -> None:
        from markidocx.differ import compare

        original = "![My Caption](img.png){#fig:img}"
        reimported = "![Different Caption](img.png){#fig:img}"
        report = compare(original, reimported)
        assert any("figure-caption" in d for d in report.degraded)

    def test_preserved_caption(self) -> None:
        from markidocx.differ import compare

        text = "![Same Caption](img.png){#fig:img}"
        report = compare(text, text)
        assert any("figure-caption" in p for p in report.preserved)


# ---------------------------------------------------------------------------
# Full figure round-trip
# ---------------------------------------------------------------------------


class TestFigureRoundTrip:
    def test_single_figure_roundtrip(self, tmp_path: Path) -> None:
        from markidocx.builder import build_document
        from markidocx.differ import compare
        from markidocx.importer import import_document
        from markidocx.manifest import load_manifest

        md = textwrap.dedent("""\
            # Document

            Introduction.

            ![System Architecture](arch.png){#fig:arch}

            Conclusion.
        """)
        _make_project(tmp_path, md)
        m = load_manifest(tmp_path / "manifest.yaml")

        build_result = build_document(m)
        assert build_result.success

        import_result = import_document(m, build_result.output_path)
        assert import_result.success

        reimported = import_result.output_files[0].read_text(encoding="utf-8")
        report = compare(md, reimported)

        # No broken figures
        broken_figs = [b for b in report.broken if "figure" in b]
        assert not broken_figs, f"Broken figures found: {broken_figs}"

    def test_multiple_figures_identity_coherent(self, tmp_path: Path) -> None:
        """Multiple figures survive round-trip with correct labels."""
        from markidocx.builder import build_document
        from markidocx.importer import import_document
        from markidocx.manifest import load_manifest

        md = textwrap.dedent("""\
            # Doc

            ![Figure One Caption](fig1.png){#fig:f1}

            Text between figures.

            ![Figure Two Caption](fig2.png){#fig:f2}
        """)
        _make_project(tmp_path, md)
        m = load_manifest(tmp_path / "manifest.yaml")

        build_result = build_document(m)
        assert build_result.success

        import_result = import_document(m, build_result.output_path)
        assert import_result.success

        reimported = import_result.output_files[0].read_text(encoding="utf-8")
        assert "{#fig:f1}" in reimported
        assert "{#fig:f2}" in reimported