generated from coulomb/repo-seed
Implements full LEVEL3 feature set: cross-references (xref.py), numbered figures (figures.py), auto-diagrams (diagrams.py), bibliography/citations (bibliography.py), LEVEL3 capability detection (level3.py), and structured error/warning records (errors.py). Builder, importer, and differ updated for LEVEL3 round-trip support. REST and MCP interfaces updated with structured warning records. 259 tests passing. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
148 lines
4.7 KiB
Python
148 lines
4.7 KiB
Python
"""Numbered figure support for LEVEL3 markidocx (FR-532, FR-541).
|
||
|
||
Handles round-trip of captioned numbered figures between Markdown and DOCX.
|
||
|
||
Markdown syntax:
|
||
{#fig:label}
|
||
|
||
DOCX representation:
|
||
[image paragraph or placeholder]
|
||
[caption paragraph: "Figure N — Caption text"]
|
||
(with alt-text marker: "figure-source:path/to/image.png#fig:label")
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import re
|
||
from typing import TYPE_CHECKING
|
||
|
||
if TYPE_CHECKING:
|
||
from docx.document import Document as DocxDocument
|
||
|
||
# Markdown figure pattern: {#fig:label}
|
||
FIGURE_RE = re.compile(
|
||
r"^!\[([^\]]*)\]\(([^)]+)\)\{#(fig:[\w:-]+)\}$",
|
||
re.MULTILINE,
|
||
)
|
||
|
||
# Caption paragraph pattern in imported DOCX
|
||
CAPTION_RE = re.compile(r"^Figure\s+(\d+)\s+[—\-–]\s+(.+)$")
|
||
|
||
# Alt-text marker embedded in images to preserve source intent (FR-534)
|
||
ALT_TEXT_MARKER_PREFIX = "figure-source:"
|
||
|
||
|
||
def is_figure_paragraph(text: str) -> bool:
|
||
"""Return True if *text* is a standalone figure declaration."""
|
||
return bool(FIGURE_RE.match(text.strip()))
|
||
|
||
|
||
def parse_figure(text: str) -> tuple[str, str, str] | None:
|
||
"""Parse a figure declaration.
|
||
|
||
Returns (caption, path, label) or None.
|
||
"""
|
||
m = FIGURE_RE.match(text.strip())
|
||
if not m:
|
||
return None
|
||
return m.group(1), m.group(2), m.group(3)
|
||
|
||
|
||
def render_figure(
|
||
doc: DocxDocument,
|
||
caption: str,
|
||
path: str,
|
||
label: str,
|
||
figure_number: int,
|
||
) -> None:
|
||
"""Render a figure declaration into *doc* (FR-532).
|
||
|
||
Adds:
|
||
1. A paragraph with alt-text marker (image placeholder — actual embedding
|
||
requires the file to exist and is omitted here for portability).
|
||
2. A caption paragraph: "Figure N — Caption"
|
||
"""
|
||
# Alt-text marker so importer can reconstruct the figure (FR-534)
|
||
alt_marker = f"{ALT_TEXT_MARKER_PREFIX}{path}#{label}"
|
||
|
||
# Image placeholder paragraph with alt-text marker as text
|
||
placeholder = doc.add_paragraph(style="Normal")
|
||
run = placeholder.add_run(f"[Figure: {path}]")
|
||
# Store source-intent in the run's text (alt-text equivalent for round-trip)
|
||
run.italic = True
|
||
|
||
# Add DOCX comment/marker paragraph with the source-intent data
|
||
marker_para = doc.add_paragraph(style="Normal")
|
||
marker_run = marker_para.add_run(alt_marker)
|
||
marker_run.font.size = None # inherit
|
||
# Hide the marker by making it very small (conceptual; keeps round-trip info)
|
||
from docx.shared import Pt
|
||
|
||
marker_run.font.size = Pt(1)
|
||
marker_run.font.color.rgb = None # default color
|
||
|
||
# Caption paragraph
|
||
caption_para = doc.add_paragraph(style="Normal")
|
||
caption_para.add_run(f"Figure {figure_number} — {caption}")
|
||
|
||
|
||
def extract_figures_from_md(text: str) -> list[tuple[str, str, str]]:
|
||
"""Extract all figure declarations from Markdown text.
|
||
|
||
Returns list of (caption, path, label).
|
||
"""
|
||
return [(m.group(1), m.group(2), m.group(3)) for m in FIGURE_RE.finditer(text)]
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Importer helpers
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
def is_caption_paragraph(text: str) -> bool:
|
||
"""Return True if *text* looks like a figure caption."""
|
||
return bool(CAPTION_RE.match(text.strip()))
|
||
|
||
|
||
def is_alt_text_marker(text: str) -> bool:
|
||
"""Return True if *text* is a figure-source alt-text marker."""
|
||
return text.strip().startswith(ALT_TEXT_MARKER_PREFIX)
|
||
|
||
|
||
def parse_alt_text_marker(text: str) -> tuple[str, str] | None:
|
||
"""Parse a figure-source marker into (path, label).
|
||
|
||
Returns None if the text is not a valid marker.
|
||
"""
|
||
stripped = text.strip()
|
||
if not stripped.startswith(ALT_TEXT_MARKER_PREFIX):
|
||
return None
|
||
rest = stripped[len(ALT_TEXT_MARKER_PREFIX):]
|
||
if "#" in rest:
|
||
path, label = rest.rsplit("#", 1)
|
||
return path, label
|
||
return rest, ""
|
||
|
||
|
||
def reconstruct_figure_md(caption: str, path: str, label: str) -> str:
|
||
"""Reconstruct a Markdown figure declaration from its parts."""
|
||
return f"{{#{label}}}"
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Differ helpers
|
||
# ---------------------------------------------------------------------------
|
||
|
||
FIGURE_LABEL_RE = re.compile(r"\{#(fig:[\w:-]+)\}")
|
||
FIGURE_CAPTION_MD_RE = re.compile(r"!\[([^\]]*)\]\([^)]+\)\{#fig:[\w:-]+\}")
|
||
|
||
|
||
def extract_figure_labels(text: str) -> set[str]:
|
||
"""Extract {#fig:label} declarations from Markdown text."""
|
||
return set(FIGURE_LABEL_RE.findall(text))
|
||
|
||
|
||
def extract_figure_captions(text: str) -> list[str]:
|
||
"""Extract captions from figure declarations in Markdown text."""
|
||
return [m.group(1) for m in FIGURE_CAPTION_MD_RE.finditer(text)]
|