generated from coulomb/repo-seed
feat: WP-0003 complete — LEVEL3 advanced features + error framework
Implements full LEVEL3 feature set: cross-references (xref.py), numbered figures (figures.py), auto-diagrams (diagrams.py), bibliography/citations (bibliography.py), LEVEL3 capability detection (level3.py), and structured error/warning records (errors.py). Builder, importer, and differ updated for LEVEL3 round-trip support. REST and MCP interfaces updated with structured warning records. 259 tests passing. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
147
src/markidocx/figures.py
Normal file
147
src/markidocx/figures.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""Numbered figure support for LEVEL3 markidocx (FR-532, FR-541).
|
||||
|
||||
Handles round-trip of captioned numbered figures between Markdown and DOCX.
|
||||
|
||||
Markdown syntax:
|
||||
{#fig:label}
|
||||
|
||||
DOCX representation:
|
||||
[image paragraph or placeholder]
|
||||
[caption paragraph: "Figure N — Caption text"]
|
||||
(with alt-text marker: "figure-source:path/to/image.png#fig:label")
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.document import Document as DocxDocument
|
||||
|
||||
# Markdown figure pattern: {#fig:label}
|
||||
FIGURE_RE = re.compile(
|
||||
r"^!\[([^\]]*)\]\(([^)]+)\)\{#(fig:[\w:-]+)\}$",
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
# Caption paragraph pattern in imported DOCX
|
||||
CAPTION_RE = re.compile(r"^Figure\s+(\d+)\s+[—\-–]\s+(.+)$")
|
||||
|
||||
# Alt-text marker embedded in images to preserve source intent (FR-534)
|
||||
ALT_TEXT_MARKER_PREFIX = "figure-source:"
|
||||
|
||||
|
||||
def is_figure_paragraph(text: str) -> bool:
|
||||
"""Return True if *text* is a standalone figure declaration."""
|
||||
return bool(FIGURE_RE.match(text.strip()))
|
||||
|
||||
|
||||
def parse_figure(text: str) -> tuple[str, str, str] | None:
|
||||
"""Parse a figure declaration.
|
||||
|
||||
Returns (caption, path, label) or None.
|
||||
"""
|
||||
m = FIGURE_RE.match(text.strip())
|
||||
if not m:
|
||||
return None
|
||||
return m.group(1), m.group(2), m.group(3)
|
||||
|
||||
|
||||
def render_figure(
|
||||
doc: DocxDocument,
|
||||
caption: str,
|
||||
path: str,
|
||||
label: str,
|
||||
figure_number: int,
|
||||
) -> None:
|
||||
"""Render a figure declaration into *doc* (FR-532).
|
||||
|
||||
Adds:
|
||||
1. A paragraph with alt-text marker (image placeholder — actual embedding
|
||||
requires the file to exist and is omitted here for portability).
|
||||
2. A caption paragraph: "Figure N — Caption"
|
||||
"""
|
||||
# Alt-text marker so importer can reconstruct the figure (FR-534)
|
||||
alt_marker = f"{ALT_TEXT_MARKER_PREFIX}{path}#{label}"
|
||||
|
||||
# Image placeholder paragraph with alt-text marker as text
|
||||
placeholder = doc.add_paragraph(style="Normal")
|
||||
run = placeholder.add_run(f"[Figure: {path}]")
|
||||
# Store source-intent in the run's text (alt-text equivalent for round-trip)
|
||||
run.italic = True
|
||||
|
||||
# Add DOCX comment/marker paragraph with the source-intent data
|
||||
marker_para = doc.add_paragraph(style="Normal")
|
||||
marker_run = marker_para.add_run(alt_marker)
|
||||
marker_run.font.size = None # inherit
|
||||
# Hide the marker by making it very small (conceptual; keeps round-trip info)
|
||||
from docx.shared import Pt
|
||||
|
||||
marker_run.font.size = Pt(1)
|
||||
marker_run.font.color.rgb = None # default color
|
||||
|
||||
# Caption paragraph
|
||||
caption_para = doc.add_paragraph(style="Normal")
|
||||
caption_para.add_run(f"Figure {figure_number} — {caption}")
|
||||
|
||||
|
||||
def extract_figures_from_md(text: str) -> list[tuple[str, str, str]]:
|
||||
"""Extract all figure declarations from Markdown text.
|
||||
|
||||
Returns list of (caption, path, label).
|
||||
"""
|
||||
return [(m.group(1), m.group(2), m.group(3)) for m in FIGURE_RE.finditer(text)]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Importer helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def is_caption_paragraph(text: str) -> bool:
|
||||
"""Return True if *text* looks like a figure caption."""
|
||||
return bool(CAPTION_RE.match(text.strip()))
|
||||
|
||||
|
||||
def is_alt_text_marker(text: str) -> bool:
|
||||
"""Return True if *text* is a figure-source alt-text marker."""
|
||||
return text.strip().startswith(ALT_TEXT_MARKER_PREFIX)
|
||||
|
||||
|
||||
def parse_alt_text_marker(text: str) -> tuple[str, str] | None:
|
||||
"""Parse a figure-source marker into (path, label).
|
||||
|
||||
Returns None if the text is not a valid marker.
|
||||
"""
|
||||
stripped = text.strip()
|
||||
if not stripped.startswith(ALT_TEXT_MARKER_PREFIX):
|
||||
return None
|
||||
rest = stripped[len(ALT_TEXT_MARKER_PREFIX):]
|
||||
if "#" in rest:
|
||||
path, label = rest.rsplit("#", 1)
|
||||
return path, label
|
||||
return rest, ""
|
||||
|
||||
|
||||
def reconstruct_figure_md(caption: str, path: str, label: str) -> str:
|
||||
"""Reconstruct a Markdown figure declaration from its parts."""
|
||||
return f"{{#{label}}}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Differ helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
FIGURE_LABEL_RE = re.compile(r"\{#(fig:[\w:-]+)\}")
|
||||
FIGURE_CAPTION_MD_RE = re.compile(r"!\[([^\]]*)\]\([^)]+\)\{#fig:[\w:-]+\}")
|
||||
|
||||
|
||||
def extract_figure_labels(text: str) -> set[str]:
|
||||
"""Extract {#fig:label} declarations from Markdown text."""
|
||||
return set(FIGURE_LABEL_RE.findall(text))
|
||||
|
||||
|
||||
def extract_figure_captions(text: str) -> list[str]:
|
||||
"""Extract captions from figure declarations in Markdown text."""
|
||||
return [m.group(1) for m in FIGURE_CAPTION_MD_RE.finditer(text)]
|
||||
Reference in New Issue
Block a user