feat: WP-0005 — diagram renderer integration (Mermaid, Graphviz, PlantUML)

Add pluggable DiagramRenderer protocol and RendererResult type to diagrams.py.
Implement MermaidRenderer (mmdc), GraphvizRenderer (dot), PlantUMLRenderer
backends with graceful source-only fallback and WarningRecord on missing tool
(FR-533, FR-534, FR-538). Builder detects renderers at build time and embeds
PNG with alt-text source marker for round-trip. Extend regression corpus with
rendered_diagrams_document.md and skipif-gated integration tests. All 272 tests
pass; ruff and mypy clean.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-17 12:16:09 +00:00
parent 0ff234b93f
commit 5564747060
6 changed files with 615 additions and 99 deletions

View File

@@ -3,6 +3,10 @@
Handles fenced diagram source blocks (mermaid, graphviz, plantuml) in the
Markdown ↔ DOCX round trip.
Renderer abstraction:
Each diagram type has a pluggable backend (DiagramRenderer protocol).
detect_renderers() probes PATH and returns available backends.
Source-intent preservation:
When a renderer is unavailable, diagram source is embedded as a verbatim
code block and a source-intent marker paragraph is added so the importer
@@ -13,21 +17,20 @@ from __future__ import annotations
import re
import shutil
from typing import TYPE_CHECKING
import subprocess
import tempfile
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Protocol
if TYPE_CHECKING:
from docx.document import Document as DocxDocument
from markidocx.errors import Severity, WarningRecord
# Diagram types recognised as LEVEL3 auto-diagram sources
DIAGRAM_TYPES: frozenset[str] = frozenset({"mermaid", "graphviz", "plantuml"})
# Renderer → CLI command mapping
_RENDERER_COMMANDS: dict[str, str] = {
"mermaid": "mmdc",
"graphviz": "dot",
"plantuml": "plantuml",
}
# Marker prefix stored in DOCX paragraph to preserve source intent (FR-534)
DIAGRAM_SOURCE_MARKER_PREFIX = "diagram-source:"
DIAGRAM_SOURCE_MARKER_RE = re.compile(
@@ -35,6 +38,178 @@ DIAGRAM_SOURCE_MARKER_RE = re.compile(
)
# ---------------------------------------------------------------------------
# Renderer abstraction (FR-538)
# ---------------------------------------------------------------------------
@dataclass
class RendererResult:
"""Result from a diagram renderer attempt."""
success: bool
output_path: Path | None = None
warning: WarningRecord | None = None
class DiagramRenderer(Protocol):
"""Protocol for pluggable diagram renderer backends."""
def can_render(self, diagram_type: str) -> bool:
"""Return True if this backend can render *diagram_type*."""
...
def render(
self, source: str, diagram_type: str, output_path: Path
) -> RendererResult:
"""Render *source* to *output_path* (PNG). Return RendererResult."""
...
# ---------------------------------------------------------------------------
# Concrete renderer backends (T02T04)
# ---------------------------------------------------------------------------
class MermaidRenderer:
"""Renderer backend for Mermaid diagrams using mmdc CLI (FR-533)."""
def can_render(self, diagram_type: str) -> bool:
return diagram_type == "mermaid" and bool(shutil.which("mmdc"))
def render(
self, source: str, diagram_type: str, output_path: Path
) -> RendererResult:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
src_file = tmp_path / "diagram.mmd"
src_file.write_text(source, encoding="utf-8")
try:
subprocess.run(
["mmdc", "-i", str(src_file), "-o", str(output_path)],
capture_output=True,
timeout=30,
check=True,
)
return RendererResult(success=True, output_path=output_path)
except Exception as exc:
return RendererResult(
success=False,
warning=WarningRecord(
severity=Severity.WARNING,
reason="diagram-render-failed",
construct=f"mermaid: {exc}",
),
)
class GraphvizRenderer:
"""Renderer backend for Graphviz diagrams using dot CLI (FR-533)."""
def can_render(self, diagram_type: str) -> bool:
return diagram_type == "graphviz" and bool(shutil.which("dot"))
def render(
self, source: str, diagram_type: str, output_path: Path
) -> RendererResult:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
src_file = tmp_path / "diagram.dot"
src_file.write_text(source, encoding="utf-8")
try:
subprocess.run(
["dot", "-Tpng", str(src_file), "-o", str(output_path)],
capture_output=True,
timeout=30,
check=True,
)
return RendererResult(success=True, output_path=output_path)
except Exception as exc:
return RendererResult(
success=False,
warning=WarningRecord(
severity=Severity.WARNING,
reason="diagram-render-failed",
construct=f"graphviz: {exc}",
),
)
class PlantUMLRenderer:
"""Renderer backend for PlantUML diagrams using plantuml CLI (FR-533)."""
def can_render(self, diagram_type: str) -> bool:
return diagram_type == "plantuml" and bool(shutil.which("plantuml"))
def render(
self, source: str, diagram_type: str, output_path: Path
) -> RendererResult:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
src_file = tmp_path / "diagram.puml"
src_file.write_text(source, encoding="utf-8")
expected_png = tmp_path / "diagram.png"
try:
subprocess.run(
["plantuml", "-tpng", str(src_file)],
capture_output=True,
timeout=30,
check=True,
)
if expected_png.exists():
import shutil as _shutil
_shutil.copy2(str(expected_png), str(output_path))
return RendererResult(success=True, output_path=output_path)
return RendererResult(
success=False,
warning=WarningRecord(
severity=Severity.WARNING,
reason="diagram-render-failed",
construct="plantuml: output PNG not found",
),
)
except Exception as exc:
return RendererResult(
success=False,
warning=WarningRecord(
severity=Severity.WARNING,
reason="diagram-render-failed",
construct=f"plantuml: {exc}",
),
)
# ---------------------------------------------------------------------------
# Renderer detection (T01)
# ---------------------------------------------------------------------------
_ALL_BACKENDS: list[DiagramRenderer] = [
MermaidRenderer(),
GraphvizRenderer(),
PlantUMLRenderer(),
]
def detect_renderers() -> dict[str, DiagramRenderer]:
"""Probe PATH and return available renderer backends keyed by diagram type.
Returns only renderers whose CLI tool is found on PATH (FR-538).
"""
available: dict[str, DiagramRenderer] = {}
for diagram_type in DIAGRAM_TYPES:
for backend in _ALL_BACKENDS:
if backend.can_render(diagram_type):
available[diagram_type] = backend
break
return available
# ---------------------------------------------------------------------------
# Public helpers
# ---------------------------------------------------------------------------
def is_diagram_info(info: str) -> bool:
"""Return True if *info* is a recognised diagram type."""
return (info or "").strip().lower() in DIAGRAM_TYPES
@@ -42,8 +217,8 @@ def is_diagram_info(info: str) -> bool:
def check_renderer(diagram_type: str) -> bool:
"""Return True if the required renderer for *diagram_type* is available."""
cmd = _RENDERER_COMMANDS.get(diagram_type.lower())
return bool(cmd and shutil.which(cmd))
renderers = detect_renderers()
return diagram_type in renderers
def render_diagram_block(
@@ -54,109 +229,66 @@ def render_diagram_block(
) -> None:
"""Render a diagram fenced block into *doc* (FR-533, FR-534).
If a renderer is available → renders to PNG and embeds the image.
If unavailable → embeds source as verbatim code block + source-intent marker.
Never silently discards source (FR-1205).
Calls detect_renderers() to find available backends. If a renderer is
available for *diagram_type*, renders to PNG and embeds the image.
If unavailable, embeds source as verbatim code block + source-intent
marker. Never silently discards source (FR-1205).
"""
from docx.shared import Pt
from markidocx.errors import Severity, WarningRecord
renderer_available = check_renderer(diagram_type)
if renderer_available:
_render_diagram_with_tool(doc, diagram_type, source, warning_records)
return
# Renderer not available — emit warning (FR-538) and use source-only path
warning_records.append(
WarningRecord(
severity=Severity.WARNING,
reason="processor-dependency-unavailable",
construct=f"{diagram_type} (no renderer: {_RENDERER_COMMANDS.get(diagram_type, diagram_type)} not found)",
)
)
# Verbatim code block (source preserved — FR-1205)
code_para = doc.add_paragraph(style="Normal")
run = code_para.add_run(f"```{diagram_type}\n{source}\n```")
run.font.name = "Courier New"
run.font.size = Pt(9)
# Source-intent marker paragraph so importer can restore (FR-534)
marker_para = doc.add_paragraph(style="Normal")
marker_run = marker_para.add_run(f"{DIAGRAM_SOURCE_MARKER_PREFIX}{diagram_type}\n{source}")
marker_run.font.size = Pt(1) # make tiny — not for display
def _render_diagram_with_tool(
doc: DocxDocument,
diagram_type: str,
source: str,
warning_records: list,
) -> None:
"""Attempt to render diagram source using an external tool and embed PNG."""
import subprocess
import tempfile
from pathlib import Path
from docx.shared import Inches, Pt
from markidocx.errors import Severity, WarningRecord
renderers = detect_renderers()
renderer = renderers.get(diagram_type)
cmd = _RENDERER_COMMANDS[diagram_type]
try:
if renderer is not None:
# Render to a temp PNG and embed while the temp dir is still alive
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
src_file = tmp_path / f"diagram.{diagram_type[:3]}"
png_file = tmp_path / "diagram.png"
src_file.write_text(source, encoding="utf-8")
output_png = Path(tmp) / "diagram.png"
result = renderer.render(source, diagram_type, output_png)
if diagram_type == "mermaid":
args = [cmd, "-i", str(src_file), "-o", str(png_file)]
elif diagram_type == "graphviz":
args = [cmd, "-Tpng", str(src_file), "-o", str(png_file)]
else: # plantuml
args = [cmd, "-tpng", str(src_file), "-o", str(tmp_path)]
png_file = tmp_path / f"diagram.{diagram_type[:3]}.png"
subprocess.run(args, capture_output=True, timeout=30)
if png_file.exists():
if result.success and result.output_path and result.output_path.exists():
para = doc.add_paragraph(style="Normal")
run = para.add_run()
run.add_picture(str(png_file), width=Inches(5))
run.add_picture(str(result.output_path), width=Inches(5))
# Source-intent marker for round-trip (FR-534)
marker_para = doc.add_paragraph(style="Normal")
marker_run = marker_para.add_run(
f"{DIAGRAM_SOURCE_MARKER_PREFIX}{diagram_type}\n{source}"
)
marker_run.font.size = Pt(1)
_add_source_marker(doc, diagram_type, source)
return
except Exception as exc:
# Render failed — record warning and fall through to source-only
if result.warning:
warning_records.append(result.warning)
else:
# No renderer available — emit processor-dependency warning (FR-538)
_cmd = {"mermaid": "mmdc", "graphviz": "dot", "plantuml": "plantuml"}.get(
diagram_type, diagram_type
)
warning_records.append(
WarningRecord(
severity=Severity.WARNING,
reason="diagram-render-failed",
construct=f"{diagram_type}: {exc}",
reason="processor-dependency-unavailable",
construct=f"{diagram_type} (no renderer: {_cmd} not found)",
)
)
# Fallback: source-only path
from docx.shared import Pt
# Source-only fallback: verbatim code block + source-intent marker
code_para = doc.add_paragraph(style="Normal")
run = code_para.add_run(f"```{diagram_type}\n{source}\n```")
run.font.name = "Courier New"
run.font.size = Pt(9)
_add_source_marker(doc, diagram_type, source)
def _add_source_marker(
doc: DocxDocument, diagram_type: str, source: str
) -> None:
"""Add a tiny source-intent marker paragraph for round-trip (FR-534)."""
from docx.shared import Pt
marker_para = doc.add_paragraph(style="Normal")
marker_run = marker_para.add_run(
f"{DIAGRAM_SOURCE_MARKER_PREFIX}{diagram_type}\n{source}"
)
from docx.shared import Pt
marker_run.font.size = Pt(1)
marker_run.font.size = Pt(1) # tiny — not for display
# ---------------------------------------------------------------------------

View File

@@ -2,6 +2,7 @@
from __future__ import annotations
import contextlib
import tempfile
import uuid
from dataclasses import dataclass, field
@@ -287,15 +288,13 @@ def _release_regression(
# Corpus identity disclosure (FR-1109)
git_sha = "unknown"
try:
with contextlib.suppress(Exception):
git_sha = subprocess.check_output(
["git", "rev-parse", "HEAD"],
cwd=manifest_path.parent,
text=True,
stderr=subprocess.DEVNULL,
).strip()
except Exception:
pass
result.aggregate_output["corpus_id"] = {
"manifest_path": str(manifest_path),