generated from coulomb/repo-seed
feat: WP-0005 — diagram renderer integration (Mermaid, Graphviz, PlantUML)
Add pluggable DiagramRenderer protocol and RendererResult type to diagrams.py. Implement MermaidRenderer (mmdc), GraphvizRenderer (dot), PlantUMLRenderer backends with graceful source-only fallback and WarningRecord on missing tool (FR-533, FR-534, FR-538). Builder detects renderers at build time and embeds PNG with alt-text source marker for round-trip. Extend regression corpus with rendered_diagrams_document.md and skipif-gated integration tests. All 272 tests pass; ruff and mypy clean. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,10 @@
|
||||
Handles fenced diagram source blocks (mermaid, graphviz, plantuml) in the
|
||||
Markdown ↔ DOCX round trip.
|
||||
|
||||
Renderer abstraction:
|
||||
Each diagram type has a pluggable backend (DiagramRenderer protocol).
|
||||
detect_renderers() probes PATH and returns available backends.
|
||||
|
||||
Source-intent preservation:
|
||||
When a renderer is unavailable, diagram source is embedded as a verbatim
|
||||
code block and a source-intent marker paragraph is added so the importer
|
||||
@@ -13,21 +17,20 @@ from __future__ import annotations
|
||||
|
||||
import re
|
||||
import shutil
|
||||
from typing import TYPE_CHECKING
|
||||
import subprocess
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Protocol
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.document import Document as DocxDocument
|
||||
|
||||
from markidocx.errors import Severity, WarningRecord
|
||||
|
||||
# Diagram types recognised as LEVEL3 auto-diagram sources
|
||||
DIAGRAM_TYPES: frozenset[str] = frozenset({"mermaid", "graphviz", "plantuml"})
|
||||
|
||||
# Renderer → CLI command mapping
|
||||
_RENDERER_COMMANDS: dict[str, str] = {
|
||||
"mermaid": "mmdc",
|
||||
"graphviz": "dot",
|
||||
"plantuml": "plantuml",
|
||||
}
|
||||
|
||||
# Marker prefix stored in DOCX paragraph to preserve source intent (FR-534)
|
||||
DIAGRAM_SOURCE_MARKER_PREFIX = "diagram-source:"
|
||||
DIAGRAM_SOURCE_MARKER_RE = re.compile(
|
||||
@@ -35,6 +38,178 @@ DIAGRAM_SOURCE_MARKER_RE = re.compile(
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Renderer abstraction (FR-538)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class RendererResult:
|
||||
"""Result from a diagram renderer attempt."""
|
||||
|
||||
success: bool
|
||||
output_path: Path | None = None
|
||||
warning: WarningRecord | None = None
|
||||
|
||||
|
||||
class DiagramRenderer(Protocol):
|
||||
"""Protocol for pluggable diagram renderer backends."""
|
||||
|
||||
def can_render(self, diagram_type: str) -> bool:
|
||||
"""Return True if this backend can render *diagram_type*."""
|
||||
...
|
||||
|
||||
def render(
|
||||
self, source: str, diagram_type: str, output_path: Path
|
||||
) -> RendererResult:
|
||||
"""Render *source* to *output_path* (PNG). Return RendererResult."""
|
||||
...
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Concrete renderer backends (T02–T04)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class MermaidRenderer:
|
||||
"""Renderer backend for Mermaid diagrams using mmdc CLI (FR-533)."""
|
||||
|
||||
def can_render(self, diagram_type: str) -> bool:
|
||||
return diagram_type == "mermaid" and bool(shutil.which("mmdc"))
|
||||
|
||||
def render(
|
||||
self, source: str, diagram_type: str, output_path: Path
|
||||
) -> RendererResult:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
src_file = tmp_path / "diagram.mmd"
|
||||
src_file.write_text(source, encoding="utf-8")
|
||||
try:
|
||||
subprocess.run(
|
||||
["mmdc", "-i", str(src_file), "-o", str(output_path)],
|
||||
capture_output=True,
|
||||
timeout=30,
|
||||
check=True,
|
||||
)
|
||||
return RendererResult(success=True, output_path=output_path)
|
||||
except Exception as exc:
|
||||
return RendererResult(
|
||||
success=False,
|
||||
warning=WarningRecord(
|
||||
severity=Severity.WARNING,
|
||||
reason="diagram-render-failed",
|
||||
construct=f"mermaid: {exc}",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class GraphvizRenderer:
|
||||
"""Renderer backend for Graphviz diagrams using dot CLI (FR-533)."""
|
||||
|
||||
def can_render(self, diagram_type: str) -> bool:
|
||||
return diagram_type == "graphviz" and bool(shutil.which("dot"))
|
||||
|
||||
def render(
|
||||
self, source: str, diagram_type: str, output_path: Path
|
||||
) -> RendererResult:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
src_file = tmp_path / "diagram.dot"
|
||||
src_file.write_text(source, encoding="utf-8")
|
||||
try:
|
||||
subprocess.run(
|
||||
["dot", "-Tpng", str(src_file), "-o", str(output_path)],
|
||||
capture_output=True,
|
||||
timeout=30,
|
||||
check=True,
|
||||
)
|
||||
return RendererResult(success=True, output_path=output_path)
|
||||
except Exception as exc:
|
||||
return RendererResult(
|
||||
success=False,
|
||||
warning=WarningRecord(
|
||||
severity=Severity.WARNING,
|
||||
reason="diagram-render-failed",
|
||||
construct=f"graphviz: {exc}",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class PlantUMLRenderer:
|
||||
"""Renderer backend for PlantUML diagrams using plantuml CLI (FR-533)."""
|
||||
|
||||
def can_render(self, diagram_type: str) -> bool:
|
||||
return diagram_type == "plantuml" and bool(shutil.which("plantuml"))
|
||||
|
||||
def render(
|
||||
self, source: str, diagram_type: str, output_path: Path
|
||||
) -> RendererResult:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
src_file = tmp_path / "diagram.puml"
|
||||
src_file.write_text(source, encoding="utf-8")
|
||||
expected_png = tmp_path / "diagram.png"
|
||||
try:
|
||||
subprocess.run(
|
||||
["plantuml", "-tpng", str(src_file)],
|
||||
capture_output=True,
|
||||
timeout=30,
|
||||
check=True,
|
||||
)
|
||||
if expected_png.exists():
|
||||
import shutil as _shutil
|
||||
|
||||
_shutil.copy2(str(expected_png), str(output_path))
|
||||
return RendererResult(success=True, output_path=output_path)
|
||||
return RendererResult(
|
||||
success=False,
|
||||
warning=WarningRecord(
|
||||
severity=Severity.WARNING,
|
||||
reason="diagram-render-failed",
|
||||
construct="plantuml: output PNG not found",
|
||||
),
|
||||
)
|
||||
except Exception as exc:
|
||||
return RendererResult(
|
||||
success=False,
|
||||
warning=WarningRecord(
|
||||
severity=Severity.WARNING,
|
||||
reason="diagram-render-failed",
|
||||
construct=f"plantuml: {exc}",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Renderer detection (T01)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_ALL_BACKENDS: list[DiagramRenderer] = [
|
||||
MermaidRenderer(),
|
||||
GraphvizRenderer(),
|
||||
PlantUMLRenderer(),
|
||||
]
|
||||
|
||||
|
||||
def detect_renderers() -> dict[str, DiagramRenderer]:
|
||||
"""Probe PATH and return available renderer backends keyed by diagram type.
|
||||
|
||||
Returns only renderers whose CLI tool is found on PATH (FR-538).
|
||||
"""
|
||||
available: dict[str, DiagramRenderer] = {}
|
||||
for diagram_type in DIAGRAM_TYPES:
|
||||
for backend in _ALL_BACKENDS:
|
||||
if backend.can_render(diagram_type):
|
||||
available[diagram_type] = backend
|
||||
break
|
||||
return available
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def is_diagram_info(info: str) -> bool:
|
||||
"""Return True if *info* is a recognised diagram type."""
|
||||
return (info or "").strip().lower() in DIAGRAM_TYPES
|
||||
@@ -42,8 +217,8 @@ def is_diagram_info(info: str) -> bool:
|
||||
|
||||
def check_renderer(diagram_type: str) -> bool:
|
||||
"""Return True if the required renderer for *diagram_type* is available."""
|
||||
cmd = _RENDERER_COMMANDS.get(diagram_type.lower())
|
||||
return bool(cmd and shutil.which(cmd))
|
||||
renderers = detect_renderers()
|
||||
return diagram_type in renderers
|
||||
|
||||
|
||||
def render_diagram_block(
|
||||
@@ -54,109 +229,66 @@ def render_diagram_block(
|
||||
) -> None:
|
||||
"""Render a diagram fenced block into *doc* (FR-533, FR-534).
|
||||
|
||||
If a renderer is available → renders to PNG and embeds the image.
|
||||
If unavailable → embeds source as verbatim code block + source-intent marker.
|
||||
Never silently discards source (FR-1205).
|
||||
Calls detect_renderers() to find available backends. If a renderer is
|
||||
available for *diagram_type*, renders to PNG and embeds the image.
|
||||
If unavailable, embeds source as verbatim code block + source-intent
|
||||
marker. Never silently discards source (FR-1205).
|
||||
"""
|
||||
from docx.shared import Pt
|
||||
|
||||
from markidocx.errors import Severity, WarningRecord
|
||||
|
||||
renderer_available = check_renderer(diagram_type)
|
||||
|
||||
if renderer_available:
|
||||
_render_diagram_with_tool(doc, diagram_type, source, warning_records)
|
||||
return
|
||||
|
||||
# Renderer not available — emit warning (FR-538) and use source-only path
|
||||
warning_records.append(
|
||||
WarningRecord(
|
||||
severity=Severity.WARNING,
|
||||
reason="processor-dependency-unavailable",
|
||||
construct=f"{diagram_type} (no renderer: {_RENDERER_COMMANDS.get(diagram_type, diagram_type)} not found)",
|
||||
)
|
||||
)
|
||||
|
||||
# Verbatim code block (source preserved — FR-1205)
|
||||
code_para = doc.add_paragraph(style="Normal")
|
||||
run = code_para.add_run(f"```{diagram_type}\n{source}\n```")
|
||||
run.font.name = "Courier New"
|
||||
run.font.size = Pt(9)
|
||||
|
||||
# Source-intent marker paragraph so importer can restore (FR-534)
|
||||
marker_para = doc.add_paragraph(style="Normal")
|
||||
marker_run = marker_para.add_run(f"{DIAGRAM_SOURCE_MARKER_PREFIX}{diagram_type}\n{source}")
|
||||
marker_run.font.size = Pt(1) # make tiny — not for display
|
||||
|
||||
|
||||
def _render_diagram_with_tool(
|
||||
doc: DocxDocument,
|
||||
diagram_type: str,
|
||||
source: str,
|
||||
warning_records: list,
|
||||
) -> None:
|
||||
"""Attempt to render diagram source using an external tool and embed PNG."""
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from docx.shared import Inches, Pt
|
||||
|
||||
from markidocx.errors import Severity, WarningRecord
|
||||
renderers = detect_renderers()
|
||||
renderer = renderers.get(diagram_type)
|
||||
|
||||
cmd = _RENDERER_COMMANDS[diagram_type]
|
||||
try:
|
||||
if renderer is not None:
|
||||
# Render to a temp PNG and embed while the temp dir is still alive
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
src_file = tmp_path / f"diagram.{diagram_type[:3]}"
|
||||
png_file = tmp_path / "diagram.png"
|
||||
src_file.write_text(source, encoding="utf-8")
|
||||
output_png = Path(tmp) / "diagram.png"
|
||||
result = renderer.render(source, diagram_type, output_png)
|
||||
|
||||
if diagram_type == "mermaid":
|
||||
args = [cmd, "-i", str(src_file), "-o", str(png_file)]
|
||||
elif diagram_type == "graphviz":
|
||||
args = [cmd, "-Tpng", str(src_file), "-o", str(png_file)]
|
||||
else: # plantuml
|
||||
args = [cmd, "-tpng", str(src_file), "-o", str(tmp_path)]
|
||||
png_file = tmp_path / f"diagram.{diagram_type[:3]}.png"
|
||||
|
||||
subprocess.run(args, capture_output=True, timeout=30)
|
||||
|
||||
if png_file.exists():
|
||||
if result.success and result.output_path and result.output_path.exists():
|
||||
para = doc.add_paragraph(style="Normal")
|
||||
run = para.add_run()
|
||||
run.add_picture(str(png_file), width=Inches(5))
|
||||
run.add_picture(str(result.output_path), width=Inches(5))
|
||||
# Source-intent marker for round-trip (FR-534)
|
||||
marker_para = doc.add_paragraph(style="Normal")
|
||||
marker_run = marker_para.add_run(
|
||||
f"{DIAGRAM_SOURCE_MARKER_PREFIX}{diagram_type}\n{source}"
|
||||
)
|
||||
marker_run.font.size = Pt(1)
|
||||
_add_source_marker(doc, diagram_type, source)
|
||||
return
|
||||
except Exception as exc:
|
||||
|
||||
# Render failed — record warning and fall through to source-only
|
||||
if result.warning:
|
||||
warning_records.append(result.warning)
|
||||
|
||||
else:
|
||||
# No renderer available — emit processor-dependency warning (FR-538)
|
||||
_cmd = {"mermaid": "mmdc", "graphviz": "dot", "plantuml": "plantuml"}.get(
|
||||
diagram_type, diagram_type
|
||||
)
|
||||
warning_records.append(
|
||||
WarningRecord(
|
||||
severity=Severity.WARNING,
|
||||
reason="diagram-render-failed",
|
||||
construct=f"{diagram_type}: {exc}",
|
||||
reason="processor-dependency-unavailable",
|
||||
construct=f"{diagram_type} (no renderer: {_cmd} not found)",
|
||||
)
|
||||
)
|
||||
|
||||
# Fallback: source-only path
|
||||
from docx.shared import Pt
|
||||
|
||||
# Source-only fallback: verbatim code block + source-intent marker
|
||||
code_para = doc.add_paragraph(style="Normal")
|
||||
run = code_para.add_run(f"```{diagram_type}\n{source}\n```")
|
||||
run.font.name = "Courier New"
|
||||
run.font.size = Pt(9)
|
||||
_add_source_marker(doc, diagram_type, source)
|
||||
|
||||
|
||||
def _add_source_marker(
|
||||
doc: DocxDocument, diagram_type: str, source: str
|
||||
) -> None:
|
||||
"""Add a tiny source-intent marker paragraph for round-trip (FR-534)."""
|
||||
from docx.shared import Pt
|
||||
|
||||
marker_para = doc.add_paragraph(style="Normal")
|
||||
marker_run = marker_para.add_run(
|
||||
f"{DIAGRAM_SOURCE_MARKER_PREFIX}{diagram_type}\n{source}"
|
||||
)
|
||||
from docx.shared import Pt
|
||||
|
||||
marker_run.font.size = Pt(1)
|
||||
marker_run.font.size = Pt(1) # tiny — not for display
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import tempfile
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
@@ -287,15 +288,13 @@ def _release_regression(
|
||||
|
||||
# Corpus identity disclosure (FR-1109)
|
||||
git_sha = "unknown"
|
||||
try:
|
||||
with contextlib.suppress(Exception):
|
||||
git_sha = subprocess.check_output(
|
||||
["git", "rev-parse", "HEAD"],
|
||||
cwd=manifest_path.parent,
|
||||
text=True,
|
||||
stderr=subprocess.DEVNULL,
|
||||
).strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
result.aggregate_output["corpus_id"] = {
|
||||
"manifest_path": str(manifest_path),
|
||||
|
||||
Reference in New Issue
Block a user