generated from coulomb/repo-seed
feat: WP-0003 complete — LEVEL3 advanced features + error framework
Implements full LEVEL3 feature set: cross-references (xref.py), numbered figures (figures.py), auto-diagrams (diagrams.py), bibliography/citations (bibliography.py), LEVEL3 capability detection (level3.py), and structured error/warning records (errors.py). Builder, importer, and differ updated for LEVEL3 round-trip support. REST and MCP interfaces updated with structured warning records. 259 tests passing. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
18
CLAUDE.md
18
CLAUDE.md
@@ -70,16 +70,22 @@ evidence artefacts
|
|||||||
|--------|--------|-------------|
|
|--------|--------|-------------|
|
||||||
| `cli.py` | implemented — all commands wired (`build`, `import`, `compare`, `validate`, `serve`, `workflow`, `mcp`, `template`) | all |
|
| `cli.py` | implemented — all commands wired (`build`, `import`, `compare`, `validate`, `serve`, `workflow`, `mcp`, `template`) | all |
|
||||||
| `manifest.py` | implemented | FR-100 |
|
| `manifest.py` | implemented | FR-100 |
|
||||||
| `builder.py` | implemented | FR-200 |
|
| `builder.py` | implemented — LEVEL1 + LEVEL3 (xrefs, figures, diagrams, citations) | FR-200, FR-531–539 |
|
||||||
| `importer.py` | implemented | FR-300/400 |
|
| `importer.py` | implemented — LEVEL1 + LEVEL3 round-trip | FR-300/400, FR-531–536 |
|
||||||
| `differ.py` | implemented | FR-700 |
|
| `differ.py` | implemented — LEVEL1 + LEVEL3 drift detection | FR-700, FR-540–542 |
|
||||||
| `templates.py` | implemented | FR-600 |
|
| `templates.py` | implemented | FR-600 |
|
||||||
| `evidence.py` | implemented | FR-1400 |
|
| `evidence.py` | implemented | FR-1400 |
|
||||||
| `workflows.py` | implemented (`single-file-roundtrip`, `multi-file-roundtrip`, `release-regression`, `family-switch-build`) | FR-1300 |
|
| `workflows.py` | implemented (`single-file-roundtrip`, `multi-file-roundtrip`, `release-regression`, `family-switch-build`) | FR-1300 |
|
||||||
| `rest.py` | implemented — FastAPI app, all endpoints | FR-900 |
|
| `rest.py` | implemented — FastAPI app, all endpoints; structured warning records | FR-900, FR-1208 |
|
||||||
| `mcp_server.py` | implemented — FastMCP server, all tools and resources | FR-1000 |
|
| `mcp_server.py` | implemented — FastMCP server, all tools and resources; structured warnings | FR-1000, FR-1208 |
|
||||||
|
| `errors.py` | implemented — `WarningRecord`, `FailureRecord`, `OutputState` | FR-1201–1210 |
|
||||||
|
| `level3.py` | implemented — LEVEL3 support detection, capability disclosure | FR-537–539 |
|
||||||
|
| `xref.py` | implemented — cross-reference round-trip helpers | FR-531, FR-540 |
|
||||||
|
| `figures.py` | implemented — numbered figure round-trip helpers | FR-532, FR-541 |
|
||||||
|
| `diagrams.py` | implemented — auto-diagram source-only + renderer path | FR-533, FR-534 |
|
||||||
|
| `bibliography.py` | implemented — citation and references section round-trip | FR-535, FR-536, FR-542 |
|
||||||
|
|
||||||
`tests/conftest.py` provides shared fixtures (`tmp_project`, `SIMPLE_MANIFEST_YAML`, `SIMPLE_MARKDOWN`). WP-0001 and WP-0002 complete — 135 tests passing. All interfaces (CLI, REST, MCP) implemented and parity-tested.
|
`tests/conftest.py` provides shared fixtures. WP-0001, WP-0002, and WP-0003 complete — 259 tests passing. Full LEVEL1 + LEVEL3 feature coverage. All interfaces (CLI, REST, MCP) implemented and parity-tested.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
208
src/markidocx/bibliography.py
Normal file
208
src/markidocx/bibliography.py
Normal file
@@ -0,0 +1,208 @@
|
|||||||
|
"""Bibliography and citation support for LEVEL3 markidocx (FR-535, FR-536, FR-542).
|
||||||
|
|
||||||
|
Handles the round-trip of inline citations and Bibliography/References sections
|
||||||
|
between Markdown and DOCX.
|
||||||
|
|
||||||
|
Markdown syntax:
|
||||||
|
Inline citation: [@key]
|
||||||
|
References section:
|
||||||
|
## References
|
||||||
|
- [@key]: Author. *Title*. Year.
|
||||||
|
|
||||||
|
DOCX representation:
|
||||||
|
Inline: [key] (plain text marker)
|
||||||
|
References section: "References" heading + plain text entries
|
||||||
|
Source-intent markers embedded for importer restoration.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Markdown citation patterns
|
||||||
|
CITATION_RE = re.compile(r"\[@([\w:.-]+)\]")
|
||||||
|
CITATION_ENTRY_RE = re.compile(r"^-\s+\[@([\w:.-]+)\]:\s+(.+)$")
|
||||||
|
REFERENCES_HEADING_RE = re.compile(r"^#{1,3}\s+References\s*$", re.MULTILINE)
|
||||||
|
|
||||||
|
# DOCX markers
|
||||||
|
CITATION_MARKER_PREFIX = "citation:"
|
||||||
|
REFERENCES_SECTION_MARKER = "references-section:"
|
||||||
|
|
||||||
|
|
||||||
|
def has_citations(text: str) -> bool:
|
||||||
|
"""Return True if *text* contains inline citations."""
|
||||||
|
return bool(CITATION_RE.search(text))
|
||||||
|
|
||||||
|
|
||||||
|
def render_inline_citations(text: str) -> str:
|
||||||
|
"""Replace [@key] markers with [key] for DOCX embedding.
|
||||||
|
|
||||||
|
Returns the transformed text suitable for DOCX paragraph text.
|
||||||
|
"""
|
||||||
|
return CITATION_RE.sub(lambda m: f"[{m.group(1)}]", text)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_citation_keys(text: str) -> list[str]:
|
||||||
|
"""Extract all citation keys from *text*."""
|
||||||
|
return CITATION_RE.findall(text)
|
||||||
|
|
||||||
|
|
||||||
|
def is_references_heading(text: str) -> bool:
|
||||||
|
"""Return True if *text* is a References section heading."""
|
||||||
|
return bool(REFERENCES_HEADING_RE.match(text.strip()))
|
||||||
|
|
||||||
|
|
||||||
|
def parse_reference_entry(text: str) -> tuple[str, str] | None:
|
||||||
|
"""Parse a reference list entry.
|
||||||
|
|
||||||
|
Returns (key, entry_text) or None.
|
||||||
|
"""
|
||||||
|
m = CITATION_ENTRY_RE.match(text.strip())
|
||||||
|
if m:
|
||||||
|
return m.group(1), m.group(2)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_references_section(md_text: str) -> tuple[list[tuple[str, str]], str]:
|
||||||
|
"""Extract the references section from Markdown text.
|
||||||
|
|
||||||
|
Returns (entries, text_without_references_section).
|
||||||
|
entries: list of (key, entry_text)
|
||||||
|
"""
|
||||||
|
# Find the References heading
|
||||||
|
m = REFERENCES_HEADING_RE.search(md_text)
|
||||||
|
if not m:
|
||||||
|
return [], md_text
|
||||||
|
|
||||||
|
refs_start = m.start()
|
||||||
|
entries: list[tuple[str, str]] = []
|
||||||
|
|
||||||
|
# Collect entries after the heading
|
||||||
|
rest = md_text[m.end():].strip()
|
||||||
|
for line in rest.split("\n"):
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
parsed = parse_reference_entry(line)
|
||||||
|
if parsed:
|
||||||
|
entries.append(parsed)
|
||||||
|
elif line.startswith("#"):
|
||||||
|
# New heading — stop collecting
|
||||||
|
break
|
||||||
|
|
||||||
|
text_without = md_text[:refs_start].rstrip()
|
||||||
|
return entries, text_without
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Builder helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
BIBLIOGRAPHY_SECTION_HEADING = "References"
|
||||||
|
BIBLIOGRAPHY_MARKER = "bibliography-section-start"
|
||||||
|
|
||||||
|
|
||||||
|
def render_citation_text(text: str) -> str:
|
||||||
|
"""Return citation text for DOCX embedding.
|
||||||
|
|
||||||
|
[@key] is kept as-is in the DOCX paragraph text so the importer
|
||||||
|
can restore it without ambiguity.
|
||||||
|
"""
|
||||||
|
return text # [@key] → [@key] (no transformation needed)
|
||||||
|
|
||||||
|
|
||||||
|
def render_references_section(doc, entries: list[tuple[str, str]]) -> None:
|
||||||
|
"""Add a References section to *doc* (FR-535).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
doc: python-docx Document
|
||||||
|
entries: list of (key, entry_text)
|
||||||
|
"""
|
||||||
|
# Section heading
|
||||||
|
try:
|
||||||
|
doc.add_heading(BIBLIOGRAPHY_SECTION_HEADING, level=2)
|
||||||
|
except Exception:
|
||||||
|
doc.add_paragraph(BIBLIOGRAPHY_SECTION_HEADING, style="Normal")
|
||||||
|
|
||||||
|
# Bibliography marker so importer can identify the section
|
||||||
|
marker_para = doc.add_paragraph(style="Normal")
|
||||||
|
from docx.shared import Pt
|
||||||
|
|
||||||
|
marker_run = marker_para.add_run(BIBLIOGRAPHY_MARKER)
|
||||||
|
marker_run.font.size = Pt(1)
|
||||||
|
|
||||||
|
# Entries — keep [@key] format directly in DOCX text for round-trip fidelity
|
||||||
|
for key, entry_text in entries:
|
||||||
|
para = doc.add_paragraph(style="Normal")
|
||||||
|
para.add_run(f"- [@{key}]: {entry_text}")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Importer helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
DOCX_CITATION_RE = re.compile(r"\[([^\]@]+)\](?!\[)") # [key] without @, not followed by [
|
||||||
|
BIBLIOGRAPHY_MARKER_PARA_RE = re.compile(r"^bibliography-section-start$")
|
||||||
|
BIBLIOGRAPHY_ENTRY_RE = re.compile(r"^-\s+\[@([\w:.-]+)\]:\s+(.+)$")
|
||||||
|
|
||||||
|
|
||||||
|
def restore_citations_in_text(text: str) -> str:
|
||||||
|
"""Return imported text with citations already in [@key] form (no-op).
|
||||||
|
|
||||||
|
Since builder now embeds [@key] directly in DOCX, no restoration needed.
|
||||||
|
"""
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def is_bibliography_marker(text: str) -> bool:
|
||||||
|
return BIBLIOGRAPHY_MARKER_PARA_RE.match(text.strip()) is not None
|
||||||
|
|
||||||
|
|
||||||
|
def is_bibliography_entry(text: str) -> bool:
|
||||||
|
return bool(BIBLIOGRAPHY_ENTRY_RE.match(text.strip()))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Differ helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def compare_citations(
|
||||||
|
original: str,
|
||||||
|
reimported: str,
|
||||||
|
preserved: list[str],
|
||||||
|
degraded: list[str],
|
||||||
|
broken: list[str],
|
||||||
|
warning_records: list | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Compare citation markers and reference entries (FR-536, FR-542)."""
|
||||||
|
orig_keys = set(extract_citation_keys(original))
|
||||||
|
reim_keys = set(extract_citation_keys(reimported))
|
||||||
|
|
||||||
|
for key in orig_keys:
|
||||||
|
if key in reim_keys:
|
||||||
|
preserved.append(f"citation:[@{key}]")
|
||||||
|
else:
|
||||||
|
broken.append(f"citation:missing '[@{key}]'")
|
||||||
|
if warning_records is not None:
|
||||||
|
from markidocx.errors import Severity, WarningRecord
|
||||||
|
|
||||||
|
warning_records.append(
|
||||||
|
WarningRecord(
|
||||||
|
severity=Severity.WARNING,
|
||||||
|
reason="citation-ambiguity",
|
||||||
|
construct=f"@{key}",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# References section
|
||||||
|
orig_entries, _ = extract_references_section(original)
|
||||||
|
reim_entries, _ = extract_references_section(reimported)
|
||||||
|
orig_ref_keys = {k for k, _ in orig_entries}
|
||||||
|
reim_ref_keys = {k for k, _ in reim_entries}
|
||||||
|
|
||||||
|
for key in orig_ref_keys:
|
||||||
|
if key in reim_ref_keys:
|
||||||
|
preserved.append(f"reference-entry:{key}")
|
||||||
|
else:
|
||||||
|
degraded.append(f"reference-entry:lost '{key}'")
|
||||||
@@ -9,6 +9,7 @@ import mistune
|
|||||||
from docx.document import Document as DocxDocument
|
from docx.document import Document as DocxDocument
|
||||||
from docx.shared import Pt, RGBColor
|
from docx.shared import Pt, RGBColor
|
||||||
|
|
||||||
|
from markidocx.errors import OutputState, Severity, WarningRecord
|
||||||
from markidocx.manifest import FeatureLevel, Manifest
|
from markidocx.manifest import FeatureLevel, Manifest
|
||||||
from markidocx.templates import FamilyRegistry
|
from markidocx.templates import FamilyRegistry
|
||||||
|
|
||||||
@@ -19,8 +20,16 @@ class BuildResult:
|
|||||||
output_path: Path
|
output_path: Path
|
||||||
family: str
|
family: str
|
||||||
feature_level: str
|
feature_level: str
|
||||||
warnings: list[str] = field(default_factory=list)
|
warning_records: list[WarningRecord] = field(default_factory=list)
|
||||||
errors: list[str] = field(default_factory=list)
|
errors: list[str] = field(default_factory=list)
|
||||||
|
output_state: OutputState = OutputState.FINAL
|
||||||
|
partial_level3: bool = False
|
||||||
|
missing_coverage: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def warnings(self) -> list[str]:
|
||||||
|
"""Backward-compatible string view of warning_records."""
|
||||||
|
return [str(w) for w in self.warning_records]
|
||||||
|
|
||||||
|
|
||||||
def build_document(manifest: Manifest) -> BuildResult:
|
def build_document(manifest: Manifest) -> BuildResult:
|
||||||
@@ -28,8 +37,27 @@ def build_document(manifest: Manifest) -> BuildResult:
|
|||||||
|
|
||||||
Returns a BuildResult regardless of success/failure.
|
Returns a BuildResult regardless of success/failure.
|
||||||
"""
|
"""
|
||||||
warnings: list[str] = []
|
warning_records: list[WarningRecord] = []
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
|
partial_level3 = False
|
||||||
|
missing_coverage: list[str] = []
|
||||||
|
|
||||||
|
# For LEVEL3 projects, check external dependencies (FR-538, FR-539)
|
||||||
|
if manifest.project.feature_level == FeatureLevel.LEVEL3:
|
||||||
|
from markidocx.level3 import check_level3_support
|
||||||
|
|
||||||
|
support = check_level3_support()
|
||||||
|
if support.partial:
|
||||||
|
partial_level3 = True
|
||||||
|
missing_coverage = support.missing_coverage
|
||||||
|
for area in support.missing_coverage:
|
||||||
|
warning_records.append(
|
||||||
|
WarningRecord(
|
||||||
|
severity=Severity.WARNING,
|
||||||
|
reason="processor-dependency-unavailable",
|
||||||
|
construct=area,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Compose all source files into one Markdown string
|
# Compose all source files into one Markdown string
|
||||||
parts: list[str] = []
|
parts: list[str] = []
|
||||||
@@ -48,11 +76,18 @@ def build_document(manifest: Manifest) -> BuildResult:
|
|||||||
core_props.author = str(manifest.metadata["author"])
|
core_props.author = str(manifest.metadata["author"])
|
||||||
|
|
||||||
# Parse and render tokens into the document
|
# Parse and render tokens into the document
|
||||||
unsupported: list[str] = []
|
_render_markdown(
|
||||||
_render_markdown(doc, markdown_text, manifest.project.feature_level, warnings, unsupported)
|
doc,
|
||||||
|
markdown_text,
|
||||||
|
manifest.project.feature_level,
|
||||||
|
warning_records,
|
||||||
|
)
|
||||||
|
|
||||||
for item in unsupported:
|
# Determine output state
|
||||||
warnings.append(f"Unsupported construct skipped: {item}")
|
has_warnings = bool(warning_records)
|
||||||
|
output_state = OutputState.PARTIAL if partial_level3 else (
|
||||||
|
OutputState.FINAL if not has_warnings else OutputState.FINAL
|
||||||
|
)
|
||||||
|
|
||||||
# Ensure output dir exists
|
# Ensure output dir exists
|
||||||
manifest.output_dir.mkdir(parents=True, exist_ok=True)
|
manifest.output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -64,8 +99,11 @@ def build_document(manifest: Manifest) -> BuildResult:
|
|||||||
output_path=output_path,
|
output_path=output_path,
|
||||||
family=manifest.project.family,
|
family=manifest.project.family,
|
||||||
feature_level=manifest.project.feature_level.value,
|
feature_level=manifest.project.feature_level.value,
|
||||||
warnings=warnings,
|
warning_records=warning_records,
|
||||||
errors=errors,
|
errors=errors,
|
||||||
|
output_state=output_state,
|
||||||
|
partial_level3=partial_level3,
|
||||||
|
missing_coverage=missing_coverage,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -77,13 +115,45 @@ def _render_markdown(
|
|||||||
doc: DocxDocument,
|
doc: DocxDocument,
|
||||||
text: str,
|
text: str,
|
||||||
feature_level: FeatureLevel,
|
feature_level: FeatureLevel,
|
||||||
warnings: list[str],
|
warning_records: list[WarningRecord],
|
||||||
unsupported: list[str],
|
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Parse *text* as Markdown and append elements to *doc*."""
|
"""Parse *text* as Markdown and append elements to *doc*."""
|
||||||
tokens = _tokenise(text)
|
# For LEVEL3, extract references section before tokenising
|
||||||
|
ref_entries: list[tuple[str, str]] = []
|
||||||
|
body_text = text
|
||||||
|
if feature_level == FeatureLevel.LEVEL3:
|
||||||
|
from markidocx.bibliography import extract_references_section
|
||||||
|
|
||||||
|
ref_entries, body_text = extract_references_section(text)
|
||||||
|
|
||||||
|
tokens = _tokenise(body_text)
|
||||||
|
|
||||||
|
# Pre-compute known anchors for cross-ref validation (LEVEL3 only)
|
||||||
|
known_anchors: set[str] = set()
|
||||||
|
if feature_level == FeatureLevel.LEVEL3:
|
||||||
|
from markidocx.xref import extract_anchors
|
||||||
|
|
||||||
|
known_anchors = extract_anchors(body_text)
|
||||||
|
|
||||||
|
bookmark_counter = [0] # mutable int for nested calls
|
||||||
|
figure_counter = [0] # auto-incrementing figure number
|
||||||
|
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
_render_token(doc, token, feature_level, warnings, unsupported)
|
_render_token(
|
||||||
|
doc,
|
||||||
|
token,
|
||||||
|
feature_level,
|
||||||
|
warning_records,
|
||||||
|
known_anchors,
|
||||||
|
bookmark_counter,
|
||||||
|
figure_counter,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Render references section at the end (LEVEL3 only) (FR-535)
|
||||||
|
if feature_level == FeatureLevel.LEVEL3 and ref_entries:
|
||||||
|
from markidocx.bibliography import render_references_section
|
||||||
|
|
||||||
|
render_references_section(doc, ref_entries)
|
||||||
|
|
||||||
|
|
||||||
def _tokenise(text: str) -> list[dict]: # type: ignore[type-arg]
|
def _tokenise(text: str) -> list[dict]: # type: ignore[type-arg]
|
||||||
@@ -99,23 +169,74 @@ def _render_token(
|
|||||||
doc: DocxDocument,
|
doc: DocxDocument,
|
||||||
token: dict,
|
token: dict,
|
||||||
feature_level: FeatureLevel,
|
feature_level: FeatureLevel,
|
||||||
warnings: list[str],
|
warning_records: list[WarningRecord],
|
||||||
unsupported: list[str],
|
known_anchors: set[str] | None = None,
|
||||||
|
bookmark_counter: list[int] | None = None,
|
||||||
|
figure_counter: list[int] | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
token_type = token.get("type", "")
|
token_type = token.get("type", "")
|
||||||
|
if known_anchors is None:
|
||||||
|
known_anchors = set()
|
||||||
|
if bookmark_counter is None:
|
||||||
|
bookmark_counter = [0]
|
||||||
|
if figure_counter is None:
|
||||||
|
figure_counter = [0]
|
||||||
|
|
||||||
if token_type == "heading":
|
if token_type == "heading":
|
||||||
level = token.get("attrs", {}).get("level", 1)
|
level = token.get("attrs", {}).get("level", 1)
|
||||||
text = _extract_text(token.get("children", []))
|
raw_text = _extract_text(token.get("children", []))
|
||||||
|
|
||||||
|
if feature_level == FeatureLevel.LEVEL3:
|
||||||
|
from markidocx.xref import (
|
||||||
|
add_bookmark_to_paragraph,
|
||||||
|
extract_anchor_from_heading,
|
||||||
|
)
|
||||||
|
|
||||||
|
clean_text, anchor = extract_anchor_from_heading(raw_text)
|
||||||
|
else:
|
||||||
|
clean_text, anchor = raw_text, None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
doc.add_heading(text, level=level)
|
para = doc.add_heading(clean_text, level=level)
|
||||||
except Exception:
|
except Exception:
|
||||||
doc.add_paragraph(text, style="Normal")
|
para = doc.add_paragraph(clean_text, style="Normal")
|
||||||
|
|
||||||
|
if anchor:
|
||||||
|
add_bookmark_to_paragraph(para, anchor, bookmark_counter[0])
|
||||||
|
bookmark_counter[0] += 1
|
||||||
|
|
||||||
elif token_type == "paragraph":
|
elif token_type == "paragraph":
|
||||||
text = _extract_text(token.get("children", []))
|
raw_text = _extract_text_with_image_syntax(token.get("children", []))
|
||||||
para = doc.add_paragraph(style="Normal")
|
|
||||||
_add_inline_runs(para, token.get("children", []))
|
if feature_level == FeatureLevel.LEVEL3:
|
||||||
|
from markidocx.figures import is_figure_paragraph, parse_figure
|
||||||
|
from markidocx.xref import has_xref_links, render_paragraph_with_xrefs
|
||||||
|
|
||||||
|
if is_figure_paragraph(raw_text):
|
||||||
|
parsed = parse_figure(raw_text)
|
||||||
|
if parsed:
|
||||||
|
caption, path, label = parsed
|
||||||
|
figure_counter[0] += 1
|
||||||
|
from markidocx.figures import render_figure
|
||||||
|
|
||||||
|
render_figure(doc, caption, path, label, figure_counter[0])
|
||||||
|
else:
|
||||||
|
para = doc.add_paragraph(style="Normal")
|
||||||
|
_add_inline_runs(para, token.get("children", []))
|
||||||
|
elif has_xref_links(raw_text):
|
||||||
|
para = doc.add_paragraph(style="Normal")
|
||||||
|
render_paragraph_with_xrefs(para, raw_text, known_anchors)
|
||||||
|
else:
|
||||||
|
para = doc.add_paragraph(style="Normal")
|
||||||
|
from markidocx.bibliography import has_citations, render_citation_text
|
||||||
|
|
||||||
|
if has_citations(raw_text):
|
||||||
|
para.add_run(render_citation_text(raw_text))
|
||||||
|
else:
|
||||||
|
_add_inline_runs(para, token.get("children", []))
|
||||||
|
else:
|
||||||
|
para = doc.add_paragraph(style="Normal")
|
||||||
|
_add_inline_runs(para, token.get("children", []))
|
||||||
|
|
||||||
elif token_type == "list":
|
elif token_type == "list":
|
||||||
ordered = token.get("attrs", {}).get("ordered", False)
|
ordered = token.get("attrs", {}).get("ordered", False)
|
||||||
@@ -135,10 +256,23 @@ def _render_token(
|
|||||||
|
|
||||||
elif token_type == "block_code":
|
elif token_type == "block_code":
|
||||||
code = token.get("raw", "")
|
code = token.get("raw", "")
|
||||||
para = doc.add_paragraph(style="Normal")
|
info = (token.get("attrs", {}) or {}).get("info", "") or ""
|
||||||
run = para.add_run(code)
|
|
||||||
run.font.name = "Courier New"
|
if feature_level == FeatureLevel.LEVEL3:
|
||||||
run.font.size = Pt(9)
|
from markidocx.diagrams import is_diagram_info, render_diagram_block
|
||||||
|
|
||||||
|
if is_diagram_info(info):
|
||||||
|
render_diagram_block(doc, info.strip().lower(), code, warning_records)
|
||||||
|
else:
|
||||||
|
para = doc.add_paragraph(style="Normal")
|
||||||
|
run = para.add_run(code)
|
||||||
|
run.font.name = "Courier New"
|
||||||
|
run.font.size = Pt(9)
|
||||||
|
else:
|
||||||
|
para = doc.add_paragraph(style="Normal")
|
||||||
|
run = para.add_run(code)
|
||||||
|
run.font.name = "Courier New"
|
||||||
|
run.font.size = Pt(9)
|
||||||
|
|
||||||
elif token_type == "block_quote":
|
elif token_type == "block_quote":
|
||||||
children = token.get("children", [])
|
children = token.get("children", [])
|
||||||
@@ -151,14 +285,27 @@ def _render_token(
|
|||||||
doc.add_paragraph("—" * 20, style="Normal")
|
doc.add_paragraph("—" * 20, style="Normal")
|
||||||
|
|
||||||
elif token_type in ("html_block", "raw_html"):
|
elif token_type in ("html_block", "raw_html"):
|
||||||
unsupported.append(f"html ({token_type})")
|
warning_records.append(
|
||||||
|
WarningRecord(
|
||||||
|
severity=Severity.WARNING,
|
||||||
|
reason="unsupported-construct",
|
||||||
|
construct=f"html ({token_type})",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
elif token_type == "blank_line":
|
elif token_type == "blank_line":
|
||||||
pass # ignore blank lines
|
pass # ignore blank lines
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Unknown token — surface as unsupported (FR-508)
|
# Unknown token — surface as unsupported (FR-508, FR-1203)
|
||||||
unsupported.append(token_type)
|
if token_type:
|
||||||
|
warning_records.append(
|
||||||
|
WarningRecord(
|
||||||
|
severity=Severity.WARNING,
|
||||||
|
reason="unsupported-construct",
|
||||||
|
construct=token_type,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _render_table(doc: DocxDocument, token: dict) -> None:
|
def _render_table(doc: DocxDocument, token: dict) -> None:
|
||||||
@@ -186,6 +333,26 @@ def _render_table(doc: DocxDocument, token: dict) -> None:
|
|||||||
run.bold = True
|
run.bold = True
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_text_with_image_syntax(children: list[dict]) -> str:
|
||||||
|
"""Extract text from token children, reconstructing image MD syntax for figures."""
|
||||||
|
parts: list[str] = []
|
||||||
|
for child in children:
|
||||||
|
child_type = child.get("type", "")
|
||||||
|
if child_type == "image":
|
||||||
|
caption = _extract_text(child.get("children", []))
|
||||||
|
url = child.get("attrs", {}).get("url", "")
|
||||||
|
parts.append(f"")
|
||||||
|
elif child_type == "text":
|
||||||
|
parts.append(child.get("raw", ""))
|
||||||
|
elif child_type in ("strong", "emphasis", "codespan", "link"):
|
||||||
|
parts.append(_extract_text(child.get("children", [])))
|
||||||
|
elif child.get("raw"):
|
||||||
|
parts.append(child["raw"])
|
||||||
|
elif child.get("children"):
|
||||||
|
parts.append(_extract_text_with_image_syntax(child["children"]))
|
||||||
|
return "".join(parts)
|
||||||
|
|
||||||
|
|
||||||
def _extract_text(children: list[dict]) -> str:
|
def _extract_text(children: list[dict]) -> str:
|
||||||
"""Recursively extract plain text from a token children list."""
|
"""Recursively extract plain text from a token children list."""
|
||||||
parts: list[str] = []
|
parts: list[str] = []
|
||||||
|
|||||||
190
src/markidocx/diagrams.py
Normal file
190
src/markidocx/diagrams.py
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
"""Auto-diagram support for LEVEL3 markidocx (FR-533, FR-534).
|
||||||
|
|
||||||
|
Handles fenced diagram source blocks (mermaid, graphviz, plantuml) in the
|
||||||
|
Markdown ↔ DOCX round trip.
|
||||||
|
|
||||||
|
Source-intent preservation:
|
||||||
|
When a renderer is unavailable, diagram source is embedded as a verbatim
|
||||||
|
code block and a source-intent marker paragraph is added so the importer
|
||||||
|
can restore the fenced block. No source is silently discarded (FR-1205).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from docx.document import Document as DocxDocument
|
||||||
|
|
||||||
|
# Diagram types recognised as LEVEL3 auto-diagram sources
|
||||||
|
DIAGRAM_TYPES: frozenset[str] = frozenset({"mermaid", "graphviz", "plantuml"})
|
||||||
|
|
||||||
|
# Renderer → CLI command mapping
|
||||||
|
_RENDERER_COMMANDS: dict[str, str] = {
|
||||||
|
"mermaid": "mmdc",
|
||||||
|
"graphviz": "dot",
|
||||||
|
"plantuml": "plantuml",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Marker prefix stored in DOCX paragraph to preserve source intent (FR-534)
|
||||||
|
DIAGRAM_SOURCE_MARKER_PREFIX = "diagram-source:"
|
||||||
|
DIAGRAM_SOURCE_MARKER_RE = re.compile(
|
||||||
|
r"^diagram-source:(\w+)\n(.*)", re.DOTALL
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def is_diagram_info(info: str) -> bool:
|
||||||
|
"""Return True if *info* is a recognised diagram type."""
|
||||||
|
return (info or "").strip().lower() in DIAGRAM_TYPES
|
||||||
|
|
||||||
|
|
||||||
|
def check_renderer(diagram_type: str) -> bool:
|
||||||
|
"""Return True if the required renderer for *diagram_type* is available."""
|
||||||
|
cmd = _RENDERER_COMMANDS.get(diagram_type.lower())
|
||||||
|
return bool(cmd and shutil.which(cmd))
|
||||||
|
|
||||||
|
|
||||||
|
def render_diagram_block(
|
||||||
|
doc: DocxDocument,
|
||||||
|
diagram_type: str,
|
||||||
|
source: str,
|
||||||
|
warning_records: list,
|
||||||
|
) -> None:
|
||||||
|
"""Render a diagram fenced block into *doc* (FR-533, FR-534).
|
||||||
|
|
||||||
|
If a renderer is available → renders to PNG and embeds the image.
|
||||||
|
If unavailable → embeds source as verbatim code block + source-intent marker.
|
||||||
|
Never silently discards source (FR-1205).
|
||||||
|
"""
|
||||||
|
from docx.shared import Pt
|
||||||
|
|
||||||
|
from markidocx.errors import Severity, WarningRecord
|
||||||
|
|
||||||
|
renderer_available = check_renderer(diagram_type)
|
||||||
|
|
||||||
|
if renderer_available:
|
||||||
|
_render_diagram_with_tool(doc, diagram_type, source, warning_records)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Renderer not available — emit warning (FR-538) and use source-only path
|
||||||
|
warning_records.append(
|
||||||
|
WarningRecord(
|
||||||
|
severity=Severity.WARNING,
|
||||||
|
reason="processor-dependency-unavailable",
|
||||||
|
construct=f"{diagram_type} (no renderer: {_RENDERER_COMMANDS.get(diagram_type, diagram_type)} not found)",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verbatim code block (source preserved — FR-1205)
|
||||||
|
code_para = doc.add_paragraph(style="Normal")
|
||||||
|
run = code_para.add_run(f"```{diagram_type}\n{source}\n```")
|
||||||
|
run.font.name = "Courier New"
|
||||||
|
run.font.size = Pt(9)
|
||||||
|
|
||||||
|
# Source-intent marker paragraph so importer can restore (FR-534)
|
||||||
|
marker_para = doc.add_paragraph(style="Normal")
|
||||||
|
marker_run = marker_para.add_run(f"{DIAGRAM_SOURCE_MARKER_PREFIX}{diagram_type}\n{source}")
|
||||||
|
marker_run.font.size = Pt(1) # make tiny — not for display
|
||||||
|
|
||||||
|
|
||||||
|
def _render_diagram_with_tool(
|
||||||
|
doc: DocxDocument,
|
||||||
|
diagram_type: str,
|
||||||
|
source: str,
|
||||||
|
warning_records: list,
|
||||||
|
) -> None:
|
||||||
|
"""Attempt to render diagram source using an external tool and embed PNG."""
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from docx.shared import Inches, Pt
|
||||||
|
|
||||||
|
from markidocx.errors import Severity, WarningRecord
|
||||||
|
|
||||||
|
cmd = _RENDERER_COMMANDS[diagram_type]
|
||||||
|
try:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
tmp_path = Path(tmp)
|
||||||
|
src_file = tmp_path / f"diagram.{diagram_type[:3]}"
|
||||||
|
png_file = tmp_path / "diagram.png"
|
||||||
|
src_file.write_text(source, encoding="utf-8")
|
||||||
|
|
||||||
|
if diagram_type == "mermaid":
|
||||||
|
args = [cmd, "-i", str(src_file), "-o", str(png_file)]
|
||||||
|
elif diagram_type == "graphviz":
|
||||||
|
args = [cmd, "-Tpng", str(src_file), "-o", str(png_file)]
|
||||||
|
else: # plantuml
|
||||||
|
args = [cmd, "-tpng", str(src_file), "-o", str(tmp_path)]
|
||||||
|
png_file = tmp_path / f"diagram.{diagram_type[:3]}.png"
|
||||||
|
|
||||||
|
subprocess.run(args, capture_output=True, timeout=30)
|
||||||
|
|
||||||
|
if png_file.exists():
|
||||||
|
para = doc.add_paragraph(style="Normal")
|
||||||
|
run = para.add_run()
|
||||||
|
run.add_picture(str(png_file), width=Inches(5))
|
||||||
|
# Source-intent marker for round-trip (FR-534)
|
||||||
|
marker_para = doc.add_paragraph(style="Normal")
|
||||||
|
marker_run = marker_para.add_run(
|
||||||
|
f"{DIAGRAM_SOURCE_MARKER_PREFIX}{diagram_type}\n{source}"
|
||||||
|
)
|
||||||
|
marker_run.font.size = Pt(1)
|
||||||
|
return
|
||||||
|
except Exception as exc:
|
||||||
|
warning_records.append(
|
||||||
|
WarningRecord(
|
||||||
|
severity=Severity.WARNING,
|
||||||
|
reason="diagram-render-failed",
|
||||||
|
construct=f"{diagram_type}: {exc}",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fallback: source-only path
|
||||||
|
from docx.shared import Pt
|
||||||
|
|
||||||
|
code_para = doc.add_paragraph(style="Normal")
|
||||||
|
run = code_para.add_run(f"```{diagram_type}\n{source}\n```")
|
||||||
|
run.font.name = "Courier New"
|
||||||
|
run.font.size = Pt(9)
|
||||||
|
|
||||||
|
marker_para = doc.add_paragraph(style="Normal")
|
||||||
|
marker_run = marker_para.add_run(
|
||||||
|
f"{DIAGRAM_SOURCE_MARKER_PREFIX}{diagram_type}\n{source}"
|
||||||
|
)
|
||||||
|
from docx.shared import Pt
|
||||||
|
|
||||||
|
marker_run.font.size = Pt(1)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Importer helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def is_diagram_source_marker(text: str) -> bool:
|
||||||
|
"""Return True if *text* is a diagram source-intent marker."""
|
||||||
|
return text.strip().startswith(DIAGRAM_SOURCE_MARKER_PREFIX)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_diagram_source_marker(text: str) -> tuple[str, str] | None:
|
||||||
|
"""Parse a diagram source-intent marker into (diagram_type, source).
|
||||||
|
|
||||||
|
Returns None if the text is not a valid marker.
|
||||||
|
"""
|
||||||
|
stripped = text.strip()
|
||||||
|
if not stripped.startswith(DIAGRAM_SOURCE_MARKER_PREFIX):
|
||||||
|
return None
|
||||||
|
rest = stripped[len(DIAGRAM_SOURCE_MARKER_PREFIX):]
|
||||||
|
# Format: "type\nsource..."
|
||||||
|
if "\n" in rest:
|
||||||
|
diagram_type, source = rest.split("\n", 1)
|
||||||
|
return diagram_type.strip(), source
|
||||||
|
return rest.strip(), ""
|
||||||
|
|
||||||
|
|
||||||
|
def reconstruct_diagram_md(diagram_type: str, source: str) -> str:
|
||||||
|
"""Reconstruct a fenced code block from diagram type and source."""
|
||||||
|
return f"```{diagram_type}\n{source}\n```"
|
||||||
@@ -5,6 +5,8 @@ from __future__ import annotations
|
|||||||
import re
|
import re
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from markidocx.errors import OutputState
|
||||||
|
|
||||||
HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
|
HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
|
||||||
LIST_ITEM_RE = re.compile(r"^(\s*[-*+]|\s*\d+\.)\s+(.+)$", re.MULTILINE)
|
LIST_ITEM_RE = re.compile(r"^(\s*[-*+]|\s*\d+\.)\s+(.+)$", re.MULTILINE)
|
||||||
TABLE_ROW_RE = re.compile(r"^\|.+\|$", re.MULTILINE)
|
TABLE_ROW_RE = re.compile(r"^\|.+\|$", re.MULTILINE)
|
||||||
@@ -19,6 +21,7 @@ class DriftReport:
|
|||||||
degraded: list[str] = field(default_factory=list)
|
degraded: list[str] = field(default_factory=list)
|
||||||
broken: list[str] = field(default_factory=list)
|
broken: list[str] = field(default_factory=list)
|
||||||
unsupported: list[str] = field(default_factory=list)
|
unsupported: list[str] = field(default_factory=list)
|
||||||
|
output_state: OutputState = OutputState.FINAL
|
||||||
|
|
||||||
|
|
||||||
def compare(original: str, reimported: str) -> DriftReport:
|
def compare(original: str, reimported: str) -> DriftReport:
|
||||||
@@ -76,13 +79,29 @@ def compare(original: str, reimported: str) -> DriftReport:
|
|||||||
else:
|
else:
|
||||||
degraded.append(f"link:lost {link[:40]}")
|
degraded.append(f"link:lost {link[:40]}")
|
||||||
|
|
||||||
|
# --- Cross-references (FR-531, FR-540) ---
|
||||||
|
_compare_xrefs(original, reimported, preserved, degraded, broken)
|
||||||
|
|
||||||
|
# --- Figures (FR-532, FR-541) ---
|
||||||
|
_compare_figures(original, reimported, preserved, degraded, broken)
|
||||||
|
|
||||||
|
# --- Citations & Bibliography (FR-535, FR-542) ---
|
||||||
|
from markidocx.bibliography import compare_citations
|
||||||
|
|
||||||
|
compare_citations(original, reimported, preserved, degraded, broken)
|
||||||
|
|
||||||
has_drift = bool(degraded or broken)
|
has_drift = bool(degraded or broken)
|
||||||
|
output_state = (
|
||||||
|
OutputState.FINAL if not has_drift
|
||||||
|
else (OutputState.DEGRADED if not broken else OutputState.PARTIAL)
|
||||||
|
)
|
||||||
return DriftReport(
|
return DriftReport(
|
||||||
has_drift=has_drift,
|
has_drift=has_drift,
|
||||||
preserved=preserved,
|
preserved=preserved,
|
||||||
degraded=degraded,
|
degraded=degraded,
|
||||||
broken=broken,
|
broken=broken,
|
||||||
unsupported=unsupported,
|
unsupported=unsupported,
|
||||||
|
output_state=output_state,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -104,6 +123,64 @@ def _count_tables(text: str) -> int:
|
|||||||
return count
|
return count
|
||||||
|
|
||||||
|
|
||||||
|
def _compare_figures(
|
||||||
|
original: str,
|
||||||
|
reimported: str,
|
||||||
|
preserved: list[str],
|
||||||
|
degraded: list[str],
|
||||||
|
broken: list[str],
|
||||||
|
) -> None:
|
||||||
|
"""Compare figure labels and captions (FR-532, FR-541)."""
|
||||||
|
from markidocx.figures import extract_figure_captions, extract_figure_labels
|
||||||
|
|
||||||
|
orig_labels = extract_figure_labels(original)
|
||||||
|
reim_labels = extract_figure_labels(reimported)
|
||||||
|
for label in orig_labels:
|
||||||
|
if label in reim_labels:
|
||||||
|
preserved.append(f"figure-label:{label}")
|
||||||
|
else:
|
||||||
|
broken.append(f"figure-label:missing '{label}'")
|
||||||
|
|
||||||
|
orig_captions = extract_figure_captions(original)
|
||||||
|
reim_captions = extract_figure_captions(reimported)
|
||||||
|
orig_set = set(orig_captions)
|
||||||
|
reim_set = set(reim_captions)
|
||||||
|
for caption in orig_set:
|
||||||
|
if caption in reim_set:
|
||||||
|
preserved.append(f"figure-caption:{caption[:40]}")
|
||||||
|
else:
|
||||||
|
degraded.append(f"figure-caption:lost '{caption[:40]}'")
|
||||||
|
|
||||||
|
|
||||||
|
def _compare_xrefs(
|
||||||
|
original: str,
|
||||||
|
reimported: str,
|
||||||
|
preserved: list[str],
|
||||||
|
degraded: list[str],
|
||||||
|
broken: list[str],
|
||||||
|
) -> None:
|
||||||
|
"""Compare cross-reference anchors and links (FR-531, FR-540)."""
|
||||||
|
from markidocx.xref import extract_anchors, extract_xref_links
|
||||||
|
|
||||||
|
orig_anchors = extract_anchors(original)
|
||||||
|
reim_anchors = extract_anchors(reimported)
|
||||||
|
for anchor in orig_anchors:
|
||||||
|
if anchor in reim_anchors:
|
||||||
|
preserved.append(f"xref-anchor:{anchor}")
|
||||||
|
else:
|
||||||
|
broken.append(f"xref-anchor:missing '{anchor}'")
|
||||||
|
|
||||||
|
orig_xrefs = extract_xref_links(original)
|
||||||
|
reim_xrefs = extract_xref_links(reimported)
|
||||||
|
for link_text, anchor in orig_xrefs:
|
||||||
|
if (link_text, anchor) in reim_xrefs:
|
||||||
|
preserved.append(f"xref-link:[{link_text}][{anchor}]")
|
||||||
|
elif anchor not in reim_anchors:
|
||||||
|
broken.append(f"xref-link:broken-target [{link_text}][{anchor}]")
|
||||||
|
else:
|
||||||
|
degraded.append(f"xref-link:degraded [{link_text}][{anchor}]")
|
||||||
|
|
||||||
|
|
||||||
def _compare_sets(
|
def _compare_sets(
|
||||||
kind: str,
|
kind: str,
|
||||||
orig: list[str],
|
orig: list[str],
|
||||||
|
|||||||
80
src/markidocx/errors.py
Normal file
80
src/markidocx/errors.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
"""Structured error and warning types for markidocx (FR-1201–1210)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from enum import StrEnum
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
class Severity(StrEnum):
|
||||||
|
INFO = "info"
|
||||||
|
WARNING = "warning"
|
||||||
|
ERROR = "error"
|
||||||
|
|
||||||
|
|
||||||
|
class OutputState(StrEnum):
|
||||||
|
"""Lifecycle state of a build/import/workflow result (FR-1210)."""
|
||||||
|
|
||||||
|
FINAL = "final"
|
||||||
|
PARTIAL = "partial"
|
||||||
|
FALLBACK = "fallback"
|
||||||
|
DEGRADED = "degraded"
|
||||||
|
UNRESOLVED = "unresolved"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class WarningRecord:
|
||||||
|
"""Structured warning record (FR-1208).
|
||||||
|
|
||||||
|
severity: info | warning | error
|
||||||
|
reason: FR-code-aligned description
|
||||||
|
construct: the token/element that triggered the warning
|
||||||
|
"""
|
||||||
|
|
||||||
|
severity: str
|
||||||
|
reason: str
|
||||||
|
construct: str = ""
|
||||||
|
|
||||||
|
def to_dict(self) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"severity": self.severity,
|
||||||
|
"reason": self.reason,
|
||||||
|
"construct": self.construct,
|
||||||
|
}
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
if self.construct:
|
||||||
|
return f"[{self.severity}] {self.reason}: {self.construct}"
|
||||||
|
return f"[{self.severity}] {self.reason}"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FailureRecord:
|
||||||
|
"""Structured failure record (FR-1209).
|
||||||
|
|
||||||
|
severity: info | warning | error
|
||||||
|
reason: FR-code-aligned description
|
||||||
|
construct: the element that caused the failure
|
||||||
|
"""
|
||||||
|
|
||||||
|
severity: str
|
||||||
|
reason: str
|
||||||
|
construct: str = ""
|
||||||
|
|
||||||
|
def to_dict(self) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"severity": self.severity,
|
||||||
|
"reason": self.reason,
|
||||||
|
"construct": self.construct,
|
||||||
|
}
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
if self.construct:
|
||||||
|
return f"[{self.severity}] {self.reason}: {self.construct}"
|
||||||
|
return f"[{self.severity}] {self.reason}"
|
||||||
|
|
||||||
|
|
||||||
|
def warning_records_to_strings(records: list[WarningRecord]) -> list[str]:
|
||||||
|
"""Convert a list of WarningRecords to plain strings (backward compat helper)."""
|
||||||
|
return [str(r) for r in records]
|
||||||
147
src/markidocx/figures.py
Normal file
147
src/markidocx/figures.py
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
"""Numbered figure support for LEVEL3 markidocx (FR-532, FR-541).
|
||||||
|
|
||||||
|
Handles round-trip of captioned numbered figures between Markdown and DOCX.
|
||||||
|
|
||||||
|
Markdown syntax:
|
||||||
|
{#fig:label}
|
||||||
|
|
||||||
|
DOCX representation:
|
||||||
|
[image paragraph or placeholder]
|
||||||
|
[caption paragraph: "Figure N — Caption text"]
|
||||||
|
(with alt-text marker: "figure-source:path/to/image.png#fig:label")
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from docx.document import Document as DocxDocument
|
||||||
|
|
||||||
|
# Markdown figure pattern: {#fig:label}
|
||||||
|
FIGURE_RE = re.compile(
|
||||||
|
r"^!\[([^\]]*)\]\(([^)]+)\)\{#(fig:[\w:-]+)\}$",
|
||||||
|
re.MULTILINE,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Caption paragraph pattern in imported DOCX
|
||||||
|
CAPTION_RE = re.compile(r"^Figure\s+(\d+)\s+[—\-–]\s+(.+)$")
|
||||||
|
|
||||||
|
# Alt-text marker embedded in images to preserve source intent (FR-534)
|
||||||
|
ALT_TEXT_MARKER_PREFIX = "figure-source:"
|
||||||
|
|
||||||
|
|
||||||
|
def is_figure_paragraph(text: str) -> bool:
|
||||||
|
"""Return True if *text* is a standalone figure declaration."""
|
||||||
|
return bool(FIGURE_RE.match(text.strip()))
|
||||||
|
|
||||||
|
|
||||||
|
def parse_figure(text: str) -> tuple[str, str, str] | None:
|
||||||
|
"""Parse a figure declaration.
|
||||||
|
|
||||||
|
Returns (caption, path, label) or None.
|
||||||
|
"""
|
||||||
|
m = FIGURE_RE.match(text.strip())
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
return m.group(1), m.group(2), m.group(3)
|
||||||
|
|
||||||
|
|
||||||
|
def render_figure(
|
||||||
|
doc: DocxDocument,
|
||||||
|
caption: str,
|
||||||
|
path: str,
|
||||||
|
label: str,
|
||||||
|
figure_number: int,
|
||||||
|
) -> None:
|
||||||
|
"""Render a figure declaration into *doc* (FR-532).
|
||||||
|
|
||||||
|
Adds:
|
||||||
|
1. A paragraph with alt-text marker (image placeholder — actual embedding
|
||||||
|
requires the file to exist and is omitted here for portability).
|
||||||
|
2. A caption paragraph: "Figure N — Caption"
|
||||||
|
"""
|
||||||
|
# Alt-text marker so importer can reconstruct the figure (FR-534)
|
||||||
|
alt_marker = f"{ALT_TEXT_MARKER_PREFIX}{path}#{label}"
|
||||||
|
|
||||||
|
# Image placeholder paragraph with alt-text marker as text
|
||||||
|
placeholder = doc.add_paragraph(style="Normal")
|
||||||
|
run = placeholder.add_run(f"[Figure: {path}]")
|
||||||
|
# Store source-intent in the run's text (alt-text equivalent for round-trip)
|
||||||
|
run.italic = True
|
||||||
|
|
||||||
|
# Add DOCX comment/marker paragraph with the source-intent data
|
||||||
|
marker_para = doc.add_paragraph(style="Normal")
|
||||||
|
marker_run = marker_para.add_run(alt_marker)
|
||||||
|
marker_run.font.size = None # inherit
|
||||||
|
# Hide the marker by making it very small (conceptual; keeps round-trip info)
|
||||||
|
from docx.shared import Pt
|
||||||
|
|
||||||
|
marker_run.font.size = Pt(1)
|
||||||
|
marker_run.font.color.rgb = None # default color
|
||||||
|
|
||||||
|
# Caption paragraph
|
||||||
|
caption_para = doc.add_paragraph(style="Normal")
|
||||||
|
caption_para.add_run(f"Figure {figure_number} — {caption}")
|
||||||
|
|
||||||
|
|
||||||
|
def extract_figures_from_md(text: str) -> list[tuple[str, str, str]]:
|
||||||
|
"""Extract all figure declarations from Markdown text.
|
||||||
|
|
||||||
|
Returns list of (caption, path, label).
|
||||||
|
"""
|
||||||
|
return [(m.group(1), m.group(2), m.group(3)) for m in FIGURE_RE.finditer(text)]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Importer helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def is_caption_paragraph(text: str) -> bool:
|
||||||
|
"""Return True if *text* looks like a figure caption."""
|
||||||
|
return bool(CAPTION_RE.match(text.strip()))
|
||||||
|
|
||||||
|
|
||||||
|
def is_alt_text_marker(text: str) -> bool:
|
||||||
|
"""Return True if *text* is a figure-source alt-text marker."""
|
||||||
|
return text.strip().startswith(ALT_TEXT_MARKER_PREFIX)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_alt_text_marker(text: str) -> tuple[str, str] | None:
|
||||||
|
"""Parse a figure-source marker into (path, label).
|
||||||
|
|
||||||
|
Returns None if the text is not a valid marker.
|
||||||
|
"""
|
||||||
|
stripped = text.strip()
|
||||||
|
if not stripped.startswith(ALT_TEXT_MARKER_PREFIX):
|
||||||
|
return None
|
||||||
|
rest = stripped[len(ALT_TEXT_MARKER_PREFIX):]
|
||||||
|
if "#" in rest:
|
||||||
|
path, label = rest.rsplit("#", 1)
|
||||||
|
return path, label
|
||||||
|
return rest, ""
|
||||||
|
|
||||||
|
|
||||||
|
def reconstruct_figure_md(caption: str, path: str, label: str) -> str:
|
||||||
|
"""Reconstruct a Markdown figure declaration from its parts."""
|
||||||
|
return f"{{#{label}}}"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Differ helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
FIGURE_LABEL_RE = re.compile(r"\{#(fig:[\w:-]+)\}")
|
||||||
|
FIGURE_CAPTION_MD_RE = re.compile(r"!\[([^\]]*)\]\([^)]+\)\{#fig:[\w:-]+\}")
|
||||||
|
|
||||||
|
|
||||||
|
def extract_figure_labels(text: str) -> set[str]:
|
||||||
|
"""Extract {#fig:label} declarations from Markdown text."""
|
||||||
|
return set(FIGURE_LABEL_RE.findall(text))
|
||||||
|
|
||||||
|
|
||||||
|
def extract_figure_captions(text: str) -> list[str]:
|
||||||
|
"""Extract captions from figure declarations in Markdown text."""
|
||||||
|
return [m.group(1) for m in FIGURE_CAPTION_MD_RE.finditer(text)]
|
||||||
@@ -11,6 +11,7 @@ from docx.document import Document as DocxDocument
|
|||||||
from docx.table import Table
|
from docx.table import Table
|
||||||
from docx.text.paragraph import Paragraph
|
from docx.text.paragraph import Paragraph
|
||||||
|
|
||||||
|
from markidocx.errors import OutputState, Severity, WarningRecord
|
||||||
from markidocx.manifest import Manifest
|
from markidocx.manifest import Manifest
|
||||||
|
|
||||||
HEADING_STYLE_RE = re.compile(r"^Heading (\d+)$", re.IGNORECASE)
|
HEADING_STYLE_RE = re.compile(r"^Heading (\d+)$", re.IGNORECASE)
|
||||||
@@ -23,7 +24,13 @@ class ImportResult:
|
|||||||
success: bool
|
success: bool
|
||||||
output_files: list[Path]
|
output_files: list[Path]
|
||||||
mapping_status: str # "redistributed" | "merged" | "failed"
|
mapping_status: str # "redistributed" | "merged" | "failed"
|
||||||
warnings: list[str] = field(default_factory=list)
|
warning_records: list[WarningRecord] = field(default_factory=list)
|
||||||
|
output_state: OutputState = OutputState.FINAL
|
||||||
|
|
||||||
|
@property
|
||||||
|
def warnings(self) -> list[str]:
|
||||||
|
"""Backward-compatible string view of warning_records."""
|
||||||
|
return [str(w) for w in self.warning_records]
|
||||||
|
|
||||||
|
|
||||||
def import_document(manifest: Manifest, docx_path: Path) -> ImportResult:
|
def import_document(manifest: Manifest, docx_path: Path) -> ImportResult:
|
||||||
@@ -33,14 +40,21 @@ def import_document(manifest: Manifest, docx_path: Path) -> ImportResult:
|
|||||||
content is redistributed to the original files. Otherwise a single
|
content is redistributed to the original files. Otherwise a single
|
||||||
merged file is produced.
|
merged file is produced.
|
||||||
"""
|
"""
|
||||||
warnings: list[str] = []
|
warning_records: list[WarningRecord] = []
|
||||||
|
|
||||||
if not docx_path.exists():
|
if not docx_path.exists():
|
||||||
return ImportResult(
|
return ImportResult(
|
||||||
success=False,
|
success=False,
|
||||||
output_files=[],
|
output_files=[],
|
||||||
mapping_status="failed",
|
mapping_status="failed",
|
||||||
warnings=[f"DOCX file not found: {docx_path}"],
|
warning_records=[
|
||||||
|
WarningRecord(
|
||||||
|
severity=Severity.ERROR,
|
||||||
|
reason="docx-not-found",
|
||||||
|
construct=str(docx_path),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
output_state=OutputState.UNRESOLVED,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -50,10 +64,17 @@ def import_document(manifest: Manifest, docx_path: Path) -> ImportResult:
|
|||||||
success=False,
|
success=False,
|
||||||
output_files=[],
|
output_files=[],
|
||||||
mapping_status="failed",
|
mapping_status="failed",
|
||||||
warnings=[f"Could not open DOCX: {exc}"],
|
warning_records=[
|
||||||
|
WarningRecord(
|
||||||
|
severity=Severity.ERROR,
|
||||||
|
reason="docx-open-failed",
|
||||||
|
construct=str(exc),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
output_state=OutputState.UNRESOLVED,
|
||||||
)
|
)
|
||||||
|
|
||||||
md_text = _docx_to_markdown(doc, warnings)
|
md_text = _docx_to_markdown(doc, warning_records)
|
||||||
|
|
||||||
manifest.output_dir.mkdir(parents=True, exist_ok=True)
|
manifest.output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
@@ -65,7 +86,8 @@ def import_document(manifest: Manifest, docx_path: Path) -> ImportResult:
|
|||||||
success=True,
|
success=True,
|
||||||
output_files=[out_path],
|
output_files=[out_path],
|
||||||
mapping_status="redistributed",
|
mapping_status="redistributed",
|
||||||
warnings=warnings,
|
warning_records=warning_records,
|
||||||
|
output_state=OutputState.FINAL,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Multi-file: attempt redistribution by H1 boundary
|
# Multi-file: attempt redistribution by H1 boundary
|
||||||
@@ -79,13 +101,20 @@ def import_document(manifest: Manifest, docx_path: Path) -> ImportResult:
|
|||||||
success=True,
|
success=True,
|
||||||
output_files=output_files,
|
output_files=output_files,
|
||||||
mapping_status="redistributed",
|
mapping_status="redistributed",
|
||||||
warnings=warnings,
|
warning_records=warning_records,
|
||||||
|
output_state=OutputState.FINAL,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Fallback: merged single output (FR-406)
|
# Fallback: merged single output (FR-406, FR-1207)
|
||||||
warnings.append(
|
warning_records.append(
|
||||||
f"Could not redistribute to {len(manifest.sources)} source files "
|
WarningRecord(
|
||||||
f"(found {len(sections)} H1 sections); writing merged output"
|
severity=Severity.WARNING,
|
||||||
|
reason="fallback",
|
||||||
|
construct=(
|
||||||
|
f"could not redistribute to {len(manifest.sources)} source files "
|
||||||
|
f"(found {len(sections)} H1 sections); writing merged output"
|
||||||
|
),
|
||||||
|
)
|
||||||
)
|
)
|
||||||
merged_path = manifest.output_dir / "imported_merged.md"
|
merged_path = manifest.output_dir / "imported_merged.md"
|
||||||
merged_path.write_text(md_text, encoding="utf-8")
|
merged_path.write_text(md_text, encoding="utf-8")
|
||||||
@@ -93,7 +122,8 @@ def import_document(manifest: Manifest, docx_path: Path) -> ImportResult:
|
|||||||
success=True,
|
success=True,
|
||||||
output_files=[merged_path],
|
output_files=[merged_path],
|
||||||
mapping_status="merged",
|
mapping_status="merged",
|
||||||
warnings=warnings,
|
warning_records=warning_records,
|
||||||
|
output_state=OutputState.FALLBACK,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -101,17 +131,95 @@ def import_document(manifest: Manifest, docx_path: Path) -> ImportResult:
|
|||||||
# DOCX → Markdown conversion
|
# DOCX → Markdown conversion
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def _docx_to_markdown(doc: DocxDocument, warnings: list[str]) -> str:
|
def _docx_to_markdown(doc: DocxDocument, warning_records: list[WarningRecord]) -> str:
|
||||||
"""Convert a python-docx Document to a Markdown string."""
|
"""Convert a python-docx Document to a Markdown string."""
|
||||||
|
from markidocx.bibliography import (
|
||||||
|
is_bibliography_entry,
|
||||||
|
is_bibliography_marker,
|
||||||
|
restore_citations_in_text,
|
||||||
|
)
|
||||||
|
from markidocx.diagrams import (
|
||||||
|
is_diagram_source_marker,
|
||||||
|
parse_diagram_source_marker,
|
||||||
|
reconstruct_diagram_md,
|
||||||
|
)
|
||||||
|
from markidocx.figures import (
|
||||||
|
CAPTION_RE,
|
||||||
|
is_alt_text_marker,
|
||||||
|
parse_alt_text_marker,
|
||||||
|
reconstruct_figure_md,
|
||||||
|
)
|
||||||
|
|
||||||
lines: list[str] = []
|
lines: list[str] = []
|
||||||
|
|
||||||
# Walk python-docx's block-level items
|
# Walk python-docx's block-level items
|
||||||
for block in _iter_blocks(doc):
|
blocks = list(_iter_blocks(doc))
|
||||||
|
idx = 0
|
||||||
|
while idx < len(blocks):
|
||||||
|
block = blocks[idx]
|
||||||
if isinstance(block, Paragraph):
|
if isinstance(block, Paragraph):
|
||||||
md = _paragraph_to_md(block, warnings)
|
text = block.text.strip()
|
||||||
|
|
||||||
|
# Detect diagram source-intent marker (tiny font) → restore fenced block (FR-534)
|
||||||
|
if is_diagram_source_marker(text):
|
||||||
|
parsed = parse_diagram_source_marker(text)
|
||||||
|
if parsed:
|
||||||
|
diagram_type, source = parsed
|
||||||
|
from markidocx.diagrams import reconstruct_diagram_md
|
||||||
|
lines.append(reconstruct_diagram_md(diagram_type, source))
|
||||||
|
idx += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Detect alt-text marker (figure source intent) — skip it; consumed by caption
|
||||||
|
if is_alt_text_marker(text):
|
||||||
|
caption_text = ""
|
||||||
|
path = ""
|
||||||
|
label = ""
|
||||||
|
marker_parsed = parse_alt_text_marker(text)
|
||||||
|
if marker_parsed:
|
||||||
|
path, label = marker_parsed
|
||||||
|
|
||||||
|
if idx + 1 < len(blocks) and isinstance(blocks[idx + 1], Paragraph):
|
||||||
|
next_text = blocks[idx + 1].text.strip()
|
||||||
|
cm = CAPTION_RE.match(next_text)
|
||||||
|
if cm:
|
||||||
|
caption_text = cm.group(2)
|
||||||
|
idx += 1 # consume caption paragraph
|
||||||
|
|
||||||
|
if caption_text:
|
||||||
|
lines.append(reconstruct_figure_md(caption_text, path, label))
|
||||||
|
idx += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Detect placeholder + alt-text marker pattern: "[Figure: path]"
|
||||||
|
if text.startswith("[Figure:") and text.endswith("]"):
|
||||||
|
idx += 1
|
||||||
|
continue # skip placeholder; handled via alt-text marker
|
||||||
|
|
||||||
|
# Detect bibliography section marker (tiny invisible paragraph)
|
||||||
|
if is_bibliography_marker(text):
|
||||||
|
idx += 1
|
||||||
|
continue # skip; section already started by heading
|
||||||
|
|
||||||
|
# Detect bibliography reference entry ([@key]: ...) — already in correct format
|
||||||
|
if is_bibliography_entry(text):
|
||||||
|
lines.append(text)
|
||||||
|
idx += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
md = _paragraph_to_md(block, warning_records)
|
||||||
if md is not None:
|
if md is not None:
|
||||||
lines.append(md)
|
lines.append(md)
|
||||||
elif isinstance(block, Table):
|
elif isinstance(block, Table):
|
||||||
lines.append(_table_to_md(block))
|
lines.append(_table_to_md(block))
|
||||||
|
idx += 1
|
||||||
|
|
||||||
|
# Bibliography entries are already inline after heading; no extra work needed
|
||||||
|
result_text = "\n\n".join(line for line in lines if line is not None)
|
||||||
|
|
||||||
|
# Restore citations in the text ([@key] markers)
|
||||||
|
result_text = restore_citations_in_text(result_text)
|
||||||
|
return result_text
|
||||||
|
|
||||||
return "\n\n".join(line for line in lines if line is not None)
|
return "\n\n".join(line for line in lines if line is not None)
|
||||||
|
|
||||||
@@ -128,7 +236,7 @@ def _iter_blocks(doc: DocxDocument):
|
|||||||
yield Table(child, doc)
|
yield Table(child, doc)
|
||||||
|
|
||||||
|
|
||||||
def _paragraph_to_md(para: Paragraph, warnings: list[str]) -> str | None:
|
def _paragraph_to_md(para: Paragraph, warning_records: list[WarningRecord]) -> str | None:
|
||||||
"""Convert a paragraph to a Markdown line."""
|
"""Convert a paragraph to a Markdown line."""
|
||||||
style_name = para.style.name if para.style else "Normal"
|
style_name = para.style.name if para.style else "Normal"
|
||||||
text = para.text.strip()
|
text = para.text.strip()
|
||||||
@@ -137,7 +245,14 @@ def _paragraph_to_md(para: Paragraph, warnings: list[str]) -> str | None:
|
|||||||
m = HEADING_STYLE_RE.match(style_name)
|
m = HEADING_STYLE_RE.match(style_name)
|
||||||
if m:
|
if m:
|
||||||
level = int(m.group(1))
|
level = int(m.group(1))
|
||||||
return f"{'#' * level} {text}"
|
# Check for bookmarks → restore {#anchor} labels (FR-531)
|
||||||
|
from markidocx.xref import extract_bookmarks_from_paragraph
|
||||||
|
|
||||||
|
bookmarks = extract_bookmarks_from_paragraph(para)
|
||||||
|
anchor_suffix = ""
|
||||||
|
if bookmarks:
|
||||||
|
anchor_suffix = " " + " ".join(f"{{#{b}}}" for b in bookmarks)
|
||||||
|
return f"{'#' * level} {text}{anchor_suffix}"
|
||||||
|
|
||||||
# Lists
|
# Lists
|
||||||
if LIST_BULLET_RE.match(style_name):
|
if LIST_BULLET_RE.match(style_name):
|
||||||
@@ -145,13 +260,33 @@ def _paragraph_to_md(para: Paragraph, warnings: list[str]) -> str | None:
|
|||||||
if LIST_NUMBER_RE.match(style_name):
|
if LIST_NUMBER_RE.match(style_name):
|
||||||
return f"1. {text}"
|
return f"1. {text}"
|
||||||
|
|
||||||
# Normal text — preserve inline markup
|
# Normal text — check for internal hyperlinks (cross-refs) → [text][anchor]
|
||||||
|
from markidocx.xref import extract_internal_hyperlinks_from_paragraph
|
||||||
|
|
||||||
|
internal_links = extract_internal_hyperlinks_from_paragraph(para)
|
||||||
|
if internal_links:
|
||||||
|
return _runs_to_md_with_xrefs(para, internal_links)
|
||||||
|
|
||||||
if not text:
|
if not text:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return _runs_to_md(para)
|
return _runs_to_md(para)
|
||||||
|
|
||||||
|
|
||||||
|
def _runs_to_md_with_xrefs(
|
||||||
|
para: Paragraph, internal_links: list[tuple[str, str]]
|
||||||
|
) -> str:
|
||||||
|
"""Convert paragraph with internal hyperlinks to Markdown with [text][anchor].
|
||||||
|
|
||||||
|
para.text includes text from nested hyperlink elements, so we use it as
|
||||||
|
the base and replace each hyperlink text with [text][anchor] syntax.
|
||||||
|
"""
|
||||||
|
result = para.text
|
||||||
|
for link_text, anchor in internal_links:
|
||||||
|
result = result.replace(link_text, f"[{link_text}][{anchor}]", 1)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def _runs_to_md(para: Paragraph) -> str:
|
def _runs_to_md(para: Paragraph) -> str:
|
||||||
"""Convert paragraph runs to Markdown with inline formatting."""
|
"""Convert paragraph runs to Markdown with inline formatting."""
|
||||||
parts: list[str] = []
|
parts: list[str] = []
|
||||||
|
|||||||
83
src/markidocx/level3.py
Normal file
83
src/markidocx/level3.py
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
"""LEVEL3 feature gating, processor-dependency disclosure, and support detection (FR-537–539)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
# Diagram renderers recognised by LEVEL3 auto-diagram support
|
||||||
|
_DIAGRAM_TOOLS: dict[str, str] = {
|
||||||
|
"mmdc": "Mermaid CLI (mermaid diagrams)",
|
||||||
|
"dot": "Graphviz dot (graphviz diagrams)",
|
||||||
|
"plantuml": "PlantUML (plantuml diagrams)",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ProcessorDependency:
|
||||||
|
"""An external tool required for a LEVEL3 construct (FR-538)."""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
description: str
|
||||||
|
available: bool
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Level3Support:
|
||||||
|
"""Summary of LEVEL3 processing capability on the current host (FR-537, FR-538).
|
||||||
|
|
||||||
|
available: True if *any* LEVEL3 processing is possible (always True —
|
||||||
|
core features like cross-refs / figures / bibliography work
|
||||||
|
without external tools).
|
||||||
|
dependencies: per-tool availability for diagram rendering.
|
||||||
|
partial: True when some LEVEL3 features are unavailable due to missing tools.
|
||||||
|
missing_coverage: human-readable list of unavailable feature areas.
|
||||||
|
"""
|
||||||
|
|
||||||
|
available: bool = True
|
||||||
|
dependencies: list[ProcessorDependency] = field(default_factory=list)
|
||||||
|
partial: bool = False
|
||||||
|
missing_coverage: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def check_level3_support() -> Level3Support:
|
||||||
|
"""Detect external tool availability and compute Level3Support (FR-537, FR-538).
|
||||||
|
|
||||||
|
Core LEVEL3 features (cross-refs, figures, bibliography) are always available.
|
||||||
|
Diagram rendering requires external tools (mmdc / dot / plantuml).
|
||||||
|
"""
|
||||||
|
deps: list[ProcessorDependency] = []
|
||||||
|
for cmd, description in _DIAGRAM_TOOLS.items():
|
||||||
|
available = shutil.which(cmd) is not None
|
||||||
|
deps.append(ProcessorDependency(name=cmd, description=description, available=available))
|
||||||
|
|
||||||
|
diagram_available = any(d.available for d in deps)
|
||||||
|
missing: list[str] = []
|
||||||
|
if not diagram_available:
|
||||||
|
missing.append("auto-diagrams (no renderer: mmdc/dot/plantuml not found)")
|
||||||
|
|
||||||
|
return Level3Support(
|
||||||
|
available=True,
|
||||||
|
dependencies=deps,
|
||||||
|
partial=bool(missing),
|
||||||
|
missing_coverage=missing,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def capabilities_entry() -> dict:
|
||||||
|
"""Return a capabilities dict fragment for LEVEL3 (FR-537)."""
|
||||||
|
support = check_level3_support()
|
||||||
|
return {
|
||||||
|
"level": "level3",
|
||||||
|
"available": support.available,
|
||||||
|
"partial": support.partial,
|
||||||
|
"missing_coverage": support.missing_coverage,
|
||||||
|
"dependencies": [
|
||||||
|
{
|
||||||
|
"name": d.name,
|
||||||
|
"description": d.description,
|
||||||
|
"available": d.available,
|
||||||
|
}
|
||||||
|
for d in support.dependencies
|
||||||
|
],
|
||||||
|
}
|
||||||
@@ -68,6 +68,8 @@ def validate_project(manifest_yaml: str) -> dict[str, Any]:
|
|||||||
except Exception:
|
except Exception:
|
||||||
(tmp_path / "dist").mkdir(exist_ok=True)
|
(tmp_path / "dist").mkdir(exist_ok=True)
|
||||||
try:
|
try:
|
||||||
|
from markidocx.level3 import capabilities_entry as level3_capabilities
|
||||||
|
|
||||||
m = load_manifest(mp)
|
m = load_manifest(mp)
|
||||||
return {
|
return {
|
||||||
"status": "ok",
|
"status": "ok",
|
||||||
@@ -79,6 +81,7 @@ def validate_project(manifest_yaml: str) -> dict[str, Any]:
|
|||||||
"context": {
|
"context": {
|
||||||
"supported_families": sorted(SUPPORTED_FAMILIES),
|
"supported_families": sorted(SUPPORTED_FAMILIES),
|
||||||
"supported_feature_levels": [e.value for e in FeatureLevel],
|
"supported_feature_levels": [e.value for e in FeatureLevel],
|
||||||
|
"level3": level3_capabilities(),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
except ManifestError as exc:
|
except ManifestError as exc:
|
||||||
@@ -123,15 +126,24 @@ def build(manifest_yaml: str, sources: list[dict[str, str]]) -> dict[str, Any]:
|
|||||||
result = build_document(m)
|
result = build_document(m)
|
||||||
if result.success:
|
if result.success:
|
||||||
docx_b64 = base64.b64encode(Path(result.output_path).read_bytes()).decode()
|
docx_b64 = base64.b64encode(Path(result.output_path).read_bytes()).decode()
|
||||||
return {
|
out: dict[str, Any] = {
|
||||||
"status": "ok",
|
"status": "ok",
|
||||||
"docx_base64": docx_b64,
|
"docx_base64": docx_b64,
|
||||||
"family": result.family,
|
"family": result.family,
|
||||||
"feature_level": result.feature_level,
|
"feature_level": result.feature_level,
|
||||||
"warnings": result.warnings,
|
"output_state": result.output_state,
|
||||||
|
"warnings": [w.to_dict() for w in result.warning_records],
|
||||||
"errors": [],
|
"errors": [],
|
||||||
}
|
}
|
||||||
return {"status": "error", "errors": result.errors, "warnings": result.warnings}
|
if result.partial_level3:
|
||||||
|
out["partial_level3"] = True
|
||||||
|
out["missing_coverage"] = result.missing_coverage
|
||||||
|
return out
|
||||||
|
return {
|
||||||
|
"status": "error",
|
||||||
|
"errors": result.errors,
|
||||||
|
"warnings": [w.to_dict() for w in result.warning_records],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
@@ -182,10 +194,15 @@ def import_docx(manifest_yaml: str, docx_base64: str) -> dict[str, Any]:
|
|||||||
"status": "ok",
|
"status": "ok",
|
||||||
"files": files_md,
|
"files": files_md,
|
||||||
"mapping_status": result.mapping_status,
|
"mapping_status": result.mapping_status,
|
||||||
"warnings": result.warnings,
|
"output_state": result.output_state,
|
||||||
|
"warnings": [w.to_dict() for w in result.warning_records],
|
||||||
"errors": [],
|
"errors": [],
|
||||||
}
|
}
|
||||||
return {"status": "error", "errors": ["Import failed"], "warnings": result.warnings}
|
return {
|
||||||
|
"status": "error",
|
||||||
|
"errors": ["Import failed"],
|
||||||
|
"warnings": [w.to_dict() for w in result.warning_records],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
@@ -329,14 +346,17 @@ def get_evidence(run_id: str) -> dict[str, Any]:
|
|||||||
|
|
||||||
@mcp.resource("markidocx://capabilities")
|
@mcp.resource("markidocx://capabilities")
|
||||||
def resource_capabilities() -> str:
|
def resource_capabilities() -> str:
|
||||||
"""Capabilities: supported feature levels and families."""
|
"""Capabilities: supported feature levels and families (FR-537)."""
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
from markidocx.level3 import capabilities_entry as level3_capabilities
|
||||||
|
|
||||||
return json.dumps(
|
return json.dumps(
|
||||||
{
|
{
|
||||||
"version": __version__,
|
"version": __version__,
|
||||||
"feature_levels": [e.value for e in FeatureLevel],
|
"feature_levels": [e.value for e in FeatureLevel],
|
||||||
"families": sorted(SUPPORTED_FAMILIES),
|
"families": sorted(SUPPORTED_FAMILIES),
|
||||||
|
"level3": level3_capabilities(),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -23,14 +23,14 @@ from markidocx.templates import FamilyRegistry
|
|||||||
class ResponseEnvelope(BaseModel):
|
class ResponseEnvelope(BaseModel):
|
||||||
status: str
|
status: str
|
||||||
outputs: Any = None
|
outputs: Any = None
|
||||||
warnings: list[str] = []
|
warnings: list[Any] = [] # list[WarningRecord.to_dict()] or list[str] (FR-1208)
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
context: dict[str, Any] = {}
|
context: dict[str, Any] = {}
|
||||||
|
|
||||||
|
|
||||||
def _ok(
|
def _ok(
|
||||||
outputs: Any = None,
|
outputs: Any = None,
|
||||||
warnings: list[str] | None = None,
|
warnings: list[Any] | None = None,
|
||||||
context: dict[str, Any] | None = None,
|
context: dict[str, Any] | None = None,
|
||||||
) -> ResponseEnvelope:
|
) -> ResponseEnvelope:
|
||||||
return ResponseEnvelope(
|
return ResponseEnvelope(
|
||||||
@@ -44,7 +44,7 @@ def _ok(
|
|||||||
|
|
||||||
def _error(
|
def _error(
|
||||||
errors: list[str],
|
errors: list[str],
|
||||||
warnings: list[str] | None = None,
|
warnings: list[Any] | None = None,
|
||||||
context: dict[str, Any] | None = None,
|
context: dict[str, Any] | None = None,
|
||||||
) -> ResponseEnvelope:
|
) -> ResponseEnvelope:
|
||||||
return ResponseEnvelope(
|
return ResponseEnvelope(
|
||||||
@@ -158,11 +158,14 @@ def create_app() -> FastAPI:
|
|||||||
|
|
||||||
@app.get("/capabilities", response_model=ResponseEnvelope)
|
@app.get("/capabilities", response_model=ResponseEnvelope)
|
||||||
def capabilities() -> ResponseEnvelope:
|
def capabilities() -> ResponseEnvelope:
|
||||||
"""Capability inspection — feature levels and families (FR-909)."""
|
"""Capability inspection — feature levels and families (FR-909, FR-537)."""
|
||||||
|
from markidocx.level3 import capabilities_entry as level3_capabilities
|
||||||
|
|
||||||
return _ok(
|
return _ok(
|
||||||
outputs={
|
outputs={
|
||||||
"feature_levels": [e.value for e in FeatureLevel],
|
"feature_levels": [e.value for e in FeatureLevel],
|
||||||
"families": sorted(SUPPORTED_FAMILIES),
|
"families": sorted(SUPPORTED_FAMILIES),
|
||||||
|
"level3": level3_capabilities(),
|
||||||
},
|
},
|
||||||
context={"version": __version__},
|
context={"version": __version__},
|
||||||
)
|
)
|
||||||
@@ -227,17 +230,29 @@ def create_app() -> FastAPI:
|
|||||||
**req.context,
|
**req.context,
|
||||||
"family": result.family,
|
"family": result.family,
|
||||||
"feature_level": result.feature_level,
|
"feature_level": result.feature_level,
|
||||||
|
"output_state": result.output_state,
|
||||||
}
|
}
|
||||||
if result.success:
|
if result.success:
|
||||||
docx_b64 = base64.b64encode(Path(result.output_path).read_bytes()).decode()
|
docx_b64 = base64.b64encode(Path(result.output_path).read_bytes()).decode()
|
||||||
|
outputs: dict[str, Any] = {
|
||||||
|
"docx_base64": docx_b64,
|
||||||
|
"output_path": str(result.output_path),
|
||||||
|
}
|
||||||
|
if result.partial_level3:
|
||||||
|
outputs["partial_level3"] = True
|
||||||
|
outputs["missing_coverage"] = result.missing_coverage
|
||||||
return ResponseEnvelope(
|
return ResponseEnvelope(
|
||||||
status="ok",
|
status="ok",
|
||||||
outputs={"docx_base64": docx_b64, "output_path": str(result.output_path)},
|
outputs=outputs,
|
||||||
warnings=result.warnings,
|
warnings=[w.to_dict() for w in result.warning_records],
|
||||||
errors=[],
|
errors=[],
|
||||||
context=ctx,
|
context=ctx,
|
||||||
)
|
)
|
||||||
return _error(errors=result.errors, warnings=result.warnings, context=ctx)
|
return _error(
|
||||||
|
errors=result.errors,
|
||||||
|
warnings=[w.to_dict() for w in result.warning_records],
|
||||||
|
context=ctx,
|
||||||
|
)
|
||||||
|
|
||||||
@app.post("/import", response_model=ResponseEnvelope)
|
@app.post("/import", response_model=ResponseEnvelope)
|
||||||
def import_docx(req: ImportRequest) -> ResponseEnvelope:
|
def import_docx(req: ImportRequest) -> ResponseEnvelope:
|
||||||
@@ -255,7 +270,7 @@ def create_app() -> FastAPI:
|
|||||||
except ManifestError as exc:
|
except ManifestError as exc:
|
||||||
return _error(errors=[str(exc)], context=req.context)
|
return _error(errors=[str(exc)], context=req.context)
|
||||||
result = import_document(m, docx_path)
|
result = import_document(m, docx_path)
|
||||||
ctx = {**req.context}
|
ctx = {**req.context, "output_state": result.output_state}
|
||||||
if result.success:
|
if result.success:
|
||||||
import contextlib
|
import contextlib
|
||||||
|
|
||||||
@@ -266,14 +281,14 @@ def create_app() -> FastAPI:
|
|||||||
return ResponseEnvelope(
|
return ResponseEnvelope(
|
||||||
status="ok",
|
status="ok",
|
||||||
outputs={"files": files_md, "mapping_status": result.mapping_status},
|
outputs={"files": files_md, "mapping_status": result.mapping_status},
|
||||||
warnings=result.warnings,
|
warnings=[w.to_dict() for w in result.warning_records],
|
||||||
errors=[],
|
errors=[],
|
||||||
context=ctx,
|
context=ctx,
|
||||||
)
|
)
|
||||||
return ResponseEnvelope(
|
return ResponseEnvelope(
|
||||||
status="error",
|
status="error",
|
||||||
outputs=None,
|
outputs=None,
|
||||||
warnings=result.warnings,
|
warnings=[w.to_dict() for w in result.warning_records],
|
||||||
errors=["Import failed"],
|
errors=["Import failed"],
|
||||||
context=ctx,
|
context=ctx,
|
||||||
)
|
)
|
||||||
|
|||||||
159
src/markidocx/xref.py
Normal file
159
src/markidocx/xref.py
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
"""Cross-reference support for LEVEL3 markidocx (FR-531, FR-540).
|
||||||
|
|
||||||
|
Handles the round-trip of heading anchors ({#anchor}) and cross-reference
|
||||||
|
links ([text][anchor]) between Markdown and DOCX bookmarks/hyperlinks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from docx.text.paragraph import Paragraph as DocxParagraph
|
||||||
|
|
||||||
|
# Markdown patterns
|
||||||
|
ANCHOR_LABEL_RE = re.compile(r"\s*\{#([\w-]+)\}\s*$")
|
||||||
|
XREF_LINK_RE = re.compile(r"\[([^\]]+)\]\[([\w-]+)\]")
|
||||||
|
|
||||||
|
# DOCX XML namespaces
|
||||||
|
_W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
||||||
|
_R = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||||
|
|
||||||
|
|
||||||
|
def extract_anchor_from_heading(text: str) -> tuple[str, str | None]:
|
||||||
|
"""Strip `{#anchor}` suffix from heading text.
|
||||||
|
|
||||||
|
Returns (clean_text, anchor_name) — anchor_name is None if no anchor present.
|
||||||
|
"""
|
||||||
|
m = ANCHOR_LABEL_RE.search(text)
|
||||||
|
if m:
|
||||||
|
anchor = m.group(1)
|
||||||
|
clean = text[: m.start()]
|
||||||
|
return clean, anchor
|
||||||
|
return text, None
|
||||||
|
|
||||||
|
|
||||||
|
def add_bookmark_to_paragraph(para: DocxParagraph, bookmark_name: str, bookmark_id: int) -> None:
|
||||||
|
"""Insert a bookmark start/end pair into a paragraph's XML (FR-531)."""
|
||||||
|
p_elem = para._p # lxml element
|
||||||
|
|
||||||
|
# <w:bookmarkStart w:id="N" w:name="anchor"/>
|
||||||
|
bm_start = etree.SubElement(p_elem, f"{{{_W}}}bookmarkStart")
|
||||||
|
bm_start.set(f"{{{_W}}}id", str(bookmark_id))
|
||||||
|
bm_start.set(f"{{{_W}}}name", bookmark_name)
|
||||||
|
|
||||||
|
# <w:bookmarkEnd w:id="N"/>
|
||||||
|
bm_end = etree.SubElement(p_elem, f"{{{_W}}}bookmarkEnd")
|
||||||
|
bm_end.set(f"{{{_W}}}id", str(bookmark_id))
|
||||||
|
|
||||||
|
|
||||||
|
def add_internal_hyperlink(para: DocxParagraph, text: str, anchor: str) -> None:
|
||||||
|
"""Add an internal hyperlink run pointing to a bookmark anchor (FR-531).
|
||||||
|
|
||||||
|
Inserts a <w:hyperlink w:anchor="anchor"> element with a run.
|
||||||
|
"""
|
||||||
|
p_elem = para._p
|
||||||
|
|
||||||
|
hyperlink = etree.SubElement(p_elem, f"{{{_W}}}hyperlink")
|
||||||
|
hyperlink.set(f"{{{_W}}}anchor", anchor)
|
||||||
|
|
||||||
|
run = etree.SubElement(hyperlink, f"{{{_W}}}r")
|
||||||
|
rpr = etree.SubElement(run, f"{{{_W}}}rPr")
|
||||||
|
style = etree.SubElement(rpr, f"{{{_W}}}rStyle")
|
||||||
|
style.set(f"{{{_W}}}val", "Hyperlink")
|
||||||
|
t = etree.SubElement(run, f"{{{_W}}}t")
|
||||||
|
t.text = text
|
||||||
|
if text and (text[0] == " " or text[-1] == " "):
|
||||||
|
t.set("{http://www.w3.org/XML/1998/namespace}space", "preserve")
|
||||||
|
|
||||||
|
|
||||||
|
def render_paragraph_with_xrefs(
|
||||||
|
para: DocxParagraph,
|
||||||
|
text: str,
|
||||||
|
known_anchors: set[str],
|
||||||
|
) -> None:
|
||||||
|
"""Render paragraph text, converting [text][anchor] to internal hyperlinks (FR-531).
|
||||||
|
|
||||||
|
Falls back to plain text for references to unknown anchors.
|
||||||
|
"""
|
||||||
|
last_end = 0
|
||||||
|
for m in XREF_LINK_RE.finditer(text):
|
||||||
|
link_text = m.group(1)
|
||||||
|
anchor = m.group(2)
|
||||||
|
# Add plain text before this match
|
||||||
|
if m.start() > last_end:
|
||||||
|
para.add_run(text[last_end : m.start()])
|
||||||
|
if anchor in known_anchors:
|
||||||
|
add_internal_hyperlink(para, link_text, anchor)
|
||||||
|
else:
|
||||||
|
# Unknown anchor — render as plain text with a note
|
||||||
|
para.add_run(f"{link_text} [→{anchor}]")
|
||||||
|
last_end = m.end()
|
||||||
|
# Remaining text
|
||||||
|
if last_end < len(text):
|
||||||
|
para.add_run(text[last_end:])
|
||||||
|
|
||||||
|
|
||||||
|
def has_xref_links(text: str) -> bool:
|
||||||
|
"""Return True if *text* contains any [text][anchor] patterns."""
|
||||||
|
return bool(XREF_LINK_RE.search(text))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Importer helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def extract_bookmarks_from_paragraph(para: DocxParagraph) -> list[str]:
|
||||||
|
"""Return bookmark names declared in the paragraph's XML."""
|
||||||
|
bookmarks: list[str] = []
|
||||||
|
for elem in para._p.iter(f"{{{_W}}}bookmarkStart"):
|
||||||
|
name = elem.get(f"{{{_W}}}name", "")
|
||||||
|
# Ignore Word-internal bookmarks (e.g. _GoBack)
|
||||||
|
if name and not name.startswith("_"):
|
||||||
|
bookmarks.append(name)
|
||||||
|
return bookmarks
|
||||||
|
|
||||||
|
|
||||||
|
def extract_internal_hyperlinks_from_paragraph(
|
||||||
|
para: DocxParagraph,
|
||||||
|
) -> list[tuple[str, str]]:
|
||||||
|
"""Return (text, anchor) pairs for internal hyperlinks in *para*.
|
||||||
|
|
||||||
|
Internal = <w:hyperlink w:anchor="..."> (no relationship ID).
|
||||||
|
"""
|
||||||
|
links: list[tuple[str, str]] = []
|
||||||
|
for hl in para._p.iter(f"{{{_W}}}hyperlink"):
|
||||||
|
anchor = hl.get(f"{{{_W}}}anchor")
|
||||||
|
# Only internal bookmark hyperlinks (no r:id)
|
||||||
|
if anchor and not hl.get(f"{{{_R}}}id"):
|
||||||
|
# Collect run text
|
||||||
|
parts: list[str] = []
|
||||||
|
for t_elem in hl.iter(f"{{{_W}}}t"):
|
||||||
|
if t_elem.text:
|
||||||
|
parts.append(t_elem.text)
|
||||||
|
text = "".join(parts)
|
||||||
|
if text:
|
||||||
|
links.append((text, anchor))
|
||||||
|
return links
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Differ helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
XREF_ANCHOR_RE = re.compile(r"\{#([\w-]+)\}")
|
||||||
|
XREF_LINK_PATTERN = re.compile(r"\[([^\]]+)\]\[([\w-]+)\]")
|
||||||
|
|
||||||
|
|
||||||
|
def extract_anchors(text: str) -> set[str]:
|
||||||
|
"""Extract all {#anchor} declarations from Markdown text."""
|
||||||
|
return set(XREF_ANCHOR_RE.findall(text))
|
||||||
|
|
||||||
|
|
||||||
|
def extract_xref_links(text: str) -> set[tuple[str, str]]:
|
||||||
|
"""Extract all (text, anchor) cross-ref link pairs from Markdown text."""
|
||||||
|
return {(m.group(1), m.group(2)) for m in XREF_LINK_PATTERN.finditer(text)}
|
||||||
0
tests/regression/level3/__init__.py
Normal file
0
tests/regression/level3/__init__.py
Normal file
35
tests/regression/level3/bibliography_document.md
Normal file
35
tests/regression/level3/bibliography_document.md
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# Research Document with Citations
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
Prior work by [@smith2020] established the foundation. The approach was later
|
||||||
|
refined by [@jones2021], building on the original insights of [@smith2020].
|
||||||
|
|
||||||
|
## Related Work
|
||||||
|
|
||||||
|
Several key contributions inform this work. The landmark paper [@brown2019]
|
||||||
|
introduced the core technique. Further development appeared in [@davis2022]
|
||||||
|
and [@wilson2023].
|
||||||
|
|
||||||
|
## Methodology
|
||||||
|
|
||||||
|
Based on [@smith2020] and the refinements of [@jones2021], our methodology
|
||||||
|
proceeds as follows.
|
||||||
|
|
||||||
|
## Results
|
||||||
|
|
||||||
|
Our results confirm the predictions of [@brown2019] and extend the findings
|
||||||
|
of [@davis2022].
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
This work synthesises [@smith2020], [@jones2021], [@brown2019], [@davis2022],
|
||||||
|
and [@wilson2023].
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [@smith2020]: Smith, J. *Foundational Work*. Journal of Research, 2020.
|
||||||
|
- [@jones2021]: Jones, B. *Refinements and Extensions*. Proceedings, 2021.
|
||||||
|
- [@brown2019]: Brown, C. *The Core Technique*. Nature, 2019.
|
||||||
|
- [@davis2022]: Davis, A. *Further Development*. Science, 2022.
|
||||||
|
- [@wilson2023]: Wilson, E. *Recent Advances*. Review, 2023.
|
||||||
63
tests/regression/level3/combined_document.md
Normal file
63
tests/regression/level3/combined_document.md
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
# Combined LEVEL3 Feature Document {#combined}
|
||||||
|
|
||||||
|
This document exercises all LEVEL3 constructs in a single file.
|
||||||
|
|
||||||
|
## Introduction {#intro}
|
||||||
|
|
||||||
|
This document demonstrates the full LEVEL3 feature set as described by [@smith2020].
|
||||||
|
See [Background][bg] for context.
|
||||||
|
|
||||||
|
## Background {#bg}
|
||||||
|
|
||||||
|
Context and prerequisites are discussed here. Refer to [Introduction][intro]
|
||||||
|
for the problem statement.
|
||||||
|
|
||||||
|
## Architecture {#arch-section}
|
||||||
|
|
||||||
|
The system architecture is shown below.
|
||||||
|
|
||||||
|
{#fig:arch}
|
||||||
|
|
||||||
|
The architecture overview in [Architecture][arch-section] establishes the
|
||||||
|
baseline from which the data flow is derived.
|
||||||
|
|
||||||
|
## Data Flow
|
||||||
|
|
||||||
|
The data flow diagram illustrates message routing.
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
graph LR
|
||||||
|
A[Input] --> B[Processor]
|
||||||
|
B --> C[Output]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Algorithm {#algo}
|
||||||
|
|
||||||
|
The algorithm formalises the approach described in [@jones2021].
|
||||||
|
|
||||||
|
```graphviz
|
||||||
|
digraph algorithm {
|
||||||
|
start -> step1 -> step2 -> end;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Results {#results}
|
||||||
|
|
||||||
|
Experimental results confirm the algorithm in [Algorithm][algo].
|
||||||
|
|
||||||
|
{#fig:results}
|
||||||
|
|
||||||
|
The results align with predictions from [@brown2019] and the architectural
|
||||||
|
choices described in [Architecture][arch-section].
|
||||||
|
|
||||||
|
## Conclusion {#conclusion}
|
||||||
|
|
||||||
|
All LEVEL3 constructs — cross-references, figures, diagrams, and citations —
|
||||||
|
have been demonstrated. See [Introduction][intro] through [Results][results]
|
||||||
|
for the complete narrative.
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [@smith2020]: Smith, J. *LEVEL3 Design Principles*. 2020.
|
||||||
|
- [@jones2021]: Jones, B. *Algorithm Formalisation*. 2021.
|
||||||
|
- [@brown2019]: Brown, C. *Experimental Validation*. 2019.
|
||||||
44
tests/regression/level3/diagrams_document.md
Normal file
44
tests/regression/level3/diagrams_document.md
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
# Document with Diagram Sources
|
||||||
|
|
||||||
|
## State Machine
|
||||||
|
|
||||||
|
The following Mermaid diagram describes the state machine:
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
stateDiagram-v2
|
||||||
|
[*] --> Idle
|
||||||
|
Idle --> Processing: start
|
||||||
|
Processing --> Done: complete
|
||||||
|
Processing --> Error: fail
|
||||||
|
Done --> [*]
|
||||||
|
Error --> Idle: reset
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dependency Graph
|
||||||
|
|
||||||
|
The Graphviz diagram shows dependencies:
|
||||||
|
|
||||||
|
```graphviz
|
||||||
|
digraph G {
|
||||||
|
A -> B;
|
||||||
|
A -> C;
|
||||||
|
B -> D;
|
||||||
|
C -> D;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Sequence
|
||||||
|
|
||||||
|
The PlantUML sequence diagram:
|
||||||
|
|
||||||
|
```plantuml
|
||||||
|
@startuml
|
||||||
|
Alice -> Bob: Request
|
||||||
|
Bob --> Alice: Response
|
||||||
|
Alice -> Carol: Forward
|
||||||
|
@enduml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
All three diagram types are supported in LEVEL3 source-only mode.
|
||||||
29
tests/regression/level3/figures_document.md
Normal file
29
tests/regression/level3/figures_document.md
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
# Technical Report with Figures
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This document contains multiple numbered figures for LEVEL3 round-trip testing.
|
||||||
|
|
||||||
|
## System Architecture
|
||||||
|
|
||||||
|
The overall architecture is illustrated below.
|
||||||
|
|
||||||
|
{#fig:arch}
|
||||||
|
|
||||||
|
The architecture shows the main components and their interactions.
|
||||||
|
|
||||||
|
## Data Flow
|
||||||
|
|
||||||
|
The data flow is shown in the following figure.
|
||||||
|
|
||||||
|
{#fig:dataflow}
|
||||||
|
|
||||||
|
Compare the architecture in [fig:arch] with the data flow above.
|
||||||
|
|
||||||
|
## Results
|
||||||
|
|
||||||
|
Final results are captured in this chart.
|
||||||
|
|
||||||
|
{#fig:results}
|
||||||
|
|
||||||
|
The chart confirms the findings from the data flow in Figure 2.
|
||||||
21
tests/regression/level3/xref_document.md
Normal file
21
tests/regression/level3/xref_document.md
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# Introduction {#intro}
|
||||||
|
|
||||||
|
This document demonstrates cross-reference support for LEVEL3 processing.
|
||||||
|
|
||||||
|
## Background {#bg}
|
||||||
|
|
||||||
|
The background section provides context. See [Introduction][intro] for the overview.
|
||||||
|
|
||||||
|
## Methodology {#method}
|
||||||
|
|
||||||
|
This section describes the approach. Refer to [Background][bg] for prerequisites,
|
||||||
|
and see [Introduction][intro] for the original problem statement.
|
||||||
|
|
||||||
|
## Results {#results}
|
||||||
|
|
||||||
|
Results are discussed here. The methodology in [Methodology][method] led to these findings.
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
This concludes the document. All sections from [Introduction][intro] through
|
||||||
|
[Results][results] have been covered.
|
||||||
261
tests/regression/test_level3_roundtrip.py
Normal file
261
tests/regression/test_level3_roundtrip.py
Normal file
@@ -0,0 +1,261 @@
|
|||||||
|
"""LEVEL3 end-to-end round-trip regression tests (FR-1100, MRKD-WP-0003 T07).
|
||||||
|
|
||||||
|
Tests the full build → import → compare cycle for each corpus file in
|
||||||
|
tests/regression/level3/, using feature_level: level3.
|
||||||
|
|
||||||
|
All LEVEL1 regression tests must remain green (non-regression gate).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.differ import compare
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
# Corpus files in tests/regression/level3/
|
||||||
|
CORPUS_DIR = Path(__file__).parent / "level3"
|
||||||
|
CORPUS_FILES = [
|
||||||
|
"xref_document.md",
|
||||||
|
"figures_document.md",
|
||||||
|
"diagrams_document.md",
|
||||||
|
"bibliography_document.md",
|
||||||
|
"combined_document.md",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _make_level3_project(tmp_path: Path, markdown: str, name: str = "test") -> Path:
|
||||||
|
(tmp_path / "doc.md").write_text(markdown, encoding="utf-8")
|
||||||
|
manifest_path = tmp_path / "manifest.yaml"
|
||||||
|
manifest_path.write_text(
|
||||||
|
yaml.dump(
|
||||||
|
{
|
||||||
|
"project": {"name": name, "feature_level": "level3", "family": "article"},
|
||||||
|
"sources": [{"path": "doc.md"}],
|
||||||
|
"output": {"dir": "./dist"},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(tmp_path / "dist").mkdir()
|
||||||
|
return manifest_path
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Corpus round-trip tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("corpus_file", CORPUS_FILES)
|
||||||
|
def test_level3_corpus_builds(tmp_path: Path, corpus_file: str) -> None:
|
||||||
|
"""Each corpus file builds successfully under LEVEL3."""
|
||||||
|
md = (CORPUS_DIR / corpus_file).read_text(encoding="utf-8")
|
||||||
|
manifest_path = _make_level3_project(tmp_path, md, name=corpus_file.replace(".md", ""))
|
||||||
|
manifest = load_manifest(manifest_path)
|
||||||
|
|
||||||
|
result = build_document(manifest)
|
||||||
|
assert result.success, f"Build failed for {corpus_file}: {result.errors}"
|
||||||
|
assert result.output_path.exists()
|
||||||
|
assert result.feature_level == "level3"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("corpus_file", CORPUS_FILES)
|
||||||
|
def test_level3_corpus_imports(tmp_path: Path, corpus_file: str) -> None:
|
||||||
|
"""Each corpus file imports successfully after build."""
|
||||||
|
md = (CORPUS_DIR / corpus_file).read_text(encoding="utf-8")
|
||||||
|
manifest_path = _make_level3_project(tmp_path, md, name=corpus_file.replace(".md", ""))
|
||||||
|
manifest = load_manifest(manifest_path)
|
||||||
|
|
||||||
|
build_result = build_document(manifest)
|
||||||
|
assert build_result.success, f"Build failed for {corpus_file}"
|
||||||
|
|
||||||
|
import_result = import_document(manifest, build_result.output_path)
|
||||||
|
assert import_result.success, f"Import failed for {corpus_file}: {import_result.warnings}"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("corpus_file", CORPUS_FILES)
|
||||||
|
def test_level3_corpus_no_unexpected_breakage(tmp_path: Path, corpus_file: str) -> None:
|
||||||
|
"""Round-trip diff for each corpus file has no broken headings."""
|
||||||
|
md = (CORPUS_DIR / corpus_file).read_text(encoding="utf-8")
|
||||||
|
manifest_path = _make_level3_project(tmp_path, md, name=corpus_file.replace(".md", ""))
|
||||||
|
manifest = load_manifest(manifest_path)
|
||||||
|
|
||||||
|
build_result = build_document(manifest)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(manifest, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
report = compare(md, reimported)
|
||||||
|
|
||||||
|
# Headings must not be broken
|
||||||
|
broken_headings = [b for b in report.broken if b.startswith("heading:")]
|
||||||
|
assert not broken_headings, (
|
||||||
|
f"Broken headings in {corpus_file}: {broken_headings}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Specific corpus: xref_document — cross-ref anchors preserved
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_xref_document_anchors_preserved(tmp_path: Path) -> None:
|
||||||
|
md = (CORPUS_DIR / "xref_document.md").read_text(encoding="utf-8")
|
||||||
|
manifest_path = _make_level3_project(tmp_path, md, name="xref")
|
||||||
|
manifest = load_manifest(manifest_path)
|
||||||
|
|
||||||
|
build_result = build_document(manifest)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(manifest, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
# Core anchors must survive
|
||||||
|
assert "{#intro}" in reimported
|
||||||
|
assert "{#bg}" in reimported
|
||||||
|
assert "{#method}" in reimported
|
||||||
|
assert "{#results}" in reimported
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Specific corpus: figures_document — figure labels preserved
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_figures_document_labels_preserved(tmp_path: Path) -> None:
|
||||||
|
md = (CORPUS_DIR / "figures_document.md").read_text(encoding="utf-8")
|
||||||
|
manifest_path = _make_level3_project(tmp_path, md, name="figures")
|
||||||
|
manifest = load_manifest(manifest_path)
|
||||||
|
|
||||||
|
build_result = build_document(manifest)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(manifest, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
assert "fig:arch" in reimported
|
||||||
|
assert "fig:dataflow" in reimported
|
||||||
|
assert "fig:results" in reimported
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Specific corpus: diagrams_document — diagram sources preserved
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_diagrams_document_sources_preserved(tmp_path: Path, monkeypatch) -> None:
|
||||||
|
"""Diagram sources survive round-trip in source-only path."""
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
monkeypatch.setattr(shutil, "which", lambda _cmd: None)
|
||||||
|
md = (CORPUS_DIR / "diagrams_document.md").read_text(encoding="utf-8")
|
||||||
|
manifest_path = _make_level3_project(tmp_path, md, name="diagrams")
|
||||||
|
manifest = load_manifest(manifest_path)
|
||||||
|
|
||||||
|
build_result = build_document(manifest)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(manifest, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
# At least one diagram type must appear in reimported
|
||||||
|
assert "mermaid" in reimported or "graphviz" in reimported or "plantuml" in reimported
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Specific corpus: bibliography_document — citation keys preserved
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_bibliography_document_citations_preserved(tmp_path: Path) -> None:
|
||||||
|
md = (CORPUS_DIR / "bibliography_document.md").read_text(encoding="utf-8")
|
||||||
|
manifest_path = _make_level3_project(tmp_path, md, name="bibliography")
|
||||||
|
manifest = load_manifest(manifest_path)
|
||||||
|
|
||||||
|
build_result = build_document(manifest)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(manifest, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
assert "smith2020" in reimported
|
||||||
|
assert "jones2021" in reimported
|
||||||
|
assert "brown2019" in reimported
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Specific corpus: combined_document — all LEVEL3 constructs
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_combined_document_roundtrip(tmp_path: Path, monkeypatch) -> None:
|
||||||
|
"""Combined document with all LEVEL3 constructs survives build+import."""
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
monkeypatch.setattr(shutil, "which", lambda _cmd: None)
|
||||||
|
md = (CORPUS_DIR / "combined_document.md").read_text(encoding="utf-8")
|
||||||
|
manifest_path = _make_level3_project(tmp_path, md, name="combined")
|
||||||
|
manifest = load_manifest(manifest_path)
|
||||||
|
|
||||||
|
build_result = build_document(manifest)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(manifest, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
# Anchors preserved
|
||||||
|
assert "{#intro}" in reimported
|
||||||
|
|
||||||
|
# Figures preserved (at least the label)
|
||||||
|
assert "fig:arch" in reimported
|
||||||
|
|
||||||
|
# Citations preserved
|
||||||
|
assert "smith2020" in reimported
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI: markidocx test executes LEVEL1 + LEVEL3 corpus (non-regression gate)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_level1_regression_still_passes(tmp_path: Path) -> None:
|
||||||
|
"""LEVEL1 round-trip must remain green after LEVEL3 changes (non-regression)."""
|
||||||
|
from tests.regression.test_roundtrip import LEVEL1_MARKDOWN
|
||||||
|
|
||||||
|
(tmp_path / "doc.md").write_text(LEVEL1_MARKDOWN, encoding="utf-8")
|
||||||
|
manifest_path = tmp_path / "manifest.yaml"
|
||||||
|
manifest_path.write_text(
|
||||||
|
yaml.dump(
|
||||||
|
{
|
||||||
|
"project": {"name": "l1-nonreg", "feature_level": "level1", "family": "article"},
|
||||||
|
"sources": [{"path": "doc.md"}],
|
||||||
|
"output": {"dir": "./dist"},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(tmp_path / "dist").mkdir()
|
||||||
|
manifest = load_manifest(manifest_path)
|
||||||
|
|
||||||
|
build_result = build_document(manifest)
|
||||||
|
assert build_result.success
|
||||||
|
assert not build_result.errors
|
||||||
|
|
||||||
|
import_result = import_document(manifest, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
report = compare(LEVEL1_MARKDOWN, reimported)
|
||||||
|
broken_headings = [b for b in report.broken if b.startswith("heading:")]
|
||||||
|
assert not broken_headings
|
||||||
380
tests/test_error_framework.py
Normal file
380
tests/test_error_framework.py
Normal file
@@ -0,0 +1,380 @@
|
|||||||
|
"""Tests for structured error & warning framework (FR-1201–1210)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import textwrap
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# WarningRecord / FailureRecord / OutputState types (FR-1208–1210)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestWarningRecord:
|
||||||
|
def test_to_dict(self) -> None:
|
||||||
|
from markidocx.errors import Severity, WarningRecord
|
||||||
|
|
||||||
|
w = WarningRecord(severity=Severity.WARNING, reason="unsupported-construct", construct="html_block")
|
||||||
|
d = w.to_dict()
|
||||||
|
assert d["severity"] == "warning"
|
||||||
|
assert d["reason"] == "unsupported-construct"
|
||||||
|
assert d["construct"] == "html_block"
|
||||||
|
|
||||||
|
def test_str_with_construct(self) -> None:
|
||||||
|
from markidocx.errors import WarningRecord
|
||||||
|
|
||||||
|
w = WarningRecord(severity="warning", reason="test-reason", construct="my-token")
|
||||||
|
assert "warning" in str(w)
|
||||||
|
assert "test-reason" in str(w)
|
||||||
|
assert "my-token" in str(w)
|
||||||
|
|
||||||
|
def test_str_without_construct(self) -> None:
|
||||||
|
from markidocx.errors import WarningRecord
|
||||||
|
|
||||||
|
w = WarningRecord(severity="info", reason="test-reason")
|
||||||
|
s = str(w)
|
||||||
|
assert "info" in s
|
||||||
|
assert "test-reason" in s
|
||||||
|
|
||||||
|
|
||||||
|
class TestFailureRecord:
|
||||||
|
def test_to_dict(self) -> None:
|
||||||
|
from markidocx.errors import FailureRecord, Severity
|
||||||
|
|
||||||
|
f = FailureRecord(severity=Severity.ERROR, reason="docx-not-found", construct="some.docx")
|
||||||
|
d = f.to_dict()
|
||||||
|
assert d["severity"] == "error"
|
||||||
|
assert d["reason"] == "docx-not-found"
|
||||||
|
|
||||||
|
|
||||||
|
class TestOutputState:
|
||||||
|
def test_all_states_defined(self) -> None:
|
||||||
|
from markidocx.errors import OutputState
|
||||||
|
|
||||||
|
assert OutputState.FINAL == "final"
|
||||||
|
assert OutputState.PARTIAL == "partial"
|
||||||
|
assert OutputState.FALLBACK == "fallback"
|
||||||
|
assert OutputState.DEGRADED == "degraded"
|
||||||
|
assert OutputState.UNRESOLVED == "unresolved"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Builder emits WarningRecord for unsupported constructs (FR-1203, FR-1205)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuilderWarningRecords:
|
||||||
|
def test_unsupported_html_emits_warning_record(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.errors import Severity
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
(tmp_path / "doc.md").write_text(
|
||||||
|
"# Hello\n\n<div>raw html</div>\n\nNormal paragraph.",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(tmp_path / "manifest.yaml").write_text(
|
||||||
|
textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: test
|
||||||
|
feature_level: level1
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
"""),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
assert len(result.warning_records) > 0
|
||||||
|
html_warnings = [w for w in result.warning_records if "html" in w.construct]
|
||||||
|
assert html_warnings, "Expected warning for html construct"
|
||||||
|
assert all(w.severity == Severity.WARNING for w in html_warnings)
|
||||||
|
|
||||||
|
def test_warning_records_have_reason(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
(tmp_path / "doc.md").write_text(
|
||||||
|
"# Hello\n\n<div>raw html</div>",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(tmp_path / "manifest.yaml").write_text(
|
||||||
|
textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: test
|
||||||
|
feature_level: level1
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
"""),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
for w in result.warning_records:
|
||||||
|
assert w.reason, "WarningRecord must have a non-empty reason"
|
||||||
|
|
||||||
|
def test_warnings_property_returns_strings(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
(tmp_path / "doc.md").write_text("# Hello\n\n<div>html</div>", encoding="utf-8")
|
||||||
|
(tmp_path / "manifest.yaml").write_text(
|
||||||
|
textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: test
|
||||||
|
feature_level: level1
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
"""),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert all(isinstance(w, str) for w in result.warnings)
|
||||||
|
|
||||||
|
def test_output_state_on_clean_build(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.errors import OutputState
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
(tmp_path / "doc.md").write_text("# Hello\n\nContent.", encoding="utf-8")
|
||||||
|
(tmp_path / "manifest.yaml").write_text(
|
||||||
|
textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: clean
|
||||||
|
feature_level: level1
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
"""),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.output_state == OutputState.FINAL
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Importer emits WarningRecord for errors and fallback paths (FR-1206, FR-1207)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestImporterWarningRecords:
|
||||||
|
def test_not_found_emits_error_warning_record(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.errors import OutputState, Severity
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
(tmp_path / "doc.md").write_text("# Hello", encoding="utf-8")
|
||||||
|
(tmp_path / "manifest.yaml").write_text(
|
||||||
|
textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: test
|
||||||
|
feature_level: level1
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
"""),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = import_document(m, tmp_path / "missing.docx")
|
||||||
|
assert not result.success
|
||||||
|
assert result.output_state == OutputState.UNRESOLVED
|
||||||
|
assert len(result.warning_records) > 0
|
||||||
|
assert result.warning_records[0].severity == Severity.ERROR
|
||||||
|
assert result.warning_records[0].reason == "docx-not-found"
|
||||||
|
|
||||||
|
def test_warnings_property_returns_strings(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
(tmp_path / "doc.md").write_text("# Hello", encoding="utf-8")
|
||||||
|
(tmp_path / "manifest.yaml").write_text(
|
||||||
|
textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: test
|
||||||
|
feature_level: level1
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
"""),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = import_document(m, tmp_path / "missing.docx")
|
||||||
|
assert all(isinstance(w, str) for w in result.warnings)
|
||||||
|
|
||||||
|
def test_fallback_emits_fallback_warning(self, tmp_path: Path) -> None:
|
||||||
|
"""Multi-source import that can't redistribute produces fallback WarningRecord."""
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.errors import OutputState
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
# Create two source files — the DOCX will have a single H1 so redistribution fails
|
||||||
|
(tmp_path / "a.md").write_text("# Alpha\n\nContent.", encoding="utf-8")
|
||||||
|
(tmp_path / "b.md").write_text("# Beta\n\nContent.", encoding="utf-8")
|
||||||
|
(tmp_path / "manifest.yaml").write_text(
|
||||||
|
textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: multi
|
||||||
|
feature_level: level1
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: a.md
|
||||||
|
- path: b.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
"""),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
# Build first to get a DOCX
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
# Now import with a manifest that has 3 sources (mismatch)
|
||||||
|
(tmp_path / "c.md").write_text("# Gamma\n\nContent.", encoding="utf-8")
|
||||||
|
(tmp_path / "manifest3.yaml").write_text(
|
||||||
|
textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: multi
|
||||||
|
feature_level: level1
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: a.md
|
||||||
|
- path: b.md
|
||||||
|
- path: c.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
"""),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
m3 = load_manifest(tmp_path / "manifest3.yaml")
|
||||||
|
result = import_document(m3, build_result.output_path)
|
||||||
|
assert result.success
|
||||||
|
assert result.mapping_status == "merged"
|
||||||
|
assert result.output_state == OutputState.FALLBACK
|
||||||
|
fallback_warnings = [w for w in result.warning_records if w.reason == "fallback"]
|
||||||
|
assert fallback_warnings, "Expected fallback WarningRecord"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Differ output_state (FR-1204)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestDifferOutputState:
|
||||||
|
def test_final_state_on_clean_diff(self) -> None:
|
||||||
|
from markidocx.differ import compare
|
||||||
|
from markidocx.errors import OutputState
|
||||||
|
|
||||||
|
text = "# Hello\n\nSome paragraph.\n\n- item one\n- item two"
|
||||||
|
report = compare(text, text)
|
||||||
|
assert not report.has_drift
|
||||||
|
assert report.output_state == OutputState.FINAL
|
||||||
|
|
||||||
|
def test_degraded_state_on_degraded_diff(self) -> None:
|
||||||
|
from markidocx.differ import compare
|
||||||
|
from markidocx.errors import OutputState
|
||||||
|
|
||||||
|
original = "# Hello\n\n- item one\n- item two\n- item three"
|
||||||
|
reimported = "# Hello\n\n- item one"
|
||||||
|
report = compare(original, reimported)
|
||||||
|
assert report.has_drift
|
||||||
|
assert report.output_state in (OutputState.DEGRADED, OutputState.PARTIAL)
|
||||||
|
|
||||||
|
def test_partial_state_on_broken_diff(self) -> None:
|
||||||
|
from markidocx.differ import compare
|
||||||
|
from markidocx.errors import OutputState
|
||||||
|
|
||||||
|
original = "# Section A\n\n## Sub\n\nParagraph."
|
||||||
|
reimported = ""
|
||||||
|
report = compare(original, reimported)
|
||||||
|
assert report.has_drift
|
||||||
|
assert report.output_state == OutputState.PARTIAL
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# REST response envelope warnings are WarningRecord dicts (FR-1208)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestRestWarningRecords:
|
||||||
|
def test_build_warnings_are_dicts(self, tmp_path: Path) -> None:
|
||||||
|
"""When build produces warnings, REST response warnings are dicts, not bare strings."""
|
||||||
|
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
from markidocx.rest import create_app
|
||||||
|
|
||||||
|
manifest_yaml = textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: test
|
||||||
|
feature_level: level1
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
""")
|
||||||
|
# HTML in source will produce warnings
|
||||||
|
sources = [{"name": "doc.md", "content": "# Hello\n\n<div>html</div>"}]
|
||||||
|
client = TestClient(create_app())
|
||||||
|
resp = client.post("/build", json={"manifest_yaml": manifest_yaml, "sources": sources})
|
||||||
|
assert resp.status_code == 200
|
||||||
|
body = resp.json()
|
||||||
|
warnings = body.get("warnings", [])
|
||||||
|
# Each warning should be a dict with severity/reason/construct keys
|
||||||
|
for w in warnings:
|
||||||
|
assert isinstance(w, dict), f"Expected dict warning, got {type(w)}: {w}"
|
||||||
|
assert "severity" in w
|
||||||
|
assert "reason" in w
|
||||||
|
|
||||||
|
def test_import_warnings_are_dicts_on_failure(self) -> None:
|
||||||
|
"""Import failure warns with WarningRecord dict, not bare string."""
|
||||||
|
import base64
|
||||||
|
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
from markidocx.rest import create_app
|
||||||
|
|
||||||
|
manifest_yaml = textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: test
|
||||||
|
feature_level: level1
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
""")
|
||||||
|
# Send an invalid (empty) DOCX
|
||||||
|
empty_docx = base64.b64encode(b"not-a-docx").decode()
|
||||||
|
client = TestClient(create_app())
|
||||||
|
resp = client.post(
|
||||||
|
"/import",
|
||||||
|
json={"manifest_yaml": manifest_yaml, "docx_base64": empty_docx},
|
||||||
|
)
|
||||||
|
body = resp.json()
|
||||||
|
warnings = body.get("warnings", [])
|
||||||
|
for w in warnings:
|
||||||
|
assert isinstance(w, dict), f"Expected dict warning, got {type(w)}: {w}"
|
||||||
349
tests/test_level3_bibliography.py
Normal file
349
tests/test_level3_bibliography.py
Normal file
@@ -0,0 +1,349 @@
|
|||||||
|
"""Tests for LEVEL3 bibliography & citation support (FR-535, FR-536, FR-542)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import textwrap
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
LEVEL3_MANIFEST = textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: bib-test
|
||||||
|
feature_level: level3
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def _make_project(tmp_path: Path, markdown: str) -> Path:
|
||||||
|
(tmp_path / "doc.md").write_text(markdown, encoding="utf-8")
|
||||||
|
(tmp_path / "manifest.yaml").write_text(LEVEL3_MANIFEST, encoding="utf-8")
|
||||||
|
return tmp_path
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# bibliography module helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestBibliographyHelpers:
|
||||||
|
def test_has_citations_true(self) -> None:
|
||||||
|
from markidocx.bibliography import has_citations
|
||||||
|
|
||||||
|
assert has_citations("See [@smith2020] for details.")
|
||||||
|
|
||||||
|
def test_has_citations_false(self) -> None:
|
||||||
|
from markidocx.bibliography import has_citations
|
||||||
|
|
||||||
|
assert not has_citations("Normal paragraph without citations.")
|
||||||
|
|
||||||
|
def test_extract_citation_keys(self) -> None:
|
||||||
|
from markidocx.bibliography import extract_citation_keys
|
||||||
|
|
||||||
|
text = "See [@smith2020] and [@jones2021:chap] for more."
|
||||||
|
keys = extract_citation_keys(text)
|
||||||
|
assert "smith2020" in keys
|
||||||
|
assert "jones2021:chap" in keys
|
||||||
|
|
||||||
|
def test_is_references_heading(self) -> None:
|
||||||
|
from markidocx.bibliography import is_references_heading
|
||||||
|
|
||||||
|
assert is_references_heading("## References")
|
||||||
|
assert is_references_heading("# References")
|
||||||
|
assert is_references_heading("### References")
|
||||||
|
assert not is_references_heading("## Introduction")
|
||||||
|
|
||||||
|
def test_parse_reference_entry(self) -> None:
|
||||||
|
from markidocx.bibliography import parse_reference_entry
|
||||||
|
|
||||||
|
result = parse_reference_entry("- [@smith2020]: Smith, J. *Title*. 2020.")
|
||||||
|
assert result is not None
|
||||||
|
key, entry = result
|
||||||
|
assert key == "smith2020"
|
||||||
|
assert "Smith, J." in entry
|
||||||
|
|
||||||
|
def test_extract_references_section(self) -> None:
|
||||||
|
from markidocx.bibliography import extract_references_section
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Document
|
||||||
|
|
||||||
|
See [@smith2020].
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [@smith2020]: Smith, J. *A Book*. 2020.
|
||||||
|
- [@jones2021]: Jones, B. *Another*. 2021.
|
||||||
|
""")
|
||||||
|
entries, text_without = extract_references_section(md)
|
||||||
|
assert len(entries) == 2
|
||||||
|
assert entries[0][0] == "smith2020"
|
||||||
|
assert entries[1][0] == "jones2021"
|
||||||
|
assert "## References" not in text_without
|
||||||
|
|
||||||
|
def test_render_citation_text_unchanged(self) -> None:
|
||||||
|
from markidocx.bibliography import render_citation_text
|
||||||
|
|
||||||
|
text = "See [@smith2020] for details."
|
||||||
|
assert render_citation_text(text) == text
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Builder: citations and references section (FR-535)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuilderBibliography:
|
||||||
|
def test_build_with_citation_succeeds(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Document
|
||||||
|
|
||||||
|
As shown by [@smith2020], the approach works.
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [@smith2020]: Smith, J. *A Work*. 2020.
|
||||||
|
""")
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
assert result.output_path.exists()
|
||||||
|
|
||||||
|
def test_build_docx_contains_citation_marker(self, tmp_path: Path) -> None:
|
||||||
|
"""The built DOCX should contain the citation text."""
|
||||||
|
from docx import Document as DocxReader
|
||||||
|
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020."
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
|
||||||
|
doc = DocxReader(str(result.output_path))
|
||||||
|
texts = [p.text for p in doc.paragraphs]
|
||||||
|
citation_paras = [t for t in texts if "smith2020" in t]
|
||||||
|
assert citation_paras, f"No citation found in DOCX. Paragraphs: {texts}"
|
||||||
|
|
||||||
|
def test_build_docx_contains_references_heading(self, tmp_path: Path) -> None:
|
||||||
|
"""The built DOCX should have a References heading."""
|
||||||
|
from docx import Document as DocxReader
|
||||||
|
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = "# Doc\n\nText.\n\n## References\n\n- [@k1]: Author. *T*. 2020."
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
|
||||||
|
doc = DocxReader(str(result.output_path))
|
||||||
|
texts = [p.text for p in doc.paragraphs]
|
||||||
|
assert "References" in texts, f"No References heading. Paragraphs: {texts}"
|
||||||
|
|
||||||
|
def test_build_multi_citation_document(self, tmp_path: Path) -> None:
|
||||||
|
"""Multiple citations and references entries all appear in DOCX."""
|
||||||
|
from docx import Document as DocxReader
|
||||||
|
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Introduction
|
||||||
|
|
||||||
|
According to [@smith2020] and [@jones2021], this is true.
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [@smith2020]: Smith, J. *Work A*. 2020.
|
||||||
|
- [@jones2021]: Jones, B. *Work B*. 2021.
|
||||||
|
""")
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
|
||||||
|
doc = DocxReader(str(result.output_path))
|
||||||
|
all_text = " ".join(p.text for p in doc.paragraphs)
|
||||||
|
assert "smith2020" in all_text
|
||||||
|
assert "jones2021" in all_text
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Importer: citations and references restoration (FR-536)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestImporterBibliography:
|
||||||
|
def test_roundtrip_preserves_citation(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020."
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(m, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
assert "smith2020" in reimported
|
||||||
|
|
||||||
|
def test_roundtrip_preserves_reference_entry(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Doc
|
||||||
|
|
||||||
|
See [@k1].
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [@k1]: Author. *Title*. 2020.
|
||||||
|
""")
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(m, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
assert "k1" in reimported
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Differ: citation and bibliography comparison (FR-542)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestDifferBibliography:
|
||||||
|
def test_preserved_citation(self) -> None:
|
||||||
|
from markidocx.differ import compare
|
||||||
|
|
||||||
|
text = "# Doc\n\nSee [@smith2020].\n\n## References\n\n- [@smith2020]: Smith. *T*. 2020."
|
||||||
|
report = compare(text, text)
|
||||||
|
assert any("citation:[@smith2020]" in p for p in report.preserved)
|
||||||
|
|
||||||
|
def test_missing_citation_broken(self) -> None:
|
||||||
|
from markidocx.differ import compare
|
||||||
|
|
||||||
|
original = "See [@smith2020]."
|
||||||
|
reimported = "See something."
|
||||||
|
report = compare(original, reimported)
|
||||||
|
assert any("citation:missing '[@smith2020]'" in b for b in report.broken)
|
||||||
|
assert report.has_drift
|
||||||
|
|
||||||
|
def test_missing_reference_entry_degraded(self) -> None:
|
||||||
|
from markidocx.differ import compare
|
||||||
|
|
||||||
|
original = textwrap.dedent("""\
|
||||||
|
See [@k1].
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [@k1]: Author. *T*. 2020.
|
||||||
|
""")
|
||||||
|
reimported = "See [@k1]."
|
||||||
|
report = compare(original, reimported)
|
||||||
|
assert any("reference-entry" in d for d in report.degraded)
|
||||||
|
|
||||||
|
def test_unresolvable_citation_emits_warning(self) -> None:
|
||||||
|
"""Missing citation in reimported emits citation-ambiguity warning."""
|
||||||
|
from markidocx.bibliography import compare_citations
|
||||||
|
from markidocx.errors import WarningRecord
|
||||||
|
|
||||||
|
original = "See [@missing]."
|
||||||
|
reimported = "See something."
|
||||||
|
preserved: list[str] = []
|
||||||
|
degraded: list[str] = []
|
||||||
|
broken: list[str] = []
|
||||||
|
warning_records: list[WarningRecord] = []
|
||||||
|
|
||||||
|
compare_citations(original, reimported, preserved, degraded, broken, warning_records)
|
||||||
|
|
||||||
|
ambiguity = [w for w in warning_records if w.reason == "citation-ambiguity"]
|
||||||
|
assert ambiguity, "Expected citation-ambiguity warning"
|
||||||
|
assert ambiguity[0].construct == "@missing"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Single citation round-trip
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestCitationRoundTrip:
|
||||||
|
def test_single_citation_roundtrip(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.differ import compare
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Introduction
|
||||||
|
|
||||||
|
According to [@smith2020], things are good.
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [@smith2020]: Smith, J. *Good Stuff*. 2020.
|
||||||
|
""")
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(m, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
report = compare(md, reimported)
|
||||||
|
|
||||||
|
broken_citations = [b for b in report.broken if "citation" in b]
|
||||||
|
assert not broken_citations, f"Broken citations: {broken_citations}"
|
||||||
|
|
||||||
|
def test_multi_citation_document(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Paper
|
||||||
|
|
||||||
|
First point from [@a2020]. Second from [@b2021].
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [@a2020]: A. *Work A*. 2020.
|
||||||
|
- [@b2021]: B. *Work B*. 2021.
|
||||||
|
""")
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(m, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
assert "a2020" in reimported
|
||||||
|
assert "b2021" in reimported
|
||||||
231
tests/test_level3_diagrams.py
Normal file
231
tests/test_level3_diagrams.py
Normal file
@@ -0,0 +1,231 @@
|
|||||||
|
"""Tests for LEVEL3 auto-diagram support (FR-533, FR-534)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import textwrap
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
LEVEL3_MANIFEST = textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: diag-test
|
||||||
|
feature_level: level3
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def _make_project(tmp_path: Path, markdown: str) -> Path:
|
||||||
|
(tmp_path / "doc.md").write_text(markdown, encoding="utf-8")
|
||||||
|
(tmp_path / "manifest.yaml").write_text(LEVEL3_MANIFEST, encoding="utf-8")
|
||||||
|
return tmp_path
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# diagrams module helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestDiagramHelpers:
|
||||||
|
def test_is_diagram_info_mermaid(self) -> None:
|
||||||
|
from markidocx.diagrams import is_diagram_info
|
||||||
|
|
||||||
|
assert is_diagram_info("mermaid")
|
||||||
|
|
||||||
|
def test_is_diagram_info_graphviz(self) -> None:
|
||||||
|
from markidocx.diagrams import is_diagram_info
|
||||||
|
|
||||||
|
assert is_diagram_info("graphviz")
|
||||||
|
|
||||||
|
def test_is_diagram_info_plantuml(self) -> None:
|
||||||
|
from markidocx.diagrams import is_diagram_info
|
||||||
|
|
||||||
|
assert is_diagram_info("plantuml")
|
||||||
|
|
||||||
|
def test_is_diagram_info_python_false(self) -> None:
|
||||||
|
from markidocx.diagrams import is_diagram_info
|
||||||
|
|
||||||
|
assert not is_diagram_info("python")
|
||||||
|
assert not is_diagram_info("")
|
||||||
|
assert not is_diagram_info(None)
|
||||||
|
|
||||||
|
def test_is_diagram_source_marker(self) -> None:
|
||||||
|
from markidocx.diagrams import is_diagram_source_marker
|
||||||
|
|
||||||
|
assert is_diagram_source_marker("diagram-source:mermaid\ngraph TD\nA-->B")
|
||||||
|
assert not is_diagram_source_marker("normal text")
|
||||||
|
|
||||||
|
def test_parse_diagram_source_marker(self) -> None:
|
||||||
|
from markidocx.diagrams import parse_diagram_source_marker
|
||||||
|
|
||||||
|
source = "graph TD\nA-->B"
|
||||||
|
result = parse_diagram_source_marker(f"diagram-source:mermaid\n{source}")
|
||||||
|
assert result is not None
|
||||||
|
diagram_type, parsed_source = result
|
||||||
|
assert diagram_type == "mermaid"
|
||||||
|
assert parsed_source == source
|
||||||
|
|
||||||
|
def test_reconstruct_diagram_md(self) -> None:
|
||||||
|
from markidocx.diagrams import reconstruct_diagram_md
|
||||||
|
|
||||||
|
result = reconstruct_diagram_md("mermaid", "graph TD\nA-->B")
|
||||||
|
assert result.startswith("```mermaid")
|
||||||
|
assert "graph TD" in result
|
||||||
|
assert result.endswith("```")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Builder: diagram blocks → source-only path (no renderer in test env) (FR-533)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuilderDiagrams:
|
||||||
|
def test_build_with_mermaid_block_succeeds(self, tmp_path: Path) -> None:
|
||||||
|
"""Mermaid block builds without error (source-only path)."""
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Document
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
graph TD
|
||||||
|
A --> B --> C
|
||||||
|
```
|
||||||
|
|
||||||
|
Some text.
|
||||||
|
""")
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
|
||||||
|
def test_build_emits_warning_for_unavailable_renderer(
|
||||||
|
self, tmp_path: Path, monkeypatch
|
||||||
|
) -> None:
|
||||||
|
"""Warns about missing diagram renderer (FR-538)."""
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
monkeypatch.setattr(shutil, "which", lambda _cmd: None)
|
||||||
|
md = "```mermaid\ngraph TD\nA-->B\n```"
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
dep_warnings = [
|
||||||
|
w for w in result.warning_records
|
||||||
|
if w.reason == "processor-dependency-unavailable"
|
||||||
|
]
|
||||||
|
assert dep_warnings
|
||||||
|
|
||||||
|
def test_build_docx_contains_source_marker(
|
||||||
|
self, tmp_path: Path, monkeypatch
|
||||||
|
) -> None:
|
||||||
|
"""DOCX contains diagram-source marker for round-trip."""
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from docx import Document as DocxReader
|
||||||
|
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
monkeypatch.setattr(shutil, "which", lambda _cmd: None)
|
||||||
|
md = "```mermaid\ngraph TD\nA-->B\n```"
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
|
||||||
|
doc = DocxReader(str(result.output_path))
|
||||||
|
texts = [p.text for p in doc.paragraphs]
|
||||||
|
marker_texts = [t for t in texts if t.startswith("diagram-source:")]
|
||||||
|
assert marker_texts, f"No diagram-source marker found. Paragraphs: {texts}"
|
||||||
|
|
||||||
|
def test_build_graphviz_block_succeeds(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = "```graphviz\ndigraph G { A -> B }\n```"
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
|
||||||
|
def test_non_diagram_code_block_not_warned(
|
||||||
|
self, tmp_path: Path
|
||||||
|
) -> None:
|
||||||
|
"""Python code blocks don't trigger diagram warnings."""
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = "```python\nprint('hello')\n```"
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
dep_warnings = [
|
||||||
|
w for w in result.warning_records
|
||||||
|
if w.reason == "processor-dependency-unavailable"
|
||||||
|
]
|
||||||
|
# Only level3 diagram types trigger this warning, not python
|
||||||
|
# (may still warn for mmdc/dot if level3 partial check fires, but not for python block)
|
||||||
|
python_warnings = [w for w in dep_warnings if "python" in w.construct]
|
||||||
|
assert not python_warnings
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Importer: diagram source-intent marker → fenced block (FR-534)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestImporterDiagrams:
|
||||||
|
def test_roundtrip_source_only_path(self, tmp_path: Path, monkeypatch) -> None:
|
||||||
|
"""Source-only round-trip: diagram source is preserved in reimported MD."""
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
monkeypatch.setattr(shutil, "which", lambda _cmd: None)
|
||||||
|
diagram_source = "graph TD\nA --> B --> C"
|
||||||
|
md = f"# Document\n\n```mermaid\n{diagram_source}\n```\n\nText."
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(m, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
assert "mermaid" in reimported
|
||||||
|
assert "graph TD" in reimported
|
||||||
|
|
||||||
|
def test_no_source_discarded(self, tmp_path: Path, monkeypatch) -> None:
|
||||||
|
"""Diagram source is never silently dropped (FR-1205)."""
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
monkeypatch.setattr(shutil, "which", lambda _cmd: None)
|
||||||
|
md = "```plantuml\n@startuml\nAlice -> Bob: Hi\n@enduml\n```"
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(m, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
# Source content must be present somewhere in the reimported text
|
||||||
|
assert "plantuml" in reimported or "@startuml" in reimported
|
||||||
342
tests/test_level3_figures.py
Normal file
342
tests/test_level3_figures.py
Normal file
@@ -0,0 +1,342 @@
|
|||||||
|
"""Tests for LEVEL3 numbered figure support (FR-532, FR-541)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import textwrap
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
LEVEL3_MANIFEST = textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: fig-test
|
||||||
|
feature_level: level3
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def _make_project(tmp_path: Path, markdown: str) -> Path:
|
||||||
|
(tmp_path / "doc.md").write_text(markdown, encoding="utf-8")
|
||||||
|
(tmp_path / "manifest.yaml").write_text(LEVEL3_MANIFEST, encoding="utf-8")
|
||||||
|
return tmp_path
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# figures module helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestFigureHelpers:
|
||||||
|
def test_is_figure_paragraph_true(self) -> None:
|
||||||
|
from markidocx.figures import is_figure_paragraph
|
||||||
|
|
||||||
|
assert is_figure_paragraph("{#fig:photo}")
|
||||||
|
|
||||||
|
def test_is_figure_paragraph_false(self) -> None:
|
||||||
|
from markidocx.figures import is_figure_paragraph
|
||||||
|
|
||||||
|
assert not is_figure_paragraph("Normal paragraph text.")
|
||||||
|
assert not is_figure_paragraph("") # no {#fig:} label
|
||||||
|
|
||||||
|
def test_parse_figure(self) -> None:
|
||||||
|
from markidocx.figures import parse_figure
|
||||||
|
|
||||||
|
result = parse_figure("{#fig:arch}")
|
||||||
|
assert result is not None
|
||||||
|
caption, path, label = result
|
||||||
|
assert caption == "Architecture Diagram"
|
||||||
|
assert path == "arch.png"
|
||||||
|
assert label == "fig:arch"
|
||||||
|
|
||||||
|
def test_extract_figures_from_md(self) -> None:
|
||||||
|
from markidocx.figures import extract_figures_from_md
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Title
|
||||||
|
|
||||||
|
Some text.
|
||||||
|
|
||||||
|
{#fig:f1}
|
||||||
|
|
||||||
|
More text.
|
||||||
|
|
||||||
|
{#fig:f2}
|
||||||
|
""")
|
||||||
|
figs = extract_figures_from_md(md)
|
||||||
|
assert len(figs) == 2
|
||||||
|
assert figs[0] == ("Figure One", "fig1.png", "fig:f1")
|
||||||
|
assert figs[1] == ("Figure Two", "fig2.png", "fig:f2")
|
||||||
|
|
||||||
|
def test_extract_figure_labels(self) -> None:
|
||||||
|
from markidocx.figures import extract_figure_labels
|
||||||
|
|
||||||
|
md = "{#fig:f1}\n\n{#fig:f2}"
|
||||||
|
labels = extract_figure_labels(md)
|
||||||
|
assert labels == {"fig:f1", "fig:f2"}
|
||||||
|
|
||||||
|
def test_is_caption_paragraph(self) -> None:
|
||||||
|
from markidocx.figures import is_caption_paragraph
|
||||||
|
|
||||||
|
assert is_caption_paragraph("Figure 1 — My Caption")
|
||||||
|
assert is_caption_paragraph("Figure 3 - Another Caption")
|
||||||
|
assert not is_caption_paragraph("Some normal text")
|
||||||
|
|
||||||
|
def test_reconstruct_figure_md(self) -> None:
|
||||||
|
from markidocx.figures import reconstruct_figure_md
|
||||||
|
|
||||||
|
result = reconstruct_figure_md("My Caption", "img/photo.png", "fig:photo")
|
||||||
|
assert result == "{#fig:photo}"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Builder: figure declaration → DOCX caption paragraph (FR-532)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuilderFigures:
|
||||||
|
def test_build_with_figure_succeeds(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Document {#doc}
|
||||||
|
|
||||||
|
Introduction.
|
||||||
|
|
||||||
|
{#fig:arch}
|
||||||
|
|
||||||
|
More text.
|
||||||
|
""")
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
assert result.output_path.exists()
|
||||||
|
|
||||||
|
def test_build_docx_contains_figure_caption(self, tmp_path: Path) -> None:
|
||||||
|
"""The built DOCX should contain a caption paragraph with 'Figure 1'."""
|
||||||
|
from docx import Document as DocxReader
|
||||||
|
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = "{#fig:diag}\n\nSome text."
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
|
||||||
|
doc = DocxReader(str(result.output_path))
|
||||||
|
texts = [p.text for p in doc.paragraphs]
|
||||||
|
caption_paras = [t for t in texts if t.startswith("Figure 1")]
|
||||||
|
assert caption_paras, f"No 'Figure 1' caption found. Paragraphs: {texts}"
|
||||||
|
|
||||||
|
def test_multiple_figures_numbered_sequentially(self, tmp_path: Path) -> None:
|
||||||
|
"""Multiple figures get Figure 1, Figure 2, Figure 3."""
|
||||||
|
from docx import Document as DocxReader
|
||||||
|
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Doc
|
||||||
|
|
||||||
|
{#fig:a}
|
||||||
|
|
||||||
|
Some text.
|
||||||
|
|
||||||
|
{#fig:b}
|
||||||
|
|
||||||
|
More text.
|
||||||
|
|
||||||
|
{#fig:c}
|
||||||
|
""")
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
|
||||||
|
doc = DocxReader(str(result.output_path))
|
||||||
|
texts = [p.text for p in doc.paragraphs]
|
||||||
|
assert any("Figure 1" in t for t in texts)
|
||||||
|
assert any("Figure 2" in t for t in texts)
|
||||||
|
assert any("Figure 3" in t for t in texts)
|
||||||
|
|
||||||
|
def test_figure_not_activated_for_level1(self, tmp_path: Path) -> None:
|
||||||
|
"""LEVEL1: figure syntax is not stripped (no caption paragraphs added)."""
|
||||||
|
from docx import Document as DocxReader
|
||||||
|
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
manifest_yaml = textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: l1-fig
|
||||||
|
feature_level: level1
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
""")
|
||||||
|
(tmp_path / "doc.md").write_text(
|
||||||
|
"# Title\n\n{#fig:diag}", encoding="utf-8"
|
||||||
|
)
|
||||||
|
(tmp_path / "manifest.yaml").write_text(manifest_yaml, encoding="utf-8")
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
doc = DocxReader(str(result.output_path))
|
||||||
|
texts = [p.text for p in doc.paragraphs]
|
||||||
|
# No "Figure N" captions in LEVEL1 output
|
||||||
|
assert not any(t.startswith("Figure ") for t in texts)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Importer: DOCX caption paragraphs → figure markdown (FR-532)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestImporterFigures:
|
||||||
|
def test_roundtrip_preserves_figure_caption(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = "# Title\n\n{#fig:arch}\n\nText."
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(m, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
assert "Architecture" in reimported
|
||||||
|
assert "fig:arch" in reimported
|
||||||
|
|
||||||
|
def test_roundtrip_preserves_figure_label(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = "{#fig:myimg}\n\nText."
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(m, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
assert "{#fig:myimg}" in reimported
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Differ: figure identity coherence (FR-541)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestDifferFigures:
|
||||||
|
def test_preserved_figure_label(self) -> None:
|
||||||
|
from markidocx.differ import compare
|
||||||
|
|
||||||
|
text = "# Title\n\n{#fig:img}\n\nText."
|
||||||
|
report = compare(text, text)
|
||||||
|
assert any("figure-label:fig:img" in p for p in report.preserved)
|
||||||
|
|
||||||
|
def test_missing_figure_label_broken(self) -> None:
|
||||||
|
from markidocx.differ import compare
|
||||||
|
|
||||||
|
original = "{#fig:img}\n\nText."
|
||||||
|
reimported = "Text."
|
||||||
|
report = compare(original, reimported)
|
||||||
|
assert any("figure-label:missing 'fig:img'" in b for b in report.broken)
|
||||||
|
assert report.has_drift
|
||||||
|
|
||||||
|
def test_missing_caption_degraded(self) -> None:
|
||||||
|
from markidocx.differ import compare
|
||||||
|
|
||||||
|
original = "{#fig:img}"
|
||||||
|
reimported = "{#fig:img}"
|
||||||
|
report = compare(original, reimported)
|
||||||
|
assert any("figure-caption" in d for d in report.degraded)
|
||||||
|
|
||||||
|
def test_preserved_caption(self) -> None:
|
||||||
|
from markidocx.differ import compare
|
||||||
|
|
||||||
|
text = "{#fig:img}"
|
||||||
|
report = compare(text, text)
|
||||||
|
assert any("figure-caption" in p for p in report.preserved)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Full figure round-trip
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestFigureRoundTrip:
|
||||||
|
def test_single_figure_roundtrip(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.differ import compare
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Document
|
||||||
|
|
||||||
|
Introduction.
|
||||||
|
|
||||||
|
{#fig:arch}
|
||||||
|
|
||||||
|
Conclusion.
|
||||||
|
""")
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(m, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
report = compare(md, reimported)
|
||||||
|
|
||||||
|
# No broken figures
|
||||||
|
broken_figs = [b for b in report.broken if "figure" in b]
|
||||||
|
assert not broken_figs, f"Broken figures found: {broken_figs}"
|
||||||
|
|
||||||
|
def test_multiple_figures_identity_coherent(self, tmp_path: Path) -> None:
|
||||||
|
"""Multiple figures survive round-trip with correct labels."""
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Doc
|
||||||
|
|
||||||
|
{#fig:f1}
|
||||||
|
|
||||||
|
Text between figures.
|
||||||
|
|
||||||
|
{#fig:f2}
|
||||||
|
""")
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(m, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
assert "{#fig:f1}" in reimported
|
||||||
|
assert "{#fig:f2}" in reimported
|
||||||
271
tests/test_level3_plumbing.py
Normal file
271
tests/test_level3_plumbing.py
Normal file
@@ -0,0 +1,271 @@
|
|||||||
|
"""Tests for LEVEL3 plumbing — feature-level gating & disclosure (FR-537–539)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import textwrap
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from markidocx.level3 import (
|
||||||
|
Level3Support,
|
||||||
|
ProcessorDependency,
|
||||||
|
capabilities_entry,
|
||||||
|
check_level3_support,
|
||||||
|
)
|
||||||
|
from markidocx.manifest import FeatureLevel, load_manifest
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Level3 support detection (FR-537, FR-538)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestCheckLevel3Support:
|
||||||
|
def test_returns_level3_support(self) -> None:
|
||||||
|
support = check_level3_support()
|
||||||
|
assert isinstance(support, Level3Support)
|
||||||
|
|
||||||
|
def test_always_available(self) -> None:
|
||||||
|
support = check_level3_support()
|
||||||
|
assert support.available is True
|
||||||
|
|
||||||
|
def test_dependencies_are_processor_dependency_instances(self) -> None:
|
||||||
|
support = check_level3_support()
|
||||||
|
for dep in support.dependencies:
|
||||||
|
assert isinstance(dep, ProcessorDependency)
|
||||||
|
assert dep.name in ("mmdc", "dot", "plantuml")
|
||||||
|
assert isinstance(dep.available, bool)
|
||||||
|
assert dep.description
|
||||||
|
|
||||||
|
def test_partial_when_no_diagram_tools(self, monkeypatch) -> None:
|
||||||
|
"""When no diagram tool is found, partial=True and missing_coverage is populated."""
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
monkeypatch.setattr(shutil, "which", lambda _cmd: None)
|
||||||
|
support = check_level3_support()
|
||||||
|
assert support.partial is True
|
||||||
|
assert len(support.missing_coverage) > 0
|
||||||
|
assert any("diagram" in m for m in support.missing_coverage)
|
||||||
|
|
||||||
|
def test_not_partial_when_diagram_tool_present(self, monkeypatch) -> None:
|
||||||
|
"""When at least one diagram tool is found, partial=False."""
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
def fake_which(cmd: str) -> str | None:
|
||||||
|
return "/usr/bin/mmdc" if cmd == "mmdc" else None
|
||||||
|
|
||||||
|
monkeypatch.setattr(shutil, "which", fake_which)
|
||||||
|
support = check_level3_support()
|
||||||
|
assert support.partial is False
|
||||||
|
assert support.missing_coverage == []
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# capabilities_entry (FR-537)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestCapabilitiesEntry:
|
||||||
|
def test_returns_dict_with_level(self) -> None:
|
||||||
|
entry = capabilities_entry()
|
||||||
|
assert entry["level"] == "level3"
|
||||||
|
|
||||||
|
def test_available_is_true(self) -> None:
|
||||||
|
entry = capabilities_entry()
|
||||||
|
assert entry["available"] is True
|
||||||
|
|
||||||
|
def test_has_dependencies_list(self) -> None:
|
||||||
|
entry = capabilities_entry()
|
||||||
|
assert isinstance(entry["dependencies"], list)
|
||||||
|
for dep in entry["dependencies"]:
|
||||||
|
assert "name" in dep
|
||||||
|
assert "available" in dep
|
||||||
|
assert "description" in dep
|
||||||
|
|
||||||
|
def test_has_partial_and_missing_coverage(self) -> None:
|
||||||
|
entry = capabilities_entry()
|
||||||
|
assert "partial" in entry
|
||||||
|
assert "missing_coverage" in entry
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Manifest accepts feature_level: level3 (FR-537)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestManifestLevel3:
|
||||||
|
def test_level3_accepted(self, tmp_path: Path) -> None:
|
||||||
|
(tmp_path / "doc.md").write_text("# Hello", encoding="utf-8")
|
||||||
|
(tmp_path / "manifest.yaml").write_text(
|
||||||
|
textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: test
|
||||||
|
feature_level: level3
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
"""),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
assert m.project.feature_level == FeatureLevel.LEVEL3
|
||||||
|
|
||||||
|
def test_level3_routes_to_level3_processing(self, tmp_path: Path) -> None:
|
||||||
|
"""Building with feature_level: level3 succeeds (processing path reached)."""
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
|
||||||
|
(tmp_path / "doc.md").write_text("# Hello\n\nContent.", encoding="utf-8")
|
||||||
|
(tmp_path / "manifest.yaml").write_text(
|
||||||
|
textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: test-l3
|
||||||
|
feature_level: level3
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
"""),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
assert result.feature_level == "level3"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# partial_level3 flag and processor-dependency disclosure (FR-538, FR-539)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestPartialLevel3Flag:
|
||||||
|
def test_partial_level3_set_when_no_diagram_tools(
|
||||||
|
self, tmp_path: Path, monkeypatch
|
||||||
|
) -> None:
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
|
||||||
|
monkeypatch.setattr(shutil, "which", lambda _cmd: None)
|
||||||
|
(tmp_path / "doc.md").write_text("# Hello\n\nContent.", encoding="utf-8")
|
||||||
|
(tmp_path / "manifest.yaml").write_text(
|
||||||
|
textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: test-partial
|
||||||
|
feature_level: level3
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
"""),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
assert result.partial_level3 is True
|
||||||
|
assert len(result.missing_coverage) > 0
|
||||||
|
|
||||||
|
def test_partial_level3_false_for_level1(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
|
||||||
|
(tmp_path / "doc.md").write_text("# Hello\n\nContent.", encoding="utf-8")
|
||||||
|
(tmp_path / "manifest.yaml").write_text(
|
||||||
|
textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: test-l1
|
||||||
|
feature_level: level1
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
"""),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.partial_level3 is False
|
||||||
|
assert result.missing_coverage == []
|
||||||
|
|
||||||
|
def test_dependency_warning_emitted_for_unavailable_tool(
|
||||||
|
self, tmp_path: Path, monkeypatch
|
||||||
|
) -> None:
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.errors import Severity
|
||||||
|
|
||||||
|
monkeypatch.setattr(shutil, "which", lambda _cmd: None)
|
||||||
|
(tmp_path / "doc.md").write_text("# Hello", encoding="utf-8")
|
||||||
|
(tmp_path / "manifest.yaml").write_text(
|
||||||
|
textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: t
|
||||||
|
feature_level: level3
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
"""),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
dep_warnings = [
|
||||||
|
w for w in result.warning_records
|
||||||
|
if w.reason == "processor-dependency-unavailable"
|
||||||
|
]
|
||||||
|
assert dep_warnings, "Expected processor-dependency-unavailable warning"
|
||||||
|
assert all(w.severity == Severity.WARNING for w in dep_warnings)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# REST capabilities includes level3 (FR-537)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestRestCapabilitiesLevel3:
|
||||||
|
def test_capabilities_includes_level3(self) -> None:
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
from markidocx.rest import create_app
|
||||||
|
|
||||||
|
client = TestClient(create_app())
|
||||||
|
resp = client.get("/capabilities")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
body = resp.json()
|
||||||
|
outputs = body["outputs"]
|
||||||
|
assert "level3" in outputs
|
||||||
|
assert outputs["level3"]["level"] == "level3"
|
||||||
|
assert outputs["level3"]["available"] is True
|
||||||
|
assert "dependencies" in outputs["level3"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# MCP validate_project includes level3 in context (FR-537)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestMcpLevel3:
|
||||||
|
def test_validate_project_includes_level3(self) -> None:
|
||||||
|
from markidocx.mcp_server import validate_project
|
||||||
|
|
||||||
|
manifest_yaml = textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: test
|
||||||
|
feature_level: level3
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
""")
|
||||||
|
result = validate_project(manifest_yaml)
|
||||||
|
assert result["status"] == "ok"
|
||||||
|
assert result["feature_level"] == "level3"
|
||||||
|
assert "level3" in result["context"]
|
||||||
|
assert result["context"]["level3"]["available"] is True
|
||||||
326
tests/test_level3_xref.py
Normal file
326
tests/test_level3_xref.py
Normal file
@@ -0,0 +1,326 @@
|
|||||||
|
"""Tests for LEVEL3 cross-reference support (FR-531, FR-540)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import textwrap
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
LEVEL3_MANIFEST = textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: xref-test
|
||||||
|
feature_level: level3
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def _make_project(tmp_path: Path, markdown: str, manifest_yaml: str = LEVEL3_MANIFEST) -> Path:
|
||||||
|
(tmp_path / "doc.md").write_text(markdown, encoding="utf-8")
|
||||||
|
(tmp_path / "manifest.yaml").write_text(manifest_yaml, encoding="utf-8")
|
||||||
|
return tmp_path
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# xref module helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestXrefHelpers:
|
||||||
|
def test_extract_anchor_from_heading_plain(self) -> None:
|
||||||
|
from markidocx.xref import extract_anchor_from_heading
|
||||||
|
|
||||||
|
clean, anchor = extract_anchor_from_heading("Introduction {#intro}")
|
||||||
|
assert clean == "Introduction"
|
||||||
|
assert anchor == "intro"
|
||||||
|
|
||||||
|
def test_extract_anchor_from_heading_no_anchor(self) -> None:
|
||||||
|
from markidocx.xref import extract_anchor_from_heading
|
||||||
|
|
||||||
|
clean, anchor = extract_anchor_from_heading("Introduction")
|
||||||
|
assert clean == "Introduction"
|
||||||
|
assert anchor is None
|
||||||
|
|
||||||
|
def test_extract_anchors_from_text(self) -> None:
|
||||||
|
from markidocx.xref import extract_anchors
|
||||||
|
|
||||||
|
text = "# Section {#sec1}\n\n## Subsection {#sec2}\n\nNormal."
|
||||||
|
anchors = extract_anchors(text)
|
||||||
|
assert anchors == {"sec1", "sec2"}
|
||||||
|
|
||||||
|
def test_extract_xref_links(self) -> None:
|
||||||
|
from markidocx.xref import extract_xref_links
|
||||||
|
|
||||||
|
text = "See [Section One][sec1] and [Section Two][sec2]."
|
||||||
|
links = extract_xref_links(text)
|
||||||
|
assert ("Section One", "sec1") in links
|
||||||
|
assert ("Section Two", "sec2") in links
|
||||||
|
|
||||||
|
def test_has_xref_links_true(self) -> None:
|
||||||
|
from markidocx.xref import has_xref_links
|
||||||
|
|
||||||
|
assert has_xref_links("See [Intro][intro] for details.")
|
||||||
|
|
||||||
|
def test_has_xref_links_false(self) -> None:
|
||||||
|
from markidocx.xref import has_xref_links
|
||||||
|
|
||||||
|
assert not has_xref_links("Normal paragraph without refs.")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Builder: headings with anchors → DOCX bookmarks (FR-531)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuilderXref:
|
||||||
|
def test_build_with_anchor_succeeds(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = "# Introduction {#intro}\n\nSome text.\n\n## Section One {#sec1}\n\nContent."
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
assert result.output_path.exists()
|
||||||
|
|
||||||
|
def test_build_docx_contains_bookmark(self, tmp_path: Path) -> None:
|
||||||
|
"""The built DOCX XML should contain a bookmarkStart for {#intro}."""
|
||||||
|
from docx import Document as DocxReader
|
||||||
|
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = "# Introduction {#intro}\n\nContent."
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
|
||||||
|
doc = DocxReader(str(result.output_path))
|
||||||
|
_W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
||||||
|
bookmarks = [
|
||||||
|
elem.get(f"{{{_W}}}name")
|
||||||
|
for elem in doc.element.body.iter(f"{{{_W}}}bookmarkStart")
|
||||||
|
if elem.get(f"{{{_W}}}name") and not elem.get(f"{{{_W}}}name", "").startswith("_")
|
||||||
|
]
|
||||||
|
assert "intro" in bookmarks
|
||||||
|
|
||||||
|
def test_build_with_cross_ref_link(self, tmp_path: Path) -> None:
|
||||||
|
"""Cross-ref links [text][anchor] render without errors."""
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Introduction {#intro}
|
||||||
|
|
||||||
|
Some text.
|
||||||
|
|
||||||
|
# Methodology {#method}
|
||||||
|
|
||||||
|
See [Introduction][intro] for background.
|
||||||
|
""")
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
assert result.output_path.exists()
|
||||||
|
|
||||||
|
def test_build_xref_not_activated_for_level1(self, tmp_path: Path) -> None:
|
||||||
|
"""Level1 build: {#anchor} syntax is treated as literal heading text."""
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
manifest_yaml = textwrap.dedent("""\
|
||||||
|
project:
|
||||||
|
name: l1-test
|
||||||
|
feature_level: level1
|
||||||
|
family: article
|
||||||
|
sources:
|
||||||
|
- path: doc.md
|
||||||
|
output:
|
||||||
|
dir: ./dist
|
||||||
|
""")
|
||||||
|
# In LEVEL1, {#anchor} is not stripped and no bookmark is added
|
||||||
|
md = "# Introduction {#intro}\n\nContent."
|
||||||
|
_make_project(tmp_path, md, manifest_yaml)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
result = build_document(m)
|
||||||
|
assert result.success
|
||||||
|
# No cross-ref warnings
|
||||||
|
xref_warnings = [w for w in result.warning_records if "xref" in w.reason.lower()]
|
||||||
|
assert not xref_warnings
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Importer: DOCX bookmarks → {#anchor} labels (FR-531)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestImporterXref:
|
||||||
|
def test_roundtrip_preserves_anchor(self, tmp_path: Path) -> None:
|
||||||
|
"""Build LEVEL3 doc with {#anchor}, import back → heading has {#anchor}."""
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = "# Introduction {#intro}\n\nSome text."
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(m, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
assert "{#intro}" in reimported
|
||||||
|
|
||||||
|
def test_roundtrip_preserves_cross_ref_link(self, tmp_path: Path) -> None:
|
||||||
|
"""Cross-ref link [text][anchor] survives a round trip."""
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Introduction {#intro}
|
||||||
|
|
||||||
|
Some intro text.
|
||||||
|
|
||||||
|
# Methodology {#method}
|
||||||
|
|
||||||
|
See [Introduction][intro] for background.
|
||||||
|
""")
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(m, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
assert "{#intro}" in reimported
|
||||||
|
assert "[Introduction][intro]" in reimported
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Differ: cross-ref detection (FR-540)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestDifferXref:
|
||||||
|
def test_preserved_anchor_reported(self) -> None:
|
||||||
|
from markidocx.differ import compare
|
||||||
|
|
||||||
|
original = "# Introduction {#intro}\n\nText."
|
||||||
|
reimported = "# Introduction {#intro}\n\nText."
|
||||||
|
report = compare(original, reimported)
|
||||||
|
assert any("xref-anchor:intro" in p for p in report.preserved)
|
||||||
|
assert not any("xref-anchor" in b for b in report.broken)
|
||||||
|
|
||||||
|
def test_missing_anchor_reported_as_broken(self) -> None:
|
||||||
|
from markidocx.differ import compare
|
||||||
|
|
||||||
|
original = "# Introduction {#intro}\n\nText."
|
||||||
|
reimported = "# Introduction\n\nText."
|
||||||
|
report = compare(original, reimported)
|
||||||
|
assert any("xref-anchor:missing 'intro'" in b for b in report.broken)
|
||||||
|
assert report.has_drift
|
||||||
|
|
||||||
|
def test_preserved_xref_link(self) -> None:
|
||||||
|
from markidocx.differ import compare
|
||||||
|
|
||||||
|
text = "# Intro {#intro}\n\nSee [Intro][intro]."
|
||||||
|
report = compare(text, text)
|
||||||
|
assert any("xref-link" in p for p in report.preserved)
|
||||||
|
|
||||||
|
def test_broken_xref_link_target_missing(self) -> None:
|
||||||
|
from markidocx.differ import compare
|
||||||
|
|
||||||
|
original = "# Intro {#intro}\n\nSee [Intro][intro]."
|
||||||
|
reimported = "# Intro\n\nSee something."
|
||||||
|
report = compare(original, reimported)
|
||||||
|
# anchor missing → broken xref link
|
||||||
|
broken_xref = [b for b in report.broken if "xref" in b]
|
||||||
|
assert broken_xref
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Full single-file xref round-trip
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestXrefRoundTrip:
|
||||||
|
def test_single_file_xref_roundtrip(self, tmp_path: Path) -> None:
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.differ import compare
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Introduction {#intro}
|
||||||
|
|
||||||
|
Welcome.
|
||||||
|
|
||||||
|
# Background {#bg}
|
||||||
|
|
||||||
|
See [Introduction][intro] and [Background][bg].
|
||||||
|
""")
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(m, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
report = compare(md, reimported)
|
||||||
|
|
||||||
|
# No broken cross-refs
|
||||||
|
broken_xrefs = [b for b in report.broken if "xref" in b]
|
||||||
|
assert not broken_xrefs, f"Broken xrefs found: {broken_xrefs}"
|
||||||
|
|
||||||
|
def test_multi_ref_document(self, tmp_path: Path) -> None:
|
||||||
|
"""Document with multiple anchors and refs doesn't produce broken xrefs."""
|
||||||
|
from markidocx.builder import build_document
|
||||||
|
from markidocx.importer import import_document
|
||||||
|
from markidocx.manifest import load_manifest
|
||||||
|
|
||||||
|
md = textwrap.dedent("""\
|
||||||
|
# Chapter One {#ch1}
|
||||||
|
|
||||||
|
Opening.
|
||||||
|
|
||||||
|
# Chapter Two {#ch2}
|
||||||
|
|
||||||
|
See [Chapter One][ch1].
|
||||||
|
|
||||||
|
# Chapter Three {#ch3}
|
||||||
|
|
||||||
|
Refers to [Chapter One][ch1] and [Chapter Two][ch2].
|
||||||
|
""")
|
||||||
|
_make_project(tmp_path, md)
|
||||||
|
m = load_manifest(tmp_path / "manifest.yaml")
|
||||||
|
|
||||||
|
build_result = build_document(m)
|
||||||
|
assert build_result.success
|
||||||
|
|
||||||
|
import_result = import_document(m, build_result.output_path)
|
||||||
|
assert import_result.success
|
||||||
|
|
||||||
|
reimported = import_result.output_files[0].read_text(encoding="utf-8")
|
||||||
|
# All three anchors should be in reimported
|
||||||
|
assert "{#ch1}" in reimported
|
||||||
|
assert "{#ch2}" in reimported
|
||||||
|
assert "{#ch3}" in reimported
|
||||||
@@ -3,7 +3,7 @@ id: MRKD-WP-0003
|
|||||||
type: workplan
|
type: workplan
|
||||||
domain: markitect
|
domain: markitect
|
||||||
repo: marki-docx
|
repo: marki-docx
|
||||||
status: active
|
status: done
|
||||||
state_hub_workstream_id: b04fe706-6e4e-48a8-b6c1-194d9e308215
|
state_hub_workstream_id: b04fe706-6e4e-48a8-b6c1-194d9e308215
|
||||||
created: 2026-03-17
|
created: 2026-03-17
|
||||||
updated: 2026-03-17
|
updated: 2026-03-17
|
||||||
@@ -28,7 +28,7 @@ through the existing interface layer once the core modules support them.
|
|||||||
|
|
||||||
```task
|
```task
|
||||||
id: MRKD-WP-0003-T01
|
id: MRKD-WP-0003-T01
|
||||||
status: todo
|
status: done
|
||||||
priority: high
|
priority: high
|
||||||
state_hub_task_id: 51e1b53e-a62f-496b-892d-615513c35d67
|
state_hub_task_id: 51e1b53e-a62f-496b-892d-615513c35d67
|
||||||
```
|
```
|
||||||
@@ -53,7 +53,7 @@ Deliverable: `pytest tests/test_level3_plumbing.py` passes; level3 appears in ca
|
|||||||
|
|
||||||
```task
|
```task
|
||||||
id: MRKD-WP-0003-T02
|
id: MRKD-WP-0003-T02
|
||||||
status: todo
|
status: done
|
||||||
priority: high
|
priority: high
|
||||||
state_hub_task_id: f4010618-9d35-4c04-bc1c-c599f254edff
|
state_hub_task_id: f4010618-9d35-4c04-bc1c-c599f254edff
|
||||||
```
|
```
|
||||||
@@ -81,7 +81,7 @@ Deliverable: `pytest tests/test_error_framework.py` passes; all modules emit str
|
|||||||
|
|
||||||
```task
|
```task
|
||||||
id: MRKD-WP-0003-T03
|
id: MRKD-WP-0003-T03
|
||||||
status: todo
|
status: done
|
||||||
priority: high
|
priority: high
|
||||||
state_hub_task_id: 0bb9c7ce-5eb8-4997-833f-c801e37f282c
|
state_hub_task_id: 0bb9c7ce-5eb8-4997-833f-c801e37f282c
|
||||||
```
|
```
|
||||||
@@ -104,7 +104,7 @@ Deliverable: `pytest tests/test_level3_xref.py` passes.
|
|||||||
|
|
||||||
```task
|
```task
|
||||||
id: MRKD-WP-0003-T04
|
id: MRKD-WP-0003-T04
|
||||||
status: todo
|
status: done
|
||||||
priority: high
|
priority: high
|
||||||
state_hub_task_id: af6b82b7-da44-4ef8-8976-6e40fee5f73c
|
state_hub_task_id: af6b82b7-da44-4ef8-8976-6e40fee5f73c
|
||||||
```
|
```
|
||||||
@@ -128,7 +128,7 @@ Deliverable: `pytest tests/test_level3_figures.py` passes.
|
|||||||
|
|
||||||
```task
|
```task
|
||||||
id: MRKD-WP-0003-T05
|
id: MRKD-WP-0003-T05
|
||||||
status: todo
|
status: done
|
||||||
priority: medium
|
priority: medium
|
||||||
state_hub_task_id: 3700e0e4-cc3b-4ef3-8b85-6cef24c35fc0
|
state_hub_task_id: 3700e0e4-cc3b-4ef3-8b85-6cef24c35fc0
|
||||||
```
|
```
|
||||||
@@ -151,7 +151,7 @@ Deliverable: `pytest tests/test_level3_diagrams.py` passes.
|
|||||||
|
|
||||||
```task
|
```task
|
||||||
id: MRKD-WP-0003-T06
|
id: MRKD-WP-0003-T06
|
||||||
status: todo
|
status: done
|
||||||
priority: medium
|
priority: medium
|
||||||
state_hub_task_id: 7c0acbd3-65f0-440b-9ad4-a5f09fabef3c
|
state_hub_task_id: 7c0acbd3-65f0-440b-9ad4-a5f09fabef3c
|
||||||
```
|
```
|
||||||
@@ -175,7 +175,7 @@ Deliverable: `pytest tests/test_level3_bibliography.py` passes.
|
|||||||
|
|
||||||
```task
|
```task
|
||||||
id: MRKD-WP-0003-T07
|
id: MRKD-WP-0003-T07
|
||||||
status: todo
|
status: done
|
||||||
priority: medium
|
priority: medium
|
||||||
state_hub_task_id: b26241b9-0fff-45a2-a95c-dd886a449038
|
state_hub_task_id: b26241b9-0fff-45a2-a95c-dd886a449038
|
||||||
```
|
```
|
||||||
|
|||||||
Reference in New Issue
Block a user