feat: WP-0001 + WP-0002 complete — LEVEL1 core + service interfaces

WP-0001 (Foundation & LEVEL1 Core): - manifest model (FR-100), MD→DOCX builder (FR-200), DOCX→MD importer (FR-300/400), template family registry (FR-600), drift detector (FR-700), CLI wiring, pre-commit config, CI skeleton, regression harness WP-0002 (Service Interfaces & Workflow Orchestration): - REST service via FastAPI (FR-900): /health, /version, /capabilities, /templates, /styles, /validate, /build, /import, /compare, /templates/register, /workflows/{name}, /evidence/{run_id} - Evidence & report store (FR-1400): JSON-backed, per-run, retrievable through all interfaces, classification (pass/warnings/failed) - Composite workflow orchestration (FR-1300): single-file-roundtrip, multi-file-roundtrip, release-regression, family-switch-build - MCP server via FastMCP (FR-1000): all tools + resources - CLI additions: `markidocx serve`, `markidocx workflow`, `markidocx mcp` - Interface parity tests: CLI / REST / MCP produce equivalent results 135 tests passing, ruff + mypy clean. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-16 07:46:31 +00:00
parent 42789cad1e
commit 1f3dddf7d6
30 changed files with 4158 additions and 26 deletions
--- a/src/markidocx/differ.py
+++ b/src/markidocx/differ.py
@@ -0,0 +1,130 @@
+"""Structural drift detection for markidocx (FR-700)."""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass, field
+
+HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
+LIST_ITEM_RE = re.compile(r"^(\s*[-*+]|\s*\d+\.)\s+(.+)$", re.MULTILINE)
+TABLE_ROW_RE = re.compile(r"^\|.+\|$", re.MULTILINE)
+FOOTNOTE_RE = re.compile(r"\[\^[^\]]+\]")
+LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
+
+
+@dataclass
+class DriftReport:
+    has_drift: bool
+    preserved: list[str] = field(default_factory=list)
+    degraded: list[str] = field(default_factory=list)
+    broken: list[str] = field(default_factory=list)
+    unsupported: list[str] = field(default_factory=list)
+
+
+def compare(original: str, reimported: str) -> DriftReport:
+    """Compare *original* Markdown against *reimported* Markdown.
+
+    Classifies each structural element as:
+    - preserved: identical in both
+    - degraded: present but modified
+    - broken: present in original, missing in reimported
+    - unsupported: construct not supported by the round-trip
+
+    Returns a DriftReport.
+    """
+    preserved: list[str] = []
+    degraded: list[str] = []
+    broken: list[str] = []
+    unsupported: list[str] = []
+
+    # --- Headings (FR-501) ---
+    orig_headings = _extract_headings(original)
+    reim_headings = _extract_headings(reimported)
+    _compare_sets("heading", orig_headings, reim_headings, preserved, degraded, broken)
+
+    # --- Lists (FR-502) ---
+    orig_lists = _extract_list_items(original)
+    reim_lists = _extract_list_items(reimported)
+    _compare_sets("list_item", orig_lists, reim_lists, preserved, degraded, broken)
+
+    # --- Tables (FR-503) ---
+    orig_tables = _count_tables(original)
+    reim_tables = _count_tables(reimported)
+    if orig_tables == reim_tables:
+        if orig_tables > 0:
+            preserved.append(f"tables:{orig_tables}")
+    elif reim_tables < orig_tables:
+        broken.append(f"tables:missing {orig_tables - reim_tables} of {orig_tables}")
+    else:
+        degraded.append(f"tables:count changed {orig_tables}→{reim_tables}")
+
+    # --- Footnotes (FR-504) ---
+    orig_fn = set(FOOTNOTE_RE.findall(original))
+    reim_fn = set(FOOTNOTE_RE.findall(reimported))
+    for fn in orig_fn:
+        if fn in reim_fn:
+            preserved.append(f"footnote:{fn}")
+        else:
+            broken.append(f"footnote:{fn}")
+
+    # --- Links (FR-506) ---
+    orig_links = {m.group(0) for m in LINK_RE.finditer(original)}
+    reim_links = {m.group(0) for m in LINK_RE.finditer(reimported)}
+    for link in orig_links:
+        if link in reim_links:
+            preserved.append(f"link:{link[:40]}")
+        else:
+            degraded.append(f"link:lost {link[:40]}")
+
+    has_drift = bool(degraded or broken)
+    return DriftReport(
+        has_drift=has_drift,
+        preserved=preserved,
+        degraded=degraded,
+        broken=broken,
+        unsupported=unsupported,
+    )
+
+
+def _extract_headings(text: str) -> list[str]:
+    return [f"{'#' * len(m.group(1))} {m.group(2).strip()}" for m in HEADING_RE.finditer(text)]
+
+
+def _extract_list_items(text: str) -> list[str]:
+    return [m.group(2).strip() for m in LIST_ITEM_RE.finditer(text)]
+
+
+def _count_tables(text: str) -> int:
+    rows = TABLE_ROW_RE.findall(text)
+    if not rows:
+        return 0
+    # Count separator rows as table boundaries
+    sep_re = re.compile(r"^\|[-| :]+\|$")
+    count = sum(1 for r in rows if sep_re.match(r))
+    return count
+
+
+def _compare_sets(
+    kind: str,
+    orig: list[str],
+    reim: list[str],
+    preserved: list[str],
+    degraded: list[str],
+    broken: list[str],
+) -> None:
+    orig_counts: dict[str, int] = {}
+    for item in orig:
+        orig_counts[item] = orig_counts.get(item, 0) + 1
+
+    reim_counts: dict[str, int] = {}
+    for item in reim:
+        reim_counts[item] = reim_counts.get(item, 0) + 1
+
+    for item, count in orig_counts.items():
+        reim_count = reim_counts.get(item, 0)
+        if reim_count >= count:
+            preserved.append(f"{kind}:{item[:60]}")
+        elif reim_count > 0:
+            degraded.append(f"{kind}:partial '{item[:60]}' ({reim_count}/{count})")
+        else:
+            broken.append(f"{kind}:missing '{item[:60]}'")