generated from coulomb/repo-seed
feat: WP-0001 + WP-0002 complete — LEVEL1 core + service interfaces
WP-0001 (Foundation & LEVEL1 Core):
- manifest model (FR-100), MD→DOCX builder (FR-200), DOCX→MD importer
(FR-300/400), template family registry (FR-600), drift detector (FR-700),
CLI wiring, pre-commit config, CI skeleton, regression harness
WP-0002 (Service Interfaces & Workflow Orchestration):
- REST service via FastAPI (FR-900): /health, /version, /capabilities,
/templates, /styles, /validate, /build, /import, /compare,
/templates/register, /workflows/{name}, /evidence/{run_id}
- Evidence & report store (FR-1400): JSON-backed, per-run, retrievable
through all interfaces, classification (pass/warnings/failed)
- Composite workflow orchestration (FR-1300): single-file-roundtrip,
multi-file-roundtrip, release-regression, family-switch-build
- MCP server via FastMCP (FR-1000): all tools + resources
- CLI additions: `markidocx serve`, `markidocx workflow`, `markidocx mcp`
- Interface parity tests: CLI / REST / MCP produce equivalent results
135 tests passing, ruff + mypy clean.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
130
src/markidocx/differ.py
Normal file
130
src/markidocx/differ.py
Normal file
@@ -0,0 +1,130 @@
|
||||
"""Structural drift detection for markidocx (FR-700)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
|
||||
LIST_ITEM_RE = re.compile(r"^(\s*[-*+]|\s*\d+\.)\s+(.+)$", re.MULTILINE)
|
||||
TABLE_ROW_RE = re.compile(r"^\|.+\|$", re.MULTILINE)
|
||||
FOOTNOTE_RE = re.compile(r"\[\^[^\]]+\]")
|
||||
LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
|
||||
|
||||
|
||||
@dataclass
|
||||
class DriftReport:
|
||||
has_drift: bool
|
||||
preserved: list[str] = field(default_factory=list)
|
||||
degraded: list[str] = field(default_factory=list)
|
||||
broken: list[str] = field(default_factory=list)
|
||||
unsupported: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def compare(original: str, reimported: str) -> DriftReport:
|
||||
"""Compare *original* Markdown against *reimported* Markdown.
|
||||
|
||||
Classifies each structural element as:
|
||||
- preserved: identical in both
|
||||
- degraded: present but modified
|
||||
- broken: present in original, missing in reimported
|
||||
- unsupported: construct not supported by the round-trip
|
||||
|
||||
Returns a DriftReport.
|
||||
"""
|
||||
preserved: list[str] = []
|
||||
degraded: list[str] = []
|
||||
broken: list[str] = []
|
||||
unsupported: list[str] = []
|
||||
|
||||
# --- Headings (FR-501) ---
|
||||
orig_headings = _extract_headings(original)
|
||||
reim_headings = _extract_headings(reimported)
|
||||
_compare_sets("heading", orig_headings, reim_headings, preserved, degraded, broken)
|
||||
|
||||
# --- Lists (FR-502) ---
|
||||
orig_lists = _extract_list_items(original)
|
||||
reim_lists = _extract_list_items(reimported)
|
||||
_compare_sets("list_item", orig_lists, reim_lists, preserved, degraded, broken)
|
||||
|
||||
# --- Tables (FR-503) ---
|
||||
orig_tables = _count_tables(original)
|
||||
reim_tables = _count_tables(reimported)
|
||||
if orig_tables == reim_tables:
|
||||
if orig_tables > 0:
|
||||
preserved.append(f"tables:{orig_tables}")
|
||||
elif reim_tables < orig_tables:
|
||||
broken.append(f"tables:missing {orig_tables - reim_tables} of {orig_tables}")
|
||||
else:
|
||||
degraded.append(f"tables:count changed {orig_tables}→{reim_tables}")
|
||||
|
||||
# --- Footnotes (FR-504) ---
|
||||
orig_fn = set(FOOTNOTE_RE.findall(original))
|
||||
reim_fn = set(FOOTNOTE_RE.findall(reimported))
|
||||
for fn in orig_fn:
|
||||
if fn in reim_fn:
|
||||
preserved.append(f"footnote:{fn}")
|
||||
else:
|
||||
broken.append(f"footnote:{fn}")
|
||||
|
||||
# --- Links (FR-506) ---
|
||||
orig_links = {m.group(0) for m in LINK_RE.finditer(original)}
|
||||
reim_links = {m.group(0) for m in LINK_RE.finditer(reimported)}
|
||||
for link in orig_links:
|
||||
if link in reim_links:
|
||||
preserved.append(f"link:{link[:40]}")
|
||||
else:
|
||||
degraded.append(f"link:lost {link[:40]}")
|
||||
|
||||
has_drift = bool(degraded or broken)
|
||||
return DriftReport(
|
||||
has_drift=has_drift,
|
||||
preserved=preserved,
|
||||
degraded=degraded,
|
||||
broken=broken,
|
||||
unsupported=unsupported,
|
||||
)
|
||||
|
||||
|
||||
def _extract_headings(text: str) -> list[str]:
|
||||
return [f"{'#' * len(m.group(1))} {m.group(2).strip()}" for m in HEADING_RE.finditer(text)]
|
||||
|
||||
|
||||
def _extract_list_items(text: str) -> list[str]:
|
||||
return [m.group(2).strip() for m in LIST_ITEM_RE.finditer(text)]
|
||||
|
||||
|
||||
def _count_tables(text: str) -> int:
|
||||
rows = TABLE_ROW_RE.findall(text)
|
||||
if not rows:
|
||||
return 0
|
||||
# Count separator rows as table boundaries
|
||||
sep_re = re.compile(r"^\|[-| :]+\|$")
|
||||
count = sum(1 for r in rows if sep_re.match(r))
|
||||
return count
|
||||
|
||||
|
||||
def _compare_sets(
|
||||
kind: str,
|
||||
orig: list[str],
|
||||
reim: list[str],
|
||||
preserved: list[str],
|
||||
degraded: list[str],
|
||||
broken: list[str],
|
||||
) -> None:
|
||||
orig_counts: dict[str, int] = {}
|
||||
for item in orig:
|
||||
orig_counts[item] = orig_counts.get(item, 0) + 1
|
||||
|
||||
reim_counts: dict[str, int] = {}
|
||||
for item in reim:
|
||||
reim_counts[item] = reim_counts.get(item, 0) + 1
|
||||
|
||||
for item, count in orig_counts.items():
|
||||
reim_count = reim_counts.get(item, 0)
|
||||
if reim_count >= count:
|
||||
preserved.append(f"{kind}:{item[:60]}")
|
||||
elif reim_count > 0:
|
||||
degraded.append(f"{kind}:partial '{item[:60]}' ({reim_count}/{count})")
|
||||
else:
|
||||
broken.append(f"{kind}:missing '{item[:60]}'")
|
||||
Reference in New Issue
Block a user