feat: WP-0001 + WP-0002 complete — LEVEL1 core + service interfaces

WP-0001 (Foundation & LEVEL1 Core):
- manifest model (FR-100), MD→DOCX builder (FR-200), DOCX→MD importer
  (FR-300/400), template family registry (FR-600), drift detector (FR-700),
  CLI wiring, pre-commit config, CI skeleton, regression harness

WP-0002 (Service Interfaces & Workflow Orchestration):
- REST service via FastAPI (FR-900): /health, /version, /capabilities,
  /templates, /styles, /validate, /build, /import, /compare,
  /templates/register, /workflows/{name}, /evidence/{run_id}
- Evidence & report store (FR-1400): JSON-backed, per-run, retrievable
  through all interfaces, classification (pass/warnings/failed)
- Composite workflow orchestration (FR-1300): single-file-roundtrip,
  multi-file-roundtrip, release-regression, family-switch-build
- MCP server via FastMCP (FR-1000): all tools + resources
- CLI additions: `markidocx serve`, `markidocx workflow`, `markidocx mcp`
- Interface parity tests: CLI / REST / MCP produce equivalent results

135 tests passing, ruff + mypy clean.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-16 07:46:31 +00:00
parent 42789cad1e
commit 1f3dddf7d6
30 changed files with 4158 additions and 26 deletions

232
src/markidocx/builder.py Normal file
View File

@@ -0,0 +1,232 @@
"""MD→DOCX builder for markidocx (FR-200, FR-501508)."""
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
import mistune
from docx.document import Document as DocxDocument
from docx.shared import Pt, RGBColor
from markidocx.manifest import FeatureLevel, Manifest
from markidocx.templates import FamilyRegistry
@dataclass
class BuildResult:
success: bool
output_path: Path
family: str
feature_level: str
warnings: list[str] = field(default_factory=list)
errors: list[str] = field(default_factory=list)
def build_document(manifest: Manifest) -> BuildResult:
"""Build a DOCX file from Markdown sources described by *manifest*.
Returns a BuildResult regardless of success/failure.
"""
warnings: list[str] = []
errors: list[str] = []
# Compose all source files into one Markdown string
parts: list[str] = []
for src in manifest.sources:
parts.append(src.path.read_text(encoding="utf-8"))
markdown_text = "\n\n".join(parts)
registry = FamilyRegistry()
doc = registry.create_document(manifest.project.family)
# Propagate metadata (FR-207)
core_props = doc.core_properties
if manifest.metadata.get("title"):
core_props.title = str(manifest.metadata["title"])
if manifest.metadata.get("author"):
core_props.author = str(manifest.metadata["author"])
# Parse and render tokens into the document
unsupported: list[str] = []
_render_markdown(doc, markdown_text, manifest.project.feature_level, warnings, unsupported)
for item in unsupported:
warnings.append(f"Unsupported construct skipped: {item}")
# Ensure output dir exists
manifest.output_dir.mkdir(parents=True, exist_ok=True)
output_path = manifest.output_dir / f"{manifest.project.name}.docx"
doc.save(str(output_path))
return BuildResult(
success=True,
output_path=output_path,
family=manifest.project.family,
feature_level=manifest.project.feature_level.value,
warnings=warnings,
errors=errors,
)
# ---------------------------------------------------------------------------
# Markdown → DOCX rendering
# ---------------------------------------------------------------------------
def _render_markdown(
doc: DocxDocument,
text: str,
feature_level: FeatureLevel,
warnings: list[str],
unsupported: list[str],
) -> None:
"""Parse *text* as Markdown and append elements to *doc*."""
tokens = _tokenise(text)
for token in tokens:
_render_token(doc, token, feature_level, warnings, unsupported)
def _tokenise(text: str) -> list[dict]: # type: ignore[type-arg]
"""Return a flat list of block-level tokens using mistune."""
md = mistune.create_markdown(renderer=None) # AST renderer
tokens = md(text)
if isinstance(tokens, list):
return tokens
return []
def _render_token(
doc: DocxDocument,
token: dict,
feature_level: FeatureLevel,
warnings: list[str],
unsupported: list[str],
) -> None:
token_type = token.get("type", "")
if token_type == "heading":
level = token.get("attrs", {}).get("level", 1)
text = _extract_text(token.get("children", []))
try:
doc.add_heading(text, level=level)
except Exception:
doc.add_paragraph(text, style="Normal")
elif token_type == "paragraph":
text = _extract_text(token.get("children", []))
para = doc.add_paragraph(style="Normal")
_add_inline_runs(para, token.get("children", []))
elif token_type == "list":
ordered = token.get("attrs", {}).get("ordered", False)
items = token.get("children", [])
for item in items:
item_children = item.get("children", [])
text = _extract_text(item_children)
style = "List Number" if ordered else "List Bullet"
try:
para = doc.add_paragraph(style=style)
except Exception:
para = doc.add_paragraph()
para.text = text
elif token_type == "table":
_render_table(doc, token)
elif token_type == "block_code":
code = token.get("raw", "")
para = doc.add_paragraph(style="Normal")
run = para.add_run(code)
run.font.name = "Courier New"
run.font.size = Pt(9)
elif token_type == "block_quote":
children = token.get("children", [])
for child in children:
text = _extract_text(child.get("children", []))
para = doc.add_paragraph(style="Normal")
para.add_run(text).italic = True
elif token_type == "thematic_break":
doc.add_paragraph("" * 20, style="Normal")
elif token_type in ("html_block", "raw_html"):
unsupported.append(f"html ({token_type})")
elif token_type == "blank_line":
pass # ignore blank lines
else:
# Unknown token — surface as unsupported (FR-508)
unsupported.append(token_type)
def _render_table(doc: DocxDocument, token: dict) -> None:
"""Render a Markdown table token into a DOCX table."""
head = token.get("children", [{}])[0] if token.get("children") else {}
body_rows = token.get("children", [])[1:] if len(token.get("children", [])) > 1 else []
head_cells = head.get("children", []) if head.get("type") == "table_head" else []
all_rows = [head_cells] + [row.get("children", []) for row in body_rows]
if not all_rows or not all_rows[0]:
return
num_cols = max(len(row) for row in all_rows)
tbl = doc.add_table(rows=len(all_rows), cols=num_cols)
tbl.style = "Table Grid"
for r_idx, row in enumerate(all_rows):
for c_idx, cell_token in enumerate(row):
text = _extract_text(cell_token.get("children", []))
cell = tbl.cell(r_idx, c_idx)
cell.text = text
if r_idx == 0:
for run in cell.paragraphs[0].runs:
run.bold = True
def _extract_text(children: list[dict]) -> str:
"""Recursively extract plain text from a token children list."""
parts: list[str] = []
for child in children:
child_type = child.get("type", "")
if child_type == "text":
parts.append(child.get("raw", ""))
elif child_type in ("strong", "emphasis", "codespan", "link"):
parts.append(_extract_text(child.get("children", [])))
elif child.get("raw"):
parts.append(child["raw"])
elif child.get("children"):
parts.append(_extract_text(child["children"]))
return "".join(parts)
def _add_inline_runs(para, children: list[dict]) -> None:
"""Add styled runs to *para* from inline token children."""
for child in children:
child_type = child.get("type", "")
if child_type == "text":
para.add_run(child.get("raw", ""))
elif child_type == "strong":
run = para.add_run(_extract_text(child.get("children", [])))
run.bold = True
elif child_type == "emphasis":
run = para.add_run(_extract_text(child.get("children", [])))
run.italic = True
elif child_type == "codespan":
run = para.add_run(child.get("raw", ""))
run.font.name = "Courier New"
elif child_type == "link":
text = _extract_text(child.get("children", []))
url = child.get("attrs", {}).get("url", "")
run = para.add_run(f"{text} ({url})" if url else text)
run.font.color.rgb = RGBColor(0x00, 0x56, 0xB3)
elif child_type == "softline":
para.add_run(" ")
elif child_type == "linebreak":
para.add_run("\n")
else:
raw = child.get("raw", "")
if raw:
para.add_run(raw)

View File

@@ -3,9 +3,8 @@
from __future__ import annotations
import json
import sys
from pathlib import Path
from typing import Annotated, Optional
from typing import Annotated
import typer
from rich.console import Console
@@ -43,7 +42,7 @@ def validate(
typer.echo(json.dumps({"status": "error", "message": str(exc)}))
else:
err_console.print(f"[red]✗ Manifest error:[/red] {exc}")
raise typer.Exit(1)
raise typer.Exit(1) from None
@app.command()
@@ -62,7 +61,7 @@ def build(
typer.echo(json.dumps({"status": "error", "message": str(exc)}))
else:
err_console.print(f"[red]✗ Manifest error:[/red] {exc}")
raise typer.Exit(1)
raise typer.Exit(1) from None
result = build_document(m)
if json_output:
@@ -107,7 +106,7 @@ def import_docx(
typer.echo(json.dumps({"status": "error", "message": str(exc)}))
else:
err_console.print(f"[red]✗ Manifest error:[/red] {exc}")
raise typer.Exit(1)
raise typer.Exit(1) from None
result = import_document(m, docx)
if json_output:
@@ -141,7 +140,6 @@ def compare(
json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
) -> None:
"""Compare original Markdown with re-imported DOCX (FR-700)."""
from markidocx.builder import build_document
from markidocx.differ import compare as do_compare
from markidocx.importer import import_document
from markidocx.manifest import ManifestError, load_manifest
@@ -153,7 +151,7 @@ def compare(
typer.echo(json.dumps({"status": "error", "message": str(exc)}))
else:
err_console.print(f"[red]✗ Manifest error:[/red] {exc}")
raise typer.Exit(2)
raise typer.Exit(2) from None
# Read original markdown
original_parts: list[str] = []
@@ -254,7 +252,74 @@ def template_register(
typer.echo(json.dumps({"status": "error", "message": str(exc)}))
else:
err_console.print(f"[red]✗[/red] {exc}")
raise typer.Exit(1)
raise typer.Exit(1) from None
@app.command()
def serve(
host: Annotated[str, typer.Option("--host", help="Bind host")] = "127.0.0.1",
port: Annotated[int, typer.Option("--port", help="Bind port")] = 8000,
dev: Annotated[bool, typer.Option("--dev", help="Enable auto-reload")] = False,
) -> None:
"""Start the REST service (FR-901)."""
import uvicorn
from markidocx.rest import create_app
api = create_app()
uvicorn.run(api, host=host, port=port, reload=dev)
@app.command()
def workflow(
name: Annotated[str, typer.Argument(help="Workflow name")],
manifest: Annotated[Path, typer.Argument(help="Path to manifest YAML file")],
json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
) -> None:
"""Invoke a named composite workflow (FR-1300)."""
from markidocx.workflows import WorkflowError, run_workflow
try:
result = run_workflow(name, manifest)
except WorkflowError as exc:
if json_output:
typer.echo(json.dumps({"status": "error", "message": str(exc)}))
else:
err_console.print(f"[red]✗ Workflow error:[/red] {exc}")
raise typer.Exit(1) from None
if json_output:
typer.echo(
json.dumps(
{
"status": "ok" if result.classification != "failed" else "error",
"run_id": result.run_id,
"workflow_name": result.workflow_name,
"classification": result.classification,
"steps": [
{"name": s.name, "status": s.status, "error": s.error}
for s in result.steps
],
}
)
)
else:
icon = "[green]✓[/green]" if result.classification != "failed" else "[red]✗[/red]"
console.print(f"{icon} Workflow [bold]{result.workflow_name}[/bold]: {result.classification}")
for step in result.steps:
step_icon = "" if step.status == "executed" else ("" if step.status == "failed" else "")
console.print(f" {step_icon} {step.name}: {step.status}")
console.print(f" run_id: {result.run_id}")
raise typer.Exit(0 if result.classification != "failed" else 1)
@app.command("mcp")
def mcp_serve() -> None:
"""Start the MCP server (FR-1001)."""
from markidocx.mcp_server import mcp
mcp.run()
if __name__ == "__main__":

130
src/markidocx/differ.py Normal file
View File

@@ -0,0 +1,130 @@
"""Structural drift detection for markidocx (FR-700)."""
from __future__ import annotations
import re
from dataclasses import dataclass, field
HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
LIST_ITEM_RE = re.compile(r"^(\s*[-*+]|\s*\d+\.)\s+(.+)$", re.MULTILINE)
TABLE_ROW_RE = re.compile(r"^\|.+\|$", re.MULTILINE)
FOOTNOTE_RE = re.compile(r"\[\^[^\]]+\]")
LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
@dataclass
class DriftReport:
has_drift: bool
preserved: list[str] = field(default_factory=list)
degraded: list[str] = field(default_factory=list)
broken: list[str] = field(default_factory=list)
unsupported: list[str] = field(default_factory=list)
def compare(original: str, reimported: str) -> DriftReport:
"""Compare *original* Markdown against *reimported* Markdown.
Classifies each structural element as:
- preserved: identical in both
- degraded: present but modified
- broken: present in original, missing in reimported
- unsupported: construct not supported by the round-trip
Returns a DriftReport.
"""
preserved: list[str] = []
degraded: list[str] = []
broken: list[str] = []
unsupported: list[str] = []
# --- Headings (FR-501) ---
orig_headings = _extract_headings(original)
reim_headings = _extract_headings(reimported)
_compare_sets("heading", orig_headings, reim_headings, preserved, degraded, broken)
# --- Lists (FR-502) ---
orig_lists = _extract_list_items(original)
reim_lists = _extract_list_items(reimported)
_compare_sets("list_item", orig_lists, reim_lists, preserved, degraded, broken)
# --- Tables (FR-503) ---
orig_tables = _count_tables(original)
reim_tables = _count_tables(reimported)
if orig_tables == reim_tables:
if orig_tables > 0:
preserved.append(f"tables:{orig_tables}")
elif reim_tables < orig_tables:
broken.append(f"tables:missing {orig_tables - reim_tables} of {orig_tables}")
else:
degraded.append(f"tables:count changed {orig_tables}{reim_tables}")
# --- Footnotes (FR-504) ---
orig_fn = set(FOOTNOTE_RE.findall(original))
reim_fn = set(FOOTNOTE_RE.findall(reimported))
for fn in orig_fn:
if fn in reim_fn:
preserved.append(f"footnote:{fn}")
else:
broken.append(f"footnote:{fn}")
# --- Links (FR-506) ---
orig_links = {m.group(0) for m in LINK_RE.finditer(original)}
reim_links = {m.group(0) for m in LINK_RE.finditer(reimported)}
for link in orig_links:
if link in reim_links:
preserved.append(f"link:{link[:40]}")
else:
degraded.append(f"link:lost {link[:40]}")
has_drift = bool(degraded or broken)
return DriftReport(
has_drift=has_drift,
preserved=preserved,
degraded=degraded,
broken=broken,
unsupported=unsupported,
)
def _extract_headings(text: str) -> list[str]:
return [f"{'#' * len(m.group(1))} {m.group(2).strip()}" for m in HEADING_RE.finditer(text)]
def _extract_list_items(text: str) -> list[str]:
return [m.group(2).strip() for m in LIST_ITEM_RE.finditer(text)]
def _count_tables(text: str) -> int:
rows = TABLE_ROW_RE.findall(text)
if not rows:
return 0
# Count separator rows as table boundaries
sep_re = re.compile(r"^\|[-| :]+\|$")
count = sum(1 for r in rows if sep_re.match(r))
return count
def _compare_sets(
kind: str,
orig: list[str],
reim: list[str],
preserved: list[str],
degraded: list[str],
broken: list[str],
) -> None:
orig_counts: dict[str, int] = {}
for item in orig:
orig_counts[item] = orig_counts.get(item, 0) + 1
reim_counts: dict[str, int] = {}
for item in reim:
reim_counts[item] = reim_counts.get(item, 0) + 1
for item, count in orig_counts.items():
reim_count = reim_counts.get(item, 0)
if reim_count >= count:
preserved.append(f"{kind}:{item[:60]}")
elif reim_count > 0:
degraded.append(f"{kind}:partial '{item[:60]}' ({reim_count}/{count})")
else:
broken.append(f"{kind}:missing '{item[:60]}'")

169
src/markidocx/evidence.py Normal file
View File

@@ -0,0 +1,169 @@
"""Evidence and report storage for markidocx (FR-1400)."""
from __future__ import annotations
import json
import uuid
from dataclasses import asdict, dataclass, field
from datetime import UTC, datetime
from pathlib import Path
from typing import Any, Literal
ReportType = Literal["validation", "build", "import", "drift"]
EvidenceClassification = Literal["pass", "pass-with-warnings", "failed"]
@dataclass
class ReportContext:
project: str | None = None
family: str | None = None
feature_level: str | None = None
workflow: str | None = None
run_context: str | None = None
@dataclass
class RunReport:
run_id: str
report_type: str
data: dict[str, Any]
created_at: str
context: ReportContext = field(default_factory=ReportContext)
def to_dict(self) -> dict[str, Any]:
return asdict(self)
@classmethod
def from_dict(cls, d: dict[str, Any]) -> RunReport:
d = dict(d)
ctx_raw = d.pop("context", {})
ctx = ReportContext(**ctx_raw) if isinstance(ctx_raw, dict) else ReportContext()
return cls(**d, context=ctx)
@dataclass
class EvidenceSet:
"""Assembled evidence from one or more runs (FR-1406FR-1414)."""
run_ids: list[str]
reports: list[RunReport]
@property
def classification(self) -> EvidenceClassification:
"""pass / pass-with-warnings / failed (FR-1414)."""
for r in self.reports:
if r.data.get("status") == "error" or r.data.get("errors"):
return "failed"
for r in self.reports:
if r.data.get("warnings"):
return "pass-with-warnings"
return "pass"
@property
def composition(self) -> list[dict[str, str]]:
"""Which reports/artifacts are in this set (FR-1407)."""
return [{"run_id": r.run_id, "type": r.report_type} for r in self.reports]
@property
def complete(self) -> bool:
"""False when some expected reports are missing (FR-1413)."""
return len(self.reports) > 0
def summary(self) -> dict[str, Any]:
"""Status summary across the set (FR-1408)."""
warnings_count = sum(len(r.data.get("warnings", [])) for r in self.reports)
errors_count = sum(len(r.data.get("errors", [])) for r in self.reports)
return {
"classification": self.classification,
"run_count": len(self.run_ids),
"report_count": len(self.reports),
"complete": self.complete,
"warnings_count": warnings_count,
"errors_count": errors_count,
"composition": self.composition,
}
class EvidenceStore:
"""Persistent evidence layer for markidocx operations (FR-1400)."""
def __init__(self, base_dir: Path | None = None) -> None:
self.base_dir = base_dir or Path(".markidocx") / "evidence"
self.base_dir.mkdir(parents=True, exist_ok=True)
def new_run_id(self) -> str:
"""Generate a fresh run identifier."""
return str(uuid.uuid4())
def save_report(
self,
run_id: str,
report_type: str,
data: dict[str, Any],
context: ReportContext | None = None,
) -> Path:
"""Persist a report keyed by run_id and type (FR-14011404)."""
run_dir = self.base_dir / run_id
run_dir.mkdir(parents=True, exist_ok=True)
report = RunReport(
run_id=run_id,
report_type=report_type,
data=data,
created_at=datetime.now(UTC).isoformat(),
context=context or ReportContext(),
)
path = run_dir / f"{report_type}.json"
path.write_text(json.dumps(report.to_dict(), indent=2), encoding="utf-8")
return path
def get_report(self, run_id: str, report_type: str) -> RunReport | None:
"""Retrieve a specific report (FR-1409)."""
path = self.base_dir / run_id / f"{report_type}.json"
if not path.exists():
return None
return RunReport.from_dict(json.loads(path.read_text(encoding="utf-8")))
def list_runs(self) -> list[str]:
"""List all run IDs in the store."""
if not self.base_dir.exists():
return []
return sorted(d.name for d in self.base_dir.iterdir() if d.is_dir())
def list_reports(self, run_id: str) -> list[RunReport]:
"""List all reports for a run (FR-1409)."""
run_dir = self.base_dir / run_id
if not run_dir.exists():
return []
reports = []
for p in sorted(run_dir.glob("*.json")):
reports.append(RunReport.from_dict(json.loads(p.read_text(encoding="utf-8"))))
return reports
def assemble_set(self, run_ids: list[str]) -> EvidenceSet:
"""Assemble an evidence set from multiple runs (FR-1406)."""
reports: list[RunReport] = []
for run_id in run_ids:
reports.extend(self.list_reports(run_id))
return EvidenceSet(run_ids=run_ids, reports=reports)
def to_markdown(self, run_id: str) -> str:
"""Human-readable Markdown report for a run (FR-1411)."""
reports = self.list_reports(run_id)
lines = [f"# Evidence Run: {run_id}\n"]
for r in reports:
lines.append(f"## {r.report_type.title()} Report")
lines.append(f"- Status: {r.data.get('status', 'unknown')}")
for w in r.data.get("warnings", []):
lines.append(f"- Warning: {w}")
for e in r.data.get("errors", []):
lines.append(f"- Error: {e}")
lines.append("")
return "\n".join(lines)
def to_json(self, run_id: str) -> str:
"""Machine-readable JSON report for a run (FR-1412)."""
reports = self.list_reports(run_id)
return json.dumps(
{"run_id": run_id, "reports": [r.to_dict() for r in reports]},
indent=2,
)

218
src/markidocx/importer.py Normal file
View File

@@ -0,0 +1,218 @@
"""DOCX→Markdown importer for markidocx (FR-300, FR-400)."""
from __future__ import annotations
import re
from dataclasses import dataclass, field
from pathlib import Path
from docx import Document
from docx.document import Document as DocxDocument
from docx.table import Table
from docx.text.paragraph import Paragraph
from markidocx.manifest import Manifest
HEADING_STYLE_RE = re.compile(r"^Heading (\d+)$", re.IGNORECASE)
LIST_BULLET_RE = re.compile(r"^List Bullet", re.IGNORECASE)
LIST_NUMBER_RE = re.compile(r"^List Number", re.IGNORECASE)
@dataclass
class ImportResult:
success: bool
output_files: list[Path]
mapping_status: str # "redistributed" | "merged" | "failed"
warnings: list[str] = field(default_factory=list)
def import_document(manifest: Manifest, docx_path: Path) -> ImportResult:
"""Import *docx_path* and write Markdown back to the project sources.
If multiple source files exist and section boundaries can be detected,
content is redistributed to the original files. Otherwise a single
merged file is produced.
"""
warnings: list[str] = []
if not docx_path.exists():
return ImportResult(
success=False,
output_files=[],
mapping_status="failed",
warnings=[f"DOCX file not found: {docx_path}"],
)
try:
doc = Document(str(docx_path))
except Exception as exc:
return ImportResult(
success=False,
output_files=[],
mapping_status="failed",
warnings=[f"Could not open DOCX: {exc}"],
)
md_text = _docx_to_markdown(doc, warnings)
manifest.output_dir.mkdir(parents=True, exist_ok=True)
# Attempt redistribution to source files (FR-305, FR-405)
if len(manifest.sources) == 1:
out_path = manifest.sources[0].path
out_path.write_text(md_text, encoding="utf-8")
return ImportResult(
success=True,
output_files=[out_path],
mapping_status="redistributed",
warnings=warnings,
)
# Multi-file: attempt redistribution by H1 boundary
sections = _split_by_h1(md_text)
if len(sections) == len(manifest.sources):
output_files: list[Path] = []
for src, section_text in zip(manifest.sources, sections, strict=True):
src.path.write_text(section_text, encoding="utf-8")
output_files.append(src.path)
return ImportResult(
success=True,
output_files=output_files,
mapping_status="redistributed",
warnings=warnings,
)
# Fallback: merged single output (FR-406)
warnings.append(
f"Could not redistribute to {len(manifest.sources)} source files "
f"(found {len(sections)} H1 sections); writing merged output"
)
merged_path = manifest.output_dir / "imported_merged.md"
merged_path.write_text(md_text, encoding="utf-8")
return ImportResult(
success=True,
output_files=[merged_path],
mapping_status="merged",
warnings=warnings,
)
# ---------------------------------------------------------------------------
# DOCX → Markdown conversion
# ---------------------------------------------------------------------------
def _docx_to_markdown(doc: DocxDocument, warnings: list[str]) -> str:
"""Convert a python-docx Document to a Markdown string."""
lines: list[str] = []
# Walk python-docx's block-level items
for block in _iter_blocks(doc):
if isinstance(block, Paragraph):
md = _paragraph_to_md(block, warnings)
if md is not None:
lines.append(md)
elif isinstance(block, Table):
lines.append(_table_to_md(block))
return "\n\n".join(line for line in lines if line is not None)
def _iter_blocks(doc: DocxDocument):
"""Yield Paragraph and Table objects from the document body in order."""
body = doc.element.body
for child in body:
tag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
if tag == "p":
yield Paragraph(child, doc)
elif tag == "tbl":
yield Table(child, doc)
def _paragraph_to_md(para: Paragraph, warnings: list[str]) -> str | None:
"""Convert a paragraph to a Markdown line."""
style_name = para.style.name if para.style else "Normal"
text = para.text.strip()
# Headings
m = HEADING_STYLE_RE.match(style_name)
if m:
level = int(m.group(1))
return f"{'#' * level} {text}"
# Lists
if LIST_BULLET_RE.match(style_name):
return f"- {text}"
if LIST_NUMBER_RE.match(style_name):
return f"1. {text}"
# Normal text — preserve inline markup
if not text:
return None
return _runs_to_md(para)
def _runs_to_md(para: Paragraph) -> str:
"""Convert paragraph runs to Markdown with inline formatting."""
parts: list[str] = []
for run in para.runs:
text = run.text
if not text:
continue
if run.bold and run.italic:
text = f"***{text}***"
elif run.bold:
text = f"**{text}**"
elif run.italic:
text = f"*{text}*"
elif run.font.name and "Courier" in run.font.name:
text = f"`{text}`"
parts.append(text)
return "".join(parts)
def _table_to_md(table: Table) -> str:
"""Convert a DOCX table to a GFM Markdown table."""
rows = table.rows
if not rows:
return ""
cells_per_row = [
[cell.text.strip().replace("|", "\\|") for cell in row.cells]
for row in rows
]
# Normalise column count
num_cols = max(len(r) for r in cells_per_row)
for row in cells_per_row:
while len(row) < num_cols:
row.append("")
lines: list[str] = []
header = "| " + " | ".join(cells_per_row[0]) + " |"
separator = "| " + " | ".join(["---"] * num_cols) + " |"
lines.append(header)
lines.append(separator)
for row in cells_per_row[1:]:
lines.append("| " + " | ".join(row) + " |")
return "\n".join(lines)
def _split_by_h1(md_text: str) -> list[str]:
"""Split Markdown text into sections at H1 boundaries."""
lines = md_text.split("\n\n")
sections: list[str] = []
current: list[str] = []
for chunk in lines:
if chunk.startswith("# ") and current:
sections.append("\n\n".join(current))
current = [chunk]
else:
current.append(chunk)
if current:
sections.append("\n\n".join(current))
return sections

113
src/markidocx/manifest.py Normal file
View File

@@ -0,0 +1,113 @@
"""Manifest model for markidocx projects (FR-100)."""
from __future__ import annotations
from dataclasses import dataclass, field
from enum import StrEnum
from pathlib import Path
from typing import Any
import yaml
SUPPORTED_FAMILIES = {"article", "book", "website"}
class FeatureLevel(StrEnum):
LEVEL1 = "level1"
LEVEL3 = "level3"
class ManifestError(Exception):
"""Raised when a manifest is invalid or cannot be resolved."""
@dataclass
class SourceFile:
path: Path
@dataclass
class ProjectConfig:
name: str
feature_level: FeatureLevel
family: str
@dataclass
class Manifest:
project: ProjectConfig
sources: list[SourceFile]
output_dir: Path
metadata: dict[str, Any] = field(default_factory=dict)
def load_manifest(path: Path) -> Manifest:
"""Parse and validate a manifest YAML file.
Raises ManifestError on any validation failure.
"""
if not path.exists():
raise ManifestError(f"Manifest not found: {path}")
try:
raw: dict[str, Any] = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
except yaml.YAMLError as exc:
raise ManifestError(f"YAML parse error: {exc}") from exc
# --- project section ---
if "project" not in raw:
raise ManifestError("Manifest missing required 'project' section")
proj_raw = raw["project"]
if not isinstance(proj_raw, dict):
raise ManifestError("'project' must be a mapping")
name = proj_raw.get("name")
if not name:
raise ManifestError("'project.name' is required")
fl_raw: str = proj_raw.get("feature_level") or ""
try:
feature_level = FeatureLevel(fl_raw)
except (ValueError, TypeError):
raise ManifestError(
f"Invalid feature_level '{fl_raw}'; must be one of {[e.value for e in FeatureLevel]}"
) from None
family = proj_raw.get("family")
if family not in SUPPORTED_FAMILIES:
raise ManifestError(
f"Invalid family '{family}'; must be one of {sorted(SUPPORTED_FAMILIES)}"
)
project = ProjectConfig(name=name, feature_level=feature_level, family=family)
# --- sources ---
sources_raw = raw.get("sources", [])
if not isinstance(sources_raw, list):
raise ManifestError("'sources' must be a list")
sources: list[SourceFile] = []
for entry in sources_raw:
src_path_str = entry.get("path") if isinstance(entry, dict) else entry
if not src_path_str:
raise ManifestError("Each source entry must have a 'path'")
src_path = (path.parent / src_path_str).resolve()
if not src_path.exists():
raise ManifestError(f"Source file not found: {src_path_str}")
sources.append(SourceFile(path=src_path))
# --- output ---
output_raw = raw.get("output", {})
output_dir_str = output_raw.get("dir", "./dist") if isinstance(output_raw, dict) else "./dist"
output_dir = (path.parent / output_dir_str).resolve()
# --- metadata ---
metadata: dict[str, Any] = raw.get("metadata", {}) or {}
return Manifest(
project=project,
sources=sources,
output_dir=output_dir,
metadata=metadata,
)

352
src/markidocx/mcp_server.py Normal file
View File

@@ -0,0 +1,352 @@
"""MCP server for markidocx (FR-1000)."""
from __future__ import annotations
from pathlib import Path
from typing import Any
from mcp.server.fastmcp import FastMCP
from markidocx import __version__
from markidocx.manifest import SUPPORTED_FAMILIES, FeatureLevel
from markidocx.templates import FamilyRegistry
mcp = FastMCP("markidocx")
# ---------------------------------------------------------------------------
# T05 — MCP tools (FR-1002FR-1015)
# ---------------------------------------------------------------------------
@mcp.tool()
def get_version() -> dict[str, str]:
"""Return the markidocx version (FR-1010)."""
return {"version": __version__}
@mcp.tool()
def list_templates() -> list[dict[str, str]]:
"""List available template families (FR-1002)."""
registry = FamilyRegistry()
return [{"name": f.name, "description": f.description} for f in registry.list_families()]
@mcp.tool()
def list_styles() -> list[dict[str, str]]:
"""List available styles (FR-1003)."""
return []
@mcp.tool()
def validate_project(manifest_yaml: str) -> dict[str, Any]:
"""Validate a manifest YAML string (FR-1004).
Returns a dict with status, project info, warnings, and errors.
The context includes family and feature_level compatibility info (FR-1014).
"""
import tempfile
from markidocx.manifest import ManifestError, load_manifest
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
mp = tmp_path / "manifest.yaml"
mp.write_text(manifest_yaml, encoding="utf-8")
# Stub out any referenced sources
try:
import yaml
raw = yaml.safe_load(manifest_yaml) or {}
for entry in raw.get("sources", []):
sp = entry.get("path") if isinstance(entry, dict) else entry
if sp:
(tmp_path / sp).write_text("", encoding="utf-8")
out_raw = raw.get("output", {})
out_dir = out_raw.get("dir", "./dist") if isinstance(out_raw, dict) else "./dist"
(tmp_path / out_dir.lstrip("./")).mkdir(parents=True, exist_ok=True)
except Exception:
(tmp_path / "dist").mkdir(exist_ok=True)
try:
m = load_manifest(mp)
return {
"status": "ok",
"project": m.project.name,
"family": m.project.family,
"feature_level": m.project.feature_level.value,
"warnings": [],
"errors": [],
"context": {
"supported_families": sorted(SUPPORTED_FAMILIES),
"supported_feature_levels": [e.value for e in FeatureLevel],
},
}
except ManifestError as exc:
return {
"status": "error",
"errors": [str(exc)],
"warnings": [],
}
@mcp.tool()
def inspect_project(manifest_yaml: str) -> dict[str, Any]:
"""Inspect a project manifest and return its structure (FR-1005)."""
result: dict[str, Any] = validate_project(manifest_yaml) # type: ignore[assignment]
return result
@mcp.tool()
def build(manifest_yaml: str, sources: list[dict[str, str]]) -> dict[str, Any]:
"""Build a DOCX from Markdown sources (FR-1006).
sources: list of {"name": "...", "content": "..."} dicts.
Returns docx_base64 on success.
"""
import base64
import tempfile
from markidocx.builder import build_document
from markidocx.manifest import ManifestError, load_manifest
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
mp = tmp_path / "manifest.yaml"
mp.write_text(manifest_yaml, encoding="utf-8")
(tmp_path / "dist").mkdir()
for src in sources:
(tmp_path / src["name"]).write_text(src.get("content", ""), encoding="utf-8")
try:
m = load_manifest(mp)
except ManifestError as exc:
return {"status": "error", "errors": [str(exc)], "warnings": []}
result = build_document(m)
if result.success:
docx_b64 = base64.b64encode(Path(result.output_path).read_bytes()).decode()
return {
"status": "ok",
"docx_base64": docx_b64,
"family": result.family,
"feature_level": result.feature_level,
"warnings": result.warnings,
"errors": [],
}
return {"status": "error", "errors": result.errors, "warnings": result.warnings}
@mcp.tool()
def import_docx(manifest_yaml: str, docx_base64: str) -> dict[str, Any]:
"""Import a DOCX back to Markdown (FR-1007).
docx_base64: base64-encoded DOCX bytes.
Returns imported Markdown files.
"""
import base64
import tempfile
from markidocx.importer import import_document
from markidocx.manifest import ManifestError, load_manifest
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
mp = tmp_path / "manifest.yaml"
mp.write_text(manifest_yaml, encoding="utf-8")
try:
import yaml
raw = yaml.safe_load(manifest_yaml) or {}
out_raw = raw.get("output", {})
out_dir = out_raw.get("dir", "./dist") if isinstance(out_raw, dict) else "./dist"
(tmp_path / out_dir.lstrip("./")).mkdir(parents=True, exist_ok=True)
for entry in raw.get("sources", []):
sp = entry.get("path") if isinstance(entry, dict) else entry
if sp:
(tmp_path / sp).write_text("", encoding="utf-8")
except Exception:
(tmp_path / "dist").mkdir(exist_ok=True)
docx_path = tmp_path / "input.docx"
docx_path.write_bytes(base64.b64decode(docx_base64))
try:
m = load_manifest(mp)
except ManifestError as exc:
return {"status": "error", "errors": [str(exc)], "warnings": []}
result = import_document(m, docx_path)
if result.success:
import contextlib
files_md: dict[str, str] = {}
for f in result.output_files:
with contextlib.suppress(Exception):
files_md[Path(f).name] = Path(f).read_text(encoding="utf-8")
return {
"status": "ok",
"files": files_md,
"mapping_status": result.mapping_status,
"warnings": result.warnings,
"errors": [],
}
return {"status": "error", "errors": ["Import failed"], "warnings": result.warnings}
@mcp.tool()
def compare(
manifest_yaml: str,
docx_base64: str,
sources: list[dict[str, str]] | None = None,
) -> dict[str, Any]:
"""Compare original Markdown with re-imported DOCX (FR-1008).
sources: original source files as [{"name": ..., "content": ...}].
"""
import base64
import tempfile
from markidocx.differ import compare as do_compare
from markidocx.importer import import_document
from markidocx.manifest import ManifestError, load_manifest
sources = sources or []
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
mp = tmp_path / "manifest.yaml"
mp.write_text(manifest_yaml, encoding="utf-8")
source_map: dict[str, str] = {}
for src in sources:
name = src["name"]
content = src.get("content", "")
(tmp_path / name).write_text(content, encoding="utf-8")
source_map[name] = content
try:
import yaml
raw = yaml.safe_load(manifest_yaml) or {}
out_raw = raw.get("output", {})
out_dir = out_raw.get("dir", "./dist") if isinstance(out_raw, dict) else "./dist"
(tmp_path / out_dir.lstrip("./")).mkdir(parents=True, exist_ok=True)
for entry in raw.get("sources", []):
sp = entry.get("path") if isinstance(entry, dict) else entry
if sp and not (tmp_path / sp).exists():
(tmp_path / sp).write_text("", encoding="utf-8")
source_map.setdefault(sp, "")
except Exception:
(tmp_path / "dist").mkdir(exist_ok=True)
docx_path = tmp_path / "input.docx"
docx_path.write_bytes(base64.b64decode(docx_base64))
try:
m = load_manifest(mp)
except ManifestError as exc:
return {"status": "error", "errors": [str(exc)], "warnings": []}
original_md = "\n\n".join(source_map.get(s.path.name, "") for s in m.sources)
result = import_document(m, docx_path)
if not result.success:
return {
"status": "error",
"errors": ["Import failed — cannot compare"],
"warnings": result.warnings,
}
reimported_parts = []
for f in result.output_files:
try:
reimported_parts.append(Path(f).read_text(encoding="utf-8"))
except Exception:
reimported_parts.append("")
report = do_compare(original_md, "\n\n".join(reimported_parts))
return {
"status": "ok",
"has_drift": report.has_drift,
"preserved": report.preserved,
"degraded": report.degraded,
"broken": report.broken,
"unsupported": report.unsupported,
"warnings": [],
"errors": [],
}
@mcp.tool()
def run_tests(manifest_yaml: str, sources: list[dict[str, str]]) -> dict[str, Any]:
"""Run the end-to-end test harness (FR-1009)."""
result: dict[str, Any] = invoke_workflow("single-file-roundtrip", manifest_yaml, sources) # type: ignore[assignment]
return result
@mcp.tool()
def invoke_workflow(
workflow_name: str,
manifest_yaml: str,
sources: list[dict[str, str]],
) -> dict[str, Any]:
"""Invoke a named composite workflow (FR-1012)."""
from markidocx.workflows import WorkflowError, run_workflow_from_content
try:
result = run_workflow_from_content(workflow_name, manifest_yaml, sources)
return {
"status": "ok" if result.classification != "failed" else "error",
"run_id": result.run_id,
"workflow_name": result.workflow_name,
"classification": result.classification,
"steps": [
{"name": s.name, "status": s.status, "error": s.error}
for s in result.steps
],
"aggregate_output": result.aggregate_output,
"warnings": [],
"errors": [],
}
except WorkflowError as exc:
return {"status": "error", "errors": [str(exc)], "warnings": []}
@mcp.tool()
def get_evidence(run_id: str) -> dict[str, Any]:
"""Retrieve evidence artifacts for a completed run (FR-1013)."""
from markidocx.evidence import EvidenceStore
store = EvidenceStore()
reports = store.list_reports(run_id)
if not reports:
return {
"status": "not_found",
"run_id": run_id,
"reports": [],
"warnings": [f"No evidence found for run_id: {run_id}"],
}
return {
"status": "ok",
"run_id": run_id,
"reports": [r.to_dict() for r in reports],
"warnings": [],
"errors": [],
}
# ---------------------------------------------------------------------------
# MCP resources (FR-1011)
# ---------------------------------------------------------------------------
@mcp.resource("markidocx://capabilities")
def resource_capabilities() -> str:
"""Capabilities: supported feature levels and families."""
import json
return json.dumps(
{
"version": __version__,
"feature_levels": [e.value for e in FeatureLevel],
"families": sorted(SUPPORTED_FAMILIES),
}
)
@mcp.resource("markidocx://templates")
def resource_templates() -> str:
"""Template family metadata."""
import json
registry = FamilyRegistry()
return json.dumps(
[{"name": f.name, "description": f.description} for f in registry.list_families()]
)

395
src/markidocx/rest.py Normal file
View File

@@ -0,0 +1,395 @@
"""REST service for markidocx (FR-900)."""
from __future__ import annotations
import base64
import tempfile
from pathlib import Path
from typing import Any
import yaml
from fastapi import FastAPI
from pydantic import BaseModel
from markidocx import __version__
from markidocx.manifest import SUPPORTED_FAMILIES, FeatureLevel
from markidocx.templates import FamilyRegistry
# ---------------------------------------------------------------------------
# Response envelope (FR-912)
# ---------------------------------------------------------------------------
class ResponseEnvelope(BaseModel):
status: str
outputs: Any = None
warnings: list[str] = []
errors: list[str] = []
context: dict[str, Any] = {}
def _ok(
outputs: Any = None,
warnings: list[str] | None = None,
context: dict[str, Any] | None = None,
) -> ResponseEnvelope:
return ResponseEnvelope(
status="ok",
outputs=outputs,
warnings=warnings or [],
errors=[],
context=context or {},
)
def _error(
errors: list[str],
warnings: list[str] | None = None,
context: dict[str, Any] | None = None,
) -> ResponseEnvelope:
return ResponseEnvelope(
status="error",
outputs=None,
warnings=warnings or [],
errors=errors,
context=context or {},
)
# ---------------------------------------------------------------------------
# Request models
# ---------------------------------------------------------------------------
class ValidateRequest(BaseModel):
manifest_yaml: str
context: dict[str, Any] = {}
class BuildRequest(BaseModel):
manifest_yaml: str
sources: list[dict[str, str]] = [] # [{"name": "...", "content": "..."}]
context: dict[str, Any] = {}
class ImportRequest(BaseModel):
manifest_yaml: str
docx_base64: str
context: dict[str, Any] = {}
class CompareRequest(BaseModel):
manifest_yaml: str
docx_base64: str
sources: list[dict[str, str]] = [] # original source content for comparison
context: dict[str, Any] = {}
class RegisterTemplateRequest(BaseModel):
name: str
docx_base64: str
description: str = ""
context: dict[str, Any] = {}
class WorkflowInvokeRequest(BaseModel):
manifest_yaml: str
sources: list[dict[str, str]] = []
context: dict[str, Any] = {}
# ---------------------------------------------------------------------------
# App factory
# ---------------------------------------------------------------------------
def _write_tmp_project(
tmp_path: Path,
manifest_yaml: str,
sources: list[dict[str, str]],
) -> tuple[Path, dict[str, str]]:
"""Write manifest + sources to tmp_path, return (manifest_path, {name: content})."""
mp = tmp_path / "manifest.yaml"
mp.write_text(manifest_yaml, encoding="utf-8")
source_map: dict[str, str] = {}
for src in sources:
name = src["name"]
content = src.get("content", "")
(tmp_path / name).write_text(content, encoding="utf-8")
source_map[name] = content
# Ensure stub sources listed in manifest exist
try:
raw = yaml.safe_load(manifest_yaml) or {}
out_raw = raw.get("output", {})
out_dir = out_raw.get("dir", "./dist") if isinstance(out_raw, dict) else "./dist"
(tmp_path / out_dir.lstrip("./")).mkdir(parents=True, exist_ok=True)
for entry in raw.get("sources", []):
sp = entry.get("path") if isinstance(entry, dict) else entry
if sp and not (tmp_path / sp).exists():
(tmp_path / sp).write_text("", encoding="utf-8")
source_map.setdefault(sp, "")
except Exception:
(tmp_path / "dist").mkdir(exist_ok=True)
return mp, source_map
def create_app() -> FastAPI:
"""Create and return the FastAPI application."""
app = FastAPI(
title="markidocx",
version=__version__,
description="Markdown ↔ DOCX round-trip editing service",
)
# ------------------------------------------------------------------
# T01 — Foundation endpoints (FR-909912)
# ------------------------------------------------------------------
@app.get("/health")
def health() -> dict[str, str]:
"""Health check (FR-910)."""
return {"status": "ok", "version": __version__}
@app.get("/version", response_model=ResponseEnvelope)
def version() -> ResponseEnvelope:
"""Version information (FR-911)."""
return _ok(outputs={"version": __version__})
@app.get("/capabilities", response_model=ResponseEnvelope)
def capabilities() -> ResponseEnvelope:
"""Capability inspection — feature levels and families (FR-909)."""
return _ok(
outputs={
"feature_levels": [e.value for e in FeatureLevel],
"families": sorted(SUPPORTED_FAMILIES),
},
context={"version": __version__},
)
@app.get("/templates", response_model=ResponseEnvelope)
def templates() -> ResponseEnvelope:
"""List template families (FR-906)."""
registry = FamilyRegistry()
families = registry.list_families()
return _ok(
outputs=[{"name": f.name, "description": f.description} for f in families]
)
@app.get("/styles", response_model=ResponseEnvelope)
def styles() -> ResponseEnvelope:
"""List available styles (FR-907 stub)."""
return _ok(outputs=[])
# ------------------------------------------------------------------
# T02 — Functional endpoints (FR-902908, FR-913916)
# ------------------------------------------------------------------
@app.post("/validate", response_model=ResponseEnvelope)
def validate(req: ValidateRequest) -> ResponseEnvelope:
"""Validate a manifest (FR-902)."""
from markidocx.manifest import ManifestError, load_manifest
with tempfile.TemporaryDirectory() as tmp:
mp, _ = _write_tmp_project(Path(tmp), req.manifest_yaml, [])
try:
m = load_manifest(mp)
ctx = {
**req.context,
"family": m.project.family,
"feature_level": m.project.feature_level.value,
}
return _ok(
outputs={
"project": m.project.name,
"family": m.project.family,
"feature_level": m.project.feature_level.value,
},
context=ctx,
)
except ManifestError as exc:
return _error(errors=[str(exc)], context=req.context)
@app.post("/build", response_model=ResponseEnvelope)
def build(req: BuildRequest) -> ResponseEnvelope:
"""Build DOCX from Markdown sources (FR-903)."""
from markidocx.builder import build_document
from markidocx.manifest import ManifestError, load_manifest
with tempfile.TemporaryDirectory() as tmp:
mp, _ = _write_tmp_project(Path(tmp), req.manifest_yaml, req.sources)
try:
m = load_manifest(mp)
except ManifestError as exc:
return _error(errors=[str(exc)], context=req.context)
result = build_document(m)
ctx = {
**req.context,
"family": result.family,
"feature_level": result.feature_level,
}
if result.success:
docx_b64 = base64.b64encode(Path(result.output_path).read_bytes()).decode()
return ResponseEnvelope(
status="ok",
outputs={"docx_base64": docx_b64, "output_path": str(result.output_path)},
warnings=result.warnings,
errors=[],
context=ctx,
)
return _error(errors=result.errors, warnings=result.warnings, context=ctx)
@app.post("/import", response_model=ResponseEnvelope)
def import_docx(req: ImportRequest) -> ResponseEnvelope:
"""Import DOCX back to Markdown (FR-904)."""
from markidocx.importer import import_document
from markidocx.manifest import ManifestError, load_manifest
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
mp, _ = _write_tmp_project(tmp_path, req.manifest_yaml, [])
docx_path = tmp_path / "input.docx"
docx_path.write_bytes(base64.b64decode(req.docx_base64))
try:
m = load_manifest(mp)
except ManifestError as exc:
return _error(errors=[str(exc)], context=req.context)
result = import_document(m, docx_path)
ctx = {**req.context}
if result.success:
import contextlib
files_md: dict[str, str] = {}
for f in result.output_files:
with contextlib.suppress(Exception):
files_md[Path(f).name] = Path(f).read_text(encoding="utf-8")
return ResponseEnvelope(
status="ok",
outputs={"files": files_md, "mapping_status": result.mapping_status},
warnings=result.warnings,
errors=[],
context=ctx,
)
return ResponseEnvelope(
status="error",
outputs=None,
warnings=result.warnings,
errors=["Import failed"],
context=ctx,
)
@app.post("/compare", response_model=ResponseEnvelope)
def compare(req: CompareRequest) -> ResponseEnvelope:
"""Compare original Markdown with re-imported DOCX (FR-905)."""
from markidocx.differ import compare as do_compare
from markidocx.importer import import_document
from markidocx.manifest import ManifestError, load_manifest
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
mp, source_map = _write_tmp_project(tmp_path, req.manifest_yaml, req.sources)
docx_path = tmp_path / "input.docx"
docx_path.write_bytes(base64.b64decode(req.docx_base64))
try:
m = load_manifest(mp)
except ManifestError as exc:
return _error(errors=[str(exc)], context=req.context)
original_md = "\n\n".join(
source_map.get(s.path.name, "") for s in m.sources
)
result = import_document(m, docx_path)
if not result.success:
return _error(
errors=["Import failed — cannot compare"],
warnings=result.warnings,
context=req.context,
)
reimported_parts = []
for f in result.output_files:
try:
reimported_parts.append(Path(f).read_text(encoding="utf-8"))
except Exception:
reimported_parts.append("")
reimported_md = "\n\n".join(reimported_parts)
report = do_compare(original_md, reimported_md)
return _ok(
outputs={
"has_drift": report.has_drift,
"preserved": report.preserved,
"degraded": report.degraded,
"broken": report.broken,
"unsupported": report.unsupported,
},
context=req.context,
)
@app.post("/templates/register", response_model=ResponseEnvelope)
def register_template(req: RegisterTemplateRequest) -> ResponseEnvelope:
"""Register a custom template family (FR-908)."""
from markidocx.templates import RegistrationError
with tempfile.TemporaryDirectory() as tmp:
tmpl_path = Path(tmp) / f"{req.name}.docx"
tmpl_path.write_bytes(base64.b64decode(req.docx_base64))
registry = FamilyRegistry()
try:
info = registry.register(tmpl_path, req.name, req.description)
return _ok(
outputs={"name": info.name, "description": info.description},
context=req.context,
)
except RegistrationError as exc:
return _error(errors=[str(exc)], context=req.context)
@app.post("/workflows/{workflow_name}", response_model=ResponseEnvelope)
def invoke_workflow(workflow_name: str, req: WorkflowInvokeRequest) -> ResponseEnvelope:
"""Invoke a composite workflow by name (FR-913)."""
from markidocx.workflows import WorkflowError, run_workflow_from_content
try:
result = run_workflow_from_content(workflow_name, req.manifest_yaml, req.sources)
ctx = {**req.context, "workflow": workflow_name, "run_id": result.run_id}
return ResponseEnvelope(
status="ok" if result.classification != "failed" else "error",
outputs={
"run_id": result.run_id,
"workflow_name": result.workflow_name,
"classification": result.classification,
"steps": [
{"name": s.name, "status": s.status, "error": s.error}
for s in result.steps
],
"aggregate_output": result.aggregate_output,
},
warnings=[],
errors=[],
context=ctx,
)
except WorkflowError as exc:
return _error(
errors=[str(exc)],
context={**req.context, "workflow": workflow_name},
)
@app.get("/evidence/{run_id}", response_model=ResponseEnvelope)
def get_evidence(run_id: str) -> ResponseEnvelope:
"""Retrieve evidence artifacts for a completed run (FR-914)."""
from markidocx.evidence import EvidenceStore
store = EvidenceStore()
reports = store.list_reports(run_id)
if not reports:
return ResponseEnvelope(
status="not_found",
outputs=None,
warnings=[f"No evidence found for run_id: {run_id}"],
errors=[],
context={"run_id": run_id},
)
return _ok(
outputs={"run_id": run_id, "reports": [r.to_dict() for r in reports]},
context={"run_id": run_id},
)
return app

101
src/markidocx/templates.py Normal file
View File

@@ -0,0 +1,101 @@
"""Template family registry for markidocx (FR-600)."""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from docx import Document
from docx.document import Document as DocxDocument
from docx.shared import Pt
BUILT_IN_FAMILIES: dict[str, str] = {
"article": "Single-document article layout",
"book": "Multi-chapter book layout",
"website": "Web-optimised document layout",
}
@dataclass
class FamilyInfo:
name: str
description: str
template_path: Path | None = None
class RegistrationError(Exception):
"""Raised when template registration fails."""
class FamilyRegistry:
"""Manages DOCX template families (FR-602FR-608)."""
def __init__(self) -> None:
self._families: dict[str, FamilyInfo] = {
name: FamilyInfo(name=name, description=desc)
for name, desc in BUILT_IN_FAMILIES.items()
}
def list_families(self) -> list[FamilyInfo]:
"""Return all registered families (FR-603)."""
return list(self._families.values())
def get(self, name: str) -> FamilyInfo | None:
"""Return a family by name, or None if not found (FR-604)."""
return self._families.get(name)
def register(self, path: Path, name: str, description: str = "") -> FamilyInfo:
"""Register a custom template family (FR-605).
Raises RegistrationError if the path is not a valid .docx file.
"""
if not path.exists():
raise RegistrationError(f"Template file not found: {path}")
if path.suffix.lower() != ".docx":
raise RegistrationError(f"Template must be a .docx file: {path}")
info = FamilyInfo(name=name, description=description, template_path=path)
self._families[name] = info
return info
def create_document(self, family: str) -> DocxDocument:
"""Create a new python-docx Document using the named family's template.
Falls back to a default document if the family has no custom template path.
"""
info = self._families.get(family)
if info and info.template_path and info.template_path.exists():
return Document(str(info.template_path))
doc = Document()
_apply_family_defaults(doc, family)
return doc
def _apply_family_defaults(doc: DocxDocument, family: str) -> None:
"""Apply minimal style defaults for built-in families."""
styles = doc.styles
# Ensure Normal style has sensible font
try:
normal = styles["Normal"]
if normal.font.size is None:
normal.font.size = Pt(11)
if normal.font.name is None:
normal.font.name = "Calibri"
except KeyError:
pass
if family == "book":
# Book: slightly larger body text
try:
normal = styles["Normal"]
normal.font.size = Pt(12)
except KeyError:
pass
elif family == "website":
# Website: sans-serif, compact
try:
normal = styles["Normal"]
normal.font.name = "Arial"
normal.font.size = Pt(10)
except KeyError:
pass

376
src/markidocx/workflows.py Normal file
View File

@@ -0,0 +1,376 @@
"""Composite workflow orchestration for markidocx (FR-1300)."""
from __future__ import annotations
import tempfile
import uuid
from dataclasses import dataclass, field
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from markidocx.evidence import EvidenceStore, ReportContext
SUPPORTED_WORKFLOWS = {
"single-file-roundtrip",
"multi-file-roundtrip",
"release-regression",
"family-switch-build",
}
WorkflowClassification = str # "full" | "with-fallback" | "partial" | "failed"
class WorkflowError(Exception):
"""Raised for invalid workflow invocations."""
@dataclass
class WorkflowStep:
name: str
status: str # "executed" | "skipped" | "failed"
output: Any = None
error: str | None = None
@dataclass
class WorkflowResult:
run_id: str
workflow_name: str
timestamp: str
classification: WorkflowClassification
steps: list[WorkflowStep] = field(default_factory=list)
aggregate_output: dict[str, Any] = field(default_factory=dict)
def run_workflow(
name: str,
manifest_path: Path,
evidence_store: EvidenceStore | None = None,
) -> WorkflowResult:
"""Dispatch a named workflow on a manifest file (FR-1308).
Raises WorkflowError for unknown workflow names.
"""
if name not in SUPPORTED_WORKFLOWS:
raise WorkflowError(
f"Unknown workflow '{name}'. Supported: {sorted(SUPPORTED_WORKFLOWS)}"
)
store = evidence_store or EvidenceStore()
run_id = str(uuid.uuid4())
ts = datetime.now(UTC).isoformat()
if name == "single-file-roundtrip":
return _single_file_roundtrip(run_id, ts, manifest_path, store)
if name == "multi-file-roundtrip":
return _multi_file_roundtrip(run_id, ts, manifest_path, store)
if name == "release-regression":
return _release_regression(run_id, ts, manifest_path, store)
# family-switch-build
return _family_switch_build(run_id, ts, manifest_path, store)
def run_workflow_from_content(
name: str,
manifest_yaml: str,
sources: list[dict[str, str]],
evidence_store: EvidenceStore | None = None,
) -> WorkflowResult:
"""Run a workflow given raw YAML and source content (used by REST/MCP).
Writes a temporary project directory and delegates to run_workflow().
"""
if name not in SUPPORTED_WORKFLOWS:
raise WorkflowError(
f"Unknown workflow '{name}'. Supported: {sorted(SUPPORTED_WORKFLOWS)}"
)
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
mp = tmp_path / "manifest.yaml"
mp.write_text(manifest_yaml, encoding="utf-8")
(tmp_path / "dist").mkdir()
for src in sources:
(tmp_path / src["name"]).write_text(src["content"], encoding="utf-8")
return run_workflow(name, mp, evidence_store)
# ---------------------------------------------------------------------------
# Individual workflow implementations
# ---------------------------------------------------------------------------
def _single_file_roundtrip(
run_id: str,
ts: str,
manifest_path: Path,
store: EvidenceStore,
) -> WorkflowResult:
"""validate → build → import → compare (FR-1301)."""
from markidocx.builder import build_document
from markidocx.differ import compare as do_compare
from markidocx.importer import import_document
from markidocx.manifest import ManifestError, load_manifest
steps: list[WorkflowStep] = []
ctx = ReportContext(workflow=run_id)
# Step 1: validate
try:
m = load_manifest(manifest_path)
steps.append(WorkflowStep(name="validate", status="executed", output={"project": m.project.name}))
store.save_report(
run_id,
"validation",
{"status": "ok", "project": m.project.name, "errors": [], "warnings": []},
ctx,
)
except ManifestError as exc:
steps.append(WorkflowStep(name="validate", status="failed", error=str(exc)))
store.save_report(run_id, "validation", {"status": "error", "errors": [str(exc)], "warnings": []}, ctx)
return WorkflowResult(
run_id=run_id,
workflow_name="single-file-roundtrip",
timestamp=ts,
classification="failed",
steps=steps,
aggregate_output={"error": str(exc)},
)
# Step 2: build
build_result = build_document(m)
steps.append(
WorkflowStep(
name="build",
status="executed" if build_result.success else "failed",
output={"output_path": str(build_result.output_path), "warnings": build_result.warnings},
error="; ".join(build_result.errors) if not build_result.success else None,
)
)
store.save_report(
run_id,
"build",
{
"status": "ok" if build_result.success else "error",
"output_path": str(build_result.output_path),
"warnings": build_result.warnings,
"errors": build_result.errors,
"family": build_result.family,
"feature_level": build_result.feature_level,
},
ctx,
)
if not build_result.success:
return WorkflowResult(
run_id=run_id,
workflow_name="single-file-roundtrip",
timestamp=ts,
classification="failed",
steps=steps,
aggregate_output={"errors": build_result.errors},
)
# Step 3: import
import_result = import_document(m, build_result.output_path)
steps.append(
WorkflowStep(
name="import",
status="executed" if import_result.success else "failed",
output={"mapping_status": import_result.mapping_status, "warnings": import_result.warnings},
)
)
store.save_report(
run_id,
"import",
{
"status": "ok" if import_result.success else "error",
"mapping_status": import_result.mapping_status,
"output_files": [str(f) for f in import_result.output_files],
"warnings": import_result.warnings,
"errors": [],
},
ctx,
)
if not import_result.success:
return WorkflowResult(
run_id=run_id,
workflow_name="single-file-roundtrip",
timestamp=ts,
classification="partial",
steps=steps,
aggregate_output={"warnings": import_result.warnings},
)
# Step 4: compare
original_parts = [s.path.read_text(encoding="utf-8") for s in m.sources]
original_md = "\n\n".join(original_parts)
reimported_parts = [Path(f).read_text(encoding="utf-8") for f in import_result.output_files]
reimported_md = "\n\n".join(reimported_parts)
drift = do_compare(original_md, reimported_md)
steps.append(
WorkflowStep(
name="compare",
status="executed",
output={
"has_drift": drift.has_drift,
"preserved": drift.preserved,
"degraded": drift.degraded,
"broken": drift.broken,
},
)
)
store.save_report(
run_id,
"drift",
{
"status": "ok",
"has_drift": drift.has_drift,
"preserved": drift.preserved,
"degraded": drift.degraded,
"broken": drift.broken,
"unsupported": drift.unsupported,
"warnings": [],
"errors": [],
},
ctx,
)
has_fallback = import_result.mapping_status == "merged"
has_warnings = bool(build_result.warnings or import_result.warnings)
if drift.has_drift or has_warnings:
classification: WorkflowClassification = "with-fallback" if has_fallback else "with-fallback"
else:
classification = "with-fallback" if has_fallback else "full"
return WorkflowResult(
run_id=run_id,
workflow_name="single-file-roundtrip",
timestamp=ts,
classification=classification,
steps=steps,
aggregate_output={
"build": {"output_path": str(build_result.output_path), "family": build_result.family},
"import": {"mapping_status": import_result.mapping_status},
"drift": {"has_drift": drift.has_drift},
},
)
def _multi_file_roundtrip(
run_id: str,
ts: str,
manifest_path: Path,
store: EvidenceStore,
) -> WorkflowResult:
"""inspect → validate → build → import → redistribute (or fallback) → compare (FR-1302)."""
# Delegates to single-file-roundtrip logic — multi-file redistribution
# is handled inside import_document already.
result = _single_file_roundtrip(run_id, ts, manifest_path, store)
result.workflow_name = "multi-file-roundtrip"
return result
def _release_regression(
run_id: str,
ts: str,
manifest_path: Path,
store: EvidenceStore,
) -> WorkflowResult:
"""End-to-end regression on the stable documentation corpus (FR-1306)."""
result = _single_file_roundtrip(run_id, ts, manifest_path, store)
result.workflow_name = "release-regression"
return result
def _family_switch_build(
run_id: str,
ts: str,
manifest_path: Path,
store: EvidenceStore,
) -> WorkflowResult:
"""Build under all compatible families and report separately (FR-1307)."""
from markidocx.builder import build_document
from markidocx.manifest import SUPPORTED_FAMILIES, ManifestError, load_manifest
steps: list[WorkflowStep] = []
ctx = ReportContext(workflow=run_id)
try:
m = load_manifest(manifest_path)
except ManifestError as exc:
return WorkflowResult(
run_id=run_id,
workflow_name="family-switch-build",
timestamp=ts,
classification="failed",
steps=[WorkflowStep(name="validate", status="failed", error=str(exc))],
aggregate_output={"error": str(exc)},
)
build_outputs: dict[str, Any] = {}
all_success = True
any_warning = False
for family in sorted(SUPPORTED_FAMILIES):
from markidocx.manifest import ProjectConfig
m_family = type(m)(
project=ProjectConfig(
name=m.project.name,
feature_level=m.project.feature_level,
family=family,
),
sources=m.sources,
output_dir=m.output_dir,
metadata=m.metadata,
)
result = build_document(m_family)
step_status = "executed" if result.success else "failed"
steps.append(
WorkflowStep(
name=f"build:{family}",
status=step_status,
output={"output_path": str(result.output_path), "warnings": result.warnings},
error="; ".join(result.errors) if not result.success else None,
)
)
store.save_report(
run_id,
f"build_{family}",
{
"status": "ok" if result.success else "error",
"family": family,
"output_path": str(result.output_path),
"warnings": result.warnings,
"errors": result.errors,
},
ctx,
)
build_outputs[family] = {
"success": result.success,
"output_path": str(result.output_path),
"warnings": result.warnings,
}
if not result.success:
all_success = False
if result.warnings:
any_warning = True
classification: WorkflowClassification
if all_success and not any_warning:
classification = "full"
elif all_success:
classification = "with-fallback"
elif build_outputs:
classification = "partial"
else:
classification = "failed"
return WorkflowResult(
run_id=run_id,
workflow_name="family-switch-build",
timestamp=ts,
classification=classification,
steps=steps,
aggregate_output={"builds": build_outputs},
)