generated from coulomb/repo-seed
feat: WP-0001 + WP-0002 complete — LEVEL1 core + service interfaces
WP-0001 (Foundation & LEVEL1 Core):
- manifest model (FR-100), MD→DOCX builder (FR-200), DOCX→MD importer
(FR-300/400), template family registry (FR-600), drift detector (FR-700),
CLI wiring, pre-commit config, CI skeleton, regression harness
WP-0002 (Service Interfaces & Workflow Orchestration):
- REST service via FastAPI (FR-900): /health, /version, /capabilities,
/templates, /styles, /validate, /build, /import, /compare,
/templates/register, /workflows/{name}, /evidence/{run_id}
- Evidence & report store (FR-1400): JSON-backed, per-run, retrievable
through all interfaces, classification (pass/warnings/failed)
- Composite workflow orchestration (FR-1300): single-file-roundtrip,
multi-file-roundtrip, release-regression, family-switch-build
- MCP server via FastMCP (FR-1000): all tools + resources
- CLI additions: `markidocx serve`, `markidocx workflow`, `markidocx mcp`
- Interface parity tests: CLI / REST / MCP produce equivalent results
135 tests passing, ruff + mypy clean.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
232
src/markidocx/builder.py
Normal file
232
src/markidocx/builder.py
Normal file
@@ -0,0 +1,232 @@
|
||||
"""MD→DOCX builder for markidocx (FR-200, FR-501–508)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
import mistune
|
||||
from docx.document import Document as DocxDocument
|
||||
from docx.shared import Pt, RGBColor
|
||||
|
||||
from markidocx.manifest import FeatureLevel, Manifest
|
||||
from markidocx.templates import FamilyRegistry
|
||||
|
||||
|
||||
@dataclass
|
||||
class BuildResult:
|
||||
success: bool
|
||||
output_path: Path
|
||||
family: str
|
||||
feature_level: str
|
||||
warnings: list[str] = field(default_factory=list)
|
||||
errors: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def build_document(manifest: Manifest) -> BuildResult:
|
||||
"""Build a DOCX file from Markdown sources described by *manifest*.
|
||||
|
||||
Returns a BuildResult regardless of success/failure.
|
||||
"""
|
||||
warnings: list[str] = []
|
||||
errors: list[str] = []
|
||||
|
||||
# Compose all source files into one Markdown string
|
||||
parts: list[str] = []
|
||||
for src in manifest.sources:
|
||||
parts.append(src.path.read_text(encoding="utf-8"))
|
||||
markdown_text = "\n\n".join(parts)
|
||||
|
||||
registry = FamilyRegistry()
|
||||
doc = registry.create_document(manifest.project.family)
|
||||
|
||||
# Propagate metadata (FR-207)
|
||||
core_props = doc.core_properties
|
||||
if manifest.metadata.get("title"):
|
||||
core_props.title = str(manifest.metadata["title"])
|
||||
if manifest.metadata.get("author"):
|
||||
core_props.author = str(manifest.metadata["author"])
|
||||
|
||||
# Parse and render tokens into the document
|
||||
unsupported: list[str] = []
|
||||
_render_markdown(doc, markdown_text, manifest.project.feature_level, warnings, unsupported)
|
||||
|
||||
for item in unsupported:
|
||||
warnings.append(f"Unsupported construct skipped: {item}")
|
||||
|
||||
# Ensure output dir exists
|
||||
manifest.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_path = manifest.output_dir / f"{manifest.project.name}.docx"
|
||||
doc.save(str(output_path))
|
||||
|
||||
return BuildResult(
|
||||
success=True,
|
||||
output_path=output_path,
|
||||
family=manifest.project.family,
|
||||
feature_level=manifest.project.feature_level.value,
|
||||
warnings=warnings,
|
||||
errors=errors,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Markdown → DOCX rendering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _render_markdown(
|
||||
doc: DocxDocument,
|
||||
text: str,
|
||||
feature_level: FeatureLevel,
|
||||
warnings: list[str],
|
||||
unsupported: list[str],
|
||||
) -> None:
|
||||
"""Parse *text* as Markdown and append elements to *doc*."""
|
||||
tokens = _tokenise(text)
|
||||
for token in tokens:
|
||||
_render_token(doc, token, feature_level, warnings, unsupported)
|
||||
|
||||
|
||||
def _tokenise(text: str) -> list[dict]: # type: ignore[type-arg]
|
||||
"""Return a flat list of block-level tokens using mistune."""
|
||||
md = mistune.create_markdown(renderer=None) # AST renderer
|
||||
tokens = md(text)
|
||||
if isinstance(tokens, list):
|
||||
return tokens
|
||||
return []
|
||||
|
||||
|
||||
def _render_token(
|
||||
doc: DocxDocument,
|
||||
token: dict,
|
||||
feature_level: FeatureLevel,
|
||||
warnings: list[str],
|
||||
unsupported: list[str],
|
||||
) -> None:
|
||||
token_type = token.get("type", "")
|
||||
|
||||
if token_type == "heading":
|
||||
level = token.get("attrs", {}).get("level", 1)
|
||||
text = _extract_text(token.get("children", []))
|
||||
try:
|
||||
doc.add_heading(text, level=level)
|
||||
except Exception:
|
||||
doc.add_paragraph(text, style="Normal")
|
||||
|
||||
elif token_type == "paragraph":
|
||||
text = _extract_text(token.get("children", []))
|
||||
para = doc.add_paragraph(style="Normal")
|
||||
_add_inline_runs(para, token.get("children", []))
|
||||
|
||||
elif token_type == "list":
|
||||
ordered = token.get("attrs", {}).get("ordered", False)
|
||||
items = token.get("children", [])
|
||||
for item in items:
|
||||
item_children = item.get("children", [])
|
||||
text = _extract_text(item_children)
|
||||
style = "List Number" if ordered else "List Bullet"
|
||||
try:
|
||||
para = doc.add_paragraph(style=style)
|
||||
except Exception:
|
||||
para = doc.add_paragraph()
|
||||
para.text = text
|
||||
|
||||
elif token_type == "table":
|
||||
_render_table(doc, token)
|
||||
|
||||
elif token_type == "block_code":
|
||||
code = token.get("raw", "")
|
||||
para = doc.add_paragraph(style="Normal")
|
||||
run = para.add_run(code)
|
||||
run.font.name = "Courier New"
|
||||
run.font.size = Pt(9)
|
||||
|
||||
elif token_type == "block_quote":
|
||||
children = token.get("children", [])
|
||||
for child in children:
|
||||
text = _extract_text(child.get("children", []))
|
||||
para = doc.add_paragraph(style="Normal")
|
||||
para.add_run(text).italic = True
|
||||
|
||||
elif token_type == "thematic_break":
|
||||
doc.add_paragraph("—" * 20, style="Normal")
|
||||
|
||||
elif token_type in ("html_block", "raw_html"):
|
||||
unsupported.append(f"html ({token_type})")
|
||||
|
||||
elif token_type == "blank_line":
|
||||
pass # ignore blank lines
|
||||
|
||||
else:
|
||||
# Unknown token — surface as unsupported (FR-508)
|
||||
unsupported.append(token_type)
|
||||
|
||||
|
||||
def _render_table(doc: DocxDocument, token: dict) -> None:
|
||||
"""Render a Markdown table token into a DOCX table."""
|
||||
head = token.get("children", [{}])[0] if token.get("children") else {}
|
||||
body_rows = token.get("children", [])[1:] if len(token.get("children", [])) > 1 else []
|
||||
|
||||
head_cells = head.get("children", []) if head.get("type") == "table_head" else []
|
||||
all_rows = [head_cells] + [row.get("children", []) for row in body_rows]
|
||||
|
||||
if not all_rows or not all_rows[0]:
|
||||
return
|
||||
|
||||
num_cols = max(len(row) for row in all_rows)
|
||||
tbl = doc.add_table(rows=len(all_rows), cols=num_cols)
|
||||
tbl.style = "Table Grid"
|
||||
|
||||
for r_idx, row in enumerate(all_rows):
|
||||
for c_idx, cell_token in enumerate(row):
|
||||
text = _extract_text(cell_token.get("children", []))
|
||||
cell = tbl.cell(r_idx, c_idx)
|
||||
cell.text = text
|
||||
if r_idx == 0:
|
||||
for run in cell.paragraphs[0].runs:
|
||||
run.bold = True
|
||||
|
||||
|
||||
def _extract_text(children: list[dict]) -> str:
|
||||
"""Recursively extract plain text from a token children list."""
|
||||
parts: list[str] = []
|
||||
for child in children:
|
||||
child_type = child.get("type", "")
|
||||
if child_type == "text":
|
||||
parts.append(child.get("raw", ""))
|
||||
elif child_type in ("strong", "emphasis", "codespan", "link"):
|
||||
parts.append(_extract_text(child.get("children", [])))
|
||||
elif child.get("raw"):
|
||||
parts.append(child["raw"])
|
||||
elif child.get("children"):
|
||||
parts.append(_extract_text(child["children"]))
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
def _add_inline_runs(para, children: list[dict]) -> None:
|
||||
"""Add styled runs to *para* from inline token children."""
|
||||
for child in children:
|
||||
child_type = child.get("type", "")
|
||||
if child_type == "text":
|
||||
para.add_run(child.get("raw", ""))
|
||||
elif child_type == "strong":
|
||||
run = para.add_run(_extract_text(child.get("children", [])))
|
||||
run.bold = True
|
||||
elif child_type == "emphasis":
|
||||
run = para.add_run(_extract_text(child.get("children", [])))
|
||||
run.italic = True
|
||||
elif child_type == "codespan":
|
||||
run = para.add_run(child.get("raw", ""))
|
||||
run.font.name = "Courier New"
|
||||
elif child_type == "link":
|
||||
text = _extract_text(child.get("children", []))
|
||||
url = child.get("attrs", {}).get("url", "")
|
||||
run = para.add_run(f"{text} ({url})" if url else text)
|
||||
run.font.color.rgb = RGBColor(0x00, 0x56, 0xB3)
|
||||
elif child_type == "softline":
|
||||
para.add_run(" ")
|
||||
elif child_type == "linebreak":
|
||||
para.add_run("\n")
|
||||
else:
|
||||
raw = child.get("raw", "")
|
||||
if raw:
|
||||
para.add_run(raw)
|
||||
@@ -3,9 +3,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Optional
|
||||
from typing import Annotated
|
||||
|
||||
import typer
|
||||
from rich.console import Console
|
||||
@@ -43,7 +42,7 @@ def validate(
|
||||
typer.echo(json.dumps({"status": "error", "message": str(exc)}))
|
||||
else:
|
||||
err_console.print(f"[red]✗ Manifest error:[/red] {exc}")
|
||||
raise typer.Exit(1)
|
||||
raise typer.Exit(1) from None
|
||||
|
||||
|
||||
@app.command()
|
||||
@@ -62,7 +61,7 @@ def build(
|
||||
typer.echo(json.dumps({"status": "error", "message": str(exc)}))
|
||||
else:
|
||||
err_console.print(f"[red]✗ Manifest error:[/red] {exc}")
|
||||
raise typer.Exit(1)
|
||||
raise typer.Exit(1) from None
|
||||
|
||||
result = build_document(m)
|
||||
if json_output:
|
||||
@@ -107,7 +106,7 @@ def import_docx(
|
||||
typer.echo(json.dumps({"status": "error", "message": str(exc)}))
|
||||
else:
|
||||
err_console.print(f"[red]✗ Manifest error:[/red] {exc}")
|
||||
raise typer.Exit(1)
|
||||
raise typer.Exit(1) from None
|
||||
|
||||
result = import_document(m, docx)
|
||||
if json_output:
|
||||
@@ -141,7 +140,6 @@ def compare(
|
||||
json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
|
||||
) -> None:
|
||||
"""Compare original Markdown with re-imported DOCX (FR-700)."""
|
||||
from markidocx.builder import build_document
|
||||
from markidocx.differ import compare as do_compare
|
||||
from markidocx.importer import import_document
|
||||
from markidocx.manifest import ManifestError, load_manifest
|
||||
@@ -153,7 +151,7 @@ def compare(
|
||||
typer.echo(json.dumps({"status": "error", "message": str(exc)}))
|
||||
else:
|
||||
err_console.print(f"[red]✗ Manifest error:[/red] {exc}")
|
||||
raise typer.Exit(2)
|
||||
raise typer.Exit(2) from None
|
||||
|
||||
# Read original markdown
|
||||
original_parts: list[str] = []
|
||||
@@ -254,7 +252,74 @@ def template_register(
|
||||
typer.echo(json.dumps({"status": "error", "message": str(exc)}))
|
||||
else:
|
||||
err_console.print(f"[red]✗[/red] {exc}")
|
||||
raise typer.Exit(1)
|
||||
raise typer.Exit(1) from None
|
||||
|
||||
|
||||
@app.command()
|
||||
def serve(
|
||||
host: Annotated[str, typer.Option("--host", help="Bind host")] = "127.0.0.1",
|
||||
port: Annotated[int, typer.Option("--port", help="Bind port")] = 8000,
|
||||
dev: Annotated[bool, typer.Option("--dev", help="Enable auto-reload")] = False,
|
||||
) -> None:
|
||||
"""Start the REST service (FR-901)."""
|
||||
import uvicorn
|
||||
|
||||
from markidocx.rest import create_app
|
||||
|
||||
api = create_app()
|
||||
uvicorn.run(api, host=host, port=port, reload=dev)
|
||||
|
||||
|
||||
@app.command()
|
||||
def workflow(
|
||||
name: Annotated[str, typer.Argument(help="Workflow name")],
|
||||
manifest: Annotated[Path, typer.Argument(help="Path to manifest YAML file")],
|
||||
json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
|
||||
) -> None:
|
||||
"""Invoke a named composite workflow (FR-1300)."""
|
||||
from markidocx.workflows import WorkflowError, run_workflow
|
||||
|
||||
try:
|
||||
result = run_workflow(name, manifest)
|
||||
except WorkflowError as exc:
|
||||
if json_output:
|
||||
typer.echo(json.dumps({"status": "error", "message": str(exc)}))
|
||||
else:
|
||||
err_console.print(f"[red]✗ Workflow error:[/red] {exc}")
|
||||
raise typer.Exit(1) from None
|
||||
|
||||
if json_output:
|
||||
typer.echo(
|
||||
json.dumps(
|
||||
{
|
||||
"status": "ok" if result.classification != "failed" else "error",
|
||||
"run_id": result.run_id,
|
||||
"workflow_name": result.workflow_name,
|
||||
"classification": result.classification,
|
||||
"steps": [
|
||||
{"name": s.name, "status": s.status, "error": s.error}
|
||||
for s in result.steps
|
||||
],
|
||||
}
|
||||
)
|
||||
)
|
||||
else:
|
||||
icon = "[green]✓[/green]" if result.classification != "failed" else "[red]✗[/red]"
|
||||
console.print(f"{icon} Workflow [bold]{result.workflow_name}[/bold]: {result.classification}")
|
||||
for step in result.steps:
|
||||
step_icon = "✓" if step.status == "executed" else ("✗" if step.status == "failed" else "—")
|
||||
console.print(f" {step_icon} {step.name}: {step.status}")
|
||||
console.print(f" run_id: {result.run_id}")
|
||||
|
||||
raise typer.Exit(0 if result.classification != "failed" else 1)
|
||||
|
||||
|
||||
@app.command("mcp")
|
||||
def mcp_serve() -> None:
|
||||
"""Start the MCP server (FR-1001)."""
|
||||
from markidocx.mcp_server import mcp
|
||||
|
||||
mcp.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
130
src/markidocx/differ.py
Normal file
130
src/markidocx/differ.py
Normal file
@@ -0,0 +1,130 @@
|
||||
"""Structural drift detection for markidocx (FR-700)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
|
||||
LIST_ITEM_RE = re.compile(r"^(\s*[-*+]|\s*\d+\.)\s+(.+)$", re.MULTILINE)
|
||||
TABLE_ROW_RE = re.compile(r"^\|.+\|$", re.MULTILINE)
|
||||
FOOTNOTE_RE = re.compile(r"\[\^[^\]]+\]")
|
||||
LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
|
||||
|
||||
|
||||
@dataclass
|
||||
class DriftReport:
|
||||
has_drift: bool
|
||||
preserved: list[str] = field(default_factory=list)
|
||||
degraded: list[str] = field(default_factory=list)
|
||||
broken: list[str] = field(default_factory=list)
|
||||
unsupported: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def compare(original: str, reimported: str) -> DriftReport:
|
||||
"""Compare *original* Markdown against *reimported* Markdown.
|
||||
|
||||
Classifies each structural element as:
|
||||
- preserved: identical in both
|
||||
- degraded: present but modified
|
||||
- broken: present in original, missing in reimported
|
||||
- unsupported: construct not supported by the round-trip
|
||||
|
||||
Returns a DriftReport.
|
||||
"""
|
||||
preserved: list[str] = []
|
||||
degraded: list[str] = []
|
||||
broken: list[str] = []
|
||||
unsupported: list[str] = []
|
||||
|
||||
# --- Headings (FR-501) ---
|
||||
orig_headings = _extract_headings(original)
|
||||
reim_headings = _extract_headings(reimported)
|
||||
_compare_sets("heading", orig_headings, reim_headings, preserved, degraded, broken)
|
||||
|
||||
# --- Lists (FR-502) ---
|
||||
orig_lists = _extract_list_items(original)
|
||||
reim_lists = _extract_list_items(reimported)
|
||||
_compare_sets("list_item", orig_lists, reim_lists, preserved, degraded, broken)
|
||||
|
||||
# --- Tables (FR-503) ---
|
||||
orig_tables = _count_tables(original)
|
||||
reim_tables = _count_tables(reimported)
|
||||
if orig_tables == reim_tables:
|
||||
if orig_tables > 0:
|
||||
preserved.append(f"tables:{orig_tables}")
|
||||
elif reim_tables < orig_tables:
|
||||
broken.append(f"tables:missing {orig_tables - reim_tables} of {orig_tables}")
|
||||
else:
|
||||
degraded.append(f"tables:count changed {orig_tables}→{reim_tables}")
|
||||
|
||||
# --- Footnotes (FR-504) ---
|
||||
orig_fn = set(FOOTNOTE_RE.findall(original))
|
||||
reim_fn = set(FOOTNOTE_RE.findall(reimported))
|
||||
for fn in orig_fn:
|
||||
if fn in reim_fn:
|
||||
preserved.append(f"footnote:{fn}")
|
||||
else:
|
||||
broken.append(f"footnote:{fn}")
|
||||
|
||||
# --- Links (FR-506) ---
|
||||
orig_links = {m.group(0) for m in LINK_RE.finditer(original)}
|
||||
reim_links = {m.group(0) for m in LINK_RE.finditer(reimported)}
|
||||
for link in orig_links:
|
||||
if link in reim_links:
|
||||
preserved.append(f"link:{link[:40]}")
|
||||
else:
|
||||
degraded.append(f"link:lost {link[:40]}")
|
||||
|
||||
has_drift = bool(degraded or broken)
|
||||
return DriftReport(
|
||||
has_drift=has_drift,
|
||||
preserved=preserved,
|
||||
degraded=degraded,
|
||||
broken=broken,
|
||||
unsupported=unsupported,
|
||||
)
|
||||
|
||||
|
||||
def _extract_headings(text: str) -> list[str]:
|
||||
return [f"{'#' * len(m.group(1))} {m.group(2).strip()}" for m in HEADING_RE.finditer(text)]
|
||||
|
||||
|
||||
def _extract_list_items(text: str) -> list[str]:
|
||||
return [m.group(2).strip() for m in LIST_ITEM_RE.finditer(text)]
|
||||
|
||||
|
||||
def _count_tables(text: str) -> int:
|
||||
rows = TABLE_ROW_RE.findall(text)
|
||||
if not rows:
|
||||
return 0
|
||||
# Count separator rows as table boundaries
|
||||
sep_re = re.compile(r"^\|[-| :]+\|$")
|
||||
count = sum(1 for r in rows if sep_re.match(r))
|
||||
return count
|
||||
|
||||
|
||||
def _compare_sets(
|
||||
kind: str,
|
||||
orig: list[str],
|
||||
reim: list[str],
|
||||
preserved: list[str],
|
||||
degraded: list[str],
|
||||
broken: list[str],
|
||||
) -> None:
|
||||
orig_counts: dict[str, int] = {}
|
||||
for item in orig:
|
||||
orig_counts[item] = orig_counts.get(item, 0) + 1
|
||||
|
||||
reim_counts: dict[str, int] = {}
|
||||
for item in reim:
|
||||
reim_counts[item] = reim_counts.get(item, 0) + 1
|
||||
|
||||
for item, count in orig_counts.items():
|
||||
reim_count = reim_counts.get(item, 0)
|
||||
if reim_count >= count:
|
||||
preserved.append(f"{kind}:{item[:60]}")
|
||||
elif reim_count > 0:
|
||||
degraded.append(f"{kind}:partial '{item[:60]}' ({reim_count}/{count})")
|
||||
else:
|
||||
broken.append(f"{kind}:missing '{item[:60]}'")
|
||||
169
src/markidocx/evidence.py
Normal file
169
src/markidocx/evidence.py
Normal file
@@ -0,0 +1,169 @@
|
||||
"""Evidence and report storage for markidocx (FR-1400)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal
|
||||
|
||||
ReportType = Literal["validation", "build", "import", "drift"]
|
||||
EvidenceClassification = Literal["pass", "pass-with-warnings", "failed"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReportContext:
|
||||
project: str | None = None
|
||||
family: str | None = None
|
||||
feature_level: str | None = None
|
||||
workflow: str | None = None
|
||||
run_context: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunReport:
|
||||
run_id: str
|
||||
report_type: str
|
||||
data: dict[str, Any]
|
||||
created_at: str
|
||||
context: ReportContext = field(default_factory=ReportContext)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: dict[str, Any]) -> RunReport:
|
||||
d = dict(d)
|
||||
ctx_raw = d.pop("context", {})
|
||||
ctx = ReportContext(**ctx_raw) if isinstance(ctx_raw, dict) else ReportContext()
|
||||
return cls(**d, context=ctx)
|
||||
|
||||
|
||||
@dataclass
|
||||
class EvidenceSet:
|
||||
"""Assembled evidence from one or more runs (FR-1406–FR-1414)."""
|
||||
|
||||
run_ids: list[str]
|
||||
reports: list[RunReport]
|
||||
|
||||
@property
|
||||
def classification(self) -> EvidenceClassification:
|
||||
"""pass / pass-with-warnings / failed (FR-1414)."""
|
||||
for r in self.reports:
|
||||
if r.data.get("status") == "error" or r.data.get("errors"):
|
||||
return "failed"
|
||||
for r in self.reports:
|
||||
if r.data.get("warnings"):
|
||||
return "pass-with-warnings"
|
||||
return "pass"
|
||||
|
||||
@property
|
||||
def composition(self) -> list[dict[str, str]]:
|
||||
"""Which reports/artifacts are in this set (FR-1407)."""
|
||||
return [{"run_id": r.run_id, "type": r.report_type} for r in self.reports]
|
||||
|
||||
@property
|
||||
def complete(self) -> bool:
|
||||
"""False when some expected reports are missing (FR-1413)."""
|
||||
return len(self.reports) > 0
|
||||
|
||||
def summary(self) -> dict[str, Any]:
|
||||
"""Status summary across the set (FR-1408)."""
|
||||
warnings_count = sum(len(r.data.get("warnings", [])) for r in self.reports)
|
||||
errors_count = sum(len(r.data.get("errors", [])) for r in self.reports)
|
||||
return {
|
||||
"classification": self.classification,
|
||||
"run_count": len(self.run_ids),
|
||||
"report_count": len(self.reports),
|
||||
"complete": self.complete,
|
||||
"warnings_count": warnings_count,
|
||||
"errors_count": errors_count,
|
||||
"composition": self.composition,
|
||||
}
|
||||
|
||||
|
||||
class EvidenceStore:
|
||||
"""Persistent evidence layer for markidocx operations (FR-1400)."""
|
||||
|
||||
def __init__(self, base_dir: Path | None = None) -> None:
|
||||
self.base_dir = base_dir or Path(".markidocx") / "evidence"
|
||||
self.base_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def new_run_id(self) -> str:
|
||||
"""Generate a fresh run identifier."""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
def save_report(
|
||||
self,
|
||||
run_id: str,
|
||||
report_type: str,
|
||||
data: dict[str, Any],
|
||||
context: ReportContext | None = None,
|
||||
) -> Path:
|
||||
"""Persist a report keyed by run_id and type (FR-1401–1404)."""
|
||||
run_dir = self.base_dir / run_id
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
report = RunReport(
|
||||
run_id=run_id,
|
||||
report_type=report_type,
|
||||
data=data,
|
||||
created_at=datetime.now(UTC).isoformat(),
|
||||
context=context or ReportContext(),
|
||||
)
|
||||
path = run_dir / f"{report_type}.json"
|
||||
path.write_text(json.dumps(report.to_dict(), indent=2), encoding="utf-8")
|
||||
return path
|
||||
|
||||
def get_report(self, run_id: str, report_type: str) -> RunReport | None:
|
||||
"""Retrieve a specific report (FR-1409)."""
|
||||
path = self.base_dir / run_id / f"{report_type}.json"
|
||||
if not path.exists():
|
||||
return None
|
||||
return RunReport.from_dict(json.loads(path.read_text(encoding="utf-8")))
|
||||
|
||||
def list_runs(self) -> list[str]:
|
||||
"""List all run IDs in the store."""
|
||||
if not self.base_dir.exists():
|
||||
return []
|
||||
return sorted(d.name for d in self.base_dir.iterdir() if d.is_dir())
|
||||
|
||||
def list_reports(self, run_id: str) -> list[RunReport]:
|
||||
"""List all reports for a run (FR-1409)."""
|
||||
run_dir = self.base_dir / run_id
|
||||
if not run_dir.exists():
|
||||
return []
|
||||
reports = []
|
||||
for p in sorted(run_dir.glob("*.json")):
|
||||
reports.append(RunReport.from_dict(json.loads(p.read_text(encoding="utf-8"))))
|
||||
return reports
|
||||
|
||||
def assemble_set(self, run_ids: list[str]) -> EvidenceSet:
|
||||
"""Assemble an evidence set from multiple runs (FR-1406)."""
|
||||
reports: list[RunReport] = []
|
||||
for run_id in run_ids:
|
||||
reports.extend(self.list_reports(run_id))
|
||||
return EvidenceSet(run_ids=run_ids, reports=reports)
|
||||
|
||||
def to_markdown(self, run_id: str) -> str:
|
||||
"""Human-readable Markdown report for a run (FR-1411)."""
|
||||
reports = self.list_reports(run_id)
|
||||
lines = [f"# Evidence Run: {run_id}\n"]
|
||||
for r in reports:
|
||||
lines.append(f"## {r.report_type.title()} Report")
|
||||
lines.append(f"- Status: {r.data.get('status', 'unknown')}")
|
||||
for w in r.data.get("warnings", []):
|
||||
lines.append(f"- Warning: {w}")
|
||||
for e in r.data.get("errors", []):
|
||||
lines.append(f"- Error: {e}")
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
def to_json(self, run_id: str) -> str:
|
||||
"""Machine-readable JSON report for a run (FR-1412)."""
|
||||
reports = self.list_reports(run_id)
|
||||
return json.dumps(
|
||||
{"run_id": run_id, "reports": [r.to_dict() for r in reports]},
|
||||
indent=2,
|
||||
)
|
||||
218
src/markidocx/importer.py
Normal file
218
src/markidocx/importer.py
Normal file
@@ -0,0 +1,218 @@
|
||||
"""DOCX→Markdown importer for markidocx (FR-300, FR-400)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from docx import Document
|
||||
from docx.document import Document as DocxDocument
|
||||
from docx.table import Table
|
||||
from docx.text.paragraph import Paragraph
|
||||
|
||||
from markidocx.manifest import Manifest
|
||||
|
||||
HEADING_STYLE_RE = re.compile(r"^Heading (\d+)$", re.IGNORECASE)
|
||||
LIST_BULLET_RE = re.compile(r"^List Bullet", re.IGNORECASE)
|
||||
LIST_NUMBER_RE = re.compile(r"^List Number", re.IGNORECASE)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ImportResult:
|
||||
success: bool
|
||||
output_files: list[Path]
|
||||
mapping_status: str # "redistributed" | "merged" | "failed"
|
||||
warnings: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def import_document(manifest: Manifest, docx_path: Path) -> ImportResult:
|
||||
"""Import *docx_path* and write Markdown back to the project sources.
|
||||
|
||||
If multiple source files exist and section boundaries can be detected,
|
||||
content is redistributed to the original files. Otherwise a single
|
||||
merged file is produced.
|
||||
"""
|
||||
warnings: list[str] = []
|
||||
|
||||
if not docx_path.exists():
|
||||
return ImportResult(
|
||||
success=False,
|
||||
output_files=[],
|
||||
mapping_status="failed",
|
||||
warnings=[f"DOCX file not found: {docx_path}"],
|
||||
)
|
||||
|
||||
try:
|
||||
doc = Document(str(docx_path))
|
||||
except Exception as exc:
|
||||
return ImportResult(
|
||||
success=False,
|
||||
output_files=[],
|
||||
mapping_status="failed",
|
||||
warnings=[f"Could not open DOCX: {exc}"],
|
||||
)
|
||||
|
||||
md_text = _docx_to_markdown(doc, warnings)
|
||||
|
||||
manifest.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Attempt redistribution to source files (FR-305, FR-405)
|
||||
if len(manifest.sources) == 1:
|
||||
out_path = manifest.sources[0].path
|
||||
out_path.write_text(md_text, encoding="utf-8")
|
||||
return ImportResult(
|
||||
success=True,
|
||||
output_files=[out_path],
|
||||
mapping_status="redistributed",
|
||||
warnings=warnings,
|
||||
)
|
||||
|
||||
# Multi-file: attempt redistribution by H1 boundary
|
||||
sections = _split_by_h1(md_text)
|
||||
if len(sections) == len(manifest.sources):
|
||||
output_files: list[Path] = []
|
||||
for src, section_text in zip(manifest.sources, sections, strict=True):
|
||||
src.path.write_text(section_text, encoding="utf-8")
|
||||
output_files.append(src.path)
|
||||
return ImportResult(
|
||||
success=True,
|
||||
output_files=output_files,
|
||||
mapping_status="redistributed",
|
||||
warnings=warnings,
|
||||
)
|
||||
|
||||
# Fallback: merged single output (FR-406)
|
||||
warnings.append(
|
||||
f"Could not redistribute to {len(manifest.sources)} source files "
|
||||
f"(found {len(sections)} H1 sections); writing merged output"
|
||||
)
|
||||
merged_path = manifest.output_dir / "imported_merged.md"
|
||||
merged_path.write_text(md_text, encoding="utf-8")
|
||||
return ImportResult(
|
||||
success=True,
|
||||
output_files=[merged_path],
|
||||
mapping_status="merged",
|
||||
warnings=warnings,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DOCX → Markdown conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _docx_to_markdown(doc: DocxDocument, warnings: list[str]) -> str:
|
||||
"""Convert a python-docx Document to a Markdown string."""
|
||||
lines: list[str] = []
|
||||
# Walk python-docx's block-level items
|
||||
for block in _iter_blocks(doc):
|
||||
if isinstance(block, Paragraph):
|
||||
md = _paragraph_to_md(block, warnings)
|
||||
if md is not None:
|
||||
lines.append(md)
|
||||
elif isinstance(block, Table):
|
||||
lines.append(_table_to_md(block))
|
||||
|
||||
return "\n\n".join(line for line in lines if line is not None)
|
||||
|
||||
|
||||
def _iter_blocks(doc: DocxDocument):
|
||||
"""Yield Paragraph and Table objects from the document body in order."""
|
||||
|
||||
body = doc.element.body
|
||||
for child in body:
|
||||
tag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
|
||||
if tag == "p":
|
||||
yield Paragraph(child, doc)
|
||||
elif tag == "tbl":
|
||||
yield Table(child, doc)
|
||||
|
||||
|
||||
def _paragraph_to_md(para: Paragraph, warnings: list[str]) -> str | None:
|
||||
"""Convert a paragraph to a Markdown line."""
|
||||
style_name = para.style.name if para.style else "Normal"
|
||||
text = para.text.strip()
|
||||
|
||||
# Headings
|
||||
m = HEADING_STYLE_RE.match(style_name)
|
||||
if m:
|
||||
level = int(m.group(1))
|
||||
return f"{'#' * level} {text}"
|
||||
|
||||
# Lists
|
||||
if LIST_BULLET_RE.match(style_name):
|
||||
return f"- {text}"
|
||||
if LIST_NUMBER_RE.match(style_name):
|
||||
return f"1. {text}"
|
||||
|
||||
# Normal text — preserve inline markup
|
||||
if not text:
|
||||
return None
|
||||
|
||||
return _runs_to_md(para)
|
||||
|
||||
|
||||
def _runs_to_md(para: Paragraph) -> str:
|
||||
"""Convert paragraph runs to Markdown with inline formatting."""
|
||||
parts: list[str] = []
|
||||
for run in para.runs:
|
||||
text = run.text
|
||||
if not text:
|
||||
continue
|
||||
if run.bold and run.italic:
|
||||
text = f"***{text}***"
|
||||
elif run.bold:
|
||||
text = f"**{text}**"
|
||||
elif run.italic:
|
||||
text = f"*{text}*"
|
||||
elif run.font.name and "Courier" in run.font.name:
|
||||
text = f"`{text}`"
|
||||
parts.append(text)
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
def _table_to_md(table: Table) -> str:
|
||||
"""Convert a DOCX table to a GFM Markdown table."""
|
||||
rows = table.rows
|
||||
if not rows:
|
||||
return ""
|
||||
|
||||
cells_per_row = [
|
||||
[cell.text.strip().replace("|", "\\|") for cell in row.cells]
|
||||
for row in rows
|
||||
]
|
||||
|
||||
# Normalise column count
|
||||
num_cols = max(len(r) for r in cells_per_row)
|
||||
for row in cells_per_row:
|
||||
while len(row) < num_cols:
|
||||
row.append("")
|
||||
|
||||
lines: list[str] = []
|
||||
header = "| " + " | ".join(cells_per_row[0]) + " |"
|
||||
separator = "| " + " | ".join(["---"] * num_cols) + " |"
|
||||
lines.append(header)
|
||||
lines.append(separator)
|
||||
for row in cells_per_row[1:]:
|
||||
lines.append("| " + " | ".join(row) + " |")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _split_by_h1(md_text: str) -> list[str]:
|
||||
"""Split Markdown text into sections at H1 boundaries."""
|
||||
lines = md_text.split("\n\n")
|
||||
sections: list[str] = []
|
||||
current: list[str] = []
|
||||
|
||||
for chunk in lines:
|
||||
if chunk.startswith("# ") and current:
|
||||
sections.append("\n\n".join(current))
|
||||
current = [chunk]
|
||||
else:
|
||||
current.append(chunk)
|
||||
|
||||
if current:
|
||||
sections.append("\n\n".join(current))
|
||||
|
||||
return sections
|
||||
113
src/markidocx/manifest.py
Normal file
113
src/markidocx/manifest.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""Manifest model for markidocx projects (FR-100)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from enum import StrEnum
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
SUPPORTED_FAMILIES = {"article", "book", "website"}
|
||||
|
||||
|
||||
class FeatureLevel(StrEnum):
|
||||
LEVEL1 = "level1"
|
||||
LEVEL3 = "level3"
|
||||
|
||||
|
||||
class ManifestError(Exception):
|
||||
"""Raised when a manifest is invalid or cannot be resolved."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceFile:
|
||||
path: Path
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProjectConfig:
|
||||
name: str
|
||||
feature_level: FeatureLevel
|
||||
family: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Manifest:
|
||||
project: ProjectConfig
|
||||
sources: list[SourceFile]
|
||||
output_dir: Path
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
def load_manifest(path: Path) -> Manifest:
|
||||
"""Parse and validate a manifest YAML file.
|
||||
|
||||
Raises ManifestError on any validation failure.
|
||||
"""
|
||||
if not path.exists():
|
||||
raise ManifestError(f"Manifest not found: {path}")
|
||||
|
||||
try:
|
||||
raw: dict[str, Any] = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
|
||||
except yaml.YAMLError as exc:
|
||||
raise ManifestError(f"YAML parse error: {exc}") from exc
|
||||
|
||||
# --- project section ---
|
||||
if "project" not in raw:
|
||||
raise ManifestError("Manifest missing required 'project' section")
|
||||
|
||||
proj_raw = raw["project"]
|
||||
if not isinstance(proj_raw, dict):
|
||||
raise ManifestError("'project' must be a mapping")
|
||||
|
||||
name = proj_raw.get("name")
|
||||
if not name:
|
||||
raise ManifestError("'project.name' is required")
|
||||
|
||||
fl_raw: str = proj_raw.get("feature_level") or ""
|
||||
try:
|
||||
feature_level = FeatureLevel(fl_raw)
|
||||
except (ValueError, TypeError):
|
||||
raise ManifestError(
|
||||
f"Invalid feature_level '{fl_raw}'; must be one of {[e.value for e in FeatureLevel]}"
|
||||
) from None
|
||||
|
||||
family = proj_raw.get("family")
|
||||
if family not in SUPPORTED_FAMILIES:
|
||||
raise ManifestError(
|
||||
f"Invalid family '{family}'; must be one of {sorted(SUPPORTED_FAMILIES)}"
|
||||
)
|
||||
|
||||
project = ProjectConfig(name=name, feature_level=feature_level, family=family)
|
||||
|
||||
# --- sources ---
|
||||
sources_raw = raw.get("sources", [])
|
||||
if not isinstance(sources_raw, list):
|
||||
raise ManifestError("'sources' must be a list")
|
||||
|
||||
sources: list[SourceFile] = []
|
||||
for entry in sources_raw:
|
||||
src_path_str = entry.get("path") if isinstance(entry, dict) else entry
|
||||
if not src_path_str:
|
||||
raise ManifestError("Each source entry must have a 'path'")
|
||||
src_path = (path.parent / src_path_str).resolve()
|
||||
if not src_path.exists():
|
||||
raise ManifestError(f"Source file not found: {src_path_str}")
|
||||
sources.append(SourceFile(path=src_path))
|
||||
|
||||
# --- output ---
|
||||
output_raw = raw.get("output", {})
|
||||
output_dir_str = output_raw.get("dir", "./dist") if isinstance(output_raw, dict) else "./dist"
|
||||
output_dir = (path.parent / output_dir_str).resolve()
|
||||
|
||||
# --- metadata ---
|
||||
metadata: dict[str, Any] = raw.get("metadata", {}) or {}
|
||||
|
||||
return Manifest(
|
||||
project=project,
|
||||
sources=sources,
|
||||
output_dir=output_dir,
|
||||
metadata=metadata,
|
||||
)
|
||||
352
src/markidocx/mcp_server.py
Normal file
352
src/markidocx/mcp_server.py
Normal file
@@ -0,0 +1,352 @@
|
||||
"""MCP server for markidocx (FR-1000)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
|
||||
from markidocx import __version__
|
||||
from markidocx.manifest import SUPPORTED_FAMILIES, FeatureLevel
|
||||
from markidocx.templates import FamilyRegistry
|
||||
|
||||
mcp = FastMCP("markidocx")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# T05 — MCP tools (FR-1002–FR-1015)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def get_version() -> dict[str, str]:
|
||||
"""Return the markidocx version (FR-1010)."""
|
||||
return {"version": __version__}
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def list_templates() -> list[dict[str, str]]:
|
||||
"""List available template families (FR-1002)."""
|
||||
registry = FamilyRegistry()
|
||||
return [{"name": f.name, "description": f.description} for f in registry.list_families()]
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def list_styles() -> list[dict[str, str]]:
|
||||
"""List available styles (FR-1003)."""
|
||||
return []
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def validate_project(manifest_yaml: str) -> dict[str, Any]:
|
||||
"""Validate a manifest YAML string (FR-1004).
|
||||
|
||||
Returns a dict with status, project info, warnings, and errors.
|
||||
The context includes family and feature_level compatibility info (FR-1014).
|
||||
"""
|
||||
import tempfile
|
||||
|
||||
from markidocx.manifest import ManifestError, load_manifest
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
mp = tmp_path / "manifest.yaml"
|
||||
mp.write_text(manifest_yaml, encoding="utf-8")
|
||||
# Stub out any referenced sources
|
||||
try:
|
||||
import yaml
|
||||
|
||||
raw = yaml.safe_load(manifest_yaml) or {}
|
||||
for entry in raw.get("sources", []):
|
||||
sp = entry.get("path") if isinstance(entry, dict) else entry
|
||||
if sp:
|
||||
(tmp_path / sp).write_text("", encoding="utf-8")
|
||||
out_raw = raw.get("output", {})
|
||||
out_dir = out_raw.get("dir", "./dist") if isinstance(out_raw, dict) else "./dist"
|
||||
(tmp_path / out_dir.lstrip("./")).mkdir(parents=True, exist_ok=True)
|
||||
except Exception:
|
||||
(tmp_path / "dist").mkdir(exist_ok=True)
|
||||
try:
|
||||
m = load_manifest(mp)
|
||||
return {
|
||||
"status": "ok",
|
||||
"project": m.project.name,
|
||||
"family": m.project.family,
|
||||
"feature_level": m.project.feature_level.value,
|
||||
"warnings": [],
|
||||
"errors": [],
|
||||
"context": {
|
||||
"supported_families": sorted(SUPPORTED_FAMILIES),
|
||||
"supported_feature_levels": [e.value for e in FeatureLevel],
|
||||
},
|
||||
}
|
||||
except ManifestError as exc:
|
||||
return {
|
||||
"status": "error",
|
||||
"errors": [str(exc)],
|
||||
"warnings": [],
|
||||
}
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def inspect_project(manifest_yaml: str) -> dict[str, Any]:
|
||||
"""Inspect a project manifest and return its structure (FR-1005)."""
|
||||
result: dict[str, Any] = validate_project(manifest_yaml) # type: ignore[assignment]
|
||||
return result
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def build(manifest_yaml: str, sources: list[dict[str, str]]) -> dict[str, Any]:
|
||||
"""Build a DOCX from Markdown sources (FR-1006).
|
||||
|
||||
sources: list of {"name": "...", "content": "..."} dicts.
|
||||
Returns docx_base64 on success.
|
||||
"""
|
||||
import base64
|
||||
import tempfile
|
||||
|
||||
from markidocx.builder import build_document
|
||||
from markidocx.manifest import ManifestError, load_manifest
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
mp = tmp_path / "manifest.yaml"
|
||||
mp.write_text(manifest_yaml, encoding="utf-8")
|
||||
(tmp_path / "dist").mkdir()
|
||||
for src in sources:
|
||||
(tmp_path / src["name"]).write_text(src.get("content", ""), encoding="utf-8")
|
||||
try:
|
||||
m = load_manifest(mp)
|
||||
except ManifestError as exc:
|
||||
return {"status": "error", "errors": [str(exc)], "warnings": []}
|
||||
result = build_document(m)
|
||||
if result.success:
|
||||
docx_b64 = base64.b64encode(Path(result.output_path).read_bytes()).decode()
|
||||
return {
|
||||
"status": "ok",
|
||||
"docx_base64": docx_b64,
|
||||
"family": result.family,
|
||||
"feature_level": result.feature_level,
|
||||
"warnings": result.warnings,
|
||||
"errors": [],
|
||||
}
|
||||
return {"status": "error", "errors": result.errors, "warnings": result.warnings}
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def import_docx(manifest_yaml: str, docx_base64: str) -> dict[str, Any]:
|
||||
"""Import a DOCX back to Markdown (FR-1007).
|
||||
|
||||
docx_base64: base64-encoded DOCX bytes.
|
||||
Returns imported Markdown files.
|
||||
"""
|
||||
import base64
|
||||
import tempfile
|
||||
|
||||
from markidocx.importer import import_document
|
||||
from markidocx.manifest import ManifestError, load_manifest
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
mp = tmp_path / "manifest.yaml"
|
||||
mp.write_text(manifest_yaml, encoding="utf-8")
|
||||
try:
|
||||
import yaml
|
||||
|
||||
raw = yaml.safe_load(manifest_yaml) or {}
|
||||
out_raw = raw.get("output", {})
|
||||
out_dir = out_raw.get("dir", "./dist") if isinstance(out_raw, dict) else "./dist"
|
||||
(tmp_path / out_dir.lstrip("./")).mkdir(parents=True, exist_ok=True)
|
||||
for entry in raw.get("sources", []):
|
||||
sp = entry.get("path") if isinstance(entry, dict) else entry
|
||||
if sp:
|
||||
(tmp_path / sp).write_text("", encoding="utf-8")
|
||||
except Exception:
|
||||
(tmp_path / "dist").mkdir(exist_ok=True)
|
||||
docx_path = tmp_path / "input.docx"
|
||||
docx_path.write_bytes(base64.b64decode(docx_base64))
|
||||
try:
|
||||
m = load_manifest(mp)
|
||||
except ManifestError as exc:
|
||||
return {"status": "error", "errors": [str(exc)], "warnings": []}
|
||||
result = import_document(m, docx_path)
|
||||
if result.success:
|
||||
import contextlib
|
||||
|
||||
files_md: dict[str, str] = {}
|
||||
for f in result.output_files:
|
||||
with contextlib.suppress(Exception):
|
||||
files_md[Path(f).name] = Path(f).read_text(encoding="utf-8")
|
||||
return {
|
||||
"status": "ok",
|
||||
"files": files_md,
|
||||
"mapping_status": result.mapping_status,
|
||||
"warnings": result.warnings,
|
||||
"errors": [],
|
||||
}
|
||||
return {"status": "error", "errors": ["Import failed"], "warnings": result.warnings}
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def compare(
|
||||
manifest_yaml: str,
|
||||
docx_base64: str,
|
||||
sources: list[dict[str, str]] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Compare original Markdown with re-imported DOCX (FR-1008).
|
||||
|
||||
sources: original source files as [{"name": ..., "content": ...}].
|
||||
"""
|
||||
import base64
|
||||
import tempfile
|
||||
|
||||
from markidocx.differ import compare as do_compare
|
||||
from markidocx.importer import import_document
|
||||
from markidocx.manifest import ManifestError, load_manifest
|
||||
|
||||
sources = sources or []
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
mp = tmp_path / "manifest.yaml"
|
||||
mp.write_text(manifest_yaml, encoding="utf-8")
|
||||
source_map: dict[str, str] = {}
|
||||
for src in sources:
|
||||
name = src["name"]
|
||||
content = src.get("content", "")
|
||||
(tmp_path / name).write_text(content, encoding="utf-8")
|
||||
source_map[name] = content
|
||||
try:
|
||||
import yaml
|
||||
|
||||
raw = yaml.safe_load(manifest_yaml) or {}
|
||||
out_raw = raw.get("output", {})
|
||||
out_dir = out_raw.get("dir", "./dist") if isinstance(out_raw, dict) else "./dist"
|
||||
(tmp_path / out_dir.lstrip("./")).mkdir(parents=True, exist_ok=True)
|
||||
for entry in raw.get("sources", []):
|
||||
sp = entry.get("path") if isinstance(entry, dict) else entry
|
||||
if sp and not (tmp_path / sp).exists():
|
||||
(tmp_path / sp).write_text("", encoding="utf-8")
|
||||
source_map.setdefault(sp, "")
|
||||
except Exception:
|
||||
(tmp_path / "dist").mkdir(exist_ok=True)
|
||||
docx_path = tmp_path / "input.docx"
|
||||
docx_path.write_bytes(base64.b64decode(docx_base64))
|
||||
try:
|
||||
m = load_manifest(mp)
|
||||
except ManifestError as exc:
|
||||
return {"status": "error", "errors": [str(exc)], "warnings": []}
|
||||
original_md = "\n\n".join(source_map.get(s.path.name, "") for s in m.sources)
|
||||
result = import_document(m, docx_path)
|
||||
if not result.success:
|
||||
return {
|
||||
"status": "error",
|
||||
"errors": ["Import failed — cannot compare"],
|
||||
"warnings": result.warnings,
|
||||
}
|
||||
reimported_parts = []
|
||||
for f in result.output_files:
|
||||
try:
|
||||
reimported_parts.append(Path(f).read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
reimported_parts.append("")
|
||||
report = do_compare(original_md, "\n\n".join(reimported_parts))
|
||||
return {
|
||||
"status": "ok",
|
||||
"has_drift": report.has_drift,
|
||||
"preserved": report.preserved,
|
||||
"degraded": report.degraded,
|
||||
"broken": report.broken,
|
||||
"unsupported": report.unsupported,
|
||||
"warnings": [],
|
||||
"errors": [],
|
||||
}
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def run_tests(manifest_yaml: str, sources: list[dict[str, str]]) -> dict[str, Any]:
|
||||
"""Run the end-to-end test harness (FR-1009)."""
|
||||
result: dict[str, Any] = invoke_workflow("single-file-roundtrip", manifest_yaml, sources) # type: ignore[assignment]
|
||||
return result
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def invoke_workflow(
|
||||
workflow_name: str,
|
||||
manifest_yaml: str,
|
||||
sources: list[dict[str, str]],
|
||||
) -> dict[str, Any]:
|
||||
"""Invoke a named composite workflow (FR-1012)."""
|
||||
from markidocx.workflows import WorkflowError, run_workflow_from_content
|
||||
|
||||
try:
|
||||
result = run_workflow_from_content(workflow_name, manifest_yaml, sources)
|
||||
return {
|
||||
"status": "ok" if result.classification != "failed" else "error",
|
||||
"run_id": result.run_id,
|
||||
"workflow_name": result.workflow_name,
|
||||
"classification": result.classification,
|
||||
"steps": [
|
||||
{"name": s.name, "status": s.status, "error": s.error}
|
||||
for s in result.steps
|
||||
],
|
||||
"aggregate_output": result.aggregate_output,
|
||||
"warnings": [],
|
||||
"errors": [],
|
||||
}
|
||||
except WorkflowError as exc:
|
||||
return {"status": "error", "errors": [str(exc)], "warnings": []}
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def get_evidence(run_id: str) -> dict[str, Any]:
|
||||
"""Retrieve evidence artifacts for a completed run (FR-1013)."""
|
||||
from markidocx.evidence import EvidenceStore
|
||||
|
||||
store = EvidenceStore()
|
||||
reports = store.list_reports(run_id)
|
||||
if not reports:
|
||||
return {
|
||||
"status": "not_found",
|
||||
"run_id": run_id,
|
||||
"reports": [],
|
||||
"warnings": [f"No evidence found for run_id: {run_id}"],
|
||||
}
|
||||
return {
|
||||
"status": "ok",
|
||||
"run_id": run_id,
|
||||
"reports": [r.to_dict() for r in reports],
|
||||
"warnings": [],
|
||||
"errors": [],
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MCP resources (FR-1011)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@mcp.resource("markidocx://capabilities")
|
||||
def resource_capabilities() -> str:
|
||||
"""Capabilities: supported feature levels and families."""
|
||||
import json
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"version": __version__,
|
||||
"feature_levels": [e.value for e in FeatureLevel],
|
||||
"families": sorted(SUPPORTED_FAMILIES),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@mcp.resource("markidocx://templates")
|
||||
def resource_templates() -> str:
|
||||
"""Template family metadata."""
|
||||
import json
|
||||
|
||||
registry = FamilyRegistry()
|
||||
return json.dumps(
|
||||
[{"name": f.name, "description": f.description} for f in registry.list_families()]
|
||||
)
|
||||
395
src/markidocx/rest.py
Normal file
395
src/markidocx/rest.py
Normal file
@@ -0,0 +1,395 @@
|
||||
"""REST service for markidocx (FR-900)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
from fastapi import FastAPI
|
||||
from pydantic import BaseModel
|
||||
|
||||
from markidocx import __version__
|
||||
from markidocx.manifest import SUPPORTED_FAMILIES, FeatureLevel
|
||||
from markidocx.templates import FamilyRegistry
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Response envelope (FR-912)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ResponseEnvelope(BaseModel):
|
||||
status: str
|
||||
outputs: Any = None
|
||||
warnings: list[str] = []
|
||||
errors: list[str] = []
|
||||
context: dict[str, Any] = {}
|
||||
|
||||
|
||||
def _ok(
|
||||
outputs: Any = None,
|
||||
warnings: list[str] | None = None,
|
||||
context: dict[str, Any] | None = None,
|
||||
) -> ResponseEnvelope:
|
||||
return ResponseEnvelope(
|
||||
status="ok",
|
||||
outputs=outputs,
|
||||
warnings=warnings or [],
|
||||
errors=[],
|
||||
context=context or {},
|
||||
)
|
||||
|
||||
|
||||
def _error(
|
||||
errors: list[str],
|
||||
warnings: list[str] | None = None,
|
||||
context: dict[str, Any] | None = None,
|
||||
) -> ResponseEnvelope:
|
||||
return ResponseEnvelope(
|
||||
status="error",
|
||||
outputs=None,
|
||||
warnings=warnings or [],
|
||||
errors=errors,
|
||||
context=context or {},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Request models
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ValidateRequest(BaseModel):
|
||||
manifest_yaml: str
|
||||
context: dict[str, Any] = {}
|
||||
|
||||
|
||||
class BuildRequest(BaseModel):
|
||||
manifest_yaml: str
|
||||
sources: list[dict[str, str]] = [] # [{"name": "...", "content": "..."}]
|
||||
context: dict[str, Any] = {}
|
||||
|
||||
|
||||
class ImportRequest(BaseModel):
|
||||
manifest_yaml: str
|
||||
docx_base64: str
|
||||
context: dict[str, Any] = {}
|
||||
|
||||
|
||||
class CompareRequest(BaseModel):
|
||||
manifest_yaml: str
|
||||
docx_base64: str
|
||||
sources: list[dict[str, str]] = [] # original source content for comparison
|
||||
context: dict[str, Any] = {}
|
||||
|
||||
|
||||
class RegisterTemplateRequest(BaseModel):
|
||||
name: str
|
||||
docx_base64: str
|
||||
description: str = ""
|
||||
context: dict[str, Any] = {}
|
||||
|
||||
|
||||
class WorkflowInvokeRequest(BaseModel):
|
||||
manifest_yaml: str
|
||||
sources: list[dict[str, str]] = []
|
||||
context: dict[str, Any] = {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# App factory
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _write_tmp_project(
|
||||
tmp_path: Path,
|
||||
manifest_yaml: str,
|
||||
sources: list[dict[str, str]],
|
||||
) -> tuple[Path, dict[str, str]]:
|
||||
"""Write manifest + sources to tmp_path, return (manifest_path, {name: content})."""
|
||||
mp = tmp_path / "manifest.yaml"
|
||||
mp.write_text(manifest_yaml, encoding="utf-8")
|
||||
source_map: dict[str, str] = {}
|
||||
for src in sources:
|
||||
name = src["name"]
|
||||
content = src.get("content", "")
|
||||
(tmp_path / name).write_text(content, encoding="utf-8")
|
||||
source_map[name] = content
|
||||
# Ensure stub sources listed in manifest exist
|
||||
try:
|
||||
raw = yaml.safe_load(manifest_yaml) or {}
|
||||
out_raw = raw.get("output", {})
|
||||
out_dir = out_raw.get("dir", "./dist") if isinstance(out_raw, dict) else "./dist"
|
||||
(tmp_path / out_dir.lstrip("./")).mkdir(parents=True, exist_ok=True)
|
||||
for entry in raw.get("sources", []):
|
||||
sp = entry.get("path") if isinstance(entry, dict) else entry
|
||||
if sp and not (tmp_path / sp).exists():
|
||||
(tmp_path / sp).write_text("", encoding="utf-8")
|
||||
source_map.setdefault(sp, "")
|
||||
except Exception:
|
||||
(tmp_path / "dist").mkdir(exist_ok=True)
|
||||
return mp, source_map
|
||||
|
||||
|
||||
def create_app() -> FastAPI:
|
||||
"""Create and return the FastAPI application."""
|
||||
|
||||
app = FastAPI(
|
||||
title="markidocx",
|
||||
version=__version__,
|
||||
description="Markdown ↔ DOCX round-trip editing service",
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# T01 — Foundation endpoints (FR-909–912)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@app.get("/health")
|
||||
def health() -> dict[str, str]:
|
||||
"""Health check (FR-910)."""
|
||||
return {"status": "ok", "version": __version__}
|
||||
|
||||
@app.get("/version", response_model=ResponseEnvelope)
|
||||
def version() -> ResponseEnvelope:
|
||||
"""Version information (FR-911)."""
|
||||
return _ok(outputs={"version": __version__})
|
||||
|
||||
@app.get("/capabilities", response_model=ResponseEnvelope)
|
||||
def capabilities() -> ResponseEnvelope:
|
||||
"""Capability inspection — feature levels and families (FR-909)."""
|
||||
return _ok(
|
||||
outputs={
|
||||
"feature_levels": [e.value for e in FeatureLevel],
|
||||
"families": sorted(SUPPORTED_FAMILIES),
|
||||
},
|
||||
context={"version": __version__},
|
||||
)
|
||||
|
||||
@app.get("/templates", response_model=ResponseEnvelope)
|
||||
def templates() -> ResponseEnvelope:
|
||||
"""List template families (FR-906)."""
|
||||
registry = FamilyRegistry()
|
||||
families = registry.list_families()
|
||||
return _ok(
|
||||
outputs=[{"name": f.name, "description": f.description} for f in families]
|
||||
)
|
||||
|
||||
@app.get("/styles", response_model=ResponseEnvelope)
|
||||
def styles() -> ResponseEnvelope:
|
||||
"""List available styles (FR-907 stub)."""
|
||||
return _ok(outputs=[])
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# T02 — Functional endpoints (FR-902–908, FR-913–916)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@app.post("/validate", response_model=ResponseEnvelope)
|
||||
def validate(req: ValidateRequest) -> ResponseEnvelope:
|
||||
"""Validate a manifest (FR-902)."""
|
||||
from markidocx.manifest import ManifestError, load_manifest
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
mp, _ = _write_tmp_project(Path(tmp), req.manifest_yaml, [])
|
||||
try:
|
||||
m = load_manifest(mp)
|
||||
ctx = {
|
||||
**req.context,
|
||||
"family": m.project.family,
|
||||
"feature_level": m.project.feature_level.value,
|
||||
}
|
||||
return _ok(
|
||||
outputs={
|
||||
"project": m.project.name,
|
||||
"family": m.project.family,
|
||||
"feature_level": m.project.feature_level.value,
|
||||
},
|
||||
context=ctx,
|
||||
)
|
||||
except ManifestError as exc:
|
||||
return _error(errors=[str(exc)], context=req.context)
|
||||
|
||||
@app.post("/build", response_model=ResponseEnvelope)
|
||||
def build(req: BuildRequest) -> ResponseEnvelope:
|
||||
"""Build DOCX from Markdown sources (FR-903)."""
|
||||
from markidocx.builder import build_document
|
||||
from markidocx.manifest import ManifestError, load_manifest
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
mp, _ = _write_tmp_project(Path(tmp), req.manifest_yaml, req.sources)
|
||||
try:
|
||||
m = load_manifest(mp)
|
||||
except ManifestError as exc:
|
||||
return _error(errors=[str(exc)], context=req.context)
|
||||
result = build_document(m)
|
||||
ctx = {
|
||||
**req.context,
|
||||
"family": result.family,
|
||||
"feature_level": result.feature_level,
|
||||
}
|
||||
if result.success:
|
||||
docx_b64 = base64.b64encode(Path(result.output_path).read_bytes()).decode()
|
||||
return ResponseEnvelope(
|
||||
status="ok",
|
||||
outputs={"docx_base64": docx_b64, "output_path": str(result.output_path)},
|
||||
warnings=result.warnings,
|
||||
errors=[],
|
||||
context=ctx,
|
||||
)
|
||||
return _error(errors=result.errors, warnings=result.warnings, context=ctx)
|
||||
|
||||
@app.post("/import", response_model=ResponseEnvelope)
|
||||
def import_docx(req: ImportRequest) -> ResponseEnvelope:
|
||||
"""Import DOCX back to Markdown (FR-904)."""
|
||||
from markidocx.importer import import_document
|
||||
from markidocx.manifest import ManifestError, load_manifest
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
mp, _ = _write_tmp_project(tmp_path, req.manifest_yaml, [])
|
||||
docx_path = tmp_path / "input.docx"
|
||||
docx_path.write_bytes(base64.b64decode(req.docx_base64))
|
||||
try:
|
||||
m = load_manifest(mp)
|
||||
except ManifestError as exc:
|
||||
return _error(errors=[str(exc)], context=req.context)
|
||||
result = import_document(m, docx_path)
|
||||
ctx = {**req.context}
|
||||
if result.success:
|
||||
import contextlib
|
||||
|
||||
files_md: dict[str, str] = {}
|
||||
for f in result.output_files:
|
||||
with contextlib.suppress(Exception):
|
||||
files_md[Path(f).name] = Path(f).read_text(encoding="utf-8")
|
||||
return ResponseEnvelope(
|
||||
status="ok",
|
||||
outputs={"files": files_md, "mapping_status": result.mapping_status},
|
||||
warnings=result.warnings,
|
||||
errors=[],
|
||||
context=ctx,
|
||||
)
|
||||
return ResponseEnvelope(
|
||||
status="error",
|
||||
outputs=None,
|
||||
warnings=result.warnings,
|
||||
errors=["Import failed"],
|
||||
context=ctx,
|
||||
)
|
||||
|
||||
@app.post("/compare", response_model=ResponseEnvelope)
|
||||
def compare(req: CompareRequest) -> ResponseEnvelope:
|
||||
"""Compare original Markdown with re-imported DOCX (FR-905)."""
|
||||
from markidocx.differ import compare as do_compare
|
||||
from markidocx.importer import import_document
|
||||
from markidocx.manifest import ManifestError, load_manifest
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
mp, source_map = _write_tmp_project(tmp_path, req.manifest_yaml, req.sources)
|
||||
docx_path = tmp_path / "input.docx"
|
||||
docx_path.write_bytes(base64.b64decode(req.docx_base64))
|
||||
try:
|
||||
m = load_manifest(mp)
|
||||
except ManifestError as exc:
|
||||
return _error(errors=[str(exc)], context=req.context)
|
||||
|
||||
original_md = "\n\n".join(
|
||||
source_map.get(s.path.name, "") for s in m.sources
|
||||
)
|
||||
result = import_document(m, docx_path)
|
||||
if not result.success:
|
||||
return _error(
|
||||
errors=["Import failed — cannot compare"],
|
||||
warnings=result.warnings,
|
||||
context=req.context,
|
||||
)
|
||||
reimported_parts = []
|
||||
for f in result.output_files:
|
||||
try:
|
||||
reimported_parts.append(Path(f).read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
reimported_parts.append("")
|
||||
reimported_md = "\n\n".join(reimported_parts)
|
||||
report = do_compare(original_md, reimported_md)
|
||||
return _ok(
|
||||
outputs={
|
||||
"has_drift": report.has_drift,
|
||||
"preserved": report.preserved,
|
||||
"degraded": report.degraded,
|
||||
"broken": report.broken,
|
||||
"unsupported": report.unsupported,
|
||||
},
|
||||
context=req.context,
|
||||
)
|
||||
|
||||
@app.post("/templates/register", response_model=ResponseEnvelope)
|
||||
def register_template(req: RegisterTemplateRequest) -> ResponseEnvelope:
|
||||
"""Register a custom template family (FR-908)."""
|
||||
from markidocx.templates import RegistrationError
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmpl_path = Path(tmp) / f"{req.name}.docx"
|
||||
tmpl_path.write_bytes(base64.b64decode(req.docx_base64))
|
||||
registry = FamilyRegistry()
|
||||
try:
|
||||
info = registry.register(tmpl_path, req.name, req.description)
|
||||
return _ok(
|
||||
outputs={"name": info.name, "description": info.description},
|
||||
context=req.context,
|
||||
)
|
||||
except RegistrationError as exc:
|
||||
return _error(errors=[str(exc)], context=req.context)
|
||||
|
||||
@app.post("/workflows/{workflow_name}", response_model=ResponseEnvelope)
|
||||
def invoke_workflow(workflow_name: str, req: WorkflowInvokeRequest) -> ResponseEnvelope:
|
||||
"""Invoke a composite workflow by name (FR-913)."""
|
||||
from markidocx.workflows import WorkflowError, run_workflow_from_content
|
||||
|
||||
try:
|
||||
result = run_workflow_from_content(workflow_name, req.manifest_yaml, req.sources)
|
||||
ctx = {**req.context, "workflow": workflow_name, "run_id": result.run_id}
|
||||
return ResponseEnvelope(
|
||||
status="ok" if result.classification != "failed" else "error",
|
||||
outputs={
|
||||
"run_id": result.run_id,
|
||||
"workflow_name": result.workflow_name,
|
||||
"classification": result.classification,
|
||||
"steps": [
|
||||
{"name": s.name, "status": s.status, "error": s.error}
|
||||
for s in result.steps
|
||||
],
|
||||
"aggregate_output": result.aggregate_output,
|
||||
},
|
||||
warnings=[],
|
||||
errors=[],
|
||||
context=ctx,
|
||||
)
|
||||
except WorkflowError as exc:
|
||||
return _error(
|
||||
errors=[str(exc)],
|
||||
context={**req.context, "workflow": workflow_name},
|
||||
)
|
||||
|
||||
@app.get("/evidence/{run_id}", response_model=ResponseEnvelope)
|
||||
def get_evidence(run_id: str) -> ResponseEnvelope:
|
||||
"""Retrieve evidence artifacts for a completed run (FR-914)."""
|
||||
from markidocx.evidence import EvidenceStore
|
||||
|
||||
store = EvidenceStore()
|
||||
reports = store.list_reports(run_id)
|
||||
if not reports:
|
||||
return ResponseEnvelope(
|
||||
status="not_found",
|
||||
outputs=None,
|
||||
warnings=[f"No evidence found for run_id: {run_id}"],
|
||||
errors=[],
|
||||
context={"run_id": run_id},
|
||||
)
|
||||
return _ok(
|
||||
outputs={"run_id": run_id, "reports": [r.to_dict() for r in reports]},
|
||||
context={"run_id": run_id},
|
||||
)
|
||||
|
||||
return app
|
||||
101
src/markidocx/templates.py
Normal file
101
src/markidocx/templates.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""Template family registry for markidocx (FR-600)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from docx import Document
|
||||
from docx.document import Document as DocxDocument
|
||||
from docx.shared import Pt
|
||||
|
||||
BUILT_IN_FAMILIES: dict[str, str] = {
|
||||
"article": "Single-document article layout",
|
||||
"book": "Multi-chapter book layout",
|
||||
"website": "Web-optimised document layout",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class FamilyInfo:
|
||||
name: str
|
||||
description: str
|
||||
template_path: Path | None = None
|
||||
|
||||
|
||||
class RegistrationError(Exception):
|
||||
"""Raised when template registration fails."""
|
||||
|
||||
|
||||
class FamilyRegistry:
|
||||
"""Manages DOCX template families (FR-602–FR-608)."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._families: dict[str, FamilyInfo] = {
|
||||
name: FamilyInfo(name=name, description=desc)
|
||||
for name, desc in BUILT_IN_FAMILIES.items()
|
||||
}
|
||||
|
||||
def list_families(self) -> list[FamilyInfo]:
|
||||
"""Return all registered families (FR-603)."""
|
||||
return list(self._families.values())
|
||||
|
||||
def get(self, name: str) -> FamilyInfo | None:
|
||||
"""Return a family by name, or None if not found (FR-604)."""
|
||||
return self._families.get(name)
|
||||
|
||||
def register(self, path: Path, name: str, description: str = "") -> FamilyInfo:
|
||||
"""Register a custom template family (FR-605).
|
||||
|
||||
Raises RegistrationError if the path is not a valid .docx file.
|
||||
"""
|
||||
if not path.exists():
|
||||
raise RegistrationError(f"Template file not found: {path}")
|
||||
if path.suffix.lower() != ".docx":
|
||||
raise RegistrationError(f"Template must be a .docx file: {path}")
|
||||
info = FamilyInfo(name=name, description=description, template_path=path)
|
||||
self._families[name] = info
|
||||
return info
|
||||
|
||||
def create_document(self, family: str) -> DocxDocument:
|
||||
"""Create a new python-docx Document using the named family's template.
|
||||
|
||||
Falls back to a default document if the family has no custom template path.
|
||||
"""
|
||||
info = self._families.get(family)
|
||||
if info and info.template_path and info.template_path.exists():
|
||||
return Document(str(info.template_path))
|
||||
doc = Document()
|
||||
_apply_family_defaults(doc, family)
|
||||
return doc
|
||||
|
||||
|
||||
def _apply_family_defaults(doc: DocxDocument, family: str) -> None:
|
||||
"""Apply minimal style defaults for built-in families."""
|
||||
styles = doc.styles
|
||||
|
||||
# Ensure Normal style has sensible font
|
||||
try:
|
||||
normal = styles["Normal"]
|
||||
if normal.font.size is None:
|
||||
normal.font.size = Pt(11)
|
||||
if normal.font.name is None:
|
||||
normal.font.name = "Calibri"
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
if family == "book":
|
||||
# Book: slightly larger body text
|
||||
try:
|
||||
normal = styles["Normal"]
|
||||
normal.font.size = Pt(12)
|
||||
except KeyError:
|
||||
pass
|
||||
elif family == "website":
|
||||
# Website: sans-serif, compact
|
||||
try:
|
||||
normal = styles["Normal"]
|
||||
normal.font.name = "Arial"
|
||||
normal.font.size = Pt(10)
|
||||
except KeyError:
|
||||
pass
|
||||
376
src/markidocx/workflows.py
Normal file
376
src/markidocx/workflows.py
Normal file
@@ -0,0 +1,376 @@
|
||||
"""Composite workflow orchestration for markidocx (FR-1300)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from markidocx.evidence import EvidenceStore, ReportContext
|
||||
|
||||
SUPPORTED_WORKFLOWS = {
|
||||
"single-file-roundtrip",
|
||||
"multi-file-roundtrip",
|
||||
"release-regression",
|
||||
"family-switch-build",
|
||||
}
|
||||
|
||||
WorkflowClassification = str # "full" | "with-fallback" | "partial" | "failed"
|
||||
|
||||
|
||||
class WorkflowError(Exception):
|
||||
"""Raised for invalid workflow invocations."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkflowStep:
|
||||
name: str
|
||||
status: str # "executed" | "skipped" | "failed"
|
||||
output: Any = None
|
||||
error: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkflowResult:
|
||||
run_id: str
|
||||
workflow_name: str
|
||||
timestamp: str
|
||||
classification: WorkflowClassification
|
||||
steps: list[WorkflowStep] = field(default_factory=list)
|
||||
aggregate_output: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
def run_workflow(
|
||||
name: str,
|
||||
manifest_path: Path,
|
||||
evidence_store: EvidenceStore | None = None,
|
||||
) -> WorkflowResult:
|
||||
"""Dispatch a named workflow on a manifest file (FR-1308).
|
||||
|
||||
Raises WorkflowError for unknown workflow names.
|
||||
"""
|
||||
if name not in SUPPORTED_WORKFLOWS:
|
||||
raise WorkflowError(
|
||||
f"Unknown workflow '{name}'. Supported: {sorted(SUPPORTED_WORKFLOWS)}"
|
||||
)
|
||||
store = evidence_store or EvidenceStore()
|
||||
run_id = str(uuid.uuid4())
|
||||
ts = datetime.now(UTC).isoformat()
|
||||
|
||||
if name == "single-file-roundtrip":
|
||||
return _single_file_roundtrip(run_id, ts, manifest_path, store)
|
||||
if name == "multi-file-roundtrip":
|
||||
return _multi_file_roundtrip(run_id, ts, manifest_path, store)
|
||||
if name == "release-regression":
|
||||
return _release_regression(run_id, ts, manifest_path, store)
|
||||
# family-switch-build
|
||||
return _family_switch_build(run_id, ts, manifest_path, store)
|
||||
|
||||
|
||||
def run_workflow_from_content(
|
||||
name: str,
|
||||
manifest_yaml: str,
|
||||
sources: list[dict[str, str]],
|
||||
evidence_store: EvidenceStore | None = None,
|
||||
) -> WorkflowResult:
|
||||
"""Run a workflow given raw YAML and source content (used by REST/MCP).
|
||||
|
||||
Writes a temporary project directory and delegates to run_workflow().
|
||||
"""
|
||||
if name not in SUPPORTED_WORKFLOWS:
|
||||
raise WorkflowError(
|
||||
f"Unknown workflow '{name}'. Supported: {sorted(SUPPORTED_WORKFLOWS)}"
|
||||
)
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
mp = tmp_path / "manifest.yaml"
|
||||
mp.write_text(manifest_yaml, encoding="utf-8")
|
||||
(tmp_path / "dist").mkdir()
|
||||
for src in sources:
|
||||
(tmp_path / src["name"]).write_text(src["content"], encoding="utf-8")
|
||||
return run_workflow(name, mp, evidence_store)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Individual workflow implementations
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _single_file_roundtrip(
|
||||
run_id: str,
|
||||
ts: str,
|
||||
manifest_path: Path,
|
||||
store: EvidenceStore,
|
||||
) -> WorkflowResult:
|
||||
"""validate → build → import → compare (FR-1301)."""
|
||||
from markidocx.builder import build_document
|
||||
from markidocx.differ import compare as do_compare
|
||||
from markidocx.importer import import_document
|
||||
from markidocx.manifest import ManifestError, load_manifest
|
||||
|
||||
steps: list[WorkflowStep] = []
|
||||
ctx = ReportContext(workflow=run_id)
|
||||
|
||||
# Step 1: validate
|
||||
try:
|
||||
m = load_manifest(manifest_path)
|
||||
steps.append(WorkflowStep(name="validate", status="executed", output={"project": m.project.name}))
|
||||
store.save_report(
|
||||
run_id,
|
||||
"validation",
|
||||
{"status": "ok", "project": m.project.name, "errors": [], "warnings": []},
|
||||
ctx,
|
||||
)
|
||||
except ManifestError as exc:
|
||||
steps.append(WorkflowStep(name="validate", status="failed", error=str(exc)))
|
||||
store.save_report(run_id, "validation", {"status": "error", "errors": [str(exc)], "warnings": []}, ctx)
|
||||
return WorkflowResult(
|
||||
run_id=run_id,
|
||||
workflow_name="single-file-roundtrip",
|
||||
timestamp=ts,
|
||||
classification="failed",
|
||||
steps=steps,
|
||||
aggregate_output={"error": str(exc)},
|
||||
)
|
||||
|
||||
# Step 2: build
|
||||
build_result = build_document(m)
|
||||
steps.append(
|
||||
WorkflowStep(
|
||||
name="build",
|
||||
status="executed" if build_result.success else "failed",
|
||||
output={"output_path": str(build_result.output_path), "warnings": build_result.warnings},
|
||||
error="; ".join(build_result.errors) if not build_result.success else None,
|
||||
)
|
||||
)
|
||||
store.save_report(
|
||||
run_id,
|
||||
"build",
|
||||
{
|
||||
"status": "ok" if build_result.success else "error",
|
||||
"output_path": str(build_result.output_path),
|
||||
"warnings": build_result.warnings,
|
||||
"errors": build_result.errors,
|
||||
"family": build_result.family,
|
||||
"feature_level": build_result.feature_level,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
if not build_result.success:
|
||||
return WorkflowResult(
|
||||
run_id=run_id,
|
||||
workflow_name="single-file-roundtrip",
|
||||
timestamp=ts,
|
||||
classification="failed",
|
||||
steps=steps,
|
||||
aggregate_output={"errors": build_result.errors},
|
||||
)
|
||||
|
||||
# Step 3: import
|
||||
import_result = import_document(m, build_result.output_path)
|
||||
steps.append(
|
||||
WorkflowStep(
|
||||
name="import",
|
||||
status="executed" if import_result.success else "failed",
|
||||
output={"mapping_status": import_result.mapping_status, "warnings": import_result.warnings},
|
||||
)
|
||||
)
|
||||
store.save_report(
|
||||
run_id,
|
||||
"import",
|
||||
{
|
||||
"status": "ok" if import_result.success else "error",
|
||||
"mapping_status": import_result.mapping_status,
|
||||
"output_files": [str(f) for f in import_result.output_files],
|
||||
"warnings": import_result.warnings,
|
||||
"errors": [],
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
if not import_result.success:
|
||||
return WorkflowResult(
|
||||
run_id=run_id,
|
||||
workflow_name="single-file-roundtrip",
|
||||
timestamp=ts,
|
||||
classification="partial",
|
||||
steps=steps,
|
||||
aggregate_output={"warnings": import_result.warnings},
|
||||
)
|
||||
|
||||
# Step 4: compare
|
||||
original_parts = [s.path.read_text(encoding="utf-8") for s in m.sources]
|
||||
original_md = "\n\n".join(original_parts)
|
||||
reimported_parts = [Path(f).read_text(encoding="utf-8") for f in import_result.output_files]
|
||||
reimported_md = "\n\n".join(reimported_parts)
|
||||
drift = do_compare(original_md, reimported_md)
|
||||
steps.append(
|
||||
WorkflowStep(
|
||||
name="compare",
|
||||
status="executed",
|
||||
output={
|
||||
"has_drift": drift.has_drift,
|
||||
"preserved": drift.preserved,
|
||||
"degraded": drift.degraded,
|
||||
"broken": drift.broken,
|
||||
},
|
||||
)
|
||||
)
|
||||
store.save_report(
|
||||
run_id,
|
||||
"drift",
|
||||
{
|
||||
"status": "ok",
|
||||
"has_drift": drift.has_drift,
|
||||
"preserved": drift.preserved,
|
||||
"degraded": drift.degraded,
|
||||
"broken": drift.broken,
|
||||
"unsupported": drift.unsupported,
|
||||
"warnings": [],
|
||||
"errors": [],
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
|
||||
has_fallback = import_result.mapping_status == "merged"
|
||||
has_warnings = bool(build_result.warnings or import_result.warnings)
|
||||
if drift.has_drift or has_warnings:
|
||||
classification: WorkflowClassification = "with-fallback" if has_fallback else "with-fallback"
|
||||
else:
|
||||
classification = "with-fallback" if has_fallback else "full"
|
||||
|
||||
return WorkflowResult(
|
||||
run_id=run_id,
|
||||
workflow_name="single-file-roundtrip",
|
||||
timestamp=ts,
|
||||
classification=classification,
|
||||
steps=steps,
|
||||
aggregate_output={
|
||||
"build": {"output_path": str(build_result.output_path), "family": build_result.family},
|
||||
"import": {"mapping_status": import_result.mapping_status},
|
||||
"drift": {"has_drift": drift.has_drift},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _multi_file_roundtrip(
|
||||
run_id: str,
|
||||
ts: str,
|
||||
manifest_path: Path,
|
||||
store: EvidenceStore,
|
||||
) -> WorkflowResult:
|
||||
"""inspect → validate → build → import → redistribute (or fallback) → compare (FR-1302)."""
|
||||
# Delegates to single-file-roundtrip logic — multi-file redistribution
|
||||
# is handled inside import_document already.
|
||||
result = _single_file_roundtrip(run_id, ts, manifest_path, store)
|
||||
result.workflow_name = "multi-file-roundtrip"
|
||||
return result
|
||||
|
||||
|
||||
def _release_regression(
|
||||
run_id: str,
|
||||
ts: str,
|
||||
manifest_path: Path,
|
||||
store: EvidenceStore,
|
||||
) -> WorkflowResult:
|
||||
"""End-to-end regression on the stable documentation corpus (FR-1306)."""
|
||||
result = _single_file_roundtrip(run_id, ts, manifest_path, store)
|
||||
result.workflow_name = "release-regression"
|
||||
return result
|
||||
|
||||
|
||||
def _family_switch_build(
|
||||
run_id: str,
|
||||
ts: str,
|
||||
manifest_path: Path,
|
||||
store: EvidenceStore,
|
||||
) -> WorkflowResult:
|
||||
"""Build under all compatible families and report separately (FR-1307)."""
|
||||
from markidocx.builder import build_document
|
||||
from markidocx.manifest import SUPPORTED_FAMILIES, ManifestError, load_manifest
|
||||
|
||||
steps: list[WorkflowStep] = []
|
||||
ctx = ReportContext(workflow=run_id)
|
||||
|
||||
try:
|
||||
m = load_manifest(manifest_path)
|
||||
except ManifestError as exc:
|
||||
return WorkflowResult(
|
||||
run_id=run_id,
|
||||
workflow_name="family-switch-build",
|
||||
timestamp=ts,
|
||||
classification="failed",
|
||||
steps=[WorkflowStep(name="validate", status="failed", error=str(exc))],
|
||||
aggregate_output={"error": str(exc)},
|
||||
)
|
||||
|
||||
build_outputs: dict[str, Any] = {}
|
||||
all_success = True
|
||||
any_warning = False
|
||||
|
||||
for family in sorted(SUPPORTED_FAMILIES):
|
||||
|
||||
from markidocx.manifest import ProjectConfig
|
||||
|
||||
m_family = type(m)(
|
||||
project=ProjectConfig(
|
||||
name=m.project.name,
|
||||
feature_level=m.project.feature_level,
|
||||
family=family,
|
||||
),
|
||||
sources=m.sources,
|
||||
output_dir=m.output_dir,
|
||||
metadata=m.metadata,
|
||||
)
|
||||
result = build_document(m_family)
|
||||
step_status = "executed" if result.success else "failed"
|
||||
steps.append(
|
||||
WorkflowStep(
|
||||
name=f"build:{family}",
|
||||
status=step_status,
|
||||
output={"output_path": str(result.output_path), "warnings": result.warnings},
|
||||
error="; ".join(result.errors) if not result.success else None,
|
||||
)
|
||||
)
|
||||
store.save_report(
|
||||
run_id,
|
||||
f"build_{family}",
|
||||
{
|
||||
"status": "ok" if result.success else "error",
|
||||
"family": family,
|
||||
"output_path": str(result.output_path),
|
||||
"warnings": result.warnings,
|
||||
"errors": result.errors,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
build_outputs[family] = {
|
||||
"success": result.success,
|
||||
"output_path": str(result.output_path),
|
||||
"warnings": result.warnings,
|
||||
}
|
||||
if not result.success:
|
||||
all_success = False
|
||||
if result.warnings:
|
||||
any_warning = True
|
||||
|
||||
classification: WorkflowClassification
|
||||
if all_success and not any_warning:
|
||||
classification = "full"
|
||||
elif all_success:
|
||||
classification = "with-fallback"
|
||||
elif build_outputs:
|
||||
classification = "partial"
|
||||
else:
|
||||
classification = "failed"
|
||||
|
||||
return WorkflowResult(
|
||||
run_id=run_id,
|
||||
workflow_name="family-switch-build",
|
||||
timestamp=ts,
|
||||
classification=classification,
|
||||
steps=steps,
|
||||
aggregate_output={"builds": build_outputs},
|
||||
)
|
||||
Reference in New Issue
Block a user