generated from coulomb/repo-seed
feat: WP-0007 — Interface Completeness & Evidence
T01: markidocx inspect (FR-806) and markidocx test (FR-810) CLI commands
T02: markidocx evidence get/list CLI commands (FR-1409, FR-814)
T03: list_styles() / GET /styles / MCP list_styles with real style data (FR-907)
T04: Evidence assembly — EvidenceSet summary via REST and MCP (FR-1406–1408)
T05: LEVEL3 edge-case tests — diagram mutation, renderer version check,
bibliography duplicate keys / missing refs / special chars (FR-534, FR-538, FR-542)
T06: markidocx template extract + Word-first round-trip regression test (FR-606)
New: differ._compare_diagram_blocks tracks fenced diagram source drift (FR-534)
New: diagrams.check_renderer_version emits warning for outdated renderers (FR-538)
New: bibliography.validate_citations detects duplicate keys and missing entries (FR-542)
New: templates.extract_template / TemplateExtractionResult / list_styles / StyleEntry
New: REST POST /template/extract; MCP extract_template tool
278 tests pass, ruff+mypy clean.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -206,3 +206,50 @@ def compare_citations(
|
||||
preserved.append(f"reference-entry:{key}")
|
||||
else:
|
||||
degraded.append(f"reference-entry:lost '{key}'")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Validation helpers (FR-542)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def validate_citations(md_text: str) -> list:
|
||||
"""Validate citation consistency in *md_text*.
|
||||
|
||||
Returns a list of WarningRecord for:
|
||||
- duplicate citation keys in the references section
|
||||
- citation keys with no corresponding reference entry
|
||||
"""
|
||||
from markidocx.errors import Severity, WarningRecord
|
||||
|
||||
warnings: list[WarningRecord] = []
|
||||
|
||||
inline_keys = extract_citation_keys(md_text)
|
||||
entries, _ = extract_references_section(md_text)
|
||||
|
||||
# Check for duplicate keys in references section
|
||||
seen_keys: set[str] = set()
|
||||
for key, _ in entries:
|
||||
if key in seen_keys:
|
||||
warnings.append(
|
||||
WarningRecord(
|
||||
severity=Severity.WARNING,
|
||||
reason="citation-duplicate-key",
|
||||
construct=f"@{key}",
|
||||
)
|
||||
)
|
||||
seen_keys.add(key)
|
||||
|
||||
# Check for inline citations with no reference entry
|
||||
ref_keys = {k for k, _ in entries}
|
||||
for key in inline_keys:
|
||||
if key not in ref_keys:
|
||||
warnings.append(
|
||||
WarningRecord(
|
||||
severity=Severity.WARNING,
|
||||
reason="citation-key-missing",
|
||||
construct=f"@{key}",
|
||||
)
|
||||
)
|
||||
|
||||
return warnings
|
||||
|
||||
@@ -19,6 +19,8 @@ app = typer.Typer(
|
||||
)
|
||||
template_app = typer.Typer(help="Template family management.")
|
||||
app.add_typer(template_app, name="template")
|
||||
evidence_app = typer.Typer(help="Evidence store access.")
|
||||
app.add_typer(evidence_app, name="evidence")
|
||||
|
||||
|
||||
def _version_callback(value: bool) -> None:
|
||||
@@ -221,6 +223,168 @@ def compare(
|
||||
raise typer.Exit(1 if report.has_drift else 0)
|
||||
|
||||
|
||||
@app.command()
|
||||
def inspect(
|
||||
manifest: Annotated[Path, typer.Argument(help="Path to manifest YAML file")],
|
||||
json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
|
||||
) -> None:
|
||||
"""Inspect a project manifest and display its structure (FR-806)."""
|
||||
from markidocx.level3 import capabilities_entry as level3_capabilities
|
||||
from markidocx.manifest import ManifestError, load_manifest
|
||||
|
||||
try:
|
||||
m = load_manifest(manifest)
|
||||
except ManifestError as exc:
|
||||
if json_output:
|
||||
typer.echo(json.dumps({"status": "error", "message": str(exc)}))
|
||||
else:
|
||||
err_console.print(f"[red]✗ Manifest error:[/red] {exc}")
|
||||
raise typer.Exit(1) from None
|
||||
|
||||
sources = [str(s.path) for s in m.sources]
|
||||
result = {
|
||||
"status": "ok",
|
||||
"project": m.project.name,
|
||||
"family": m.project.family,
|
||||
"feature_level": m.project.feature_level.value,
|
||||
"sources": sources,
|
||||
"level3": level3_capabilities(),
|
||||
}
|
||||
|
||||
if json_output:
|
||||
typer.echo(json.dumps(result))
|
||||
else:
|
||||
console.print(f"[bold]Project:[/bold] {m.project.name}")
|
||||
console.print(f" family: {m.project.family}")
|
||||
console.print(f" feature_level: {m.project.feature_level.value}")
|
||||
console.print(f" sources: {', '.join(sources)}")
|
||||
l3_raw = result.get("level3")
|
||||
l3: dict[str, object] = l3_raw if isinstance(l3_raw, dict) else {}
|
||||
console.print(f" level3 xref: {l3.get('xref_available', False)}")
|
||||
console.print(f" level3 fig: {l3.get('figures_available', False)}")
|
||||
console.print(f" level3 diag: {l3.get('diagrams_available', False)}")
|
||||
console.print(f" level3 bib: {l3.get('bibliography_available', False)}")
|
||||
|
||||
raise typer.Exit(0)
|
||||
|
||||
|
||||
@app.command("test")
|
||||
def run_tests(
|
||||
manifest: Annotated[Path, typer.Argument(help="Path to manifest YAML file")],
|
||||
json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
|
||||
) -> None:
|
||||
"""Run the end-to-end regression test suite for a project (FR-810)."""
|
||||
from markidocx.workflows import WorkflowError, run_workflow
|
||||
|
||||
try:
|
||||
result = run_workflow("single-file-roundtrip", manifest)
|
||||
except WorkflowError as exc:
|
||||
if json_output:
|
||||
typer.echo(json.dumps({"status": "error", "message": str(exc)}))
|
||||
else:
|
||||
err_console.print(f"[red]✗ Workflow error:[/red] {exc}")
|
||||
raise typer.Exit(1) from None
|
||||
|
||||
passed = sum(1 for s in result.steps if s.status == "executed")
|
||||
failed = sum(1 for s in result.steps if s.status == "failed")
|
||||
skipped = sum(1 for s in result.steps if s.status not in ("executed", "failed"))
|
||||
overall_ok = result.classification != "failed"
|
||||
|
||||
if json_output:
|
||||
typer.echo(
|
||||
json.dumps(
|
||||
{
|
||||
"status": "ok" if overall_ok else "error",
|
||||
"run_id": result.run_id,
|
||||
"classification": result.classification,
|
||||
"passed": passed,
|
||||
"failed": failed,
|
||||
"skipped": skipped,
|
||||
"steps": [
|
||||
{"name": s.name, "status": s.status, "error": s.error}
|
||||
for s in result.steps
|
||||
],
|
||||
}
|
||||
)
|
||||
)
|
||||
else:
|
||||
icon = "[green]✓[/green]" if overall_ok else "[red]✗[/red]"
|
||||
console.print(f"{icon} Tests: {passed} passed, {failed} failed, {skipped} skipped")
|
||||
for step in result.steps:
|
||||
step_icon = "✓" if step.status == "executed" else ("✗" if step.status == "failed" else "—")
|
||||
console.print(f" {step_icon} {step.name}: {step.status}")
|
||||
if result.run_id:
|
||||
console.print(f" run_id: {result.run_id}")
|
||||
|
||||
raise typer.Exit(0 if overall_ok else 1)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Evidence commands (T02 — FR-1409, FR-814)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@evidence_app.command("list")
|
||||
def evidence_list(
|
||||
limit: Annotated[int, typer.Option("--limit", help="Maximum runs to show")] = 10,
|
||||
json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
|
||||
) -> None:
|
||||
"""List run IDs in the evidence store, newest first."""
|
||||
from markidocx.evidence import EvidenceStore
|
||||
|
||||
store = EvidenceStore()
|
||||
runs = list(reversed(store.list_runs()))[:limit]
|
||||
|
||||
if json_output:
|
||||
typer.echo(json.dumps({"runs": runs}))
|
||||
else:
|
||||
if not runs:
|
||||
console.print("No evidence runs found.")
|
||||
else:
|
||||
for run_id in runs:
|
||||
console.print(run_id)
|
||||
|
||||
|
||||
@evidence_app.command("get")
|
||||
def evidence_get(
|
||||
run_id: Annotated[str, typer.Argument(help="Run ID to retrieve")],
|
||||
json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
|
||||
output: Annotated[Path | None, typer.Option("--output", help="Write evidence JSON to file")] = None,
|
||||
) -> None:
|
||||
"""Retrieve and display evidence for a completed run (FR-1409)."""
|
||||
from markidocx.evidence import EvidenceStore
|
||||
|
||||
store = EvidenceStore()
|
||||
reports = store.list_reports(run_id)
|
||||
|
||||
if not reports:
|
||||
if json_output:
|
||||
typer.echo(json.dumps({"status": "not_found", "run_id": run_id}))
|
||||
else:
|
||||
err_console.print(f"[red]✗[/red] No evidence found for run_id: {run_id}")
|
||||
raise typer.Exit(1)
|
||||
|
||||
ev_set = store.assemble_set([run_id])
|
||||
summary = ev_set.summary()
|
||||
|
||||
if output:
|
||||
output.write_text(json.dumps({"run_id": run_id, "reports": [r.to_dict() for r in reports]}, indent=2), encoding="utf-8")
|
||||
|
||||
if json_output:
|
||||
typer.echo(json.dumps({"run_id": run_id, **summary}))
|
||||
else:
|
||||
classification = summary["classification"]
|
||||
icon = "[green]✓[/green]" if classification == "pass" else ("[yellow]⚠[/yellow]" if "warning" in classification else "[red]✗[/red]")
|
||||
console.print(f"{icon} Run: [bold]{run_id}[/bold] [{classification}]")
|
||||
console.print(f" Reports: {summary['report_count']}")
|
||||
console.print(f" Warnings: {summary['warnings_count']}")
|
||||
console.print(f" Errors: {summary['errors_count']}")
|
||||
for comp in summary["composition"]:
|
||||
console.print(f" • {comp['type']} ({comp['run_id'][:8]}…)")
|
||||
|
||||
raise typer.Exit(0)
|
||||
|
||||
|
||||
@template_app.command("list")
|
||||
def template_list(
|
||||
json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
|
||||
@@ -273,6 +437,76 @@ def template_register(
|
||||
raise typer.Exit(1) from None
|
||||
|
||||
|
||||
@template_app.command("styles")
|
||||
def template_styles(
|
||||
family: Annotated[str | None, typer.Option("--family", help="Filter by family name")] = None,
|
||||
json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
|
||||
) -> None:
|
||||
"""List available styles for a template family (FR-907)."""
|
||||
from markidocx.templates import list_styles
|
||||
|
||||
entries = list_styles(family=family)
|
||||
|
||||
if json_output:
|
||||
typer.echo(
|
||||
json.dumps(
|
||||
[
|
||||
{
|
||||
"name": e.name,
|
||||
"style_id": e.style_id,
|
||||
"type": e.type,
|
||||
"family": e.family,
|
||||
"built_in": e.built_in,
|
||||
}
|
||||
for e in entries
|
||||
]
|
||||
)
|
||||
)
|
||||
else:
|
||||
table = Table(title=f"Styles{' — ' + family if family else ''}")
|
||||
table.add_column("Name", style="bold")
|
||||
table.add_column("ID")
|
||||
table.add_column("Type")
|
||||
table.add_column("Family")
|
||||
for e in entries:
|
||||
table.add_row(e.name, e.style_id, e.type, e.family)
|
||||
console.print(table)
|
||||
|
||||
|
||||
@template_app.command("extract")
|
||||
def template_extract(
|
||||
source: Annotated[Path, typer.Argument(help="Source DOCX to extract template from")],
|
||||
template_out: Annotated[Path | None, typer.Option("--template-out", help="Output template path")] = None,
|
||||
content_out: Annotated[Path | None, typer.Option("--content-out", help="Output Markdown content path")] = None,
|
||||
family: Annotated[str | None, typer.Option("--family", help="Register extracted template under this name")] = None,
|
||||
json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
|
||||
) -> None:
|
||||
"""Extract a content-free template shell from an existing DOCX (FR-606)."""
|
||||
from markidocx.templates import extract_template
|
||||
|
||||
if template_out is None:
|
||||
template_out = source.parent / (source.stem + "-template.docx")
|
||||
|
||||
result = extract_template(source, template_out, family=family)
|
||||
|
||||
if json_output:
|
||||
typer.echo(
|
||||
json.dumps(
|
||||
{
|
||||
"status": "ok",
|
||||
"template_path": str(result.template_path),
|
||||
"styles_preserved": result.styles_preserved,
|
||||
"warnings": [w.to_dict() for w in result.warnings],
|
||||
}
|
||||
)
|
||||
)
|
||||
else:
|
||||
console.print(f"[green]✓[/green] Template extracted: [bold]{result.template_path}[/bold]")
|
||||
console.print(f" Styles preserved: {result.styles_preserved}")
|
||||
for w in result.warnings:
|
||||
console.print(f"[yellow]⚠[/yellow] {w}")
|
||||
|
||||
|
||||
@app.command()
|
||||
def serve(
|
||||
host: Annotated[str, typer.Option("--host", help="Bind host")] = "127.0.0.1",
|
||||
|
||||
@@ -205,6 +205,56 @@ def detect_renderers() -> dict[str, DiagramRenderer]:
|
||||
return available
|
||||
|
||||
|
||||
# Minimum supported major versions for each diagram renderer (FR-538)
|
||||
_MIN_RENDERER_VERSIONS: dict[str, tuple[int, ...]] = {
|
||||
"mmdc": (9, 0), # Mermaid CLI >= 9.x
|
||||
"dot": (2, 40), # Graphviz >= 2.40
|
||||
"plantuml": (1, 50), # PlantUML >= 1.50
|
||||
}
|
||||
|
||||
|
||||
def check_renderer_version(
|
||||
cmd: str, warning_records: list
|
||||
) -> None:
|
||||
"""Check the renderer CLI version and emit a warning if outdated (FR-538).
|
||||
|
||||
Runs ``cmd --version`` (or ``cmd -version`` for plantuml), parses the
|
||||
first version-like token, and appends a WarningRecord if the version is
|
||||
below the minimum.
|
||||
"""
|
||||
min_ver = _MIN_RENDERER_VERSIONS.get(cmd)
|
||||
if min_ver is None:
|
||||
return
|
||||
|
||||
version_flags = ["-version"] if cmd == "plantuml" else ["--version"]
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[cmd] + version_flags,
|
||||
capture_output=True,
|
||||
timeout=5,
|
||||
text=True,
|
||||
)
|
||||
output = proc.stdout or proc.stderr
|
||||
except Exception:
|
||||
return # Can't probe — don't warn
|
||||
|
||||
# Extract first numeric token like "10.4.0" or "2.42.2"
|
||||
import re as _re
|
||||
m = _re.search(r"(\d+)\.(\d+)", output)
|
||||
if not m:
|
||||
return
|
||||
|
||||
major, minor = int(m.group(1)), int(m.group(2))
|
||||
if (major, minor) < min_ver:
|
||||
warning_records.append(
|
||||
WarningRecord(
|
||||
severity=Severity.WARNING,
|
||||
reason="renderer-version-unsupported",
|
||||
construct=f"{cmd} {major}.{minor} (min {min_ver[0]}.{min_ver[1]})",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -85,6 +85,9 @@ def compare(original: str, reimported: str) -> DriftReport:
|
||||
# --- Figures (FR-532, FR-541) ---
|
||||
_compare_figures(original, reimported, preserved, degraded, broken)
|
||||
|
||||
# --- Diagram source blocks (FR-534) ---
|
||||
_compare_diagram_blocks(original, reimported, preserved, degraded, broken)
|
||||
|
||||
# --- Citations & Bibliography (FR-535, FR-542) ---
|
||||
from markidocx.bibliography import compare_citations
|
||||
|
||||
@@ -181,6 +184,38 @@ def _compare_xrefs(
|
||||
degraded.append(f"xref-link:degraded [{link_text}][{anchor}]")
|
||||
|
||||
|
||||
_FENCED_BLOCK_RE = re.compile(r"```(\w+)\n(.*?)```", re.DOTALL)
|
||||
|
||||
|
||||
def _extract_fenced_blocks(text: str) -> list[tuple[str, str]]:
|
||||
"""Extract all fenced code blocks as (language, source) pairs."""
|
||||
return [(m.group(1).strip().lower(), m.group(2).rstrip()) for m in _FENCED_BLOCK_RE.finditer(text)]
|
||||
|
||||
|
||||
def _compare_diagram_blocks(
|
||||
original: str,
|
||||
reimported: str,
|
||||
preserved: list[str],
|
||||
degraded: list[str],
|
||||
broken: list[str],
|
||||
) -> None:
|
||||
"""Compare diagram fenced blocks for source-content drift (FR-534)."""
|
||||
from markidocx.diagrams import DIAGRAM_TYPES
|
||||
|
||||
orig_blocks = [(lang, src) for lang, src in _extract_fenced_blocks(original) if lang in DIAGRAM_TYPES]
|
||||
reim_blocks = [(lang, src) for lang, src in _extract_fenced_blocks(reimported) if lang in DIAGRAM_TYPES]
|
||||
|
||||
for i, (lang, src) in enumerate(orig_blocks):
|
||||
if i < len(reim_blocks):
|
||||
reim_lang, reim_src = reim_blocks[i]
|
||||
if lang == reim_lang and src == reim_src:
|
||||
preserved.append(f"diagram:{lang}[{i}]")
|
||||
else:
|
||||
degraded.append(f"diagram:{lang}[{i}]:source-mutated")
|
||||
else:
|
||||
broken.append(f"diagram:{lang}[{i}]:missing")
|
||||
|
||||
|
||||
def _compare_sets(
|
||||
kind: str,
|
||||
orig: list[str],
|
||||
|
||||
@@ -33,9 +33,21 @@ def list_templates() -> list[dict[str, str]]:
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def list_styles() -> list[dict[str, str]]:
|
||||
"""List available styles (FR-1003)."""
|
||||
return []
|
||||
def list_styles(family: str | None = None) -> list[dict[str, Any]]:
|
||||
"""List available styles for a template family (FR-1003)."""
|
||||
from markidocx.templates import list_styles as _list_styles
|
||||
|
||||
entries = _list_styles(family=family)
|
||||
return [
|
||||
{
|
||||
"name": e.name,
|
||||
"style_id": e.style_id,
|
||||
"type": e.type,
|
||||
"family": e.family,
|
||||
"built_in": e.built_in,
|
||||
}
|
||||
for e in entries
|
||||
]
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
@@ -318,7 +330,7 @@ def invoke_workflow(
|
||||
|
||||
@mcp.tool()
|
||||
def get_evidence(run_id: str) -> dict[str, Any]:
|
||||
"""Retrieve evidence artifacts for a completed run (FR-1013)."""
|
||||
"""Retrieve assembled evidence set for a completed run (FR-1013, FR-1406–1408)."""
|
||||
from markidocx.evidence import EvidenceStore
|
||||
|
||||
store = EvidenceStore()
|
||||
@@ -327,18 +339,46 @@ def get_evidence(run_id: str) -> dict[str, Any]:
|
||||
return {
|
||||
"status": "not_found",
|
||||
"run_id": run_id,
|
||||
"reports": [],
|
||||
"warnings": [f"No evidence found for run_id: {run_id}"],
|
||||
}
|
||||
ev_set = store.assemble_set([run_id])
|
||||
return {
|
||||
"status": "ok",
|
||||
"run_id": run_id,
|
||||
"reports": [r.to_dict() for r in reports],
|
||||
**ev_set.summary(),
|
||||
"warnings": [],
|
||||
"errors": [],
|
||||
}
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def extract_template(
|
||||
source_path: str,
|
||||
template_out: str,
|
||||
family: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Extract a content-free template shell from an existing DOCX (FR-606).
|
||||
|
||||
Copies all styles, page setup, and headers/footers from source_path to
|
||||
template_out, clearing all body content. Optionally registers the result
|
||||
under a family name.
|
||||
"""
|
||||
from markidocx.templates import extract_template as _extract_template
|
||||
|
||||
result = _extract_template(
|
||||
source_path=Path(source_path),
|
||||
template_out=Path(template_out),
|
||||
family=family,
|
||||
)
|
||||
return {
|
||||
"status": "ok",
|
||||
"template_path": str(result.template_path),
|
||||
"styles_preserved": result.styles_preserved,
|
||||
"warnings": [w.to_dict() for w in result.warnings],
|
||||
"errors": [],
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MCP resources (FR-1011)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -98,6 +98,12 @@ class WorkflowInvokeRequest(BaseModel):
|
||||
context: dict[str, Any] = {}
|
||||
|
||||
|
||||
class TemplateExtractRequest(BaseModel):
|
||||
docx_base64: str
|
||||
family: str | None = None
|
||||
context: dict[str, Any] = {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# App factory
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -180,9 +186,23 @@ def create_app() -> FastAPI:
|
||||
)
|
||||
|
||||
@app.get("/styles", response_model=ResponseEnvelope)
|
||||
def styles() -> ResponseEnvelope:
|
||||
"""List available styles (FR-907 stub)."""
|
||||
return _ok(outputs=[])
|
||||
def styles(family: str | None = None) -> ResponseEnvelope:
|
||||
"""List available styles for a template family (FR-907)."""
|
||||
from markidocx.templates import list_styles
|
||||
|
||||
entries = list_styles(family=family)
|
||||
return _ok(
|
||||
outputs=[
|
||||
{
|
||||
"name": e.name,
|
||||
"style_id": e.style_id,
|
||||
"type": e.type,
|
||||
"family": e.family,
|
||||
"built_in": e.built_in,
|
||||
}
|
||||
for e in entries
|
||||
]
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# T02 — Functional endpoints (FR-902–908, FR-913–916)
|
||||
@@ -389,7 +409,7 @@ def create_app() -> FastAPI:
|
||||
|
||||
@app.get("/evidence/{run_id}", response_model=ResponseEnvelope)
|
||||
def get_evidence(run_id: str) -> ResponseEnvelope:
|
||||
"""Retrieve evidence artifacts for a completed run (FR-914)."""
|
||||
"""Retrieve assembled evidence set for a completed run (FR-914, FR-1406–1408)."""
|
||||
from markidocx.evidence import EvidenceStore
|
||||
|
||||
store = EvidenceStore()
|
||||
@@ -402,9 +422,36 @@ def create_app() -> FastAPI:
|
||||
errors=[],
|
||||
context={"run_id": run_id},
|
||||
)
|
||||
ev_set = store.assemble_set([run_id])
|
||||
return _ok(
|
||||
outputs={"run_id": run_id, "reports": [r.to_dict() for r in reports]},
|
||||
outputs={"run_id": run_id, **ev_set.summary()},
|
||||
context={"run_id": run_id},
|
||||
)
|
||||
|
||||
@app.post("/template/extract", response_model=ResponseEnvelope)
|
||||
def template_extract_endpoint(req: TemplateExtractRequest) -> ResponseEnvelope:
|
||||
"""Extract a content-free template shell from a base64-encoded DOCX (FR-606)."""
|
||||
import base64
|
||||
import tempfile
|
||||
|
||||
from markidocx.templates import extract_template
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
source_path = tmp_path / "source.docx"
|
||||
template_out = tmp_path / "template.docx"
|
||||
source_path.write_bytes(base64.b64decode(req.docx_base64))
|
||||
|
||||
result = extract_template(source_path, template_out, family=req.family)
|
||||
template_b64 = base64.b64encode(template_out.read_bytes()).decode()
|
||||
|
||||
return _ok(
|
||||
outputs={
|
||||
"template_base64": template_b64,
|
||||
"styles_preserved": result.styles_preserved,
|
||||
"warnings": [w.to_dict() for w in result.warnings],
|
||||
},
|
||||
context=req.context,
|
||||
)
|
||||
|
||||
return app
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from docx import Document
|
||||
@@ -23,6 +23,26 @@ class FamilyInfo:
|
||||
template_path: Path | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class StyleEntry:
|
||||
"""Metadata for a single DOCX style (FR-907)."""
|
||||
|
||||
name: str
|
||||
style_id: str
|
||||
type: str # "paragraph" | "character" | "table" | "numbering"
|
||||
family: str
|
||||
built_in: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class TemplateExtractionResult:
|
||||
"""Result from extracting a content-free template (FR-606)."""
|
||||
|
||||
template_path: Path
|
||||
styles_preserved: int
|
||||
warnings: list = field(default_factory=list)
|
||||
|
||||
|
||||
class RegistrationError(Exception):
|
||||
"""Raised when template registration fails."""
|
||||
|
||||
@@ -70,6 +90,114 @@ class FamilyRegistry:
|
||||
return doc
|
||||
|
||||
|
||||
def list_styles(family: str | None = None) -> list[StyleEntry]:
|
||||
"""Enumerate styles from the template for the given family (FR-907).
|
||||
|
||||
Opens the template DOCX (or creates a default document) and returns
|
||||
all styles sorted by type then name.
|
||||
"""
|
||||
target_family = family or "article"
|
||||
registry = FamilyRegistry()
|
||||
doc = registry.create_document(target_family)
|
||||
|
||||
_STYLE_TYPE_MAP = {
|
||||
1: "paragraph",
|
||||
2: "character",
|
||||
3: "table",
|
||||
4: "numbering",
|
||||
}
|
||||
|
||||
entries: list[StyleEntry] = []
|
||||
for style in doc.styles:
|
||||
style_type = _STYLE_TYPE_MAP.get(style.type.value if hasattr(style.type, "value") else int(style.type), "paragraph")
|
||||
elem = getattr(style, "element", None)
|
||||
built_in = elem is None or elem.get("{http://schemas.openxmlformats.org/wordprocessingml/2006/main}customStyle") != "1"
|
||||
entries.append(
|
||||
StyleEntry(
|
||||
name=style.name,
|
||||
style_id=style.style_id,
|
||||
type=style_type,
|
||||
family=target_family,
|
||||
built_in=built_in,
|
||||
)
|
||||
)
|
||||
|
||||
entries.sort(key=lambda e: (e.type, e.name))
|
||||
return entries
|
||||
|
||||
|
||||
def extract_template(
|
||||
source_path: Path,
|
||||
template_out: Path,
|
||||
family: str | None = None,
|
||||
) -> TemplateExtractionResult:
|
||||
"""Extract a content-free template shell from an existing DOCX (FR-606).
|
||||
|
||||
Opens source_path, copies all styles, page setup, headers/footers, and
|
||||
theme data, then clears the body. Saves to template_out.
|
||||
"""
|
||||
from docx.oxml.ns import qn
|
||||
|
||||
from markidocx.errors import Severity, WarningRecord
|
||||
|
||||
warnings: list[WarningRecord] = []
|
||||
|
||||
source_doc = Document(str(source_path))
|
||||
|
||||
# Count styles before clearing
|
||||
styles_count = len(list(source_doc.styles))
|
||||
|
||||
# Create a new document from the source (preserves styles, settings)
|
||||
template_doc = Document(str(source_path))
|
||||
|
||||
# Clear all body content (paragraphs and tables)
|
||||
body = template_doc.element.body
|
||||
# Remove all child elements except sectPr (section properties)
|
||||
sect_pr = body.find(qn("w:sectPr"))
|
||||
for child in list(body):
|
||||
if child is not sect_pr:
|
||||
body.remove(child)
|
||||
|
||||
# Add a single empty paragraph so the doc is valid
|
||||
from docx.oxml import OxmlElement
|
||||
p = OxmlElement("w:p")
|
||||
if sect_pr is not None:
|
||||
body.insert(list(body).index(sect_pr), p)
|
||||
else:
|
||||
body.append(p)
|
||||
|
||||
template_doc.save(str(template_out))
|
||||
|
||||
if styles_count == 0:
|
||||
warnings.append(
|
||||
WarningRecord(
|
||||
severity=Severity.WARNING,
|
||||
reason="template-no-styles",
|
||||
construct=str(source_path),
|
||||
)
|
||||
)
|
||||
|
||||
# Optionally register the extracted template
|
||||
if family:
|
||||
registry = FamilyRegistry()
|
||||
try:
|
||||
registry.register(template_out, family)
|
||||
except RegistrationError as exc:
|
||||
warnings.append(
|
||||
WarningRecord(
|
||||
severity=Severity.WARNING,
|
||||
reason="template-registration-failed",
|
||||
construct=str(exc),
|
||||
)
|
||||
)
|
||||
|
||||
return TemplateExtractionResult(
|
||||
template_path=template_out,
|
||||
styles_preserved=styles_count,
|
||||
warnings=warnings,
|
||||
)
|
||||
|
||||
|
||||
def _apply_family_defaults(doc: DocxDocument, family: str) -> None:
|
||||
"""Apply minimal style defaults for built-in families."""
|
||||
styles = doc.styles
|
||||
|
||||
Reference in New Issue
Block a user