feat: WP-0007 — Interface Completeness & Evidence

T01: markidocx inspect (FR-806) and markidocx test (FR-810) CLI commands T02: markidocx evidence get/list CLI commands (FR-1409, FR-814) T03: list_styles() / GET /styles / MCP list_styles with real style data (FR-907) T04: Evidence assembly — EvidenceSet summary via REST and MCP (FR-1406–1408) T05: LEVEL3 edge-case tests — diagram mutation, renderer version check, bibliography duplicate keys / missing refs / special chars (FR-534, FR-538, FR-542) T06: markidocx template extract + Word-first round-trip regression test (FR-606) New: differ._compare_diagram_blocks tracks fenced diagram source drift (FR-534) New: diagrams.check_renderer_version emits warning for outdated renderers (FR-538) New: bibliography.validate_citations detects duplicate keys and missing entries (FR-542) New: templates.extract_template / TemplateExtractionResult / list_styles / StyleEntry New: REST POST /template/extract; MCP extract_template tool 278 tests pass, ruff+mypy clean. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 19:30:09 +00:00
parent 893b9fa57b
commit 9fe64bcd7f
16 changed files with 1537 additions and 19 deletions
--- a/src/markidocx/bibliography.py
+++ b/src/markidocx/bibliography.py
@@ -206,3 +206,50 @@ def compare_citations(
            preserved.append(f"reference-entry:{key}")
        else:
            degraded.append(f"reference-entry:lost '{key}'")
+
+
+# ---------------------------------------------------------------------------
+# Validation helpers (FR-542)
+# ---------------------------------------------------------------------------
+
+
+def validate_citations(md_text: str) -> list:
+    """Validate citation consistency in *md_text*.
+
+    Returns a list of WarningRecord for:
+    - duplicate citation keys in the references section
+    - citation keys with no corresponding reference entry
+    """
+    from markidocx.errors import Severity, WarningRecord
+
+    warnings: list[WarningRecord] = []
+
+    inline_keys = extract_citation_keys(md_text)
+    entries, _ = extract_references_section(md_text)
+
+    # Check for duplicate keys in references section
+    seen_keys: set[str] = set()
+    for key, _ in entries:
+        if key in seen_keys:
+            warnings.append(
+                WarningRecord(
+                    severity=Severity.WARNING,
+                    reason="citation-duplicate-key",
+                    construct=f"@{key}",
+                )
+            )
+        seen_keys.add(key)
+
+    # Check for inline citations with no reference entry
+    ref_keys = {k for k, _ in entries}
+    for key in inline_keys:
+        if key not in ref_keys:
+            warnings.append(
+                WarningRecord(
+                    severity=Severity.WARNING,
+                    reason="citation-key-missing",
+                    construct=f"@{key}",
+                )
+            )
+
+    return warnings
--- a/src/markidocx/cli.py
+++ b/src/markidocx/cli.py
@@ -19,6 +19,8 @@ app = typer.Typer(
 )
 template_app = typer.Typer(help="Template family management.")
 app.add_typer(template_app, name="template")
+evidence_app = typer.Typer(help="Evidence store access.")
+app.add_typer(evidence_app, name="evidence")


 def _version_callback(value: bool) -> None:
@@ -221,6 +223,168 @@ def compare(
    raise typer.Exit(1 if report.has_drift else 0)


+@app.command()
+def inspect(
+    manifest: Annotated[Path, typer.Argument(help="Path to manifest YAML file")],
+    json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
+) -> None:
+    """Inspect a project manifest and display its structure (FR-806)."""
+    from markidocx.level3 import capabilities_entry as level3_capabilities
+    from markidocx.manifest import ManifestError, load_manifest
+
+    try:
+        m = load_manifest(manifest)
+    except ManifestError as exc:
+        if json_output:
+            typer.echo(json.dumps({"status": "error", "message": str(exc)}))
+        else:
+            err_console.print(f"[red]✗ Manifest error:[/red] {exc}")
+        raise typer.Exit(1) from None
+
+    sources = [str(s.path) for s in m.sources]
+    result = {
+        "status": "ok",
+        "project": m.project.name,
+        "family": m.project.family,
+        "feature_level": m.project.feature_level.value,
+        "sources": sources,
+        "level3": level3_capabilities(),
+    }
+
+    if json_output:
+        typer.echo(json.dumps(result))
+    else:
+        console.print(f"[bold]Project:[/bold] {m.project.name}")
+        console.print(f"  family:        {m.project.family}")
+        console.print(f"  feature_level: {m.project.feature_level.value}")
+        console.print(f"  sources:       {', '.join(sources)}")
+        l3_raw = result.get("level3")
+        l3: dict[str, object] = l3_raw if isinstance(l3_raw, dict) else {}
+        console.print(f"  level3 xref:   {l3.get('xref_available', False)}")
+        console.print(f"  level3 fig:    {l3.get('figures_available', False)}")
+        console.print(f"  level3 diag:   {l3.get('diagrams_available', False)}")
+        console.print(f"  level3 bib:    {l3.get('bibliography_available', False)}")
+
+    raise typer.Exit(0)
+
+
+@app.command("test")
+def run_tests(
+    manifest: Annotated[Path, typer.Argument(help="Path to manifest YAML file")],
+    json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
+) -> None:
+    """Run the end-to-end regression test suite for a project (FR-810)."""
+    from markidocx.workflows import WorkflowError, run_workflow
+
+    try:
+        result = run_workflow("single-file-roundtrip", manifest)
+    except WorkflowError as exc:
+        if json_output:
+            typer.echo(json.dumps({"status": "error", "message": str(exc)}))
+        else:
+            err_console.print(f"[red]✗ Workflow error:[/red] {exc}")
+        raise typer.Exit(1) from None
+
+    passed = sum(1 for s in result.steps if s.status == "executed")
+    failed = sum(1 for s in result.steps if s.status == "failed")
+    skipped = sum(1 for s in result.steps if s.status not in ("executed", "failed"))
+    overall_ok = result.classification != "failed"
+
+    if json_output:
+        typer.echo(
+            json.dumps(
+                {
+                    "status": "ok" if overall_ok else "error",
+                    "run_id": result.run_id,
+                    "classification": result.classification,
+                    "passed": passed,
+                    "failed": failed,
+                    "skipped": skipped,
+                    "steps": [
+                        {"name": s.name, "status": s.status, "error": s.error}
+                        for s in result.steps
+                    ],
+                }
+            )
+        )
+    else:
+        icon = "[green]✓[/green]" if overall_ok else "[red]✗[/red]"
+        console.print(f"{icon} Tests: {passed} passed, {failed} failed, {skipped} skipped")
+        for step in result.steps:
+            step_icon = "✓" if step.status == "executed" else ("✗" if step.status == "failed" else "—")
+            console.print(f"  {step_icon} {step.name}: {step.status}")
+        if result.run_id:
+            console.print(f"  run_id: {result.run_id}")
+
+    raise typer.Exit(0 if overall_ok else 1)
+
+
+# ---------------------------------------------------------------------------
+# Evidence commands (T02 — FR-1409, FR-814)
+# ---------------------------------------------------------------------------
+
+
+@evidence_app.command("list")
+def evidence_list(
+    limit: Annotated[int, typer.Option("--limit", help="Maximum runs to show")] = 10,
+    json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
+) -> None:
+    """List run IDs in the evidence store, newest first."""
+    from markidocx.evidence import EvidenceStore
+
+    store = EvidenceStore()
+    runs = list(reversed(store.list_runs()))[:limit]
+
+    if json_output:
+        typer.echo(json.dumps({"runs": runs}))
+    else:
+        if not runs:
+            console.print("No evidence runs found.")
+        else:
+            for run_id in runs:
+                console.print(run_id)
+
+
+@evidence_app.command("get")
+def evidence_get(
+    run_id: Annotated[str, typer.Argument(help="Run ID to retrieve")],
+    json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
+    output: Annotated[Path | None, typer.Option("--output", help="Write evidence JSON to file")] = None,
+) -> None:
+    """Retrieve and display evidence for a completed run (FR-1409)."""
+    from markidocx.evidence import EvidenceStore
+
+    store = EvidenceStore()
+    reports = store.list_reports(run_id)
+
+    if not reports:
+        if json_output:
+            typer.echo(json.dumps({"status": "not_found", "run_id": run_id}))
+        else:
+            err_console.print(f"[red]✗[/red] No evidence found for run_id: {run_id}")
+        raise typer.Exit(1)
+
+    ev_set = store.assemble_set([run_id])
+    summary = ev_set.summary()
+
+    if output:
+        output.write_text(json.dumps({"run_id": run_id, "reports": [r.to_dict() for r in reports]}, indent=2), encoding="utf-8")
+
+    if json_output:
+        typer.echo(json.dumps({"run_id": run_id, **summary}))
+    else:
+        classification = summary["classification"]
+        icon = "[green]✓[/green]" if classification == "pass" else ("[yellow]⚠[/yellow]" if "warning" in classification else "[red]✗[/red]")
+        console.print(f"{icon} Run: [bold]{run_id}[/bold]  [{classification}]")
+        console.print(f"  Reports:  {summary['report_count']}")
+        console.print(f"  Warnings: {summary['warnings_count']}")
+        console.print(f"  Errors:   {summary['errors_count']}")
+        for comp in summary["composition"]:
+            console.print(f"  • {comp['type']} ({comp['run_id'][:8]}…)")
+
+    raise typer.Exit(0)
+
+
@template_app.command("list")
 def template_list(
    json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
@@ -273,6 +437,76 @@ def template_register(
        raise typer.Exit(1) from None


+@template_app.command("styles")
+def template_styles(
+    family: Annotated[str | None, typer.Option("--family", help="Filter by family name")] = None,
+    json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
+) -> None:
+    """List available styles for a template family (FR-907)."""
+    from markidocx.templates import list_styles
+
+    entries = list_styles(family=family)
+
+    if json_output:
+        typer.echo(
+            json.dumps(
+                [
+                    {
+                        "name": e.name,
+                        "style_id": e.style_id,
+                        "type": e.type,
+                        "family": e.family,
+                        "built_in": e.built_in,
+                    }
+                    for e in entries
+                ]
+            )
+        )
+    else:
+        table = Table(title=f"Styles{' — ' + family if family else ''}")
+        table.add_column("Name", style="bold")
+        table.add_column("ID")
+        table.add_column("Type")
+        table.add_column("Family")
+        for e in entries:
+            table.add_row(e.name, e.style_id, e.type, e.family)
+        console.print(table)
+
+
+@template_app.command("extract")
+def template_extract(
+    source: Annotated[Path, typer.Argument(help="Source DOCX to extract template from")],
+    template_out: Annotated[Path | None, typer.Option("--template-out", help="Output template path")] = None,
+    content_out: Annotated[Path | None, typer.Option("--content-out", help="Output Markdown content path")] = None,
+    family: Annotated[str | None, typer.Option("--family", help="Register extracted template under this name")] = None,
+    json_output: Annotated[bool, typer.Option("--json", help="Machine-readable JSON output")] = False,
+) -> None:
+    """Extract a content-free template shell from an existing DOCX (FR-606)."""
+    from markidocx.templates import extract_template
+
+    if template_out is None:
+        template_out = source.parent / (source.stem + "-template.docx")
+
+    result = extract_template(source, template_out, family=family)
+
+    if json_output:
+        typer.echo(
+            json.dumps(
+                {
+                    "status": "ok",
+                    "template_path": str(result.template_path),
+                    "styles_preserved": result.styles_preserved,
+                    "warnings": [w.to_dict() for w in result.warnings],
+                }
+            )
+        )
+    else:
+        console.print(f"[green]✓[/green] Template extracted: [bold]{result.template_path}[/bold]")
+        console.print(f"  Styles preserved: {result.styles_preserved}")
+        for w in result.warnings:
+            console.print(f"[yellow]⚠[/yellow] {w}")
+
+
@app.command()
 def serve(
    host: Annotated[str, typer.Option("--host", help="Bind host")] = "127.0.0.1",
--- a/src/markidocx/diagrams.py
+++ b/src/markidocx/diagrams.py
@@ -205,6 +205,56 @@ def detect_renderers() -> dict[str, DiagramRenderer]:
    return available


+# Minimum supported major versions for each diagram renderer (FR-538)
+_MIN_RENDERER_VERSIONS: dict[str, tuple[int, ...]] = {
+    "mmdc": (9, 0),      # Mermaid CLI >= 9.x
+    "dot": (2, 40),      # Graphviz >= 2.40
+    "plantuml": (1, 50), # PlantUML >= 1.50
+}
+
+
+def check_renderer_version(
+    cmd: str, warning_records: list
+) -> None:
+    """Check the renderer CLI version and emit a warning if outdated (FR-538).
+
+    Runs ``cmd --version`` (or ``cmd -version`` for plantuml), parses the
+    first version-like token, and appends a WarningRecord if the version is
+    below the minimum.
+    """
+    min_ver = _MIN_RENDERER_VERSIONS.get(cmd)
+    if min_ver is None:
+        return
+
+    version_flags = ["-version"] if cmd == "plantuml" else ["--version"]
+    try:
+        proc = subprocess.run(
+            [cmd] + version_flags,
+            capture_output=True,
+            timeout=5,
+            text=True,
+        )
+        output = proc.stdout or proc.stderr
+    except Exception:
+        return  # Can't probe — don't warn
+
+    # Extract first numeric token like "10.4.0" or "2.42.2"
+    import re as _re
+    m = _re.search(r"(\d+)\.(\d+)", output)
+    if not m:
+        return
+
+    major, minor = int(m.group(1)), int(m.group(2))
+    if (major, minor) < min_ver:
+        warning_records.append(
+            WarningRecord(
+                severity=Severity.WARNING,
+                reason="renderer-version-unsupported",
+                construct=f"{cmd} {major}.{minor} (min {min_ver[0]}.{min_ver[1]})",
+            )
+        )
+
+
 # ---------------------------------------------------------------------------
 # Public helpers
 # ---------------------------------------------------------------------------
--- a/src/markidocx/differ.py
+++ b/src/markidocx/differ.py
@@ -85,6 +85,9 @@ def compare(original: str, reimported: str) -> DriftReport:
    # --- Figures (FR-532, FR-541) ---
    _compare_figures(original, reimported, preserved, degraded, broken)

+    # --- Diagram source blocks (FR-534) ---
+    _compare_diagram_blocks(original, reimported, preserved, degraded, broken)
+
    # --- Citations & Bibliography (FR-535, FR-542) ---
    from markidocx.bibliography import compare_citations

@@ -181,6 +184,38 @@ def _compare_xrefs(
            degraded.append(f"xref-link:degraded [{link_text}][{anchor}]")


+_FENCED_BLOCK_RE = re.compile(r"```(\w+)\n(.*?)```", re.DOTALL)
+
+
+def _extract_fenced_blocks(text: str) -> list[tuple[str, str]]:
+    """Extract all fenced code blocks as (language, source) pairs."""
+    return [(m.group(1).strip().lower(), m.group(2).rstrip()) for m in _FENCED_BLOCK_RE.finditer(text)]
+
+
+def _compare_diagram_blocks(
+    original: str,
+    reimported: str,
+    preserved: list[str],
+    degraded: list[str],
+    broken: list[str],
+) -> None:
+    """Compare diagram fenced blocks for source-content drift (FR-534)."""
+    from markidocx.diagrams import DIAGRAM_TYPES
+
+    orig_blocks = [(lang, src) for lang, src in _extract_fenced_blocks(original) if lang in DIAGRAM_TYPES]
+    reim_blocks = [(lang, src) for lang, src in _extract_fenced_blocks(reimported) if lang in DIAGRAM_TYPES]
+
+    for i, (lang, src) in enumerate(orig_blocks):
+        if i < len(reim_blocks):
+            reim_lang, reim_src = reim_blocks[i]
+            if lang == reim_lang and src == reim_src:
+                preserved.append(f"diagram:{lang}[{i}]")
+            else:
+                degraded.append(f"diagram:{lang}[{i}]:source-mutated")
+        else:
+            broken.append(f"diagram:{lang}[{i}]:missing")
+
+
 def _compare_sets(
    kind: str,
    orig: list[str],
--- a/src/markidocx/mcp_server.py
+++ b/src/markidocx/mcp_server.py
@@ -33,9 +33,21 @@ def list_templates() -> list[dict[str, str]]:


@mcp.tool()
-def list_styles() -> list[dict[str, str]]:
-    """List available styles (FR-1003)."""
-    return []
+def list_styles(family: str | None = None) -> list[dict[str, Any]]:
+    """List available styles for a template family (FR-1003)."""
+    from markidocx.templates import list_styles as _list_styles
+
+    entries = _list_styles(family=family)
+    return [
+        {
+            "name": e.name,
+            "style_id": e.style_id,
+            "type": e.type,
+            "family": e.family,
+            "built_in": e.built_in,
+        }
+        for e in entries
+    ]


@mcp.tool()
@@ -318,7 +330,7 @@ def invoke_workflow(

@mcp.tool()
 def get_evidence(run_id: str) -> dict[str, Any]:
-    """Retrieve evidence artifacts for a completed run (FR-1013)."""
+    """Retrieve assembled evidence set for a completed run (FR-1013, FR-1406–1408)."""
    from markidocx.evidence import EvidenceStore

    store = EvidenceStore()
@@ -327,18 +339,46 @@ def get_evidence(run_id: str) -> dict[str, Any]:
        return {
            "status": "not_found",
            "run_id": run_id,
-            "reports": [],
            "warnings": [f"No evidence found for run_id: {run_id}"],
        }
+    ev_set = store.assemble_set([run_id])
    return {
        "status": "ok",
        "run_id": run_id,
-        "reports": [r.to_dict() for r in reports],
+        **ev_set.summary(),
        "warnings": [],
        "errors": [],
    }


+@mcp.tool()
+def extract_template(
+    source_path: str,
+    template_out: str,
+    family: str | None = None,
+) -> dict[str, Any]:
+    """Extract a content-free template shell from an existing DOCX (FR-606).
+
+    Copies all styles, page setup, and headers/footers from source_path to
+    template_out, clearing all body content. Optionally registers the result
+    under a family name.
+    """
+    from markidocx.templates import extract_template as _extract_template
+
+    result = _extract_template(
+        source_path=Path(source_path),
+        template_out=Path(template_out),
+        family=family,
+    )
+    return {
+        "status": "ok",
+        "template_path": str(result.template_path),
+        "styles_preserved": result.styles_preserved,
+        "warnings": [w.to_dict() for w in result.warnings],
+        "errors": [],
+    }
+
+
 # ---------------------------------------------------------------------------
 # MCP resources (FR-1011)
 # ---------------------------------------------------------------------------
--- a/src/markidocx/rest.py
+++ b/src/markidocx/rest.py
@@ -98,6 +98,12 @@ class WorkflowInvokeRequest(BaseModel):
    context: dict[str, Any] = {}


+class TemplateExtractRequest(BaseModel):
+    docx_base64: str
+    family: str | None = None
+    context: dict[str, Any] = {}
+
+
 # ---------------------------------------------------------------------------
 # App factory
 # ---------------------------------------------------------------------------
@@ -180,9 +186,23 @@ def create_app() -> FastAPI:
        )

    @app.get("/styles", response_model=ResponseEnvelope)
-    def styles() -> ResponseEnvelope:
-        """List available styles (FR-907 stub)."""
-        return _ok(outputs=[])
+    def styles(family: str | None = None) -> ResponseEnvelope:
+        """List available styles for a template family (FR-907)."""
+        from markidocx.templates import list_styles
+
+        entries = list_styles(family=family)
+        return _ok(
+            outputs=[
+                {
+                    "name": e.name,
+                    "style_id": e.style_id,
+                    "type": e.type,
+                    "family": e.family,
+                    "built_in": e.built_in,
+                }
+                for e in entries
+            ]
+        )

    # ------------------------------------------------------------------
    # T02 — Functional endpoints (FR-902–908, FR-913–916)
@@ -389,7 +409,7 @@ def create_app() -> FastAPI:

    @app.get("/evidence/{run_id}", response_model=ResponseEnvelope)
    def get_evidence(run_id: str) -> ResponseEnvelope:
-        """Retrieve evidence artifacts for a completed run (FR-914)."""
+        """Retrieve assembled evidence set for a completed run (FR-914, FR-1406–1408)."""
        from markidocx.evidence import EvidenceStore

        store = EvidenceStore()
@@ -402,9 +422,36 @@ def create_app() -> FastAPI:
                errors=[],
                context={"run_id": run_id},
            )
+        ev_set = store.assemble_set([run_id])
        return _ok(
-            outputs={"run_id": run_id, "reports": [r.to_dict() for r in reports]},
+            outputs={"run_id": run_id, **ev_set.summary()},
            context={"run_id": run_id},
        )

+    @app.post("/template/extract", response_model=ResponseEnvelope)
+    def template_extract_endpoint(req: TemplateExtractRequest) -> ResponseEnvelope:
+        """Extract a content-free template shell from a base64-encoded DOCX (FR-606)."""
+        import base64
+        import tempfile
+
+        from markidocx.templates import extract_template
+
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            source_path = tmp_path / "source.docx"
+            template_out = tmp_path / "template.docx"
+            source_path.write_bytes(base64.b64decode(req.docx_base64))
+
+            result = extract_template(source_path, template_out, family=req.family)
+            template_b64 = base64.b64encode(template_out.read_bytes()).decode()
+
+            return _ok(
+                outputs={
+                    "template_base64": template_b64,
+                    "styles_preserved": result.styles_preserved,
+                    "warnings": [w.to_dict() for w in result.warnings],
+                },
+                context=req.context,
+            )
+
    return app
--- a/src/markidocx/templates.py
+++ b/src/markidocx/templates.py
@@ -2,7 +2,7 @@

 from __future__ import annotations

-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path

 from docx import Document
@@ -23,6 +23,26 @@ class FamilyInfo:
    template_path: Path | None = None


+@dataclass
+class StyleEntry:
+    """Metadata for a single DOCX style (FR-907)."""
+
+    name: str
+    style_id: str
+    type: str  # "paragraph" | "character" | "table" | "numbering"
+    family: str
+    built_in: bool
+
+
+@dataclass
+class TemplateExtractionResult:
+    """Result from extracting a content-free template (FR-606)."""
+
+    template_path: Path
+    styles_preserved: int
+    warnings: list = field(default_factory=list)
+
+
 class RegistrationError(Exception):
    """Raised when template registration fails."""

@@ -70,6 +90,114 @@ class FamilyRegistry:
        return doc


+def list_styles(family: str | None = None) -> list[StyleEntry]:
+    """Enumerate styles from the template for the given family (FR-907).
+
+    Opens the template DOCX (or creates a default document) and returns
+    all styles sorted by type then name.
+    """
+    target_family = family or "article"
+    registry = FamilyRegistry()
+    doc = registry.create_document(target_family)
+
+    _STYLE_TYPE_MAP = {
+        1: "paragraph",
+        2: "character",
+        3: "table",
+        4: "numbering",
+    }
+
+    entries: list[StyleEntry] = []
+    for style in doc.styles:
+        style_type = _STYLE_TYPE_MAP.get(style.type.value if hasattr(style.type, "value") else int(style.type), "paragraph")
+        elem = getattr(style, "element", None)
+        built_in = elem is None or elem.get("{http://schemas.openxmlformats.org/wordprocessingml/2006/main}customStyle") != "1"
+        entries.append(
+            StyleEntry(
+                name=style.name,
+                style_id=style.style_id,
+                type=style_type,
+                family=target_family,
+                built_in=built_in,
+            )
+        )
+
+    entries.sort(key=lambda e: (e.type, e.name))
+    return entries
+
+
+def extract_template(
+    source_path: Path,
+    template_out: Path,
+    family: str | None = None,
+) -> TemplateExtractionResult:
+    """Extract a content-free template shell from an existing DOCX (FR-606).
+
+    Opens source_path, copies all styles, page setup, headers/footers, and
+    theme data, then clears the body. Saves to template_out.
+    """
+    from docx.oxml.ns import qn
+
+    from markidocx.errors import Severity, WarningRecord
+
+    warnings: list[WarningRecord] = []
+
+    source_doc = Document(str(source_path))
+
+    # Count styles before clearing
+    styles_count = len(list(source_doc.styles))
+
+    # Create a new document from the source (preserves styles, settings)
+    template_doc = Document(str(source_path))
+
+    # Clear all body content (paragraphs and tables)
+    body = template_doc.element.body
+    # Remove all child elements except sectPr (section properties)
+    sect_pr = body.find(qn("w:sectPr"))
+    for child in list(body):
+        if child is not sect_pr:
+            body.remove(child)
+
+    # Add a single empty paragraph so the doc is valid
+    from docx.oxml import OxmlElement
+    p = OxmlElement("w:p")
+    if sect_pr is not None:
+        body.insert(list(body).index(sect_pr), p)
+    else:
+        body.append(p)
+
+    template_doc.save(str(template_out))
+
+    if styles_count == 0:
+        warnings.append(
+            WarningRecord(
+                severity=Severity.WARNING,
+                reason="template-no-styles",
+                construct=str(source_path),
+            )
+        )
+
+    # Optionally register the extracted template
+    if family:
+        registry = FamilyRegistry()
+        try:
+            registry.register(template_out, family)
+        except RegistrationError as exc:
+            warnings.append(
+                WarningRecord(
+                    severity=Severity.WARNING,
+                    reason="template-registration-failed",
+                    construct=str(exc),
+                )
+            )
+
+    return TemplateExtractionResult(
+        template_path=template_out,
+        styles_preserved=styles_count,
+        warnings=warnings,
+    )
+
+
 def _apply_family_defaults(doc: DocxDocument, family: str) -> None:
    """Apply minimal style defaults for built-in families."""
    styles = doc.styles