extension for ref resolve, explode, implode, weave, tangle

2026-05-04 02:25:49 +02:00
parent 8203f50fd5
commit 65bfc1aebf
39 changed files with 3959 additions and 25 deletions
--- a/src/markitect_tool/init.py
+++ b/src/markitect_tool/init.py
@@ -32,7 +32,26 @@ from markitect_tool.cache import (
    save_cache,
    scan_markdown_files,
 )
+from markitect_tool.content_class import (
+    ClassCompositionResult,
+    ContentClass,
+    ContentClassRegistry,
+    ContentClassResolutionError,
+    load_content_class_file,
+    load_content_classes,
+)
 from markitect_tool.diagnostics import Diagnostic, SourceLocation
+from markitect_tool.explode import (
+    EXPLODE_MANIFEST_NAME,
+    ExplodeEntry,
+    ExplodeError,
+    ExplodeManifest,
+    ExplodeResult,
+    ImplodeResult,
+    explode_markdown_file,
+    implode_markdown_directory,
+    load_explode_manifest,
+)
 from markitect_tool.generation import (
    GeneratedDocument,
    GenerationHookRequest,
@@ -44,21 +63,55 @@ from markitect_tool.generation import (
    load_generation_plan_file,
    run_generation_plan,
 )
+from markitect_tool.literate import (
+    CodeChunk,
+    LiterateFile,
+    TangleResult,
+    WeaveResult,
+    discover_code_chunks,
+    tangle_markdown,
+    weave_markdown,
+    write_tangle_files,
+)
 from markitect_tool.ops import (
    ComposeResult,
    IncludeError,
    IncludeResult,
+    OperationProvenance,
    TransformResult,
    compose_files,
    resolve_includes,
    transform_markdown,
 )
+from markitect_tool.processor import (
+    FencedProcessorBlock,
+    ProcessorContext,
+    ProcessorOutputFile,
+    ProcessorRegistry,
+    ProcessorRequest,
+    ProcessorResult,
+    ProcessorRun,
+    default_processor_registry,
+    discover_fenced_processors,
+    run_fenced_processors,
+)
 from markitect_tool.query import (
    InvalidQueryError,
    QueryMatch,
    extract_document,
    query_document,
 )
+from markitect_tool.reference import (
+    ContentUnit,
+    ReferenceAddress,
+    ReferenceContext,
+    ReferenceResolution,
+    ReferenceResolutionError,
+    SourceSpan as ReferenceSourceSpan,
+    load_namespaces,
+    parse_reference,
+    resolve_reference,
+)
 from markitect_tool.schema import (
    MarkdownSchema,
    SchemaValidationResult,
@@ -109,8 +162,23 @@ __all__ = [
    "load_cache",
    "save_cache",
    "scan_markdown_files",
+    "ClassCompositionResult",
+    "ContentClass",
+    "ContentClassRegistry",
+    "ContentClassResolutionError",
+    "load_content_class_file",
+    "load_content_classes",
    "Diagnostic",
    "SourceLocation",
+    "EXPLODE_MANIFEST_NAME",
+    "ExplodeEntry",
+    "ExplodeError",
+    "ExplodeManifest",
+    "ExplodeResult",
+    "ImplodeResult",
+    "explode_markdown_file",
+    "implode_markdown_directory",
+    "load_explode_manifest",
    "GeneratedDocument",
    "GenerationHookRequest",
    "GenerationHookResult",
@@ -120,17 +188,45 @@ __all__ = [
    "generate_with_hook",
    "load_generation_plan_file",
    "run_generation_plan",
+    "CodeChunk",
+    "LiterateFile",
+    "TangleResult",
+    "WeaveResult",
+    "discover_code_chunks",
+    "tangle_markdown",
+    "weave_markdown",
+    "write_tangle_files",
    "ComposeResult",
    "IncludeError",
    "IncludeResult",
+    "OperationProvenance",
    "TransformResult",
    "compose_files",
    "resolve_includes",
    "transform_markdown",
+    "FencedProcessorBlock",
+    "ProcessorContext",
+    "ProcessorOutputFile",
+    "ProcessorRegistry",
+    "ProcessorRequest",
+    "ProcessorResult",
+    "ProcessorRun",
+    "default_processor_registry",
+    "discover_fenced_processors",
+    "run_fenced_processors",
    "InvalidQueryError",
    "QueryMatch",
    "extract_document",
    "query_document",
+    "ContentUnit",
+    "ReferenceAddress",
+    "ReferenceContext",
+    "ReferenceResolution",
+    "ReferenceResolutionError",
+    "ReferenceSourceSpan",
+    "load_namespaces",
+    "parse_reference",
+    "resolve_reference",
    "MissingTemplateVariable",
    "TemplateAnalysis",
    "TemplateError",
--- a/src/markitect_tool/cli/main.py
+++ b/src/markitect_tool/cli/main.py
@@ -16,6 +16,10 @@ from markitect_tool.cache import (
    load_cache,
    save_cache,
 )
+from markitect_tool.content_class import (
+    ContentClassResolutionError,
+    load_content_class_file,
+)
 from markitect_tool.core import parse_markdown_file
 from markitect_tool.contract import (
    ContractLoaderError,
@@ -24,6 +28,11 @@ from markitect_tool.contract import (
    load_contract_file,
    validate_contract,
 )
+from markitect_tool.explode import (
+    ExplodeError,
+    explode_markdown_file,
+    implode_markdown_directory,
+)
 from markitect_tool.generation import (
    GenerationPlanError,
    generate_stub_from_contract,
@@ -31,8 +40,16 @@ from markitect_tool.generation import (
    load_generation_plan_file,
    run_generation_plan,
 )
+from markitect_tool.literate import tangle_markdown, weave_markdown, write_tangle_files
 from markitect_tool.ops import IncludeError, compose_files, resolve_includes, transform_markdown
+from markitect_tool.processor import ProcessorContext, run_fenced_processors
 from markitect_tool.query import InvalidQueryError, extract_document, query_document
+from markitect_tool.reference import (
+    ReferenceContext,
+    ReferenceResolutionError,
+    load_namespaces,
+    resolve_reference,
+)
 from markitect_tool.schema import load_schema_file, validate_markdown_file, validate_schema
 from markitect_tool.template import (
    MissingTemplateVariable,
@@ -296,6 +313,224 @@ def include(
    _emit_markdown_result(result.to_dict(), output_format, output)


+@main.command()
+@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
+@click.option(
+    "--output-dir",
+    required=True,
+    type=click.Path(file_okay=False, path_type=Path),
+    help="Directory to write exploded Markdown files and manifest into.",
+)
+@click.option(
+    "--variant",
+    type=click.Choice(["flat", "hierarchical"], case_sensitive=False),
+    default="flat",
+    show_default=True,
+)
+@click.option("--force", is_flag=True, help="Allow writing into a non-empty output directory.")
+@click.option(
+    "--format",
+    "output_format",
+    type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
+    default="text",
+    show_default=True,
+)
+def explode(
+    file: Path,
+    output_dir: Path,
+    variant: str,
+    force: bool,
+    output_format: str,
+) -> None:
+    """Explode a Markdown file into reversible section files."""
+
+    try:
+        result = explode_markdown_file(file, output_dir, variant=variant, overwrite=force)
+    except ExplodeError as exc:
+        raise click.ClickException(str(exc)) from exc
+    _emit_explode_result(result.to_dict(), output_format)
+
+
+@main.command()
+@click.argument("directory", type=click.Path(exists=True, file_okay=False, path_type=Path))
+@click.option(
+    "--manifest",
+    "manifest_path",
+    type=click.Path(exists=True, dir_okay=False, path_type=Path),
+    help="Manifest path. Defaults to markitect-explode.yaml in the input directory.",
+)
+@click.option(
+    "--output",
+    type=click.Path(dir_okay=False, path_type=Path),
+    help="Write imploded Markdown to a file.",
+)
+@click.option(
+    "--format",
+    "output_format",
+    type=click.Choice(["markdown", "json", "yaml"], case_sensitive=False),
+    default="markdown",
+    show_default=True,
+)
+def implode(
+    directory: Path,
+    manifest_path: Path | None,
+    output: Path | None,
+    output_format: str,
+) -> None:
+    """Implode a Markdown directory created by `mkt explode`."""
+
+    try:
+        result = implode_markdown_directory(directory, manifest_path=manifest_path)
+    except ExplodeError as exc:
+        raise click.ClickException(str(exc)) from exc
+    _emit_markdown_result(result.to_dict(), output_format, output)
+
+
+@main.group("ref")
+def ref_group() -> None:
+    """Resolve namespaced Markdown content references."""
+
+
+@ref_group.command("resolve")
+@click.argument("context_file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
+@click.argument("reference")
+@click.option(
+    "--root",
+    type=click.Path(exists=True, file_okay=False, path_type=Path),
+    default=Path("."),
+    show_default=True,
+    help="Root that relative paths and namespaces must stay within.",
+)
+@click.option(
+    "--format",
+    "output_format",
+    type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
+    default="text",
+    show_default=True,
+)
+def ref_resolve(context_file: Path, reference: str, root: Path, output_format: str) -> None:
+    """Resolve a content reference using a Markdown document as context."""
+
+    context_document = parse_markdown_file(context_file)
+    context = ReferenceContext.from_document(
+        context_document,
+        root=root,
+        current_path=context_file,
+    )
+    try:
+        resolution = resolve_reference(reference, context=context)
+    except ReferenceResolutionError as exc:
+        raise click.ClickException(str(exc)) from exc
+    _emit_reference_result(resolution.to_dict(), output_format)
+
+
+@main.command("process")
+@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
+@click.option(
+    "--root",
+    type=click.Path(exists=True, file_okay=False, path_type=Path),
+    default=Path("."),
+    show_default=True,
+    help="Root used for relative processor references.",
+)
+@click.option(
+    "--format",
+    "output_format",
+    type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
+    default="text",
+    show_default=True,
+)
+def process(file: Path, root: Path, output_format: str) -> None:
+    """Run deterministic fenced-block processors in a Markdown file."""
+
+    document = parse_markdown_file(file)
+    context = ProcessorContext(
+        root=root,
+        current_path=file,
+        namespaces=load_namespaces(document.frontmatter),
+    )
+    result = run_fenced_processors(
+        file.read_text(encoding="utf-8"),
+        context=context,
+        source_path=file,
+    )
+    _emit_processor_run(result.to_dict(), output_format)
+    raise click.exceptions.Exit(0 if result.valid else 1)
+
+
+@main.group("class")
+def class_group() -> None:
+    """Resolve deterministic content classes."""
+
+
+@class_group.command("resolve")
+@click.argument("class_file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
+@click.argument("class_name")
+@click.option(
+    "--format",
+    "output_format",
+    type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
+    default="text",
+    show_default=True,
+)
+def class_resolve(class_file: Path, class_name: str, output_format: str) -> None:
+    """Resolve content class inheritance and merged slots."""
+
+    try:
+        registry = load_content_class_file(class_file)
+        result = registry.compose(class_name)
+    except ContentClassResolutionError as exc:
+        raise click.ClickException(str(exc)) from exc
+    _emit_content_class_result(result.to_dict(), output_format)
+    raise click.exceptions.Exit(0 if result.valid else 1)
+
+
+@main.command()
+@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
+@click.option(
+    "--output-dir",
+    type=click.Path(file_okay=False, path_type=Path),
+    help="Write tangled files under this directory. Omit for dry JSON/YAML/text output.",
+)
+@click.option(
+    "--format",
+    "output_format",
+    type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
+    default="text",
+    show_default=True,
+)
+def tangle(file: Path, output_dir: Path | None, output_format: str) -> None:
+    """Tangle named Markdown code chunks into target files."""
+
+    result = tangle_markdown(file.read_text(encoding="utf-8"), source_path=file)
+    data = result.to_dict()
+    if output_dir and result.valid:
+        data["written_files"] = write_tangle_files(result, output_dir)
+    _emit_tangle_result(data, output_format)
+    raise click.exceptions.Exit(0 if result.valid else 1)
+
+
+@main.command()
+@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
+@click.option(
+    "--output",
+    type=click.Path(dir_okay=False, path_type=Path),
+    help="Write woven Markdown to a file.",
+)
+@click.option(
+    "--format",
+    "output_format",
+    type=click.Choice(["markdown", "json", "yaml"], case_sensitive=False),
+    default="markdown",
+    show_default=True,
+)
+def weave(file: Path, output: Path | None, output_format: str) -> None:
+    """Weave Markdown documentation with a deterministic chunk index."""
+
+    result = weave_markdown(file.read_text(encoding="utf-8"), source_path=file)
+    _emit_markdown_result(result.to_dict(), output_format, output)
+
+
@main.group()
 def cache() -> None:
    """Fingerprint Markdown files and detect changed inputs."""
@@ -788,6 +1023,83 @@ def _emit_cache_data(data: dict, output_format: str) -> None:
                click.echo(f"written: {data['written']}")


+def _emit_reference_result(data: dict, output_format: str) -> None:
+    if output_format == "json":
+        click.echo(json.dumps(data, indent=2, ensure_ascii=False))
+    elif output_format == "yaml":
+        click.echo(yaml.safe_dump(data, sort_keys=False))
+    else:
+        click.echo(f"{data['count']} unit(s)")
+        click.echo(f"target: {data['target_path']}")
+        for unit in data["units"]:
+            span = unit.get("span", {})
+            line = f":{span['line_start']}" if span.get("line_start") else ""
+            click.echo(f"- {unit['kind']} {unit['unit_id']} {unit['source_path']}{line}")
+            if unit.get("name"):
+                click.echo(f"  {unit['name']}")
+
+
+def _emit_explode_result(data: dict, output_format: str) -> None:
+    if output_format == "json":
+        click.echo(json.dumps(data, indent=2, ensure_ascii=False))
+    elif output_format == "yaml":
+        click.echo(yaml.safe_dump(data, sort_keys=False))
+    else:
+        manifest = data["manifest"]
+        click.echo(f"manifest: {data['manifest_path']}")
+        click.echo(f"variant: {manifest['variant']}")
+        click.echo(f"entries: {len(manifest['entries'])}")
+        for entry in manifest["entries"]:
+            click.echo(f"- {entry['kind']} {entry['file']}")
+
+
+def _emit_processor_run(data: dict, output_format: str) -> None:
+    if output_format == "json":
+        click.echo(json.dumps(data, indent=2, ensure_ascii=False))
+    elif output_format == "yaml":
+        click.echo(yaml.safe_dump(data, sort_keys=False))
+    else:
+        click.echo("valid" if data["valid"] else "invalid")
+        click.echo(f"processors: {data['count']}")
+        for block, result in zip(data["blocks"], data["results"], strict=False):
+            line = f":{block['line_start']}" if block.get("line_start") else ""
+            click.echo(f"- {block['processor']} {block['unit_id']}{line}")
+            if result.get("content"):
+                click.echo(f"  content: {result['content'].splitlines()[0]}")
+            for diagnostic in result.get("diagnostics", []):
+                click.echo(f"  [{diagnostic['severity']}] {diagnostic['code']}: {diagnostic['message']}")
+
+
+def _emit_content_class_result(data: dict, output_format: str) -> None:
+    if output_format == "json":
+        click.echo(json.dumps(data, indent=2, ensure_ascii=False))
+    elif output_format == "yaml":
+        click.echo(yaml.safe_dump(data, sort_keys=False))
+    else:
+        click.echo("valid" if data["valid"] else "invalid")
+        click.echo("linearization: " + " -> ".join(data["linearization"]))
+        for slot, value in data.get("slots", {}).items():
+            click.echo(f"- {slot}: {value}")
+        for diagnostic in data.get("diagnostics", []):
+            click.echo(f"! [{diagnostic['severity']}] {diagnostic['code']}: {diagnostic['message']}")
+
+
+def _emit_tangle_result(data: dict, output_format: str) -> None:
+    if output_format == "json":
+        click.echo(json.dumps(data, indent=2, ensure_ascii=False))
+    elif output_format == "yaml":
+        click.echo(yaml.safe_dump(data, sort_keys=False))
+    else:
+        click.echo("valid" if data["valid"] else "invalid")
+        click.echo(f"files: {len(data['files'])}")
+        for file in data["files"]:
+            click.echo(f"- {file['path']}: {', '.join(file['chunk_ids'])}")
+        for diagnostic in data.get("diagnostics", []):
+            click.echo(f"! [{diagnostic['severity']}] {diagnostic['code']}: {diagnostic['message']}")
+        for written in data.get("written_files", []):
+            click.echo(f"written: {written}")
+
+
 def _emit_jsonish(data: dict, output_format: str) -> None:
    if output_format == "yaml":
        click.echo(yaml.safe_dump(data, sort_keys=False))
--- a/src/markitect_tool/content_class/init.py
+++ b/src/markitect_tool/content_class/init.py
@@ -0,0 +1,19 @@
+"""Deterministic content class composition."""
+
+from markitect_tool.content_class.engine import (
+    ClassCompositionResult,
+    ContentClass,
+    ContentClassRegistry,
+    ContentClassResolutionError,
+    load_content_class_file,
+    load_content_classes,
+)
+
+__all__ = [
+    "ClassCompositionResult",
+    "ContentClass",
+    "ContentClassRegistry",
+    "ContentClassResolutionError",
+    "load_content_class_file",
+    "load_content_classes",
+]
--- a/src/markitect_tool/content_class/engine.py
+++ b/src/markitect_tool/content_class/engine.py
@@ -0,0 +1,225 @@
+"""Small deterministic content class resolver."""
+
+from __future__ import annotations
+
+from copy import deepcopy
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from markitect_tool.diagnostics import Diagnostic
+
+
+class ContentClassResolutionError(ValueError):
+    """Raised when content class definitions cannot be loaded."""
+
+
+@dataclass(frozen=True)
+class ContentClass:
+    """A data-defined content class."""
+
+    name: str
+    extends: list[str] = field(default_factory=list)
+    slots: dict[str, Any] = field(default_factory=dict)
+    merge_policies: dict[str, str] = field(default_factory=dict)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {key: value for key, value in asdict(self).items() if value not in ({}, [], None)}
+
+
+@dataclass(frozen=True)
+class ClassCompositionResult:
+    """Resolved content class slots plus diagnostics."""
+
+    class_name: str
+    linearization: list[str]
+    slots: dict[str, Any]
+    diagnostics: list[Diagnostic] = field(default_factory=list)
+
+    @property
+    def valid(self) -> bool:
+        return not any(diagnostic.severity == "error" for diagnostic in self.diagnostics)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "valid": self.valid,
+            "class_name": self.class_name,
+            "linearization": self.linearization,
+            "slots": self.slots,
+            "diagnostics": [diagnostic.to_dict() for diagnostic in self.diagnostics],
+        }
+
+
+class ContentClassRegistry:
+    """Registry and resolver for content classes."""
+
+    def __init__(self, classes: dict[str, ContentClass] | None = None) -> None:
+        self.classes = classes or {}
+
+    def add(self, content_class: ContentClass) -> None:
+        self.classes[content_class.name] = content_class
+
+    def linearize(self, class_name: str) -> list[str]:
+        if class_name not in self.classes:
+            raise ContentClassResolutionError(f"Unknown content class `{class_name}`")
+        return self._linearize(class_name, [])
+
+    def compose(self, class_name: str) -> ClassCompositionResult:
+        diagnostics: list[Diagnostic] = []
+        try:
+            linearization = self.linearize(class_name)
+        except ContentClassResolutionError as exc:
+            return ClassCompositionResult(
+                class_name=class_name,
+                linearization=[],
+                slots={},
+                diagnostics=[
+                    Diagnostic(
+                        severity="error",
+                        code="content_class.resolution_error",
+                        message=str(exc),
+                    )
+                ],
+            )
+
+        slots: dict[str, Any] = {}
+        for name in reversed(linearization):
+            content_class = self.classes[name]
+            for slot, value in content_class.slots.items():
+                policy = content_class.merge_policies.get(slot, "replace")
+                try:
+                    slots[slot] = _merge_slot(slots.get(slot), value, policy)
+                except ContentClassResolutionError as exc:
+                    diagnostics.append(
+                        Diagnostic(
+                            severity="error",
+                            code="content_class.merge_conflict",
+                            message=str(exc),
+                            details={"class": name, "slot": slot, "policy": policy},
+                        )
+                    )
+        return ClassCompositionResult(
+            class_name=class_name,
+            linearization=linearization,
+            slots=slots,
+            diagnostics=diagnostics,
+        )
+
+    def _linearize(self, class_name: str, stack: list[str]) -> list[str]:
+        if class_name in stack:
+            raise ContentClassResolutionError(
+                "Cyclic content class inheritance: " + " -> ".join(stack + [class_name])
+            )
+        content_class = self.classes[class_name]
+        parent_mros = [
+            self._linearize(parent, stack + [class_name])
+            for parent in content_class.extends
+            if _known_parent(parent, self.classes)
+        ]
+        missing = [parent for parent in content_class.extends if parent not in self.classes]
+        if missing:
+            raise ContentClassResolutionError(
+                f"Content class `{class_name}` extends unknown class(es): {', '.join(missing)}"
+            )
+        return [class_name] + _c3_merge(parent_mros + [list(content_class.extends)])
+
+
+def load_content_class_file(path: str | Path) -> ContentClassRegistry:
+    """Load content class definitions from YAML."""
+
+    data = yaml.safe_load(Path(path).read_text(encoding="utf-8"))
+    if not isinstance(data, dict):
+        raise ContentClassResolutionError("Content class file must be a mapping")
+    return load_content_classes(data)
+
+
+def load_content_classes(data: dict[str, Any]) -> ContentClassRegistry:
+    """Load content class definitions from a mapping."""
+
+    raw_classes = data.get("classes", data)
+    if not isinstance(raw_classes, dict):
+        raise ContentClassResolutionError("Content classes must be a mapping")
+    classes: dict[str, ContentClass] = {}
+    for name, raw_class in raw_classes.items():
+        if not isinstance(raw_class, dict):
+            raise ContentClassResolutionError(f"Content class `{name}` must be a mapping")
+        extends = raw_class.get("extends", [])
+        if isinstance(extends, str):
+            extends = [extends]
+        if not isinstance(extends, list):
+            raise ContentClassResolutionError(f"Content class `{name}` extends must be a list")
+        slots = raw_class.get("slots", {})
+        policies = raw_class.get("merge_policies", {})
+        if not isinstance(slots, dict) or not isinstance(policies, dict):
+            raise ContentClassResolutionError(
+                f"Content class `{name}` slots and merge_policies must be mappings"
+            )
+        classes[str(name)] = ContentClass(
+            name=str(name),
+            extends=[str(parent) for parent in extends],
+            slots=slots,
+            merge_policies={str(key): str(value) for key, value in policies.items()},
+        )
+    return ContentClassRegistry(classes)
+
+
+def _c3_merge(sequences: list[list[str]]) -> list[str]:
+    result: list[str] = []
+    sequences = [list(sequence) for sequence in sequences if sequence]
+    while sequences:
+        candidate = None
+        for sequence in sequences:
+            head = sequence[0]
+            if not any(head in other[1:] for other in sequences):
+                candidate = head
+                break
+        if candidate is None:
+            raise ContentClassResolutionError("Inconsistent content class precedence order")
+        result.append(candidate)
+        sequences = [
+            [item for item in sequence if item != candidate]
+            for sequence in sequences
+        ]
+        sequences = [sequence for sequence in sequences if sequence]
+    return result
+
+
+def _merge_slot(existing: Any, value: Any, policy: str) -> Any:
+    incoming = deepcopy(value)
+    if existing is None:
+        return incoming
+    if policy == "replace":
+        return incoming
+    if policy == "append":
+        return _as_list(existing) + _as_list(incoming)
+    if policy == "prepend":
+        return _as_list(incoming) + _as_list(existing)
+    if policy == "deep_merge":
+        if not isinstance(existing, dict) or not isinstance(incoming, dict):
+            raise ContentClassResolutionError("deep_merge requires mapping values")
+        return _deep_merge(existing, incoming)
+    if policy == "error_on_conflict":
+        if existing != incoming:
+            raise ContentClassResolutionError("slot conflict")
+        return existing
+    raise ContentClassResolutionError(f"Unknown merge policy `{policy}`")
+
+
+def _deep_merge(left: dict[str, Any], right: dict[str, Any]) -> dict[str, Any]:
+    merged = deepcopy(left)
+    for key, value in right.items():
+        if isinstance(merged.get(key), dict) and isinstance(value, dict):
+            merged[key] = _deep_merge(merged[key], value)
+        else:
+            merged[key] = deepcopy(value)
+    return merged
+
+
+def _as_list(value: Any) -> list[Any]:
+    return value if isinstance(value, list) else [value]
+
+
+def _known_parent(parent: str, classes: dict[str, ContentClass]) -> bool:
+    return parent in classes
--- a/src/markitect_tool/explode/init.py
+++ b/src/markitect_tool/explode/init.py
@@ -0,0 +1,25 @@
+"""Reversible explode/implode operations for Markdown documents."""
+
+from markitect_tool.explode.engine import (
+    EXPLODE_MANIFEST_NAME,
+    ExplodeEntry,
+    ExplodeError,
+    ExplodeManifest,
+    ExplodeResult,
+    ImplodeResult,
+    explode_markdown_file,
+    implode_markdown_directory,
+    load_explode_manifest,
+)
+
+__all__ = [
+    "EXPLODE_MANIFEST_NAME",
+    "ExplodeEntry",
+    "ExplodeError",
+    "ExplodeManifest",
+    "ExplodeResult",
+    "ImplodeResult",
+    "explode_markdown_file",
+    "implode_markdown_directory",
+    "load_explode_manifest",
+]
--- a/src/markitect_tool/explode/engine.py
+++ b/src/markitect_tool/explode/engine.py
@@ -0,0 +1,324 @@
+"""Manifest-first reversible explode/implode for Markdown files."""
+
+from __future__ import annotations
+
+import hashlib
+import re
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from markitect_tool.core import Heading, parse_markdown
+
+
+EXPLODE_MANIFEST_NAME = "markitect-explode.yaml"
+
+
+class ExplodeError(ValueError):
+    """Raised when explode or implode cannot preserve a safe roundtrip."""
+
+
+@dataclass(frozen=True)
+class ExplodeEntry:
+    """One file entry in an exploded Markdown directory."""
+
+    kind: str
+    file: str
+    order: int
+    unit_id: str
+    line_start: int
+    line_end: int
+    heading_level: int | None = None
+    heading_text: str | None = None
+    content_hash: str = ""
+
+    def to_dict(self) -> dict[str, Any]:
+        return {key: value for key, value in asdict(self).items() if value is not None}
+
+
+@dataclass(frozen=True)
+class ExplodeManifest:
+    """Manifest used to implode an exploded Markdown directory."""
+
+    version: int
+    source_path: str
+    source_hash: str
+    variant: str
+    frontmatter_raw: str = ""
+    entries: list[ExplodeEntry] = field(default_factory=list)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "version": self.version,
+            "source_path": self.source_path,
+            "source_hash": self.source_hash,
+            "variant": self.variant,
+            "frontmatter_raw": self.frontmatter_raw,
+            "entries": [entry.to_dict() for entry in self.entries],
+        }
+
+
+@dataclass(frozen=True)
+class ExplodeResult:
+    """Result of exploding a Markdown file into a directory."""
+
+    manifest_path: str
+    output_dir: str
+    manifest: ExplodeManifest
+    written_files: list[str]
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "manifest_path": self.manifest_path,
+            "output_dir": self.output_dir,
+            "manifest": self.manifest.to_dict(),
+            "written_files": self.written_files,
+        }
+
+
+@dataclass(frozen=True)
+class ImplodeResult:
+    """Result of rebuilding Markdown from an explode manifest."""
+
+    markdown: str
+    manifest_path: str
+    source_hash: str
+    current_hash: str
+    entries: list[str]
+
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+
+
+def explode_markdown_file(
+    path: str | Path,
+    output_dir: str | Path,
+    *,
+    variant: str = "flat",
+    overwrite: bool = False,
+) -> ExplodeResult:
+    """Explode a Markdown file into section files plus a roundtrip manifest."""
+
+    if variant not in {"flat", "hierarchical"}:
+        raise ExplodeError("Explode variant must be `flat` or `hierarchical`")
+
+    source_path = Path(path)
+    target_dir = Path(output_dir)
+    markdown = source_path.read_text(encoding="utf-8")
+    if target_dir.exists() and any(target_dir.iterdir()) and not overwrite:
+        raise ExplodeError(f"Output directory is not empty: {target_dir}")
+    target_dir.mkdir(parents=True, exist_ok=True)
+
+    frontmatter_raw, body_start_line = _split_frontmatter_raw(markdown)
+    entries_with_text = _explode_entries(markdown, body_start_line, variant)
+    written_files: list[str] = []
+    entries: list[ExplodeEntry] = []
+
+    for entry, text in entries_with_text:
+        entry_path = _safe_entry_path(target_dir, entry.file)
+        entry_path.parent.mkdir(parents=True, exist_ok=True)
+        entry_path.write_text(text, encoding="utf-8")
+        written_files.append(str(entry_path))
+        entries.append(entry)
+
+    manifest = ExplodeManifest(
+        version=1,
+        source_path=str(source_path),
+        source_hash=_hash_text(markdown),
+        variant=variant,
+        frontmatter_raw=frontmatter_raw,
+        entries=entries,
+    )
+    manifest_path = target_dir / EXPLODE_MANIFEST_NAME
+    manifest_path.write_text(yaml.safe_dump(manifest.to_dict(), sort_keys=False), encoding="utf-8")
+    return ExplodeResult(
+        manifest_path=str(manifest_path),
+        output_dir=str(target_dir),
+        manifest=manifest,
+        written_files=written_files + [str(manifest_path)],
+    )
+
+
+def implode_markdown_directory(
+    directory: str | Path,
+    *,
+    manifest_path: str | Path | None = None,
+) -> ImplodeResult:
+    """Implode a Markdown directory created by :func:`explode_markdown_file`."""
+
+    root = Path(directory)
+    manifest_file = Path(manifest_path) if manifest_path else root / EXPLODE_MANIFEST_NAME
+    manifest = load_explode_manifest(manifest_file)
+    parts = [manifest.frontmatter_raw]
+    entry_files: list[str] = []
+
+    for entry in manifest.entries:
+        entry_path = _safe_entry_path(root, entry.file)
+        if not entry_path.exists() or not entry_path.is_file():
+            raise ExplodeError(f"Exploded entry file not found: {entry.file}")
+        parts.append(entry_path.read_text(encoding="utf-8"))
+        entry_files.append(str(entry_path))
+
+    markdown = "".join(parts)
+    return ImplodeResult(
+        markdown=markdown,
+        manifest_path=str(manifest_file),
+        source_hash=manifest.source_hash,
+        current_hash=_hash_text(markdown),
+        entries=entry_files,
+    )
+
+
+def load_explode_manifest(path: str | Path) -> ExplodeManifest:
+    """Load an explode manifest from YAML."""
+
+    manifest_path = Path(path)
+    data = yaml.safe_load(manifest_path.read_text(encoding="utf-8"))
+    if not isinstance(data, dict):
+        raise ExplodeError("Explode manifest must be a mapping")
+    entries = data.get("entries", [])
+    if not isinstance(entries, list):
+        raise ExplodeError("Explode manifest entries must be a list")
+    return ExplodeManifest(
+        version=int(data.get("version", 1)),
+        source_path=str(data.get("source_path", "")),
+        source_hash=str(data.get("source_hash", "")),
+        variant=str(data.get("variant", "flat")),
+        frontmatter_raw=str(data.get("frontmatter_raw", "")),
+        entries=[_entry_from_mapping(entry) for entry in entries],
+    )
+
+
+def _explode_entries(
+    markdown: str,
+    body_start_line: int,
+    variant: str,
+) -> list[tuple[ExplodeEntry, str]]:
+    lines = markdown.splitlines(keepends=True)
+    headings = parse_markdown(markdown).headings
+    entries: list[tuple[ExplodeEntry, str]] = []
+    used_ids: dict[str, int] = {}
+    order = 0
+
+    first_heading_line = headings[0].line if headings else len(lines) + 1
+    preamble_text = "".join(lines[body_start_line - 1:first_heading_line - 1])
+    if preamble_text or not headings:
+        entry = ExplodeEntry(
+            kind="preamble",
+            file="00-preamble.md",
+            order=order,
+            unit_id="preamble",
+            line_start=body_start_line,
+            line_end=max(first_heading_line - 1, body_start_line),
+            content_hash=_hash_text(preamble_text),
+        )
+        entries.append((entry, preamble_text))
+        order += 1
+
+    hierarchy: dict[int, str] = {}
+    for index, heading in enumerate(headings):
+        start = heading.line
+        end = headings[index + 1].line - 1 if index + 1 < len(headings) else len(lines)
+        text = "".join(lines[start - 1:end])
+        unit_id = _dedupe_id(_slug(_heading_title(heading)), used_ids)
+        file_path = _entry_file_for_heading(heading, index + 1, unit_id, variant, hierarchy)
+        entry = ExplodeEntry(
+            kind="section",
+            file=file_path,
+            order=order,
+            unit_id=unit_id,
+            line_start=start,
+            line_end=end,
+            heading_level=heading.level,
+            heading_text=heading.text,
+            content_hash=_hash_text(text),
+        )
+        entries.append((entry, text))
+        order += 1
+
+    return entries
+
+
+def _entry_file_for_heading(
+    heading: Heading,
+    index: int,
+    unit_id: str,
+    variant: str,
+    hierarchy: dict[int, str],
+) -> str:
+    filename = f"{index:02d}-{unit_id}.md"
+    if variant == "flat":
+        return f"sections/{filename}"
+
+    for level in list(hierarchy):
+        if level >= heading.level:
+            del hierarchy[level]
+    parents = [hierarchy[level] for level in sorted(hierarchy) if level < heading.level]
+    hierarchy[heading.level] = f"{index:02d}-{unit_id}"
+    return str(Path(*parents, filename)) if parents else filename
+
+
+def _entry_from_mapping(data: Any) -> ExplodeEntry:
+    if not isinstance(data, dict):
+        raise ExplodeError("Explode manifest entry must be a mapping")
+    return ExplodeEntry(
+        kind=str(data["kind"]),
+        file=str(data["file"]),
+        order=int(data["order"]),
+        unit_id=str(data["unit_id"]),
+        line_start=int(data["line_start"]),
+        line_end=int(data["line_end"]),
+        heading_level=int(data["heading_level"]) if data.get("heading_level") is not None else None,
+        heading_text=str(data["heading_text"]) if data.get("heading_text") is not None else None,
+        content_hash=str(data.get("content_hash", "")),
+    )
+
+
+def _safe_entry_path(root: Path, relative_path: str) -> Path:
+    path = Path(relative_path)
+    if path.is_absolute():
+        raise ExplodeError(f"Exploded entry path must be relative: {relative_path}")
+    resolved = (root / path).resolve()
+    try:
+        resolved.relative_to(root.resolve())
+    except ValueError as exc:
+        raise ExplodeError(f"Exploded entry path escapes directory: {relative_path}") from exc
+    return resolved
+
+
+def _split_frontmatter_raw(markdown: str) -> tuple[str, int]:
+    if not markdown.startswith("---\n"):
+        return "", 1
+    end = markdown.find("\n---", 4)
+    if end == -1:
+        return "", 1
+    closing_end = markdown.find("\n", end + 4)
+    if closing_end == -1:
+        closing_end = len(markdown)
+    else:
+        closing_end += 1
+    frontmatter_raw = markdown[:closing_end]
+    return frontmatter_raw, frontmatter_raw.count("\n") + 1
+
+
+def _heading_title(heading: Heading) -> str:
+    text = re.sub(r"\s+\{#[A-Za-z0-9_.:-]+\}\s*$", "", heading.text.strip())
+    return text or "section"
+
+
+def _dedupe_id(unit_id: str, used_ids: dict[str, int]) -> str:
+    count = used_ids.get(unit_id, 0) + 1
+    used_ids[unit_id] = count
+    return unit_id if count == 1 else f"{unit_id}-{count}"
+
+
+def _slug(value: str) -> str:
+    slug = re.sub(r"[^a-z0-9_.:-]+", "-", value.strip().lower())
+    slug = re.sub(r"-+", "-", slug).strip("-")
+    return slug or "section"
+
+
+def _hash_text(text: str) -> str:
+    return "sha256:" + hashlib.sha256(text.encode("utf-8")).hexdigest()
--- a/src/markitect_tool/literate/init.py
+++ b/src/markitect_tool/literate/init.py
@@ -0,0 +1,23 @@
+"""Markdown-native literate weave/tangle workflows."""
+
+from markitect_tool.literate.engine import (
+    CodeChunk,
+    LiterateFile,
+    TangleResult,
+    WeaveResult,
+    discover_code_chunks,
+    tangle_markdown,
+    weave_markdown,
+    write_tangle_files,
+)
+
+__all__ = [
+    "CodeChunk",
+    "LiterateFile",
+    "TangleResult",
+    "WeaveResult",
+    "discover_code_chunks",
+    "tangle_markdown",
+    "weave_markdown",
+    "write_tangle_files",
+]
--- a/src/markitect_tool/literate/engine.py
+++ b/src/markitect_tool/literate/engine.py
@@ -0,0 +1,317 @@
+"""Literate programming helpers for Markdown fenced code chunks."""
+
+from __future__ import annotations
+
+import hashlib
+import re
+import shlex
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any
+
+from markdown_it import MarkdownIt
+
+from markitect_tool.diagnostics import Diagnostic, SourceLocation
+from markitect_tool.ops import OperationProvenance
+
+
+@dataclass(frozen=True)
+class CodeChunk:
+    """A named fenced code chunk."""
+
+    chunk_id: str
+    content: str
+    language: str | None = None
+    target_path: str | None = None
+    references: list[str] = field(default_factory=list)
+    source_path: str | None = None
+    line_start: int | None = None
+    line_end: int | None = None
+    content_hash: str = ""
+
+    def to_dict(self) -> dict[str, Any]:
+        return {key: value for key, value in asdict(self).items() if value not in (None, [], "")}
+
+
+@dataclass(frozen=True)
+class LiterateFile:
+    """One generated file from tangling."""
+
+    path: str
+    content: str
+    chunk_ids: list[str]
+
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+
+
+@dataclass(frozen=True)
+class TangleResult:
+    """Result of tangling Markdown code chunks."""
+
+    files: list[LiterateFile]
+    chunks: list[CodeChunk]
+    diagnostics: list[Diagnostic] = field(default_factory=list)
+    provenance: list[OperationProvenance] = field(default_factory=list)
+
+    @property
+    def valid(self) -> bool:
+        return not any(diagnostic.severity == "error" for diagnostic in self.diagnostics)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "valid": self.valid,
+            "files": [file.to_dict() for file in self.files],
+            "chunks": [chunk.to_dict() for chunk in self.chunks],
+            "diagnostics": [diagnostic.to_dict() for diagnostic in self.diagnostics],
+            "provenance": [event.to_dict() for event in self.provenance],
+        }
+
+
+@dataclass(frozen=True)
+class WeaveResult:
+    """Result of weaving Markdown documentation with a chunk index."""
+
+    markdown: str
+    chunks: list[CodeChunk]
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "markdown": self.markdown,
+            "chunks": [chunk.to_dict() for chunk in self.chunks],
+        }
+
+
+_CHUNK_REF_RE = re.compile(r"<<(?P<id>[A-Za-z0-9_.:-]+)>>")
+_CHUNK_LINE_REF_RE = re.compile(r"^(?P<indent>[ \t]*)<<(?P<id>[A-Za-z0-9_.:-]+)>>[ \t]*$", re.MULTILINE)
+
+
+def discover_code_chunks(
+    markdown: str,
+    *,
+    source_path: str | Path | None = None,
+) -> list[CodeChunk]:
+    """Discover named fenced code chunks in Markdown order."""
+
+    parser = MarkdownIt("commonmark", {"tables": True}).enable("table")
+    chunks: list[CodeChunk] = []
+    used_ids: dict[str, int] = {}
+    for token in parser.parse(markdown):
+        if token.type != "fence":
+            continue
+        attrs = _parse_fence_info(token.info)
+        chunk_id = attrs.get("id")
+        if not chunk_id:
+            continue
+        chunk_id = _dedupe_id(_slug(chunk_id), used_ids)
+        line_start = token.map[0] + 1 if token.map else None
+        line_end = token.map[1] if token.map else None
+        chunks.append(
+            CodeChunk(
+                chunk_id=chunk_id,
+                content=token.content,
+                language=attrs.get("language"),
+                target_path=attrs.get("tangle") or attrs.get("target"),
+                references=_chunk_references(token.content),
+                source_path=str(source_path) if source_path else None,
+                line_start=line_start,
+                line_end=line_end,
+                content_hash=_hash_text(token.content),
+            )
+        )
+    return chunks
+
+
+def tangle_markdown(
+    markdown: str,
+    *,
+    source_path: str | Path | None = None,
+) -> TangleResult:
+    """Tangle named chunks into target files."""
+
+    chunks = discover_code_chunks(markdown, source_path=source_path)
+    chunks_by_id = {chunk.chunk_id: chunk for chunk in chunks}
+    diagnostics: list[Diagnostic] = []
+    provenance: list[OperationProvenance] = []
+    target_chunks: dict[str, list[CodeChunk]] = {}
+    for chunk in chunks:
+        if chunk.target_path:
+            target_chunks.setdefault(chunk.target_path, []).append(chunk)
+
+    files: list[LiterateFile] = []
+    for target_path, grouped_chunks in target_chunks.items():
+        rendered_parts: list[str] = []
+        for chunk in grouped_chunks:
+            rendered_parts.append(_expand_chunk(chunk, chunks_by_id, diagnostics, []))
+            provenance.append(
+                OperationProvenance(
+                    operation="literate.tangle",
+                    source_path=chunk.source_path,
+                    line_start=chunk.line_start,
+                    line_end=chunk.line_end,
+                    target_path=target_path,
+                    dependencies=[chunk.source_path] if chunk.source_path else [],
+                    metadata={"chunk_id": chunk.chunk_id, "references": chunk.references},
+                )
+            )
+        files.append(
+            LiterateFile(
+                path=target_path,
+                content=_join_tangled_parts(rendered_parts),
+                chunk_ids=[chunk.chunk_id for chunk in grouped_chunks],
+            )
+        )
+
+    return TangleResult(
+        files=files,
+        chunks=chunks,
+        diagnostics=diagnostics,
+        provenance=provenance,
+    )
+
+
+def weave_markdown(
+    markdown: str,
+    *,
+    source_path: str | Path | None = None,
+) -> WeaveResult:
+    """Append a deterministic chunk index to human-readable Markdown."""
+
+    chunks = discover_code_chunks(markdown, source_path=source_path)
+    if not chunks:
+        return WeaveResult(markdown=markdown, chunks=[])
+
+    lines = [markdown.rstrip(), "", "## Code Chunk Index", ""]
+    for chunk in chunks:
+        target = f" -> `{chunk.target_path}`" if chunk.target_path else ""
+        refs = f"; refs: {', '.join(f'`{ref}`' for ref in chunk.references)}" if chunk.references else ""
+        location = f" line {chunk.line_start}" if chunk.line_start else ""
+        lines.append(f"- `{chunk.chunk_id}`{target}{refs}{location}")
+    return WeaveResult(markdown="\n".join(lines).rstrip() + "\n", chunks=chunks)
+
+
+def write_tangle_files(result: TangleResult, output_dir: str | Path) -> list[str]:
+    """Write tangled files under an output directory."""
+
+    root = Path(output_dir)
+    root.mkdir(parents=True, exist_ok=True)
+    written: list[str] = []
+    for file in result.files:
+        target = _safe_output_path(root, file.path)
+        target.parent.mkdir(parents=True, exist_ok=True)
+        target.write_text(file.content, encoding="utf-8")
+        written.append(str(target))
+    return written
+
+
+def _expand_chunk(
+    chunk: CodeChunk,
+    chunks_by_id: dict[str, CodeChunk],
+    diagnostics: list[Diagnostic],
+    stack: list[str],
+) -> str:
+    if chunk.chunk_id in stack:
+        diagnostics.append(
+            Diagnostic(
+                severity="error",
+                code="literate.chunk_cycle",
+                message="Cyclic chunk reference: " + " -> ".join(stack + [chunk.chunk_id]),
+                source=SourceLocation(path=chunk.source_path, line=chunk.line_start),
+            )
+        )
+        return f"<<{chunk.chunk_id}>>"
+
+    def replace_line(match: re.Match[str]) -> str:
+        indent = match.group("indent")
+        expanded = _expand_reference(match.group("id"), chunks_by_id, diagnostics, stack + [chunk.chunk_id], chunk)
+        return "\n".join(f"{indent}{line}" if line else line for line in expanded.splitlines())
+
+    rendered = _CHUNK_LINE_REF_RE.sub(replace_line, chunk.content)
+
+    def replace_inline(match: re.Match[str]) -> str:
+        return _expand_reference(match.group("id"), chunks_by_id, diagnostics, stack + [chunk.chunk_id], chunk)
+
+    return _CHUNK_REF_RE.sub(replace_inline, rendered)
+
+
+def _expand_reference(
+    chunk_id: str,
+    chunks_by_id: dict[str, CodeChunk],
+    diagnostics: list[Diagnostic],
+    stack: list[str],
+    source_chunk: CodeChunk,
+) -> str:
+    referenced = chunks_by_id.get(chunk_id)
+    if not referenced:
+        diagnostics.append(
+            Diagnostic(
+                severity="error",
+                code="literate.missing_chunk",
+                message=f"Missing chunk reference `{chunk_id}`",
+                source=SourceLocation(path=source_chunk.source_path, line=source_chunk.line_start),
+            )
+        )
+        return f"<<{chunk_id}>>"
+    return _expand_chunk(referenced, chunks_by_id, diagnostics, stack)
+
+
+def _join_tangled_parts(parts: list[str]) -> str:
+    rendered = "\n".join(part.rstrip("\n") for part in parts if part is not None)
+    return rendered.rstrip() + "\n" if rendered else ""
+
+
+def _safe_output_path(root: Path, relative_path: str) -> Path:
+    path = Path(relative_path)
+    if path.is_absolute():
+        raise ValueError(f"Tangle target must be relative: {relative_path}")
+    resolved = (root / path).resolve()
+    try:
+        resolved.relative_to(root.resolve())
+    except ValueError as exc:
+        raise ValueError(f"Tangle target escapes output directory: {relative_path}") from exc
+    return resolved
+
+
+def _parse_fence_info(info: str) -> dict[str, str]:
+    match = re.match(r"^(?P<language>[^\s{]+)?(?:\s+\{(?P<attrs>.*)\})?\s*$", info.strip())
+    if not match:
+        return {"language": info.strip()} if info.strip() else {}
+    attrs = _parse_attrs(match.group("attrs") or "")
+    language = match.group("language")
+    if language:
+        attrs["language"] = language
+    return attrs
+
+
+def _parse_attrs(raw: str) -> dict[str, str]:
+    attrs: dict[str, str] = {}
+    for part in shlex.split(raw):
+        if part.startswith("#") and len(part) > 1:
+            attrs["id"] = part[1:]
+            continue
+        if "=" not in part:
+            attrs[part] = "true"
+            continue
+        key, value = part.split("=", 1)
+        attrs[key.strip()] = value.strip()
+    return attrs
+
+
+def _chunk_references(content: str) -> list[str]:
+    return [match.group("id") for match in _CHUNK_REF_RE.finditer(content)]
+
+
+def _dedupe_id(unit_id: str, used_ids: dict[str, int]) -> str:
+    count = used_ids.get(unit_id, 0) + 1
+    used_ids[unit_id] = count
+    return unit_id if count == 1 else f"{unit_id}-{count}"
+
+
+def _slug(value: str) -> str:
+    slug = re.sub(r"[^a-z0-9_.:-]+", "-", value.strip().lower())
+    slug = re.sub(r"-+", "-", slug).strip("-")
+    return slug or "chunk"
+
+
+def _hash_text(text: str) -> str:
+    return "sha256:" + hashlib.sha256(text.encode("utf-8")).hexdigest()
--- a/src/markitect_tool/ops/init.py
+++ b/src/markitect_tool/ops/init.py
@@ -4,6 +4,7 @@ from markitect_tool.ops.engine import (
    ComposeResult,
    IncludeError,
    IncludeResult,
+    OperationProvenance,
    TransformResult,
    compose_files,
    resolve_includes,
@@ -14,6 +15,7 @@ __all__ = [
    "ComposeResult",
    "IncludeError",
    "IncludeResult",
+    "OperationProvenance",
    "TransformResult",
    "compose_files",
    "resolve_includes",
--- a/src/markitect_tool/ops/engine.py
+++ b/src/markitect_tool/ops/engine.py
@@ -9,6 +9,7 @@ from pathlib import Path
 from typing import Any

 import yaml
+from markdown_it import MarkdownIt

 from markitect_tool.core import parse_markdown
 from markitect_tool.query import extract_document
@@ -18,15 +19,46 @@ class IncludeError(ValueError):
    """Raised when include resolution cannot continue."""


+@dataclass(frozen=True)
+class OperationProvenance:
+    """Structured provenance for deterministic Markdown operations."""
+
+    operation: str
+    source_path: str | None = None
+    line_start: int | None = None
+    line_end: int | None = None
+    target_path: str | None = None
+    dependencies: list[str] = field(default_factory=list)
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+    def to_dict(self) -> dict[str, Any]:
+        data = {
+            "operation": self.operation,
+            "source_path": self.source_path,
+            "line_start": self.line_start,
+            "line_end": self.line_end,
+            "target_path": self.target_path,
+            "dependencies": self.dependencies or None,
+            "metadata": self.metadata or None,
+        }
+        return {key: value for key, value in data.items() if value is not None}
+
+
@dataclass(frozen=True)
 class TransformResult:
    """Result of a deterministic Markdown transform."""

    markdown: str
    operations: list[str] = field(default_factory=list)
+    provenance: list[OperationProvenance] = field(default_factory=list)

    def to_dict(self) -> dict[str, Any]:
-        return asdict(self)
+        data: dict[str, Any] = {
+            "markdown": self.markdown,
+            "operations": self.operations,
+            "provenance": [event.to_dict() for event in self.provenance],
+        }
+        return {key: value for key, value in data.items() if value}


@dataclass(frozen=True)
@@ -46,9 +78,15 @@ class IncludeResult:

    markdown: str
    included_paths: list[str] = field(default_factory=list)
+    provenance: list[OperationProvenance] = field(default_factory=list)

    def to_dict(self) -> dict[str, Any]:
-        return asdict(self)
+        data: dict[str, Any] = {
+            "markdown": self.markdown,
+            "included_paths": self.included_paths,
+            "provenance": [event.to_dict() for event in self.provenance],
+        }
+        return {key: value for key, value in data.items() if value}


 _COMMENT_INCLUDE_RE = re.compile(r"<!--\s*mkt:include\s+(?P<attrs>.*?)\s*-->", re.DOTALL)
@@ -68,15 +106,30 @@ def transform_markdown(
    """Apply deterministic operations to one Markdown document."""

    operations: list[str] = []
+    provenance: list[OperationProvenance] = []
    frontmatter, body = _split_frontmatter(markdown)

    if set_frontmatter:
        frontmatter = _deep_merge(frontmatter, set_frontmatter)
        operations.append("set_frontmatter")
+        provenance.append(
+            OperationProvenance(
+                operation="set_frontmatter",
+                source_path=source_path,
+                metadata={"keys": sorted(set_frontmatter.keys())},
+            )
+        )

    if heading_delta:
-        body = shift_heading_levels(body, heading_delta)
+        body, affected_lines = _shift_heading_levels(body, heading_delta)
        operations.append(f"shift_headings:{heading_delta}")
+        provenance.append(
+            OperationProvenance(
+                operation="shift_headings",
+                source_path=source_path,
+                metadata={"delta": heading_delta, "affected_lines": affected_lines},
+            )
+        )

    if extract_selector:
        document_text = _join_frontmatter(frontmatter, body) if frontmatter else body
@@ -84,24 +137,71 @@ def transform_markdown(
        body = "\n\n".join(extract_document(document, extract_selector))
        frontmatter = {}
        operations.append(f"extract:{extract_selector}")
+        provenance.append(
+            OperationProvenance(
+                operation="extract",
+                source_path=source_path,
+                metadata={"selector": extract_selector},
+            )
+        )

    if strip_frontmatter:
        frontmatter = {}
        operations.append("strip_frontmatter")
+        provenance.append(
+            OperationProvenance(
+                operation="strip_frontmatter",
+                source_path=source_path,
+            )
+        )

-    return TransformResult(markdown=_join_frontmatter(frontmatter, body), operations=operations)
+    return TransformResult(
+        markdown=_join_frontmatter(frontmatter, body),
+        operations=operations,
+        provenance=provenance,
+    )


 def shift_heading_levels(markdown: str, delta: int) -> str:
    """Shift ATX heading levels by delta while clamping to levels 1 through 6."""

-    def replace(match: re.Match[str]) -> str:
+    shifted, _affected_lines = _shift_heading_levels(markdown, delta)
+    return shifted
+
+
+def _shift_heading_levels(markdown: str, delta: int) -> tuple[str, list[int]]:
+    ignored_lines = _code_line_numbers(markdown)
+    affected_lines: list[int] = []
+    rendered_lines: list[str] = []
+
+    for line_number, line in enumerate(markdown.splitlines(keepends=True), start=1):
+        if line_number in ignored_lines:
+            rendered_lines.append(line)
+            continue
+        line_body = line.rstrip("\r\n")
+        line_ending = line[len(line_body) :]
+        match = _HEADING_RE.match(line_body)
+        if not match:
+            rendered_lines.append(line)
+            continue
        marks = match.group(1)
        suffix = match.group(2)
        level = min(max(len(marks) + delta, 1), 6)
-        return f"{'#' * level}{suffix}"
+        rendered_lines.append(f"{'#' * level}{suffix}{line_ending}")
+        affected_lines.append(line_number)

-    return _HEADING_RE.sub(replace, markdown)
+    return "".join(rendered_lines), affected_lines
+
+
+def _code_line_numbers(markdown: str) -> set[int]:
+    parser = MarkdownIt("commonmark", {"tables": True}).enable("table")
+    ignored_lines: set[int] = set()
+    for token in parser.parse(markdown):
+        if token.type not in {"fence", "code_block"} or not token.map:
+            continue
+        start, end = token.map
+        ignored_lines.update(range(start + 1, end + 1))
+    return ignored_lines


 def compose_files(
@@ -154,18 +254,22 @@ def resolve_includes(
    root = Path(base_dir).resolve()
    stack = [Path(current_path).resolve()] if current_path else []
    included: list[Path] = []
+    provenance: list[OperationProvenance] = []
    resolved = _resolve_include_text(
        markdown,
        root=root,
        current_dir=Path(current_path).resolve().parent if current_path else root,
+        source_path=Path(current_path).resolve() if current_path else None,
        stack=stack,
        included=included,
+        provenance=provenance,
        depth=0,
        max_depth=max_depth,
    )
    return IncludeResult(
        markdown=resolved,
        included_paths=[str(path) for path in included],
+        provenance=provenance,
    )


@@ -174,34 +278,73 @@ def _resolve_include_text(
    *,
    root: Path,
    current_dir: Path,
+    source_path: Path | None,
    stack: list[Path],
    included: list[Path],
+    provenance: list[OperationProvenance],
    depth: int,
    max_depth: int,
 ) -> str:
    if depth > max_depth:
        raise IncludeError(f"Include depth exceeded max_depth={max_depth}")

-    def replace_comment(match: re.Match[str]) -> str:
-        attrs = _parse_include_attrs(match.group("attrs"))
-        return _render_include(attrs, root, current_dir, stack, included, depth, max_depth)
+    ignored_lines = _code_line_numbers(markdown)
+    rendered_lines: list[str] = []

-    def replace_brace(match: re.Match[str]) -> str:
-        attrs = {"path": match.group("path").strip()}
-        return _render_include(attrs, root, current_dir, stack, included, depth, max_depth)
+    for line_number, line in enumerate(markdown.splitlines(keepends=True), start=1):
+        if line_number in ignored_lines:
+            rendered_lines.append(line)
+            continue

-    markdown = _COMMENT_INCLUDE_RE.sub(replace_comment, markdown)
-    return _BRACE_INCLUDE_RE.sub(replace_brace, markdown)
+        def replace_comment(match: re.Match[str]) -> str:
+            attrs = _parse_include_attrs(match.group("attrs"))
+            return _render_include(
+                attrs,
+                root,
+                current_dir,
+                source_path,
+                stack,
+                included,
+                provenance,
+                depth,
+                max_depth,
+                marker_line=line_number,
+            )
+
+        def replace_brace(match: re.Match[str]) -> str:
+            attrs = {"path": match.group("path").strip()}
+            return _render_include(
+                attrs,
+                root,
+                current_dir,
+                source_path,
+                stack,
+                included,
+                provenance,
+                depth,
+                max_depth,
+                marker_line=line_number,
+            )
+
+        line = _COMMENT_INCLUDE_RE.sub(replace_comment, line)
+        line = _BRACE_INCLUDE_RE.sub(replace_brace, line)
+        rendered_lines.append(line)
+
+    return "".join(rendered_lines)


 def _render_include(
    attrs: dict[str, str],
    root: Path,
    current_dir: Path,
+    source_path: Path | None,
    stack: list[Path],
    included: list[Path],
+    provenance: list[OperationProvenance],
    depth: int,
    max_depth: int,
+    *,
+    marker_line: int,
 ) -> str:
    raw_path = attrs.get("path")
    if not raw_path:
@@ -228,12 +371,33 @@ def _render_include(
        body = shift_heading_levels(body, heading_delta)

    included.append(include_path)
+    provenance.append(
+        OperationProvenance(
+            operation="include",
+            source_path=str(source_path) if source_path else None,
+            line_start=marker_line,
+            line_end=marker_line,
+            target_path=str(include_path),
+            dependencies=[str(include_path)],
+            metadata={
+                key: value
+                for key, value in {
+                    "selector": selector,
+                    "heading_delta": heading_delta if heading_delta else None,
+                    "include_frontmatter": attrs.get("include_frontmatter"),
+                }.items()
+                if value is not None
+            },
+        )
+    )
    return _resolve_include_text(
        body.strip(),
        root=root,
        current_dir=include_path.parent,
+        source_path=include_path,
        stack=stack + [include_path],
        included=included,
+        provenance=provenance,
        depth=depth + 1,
        max_depth=max_depth,
    )
--- a/src/markitect_tool/processor/init.py
+++ b/src/markitect_tool/processor/init.py
@@ -0,0 +1,27 @@
+"""Deterministic fenced-block processor registry."""
+
+from markitect_tool.processor.engine import (
+    FencedProcessorBlock,
+    ProcessorContext,
+    ProcessorOutputFile,
+    ProcessorRegistry,
+    ProcessorRequest,
+    ProcessorResult,
+    ProcessorRun,
+    default_processor_registry,
+    discover_fenced_processors,
+    run_fenced_processors,
+)
+
+__all__ = [
+    "FencedProcessorBlock",
+    "ProcessorContext",
+    "ProcessorOutputFile",
+    "ProcessorRegistry",
+    "ProcessorRequest",
+    "ProcessorResult",
+    "ProcessorRun",
+    "default_processor_registry",
+    "discover_fenced_processors",
+    "run_fenced_processors",
+]
--- a/src/markitect_tool/processor/engine.py
+++ b/src/markitect_tool/processor/engine.py
@@ -0,0 +1,374 @@
+"""Processor API for deterministic fenced-block workflows."""
+
+from __future__ import annotations
+
+import hashlib
+import re
+import shlex
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any, Callable
+
+from markdown_it import MarkdownIt
+
+from markitect_tool.diagnostics import Diagnostic, SourceLocation
+from markitect_tool.ops import OperationProvenance
+from markitect_tool.reference import (
+    ReferenceContext,
+    ReferenceResolutionError,
+    resolve_reference,
+)
+
+
+ProcessorCallable = Callable[["ProcessorRequest"], "ProcessorResult"]
+
+
+@dataclass(frozen=True)
+class FencedProcessorBlock:
+    """A fenced Markdown block that opted into processor handling."""
+
+    processor: str
+    content: str
+    unit_id: str
+    attrs: dict[str, str]
+    language: str | None = None
+    source_path: str | None = None
+    line_start: int | None = None
+    line_end: int | None = None
+    content_hash: str = ""
+
+    def to_dict(self) -> dict[str, Any]:
+        return {key: value for key, value in asdict(self).items() if value not in (None, {}, "")}
+
+
+@dataclass(frozen=True)
+class ProcessorContext:
+    """Execution context passed to deterministic processors."""
+
+    root: Path = Path(".")
+    current_path: Path | None = None
+    namespaces: dict[str, str] = field(default_factory=dict)
+    variables: dict[str, Any] = field(default_factory=dict)
+    policy: dict[str, Any] = field(default_factory=dict)
+
+    def reference_context(self) -> ReferenceContext:
+        return ReferenceContext(
+            root=self.root,
+            current_path=self.current_path,
+            namespaces=self.namespaces,
+        )
+
+    def to_dict(self) -> dict[str, Any]:
+        data = {
+            "root": str(self.root),
+            "current_path": str(self.current_path) if self.current_path else None,
+            "namespaces": self.namespaces,
+            "variables": self.variables,
+            "policy": self.policy,
+        }
+        return {key: value for key, value in data.items() if value not in (None, {}, "")}
+
+
+@dataclass(frozen=True)
+class ProcessorRequest:
+    """One processor invocation."""
+
+    block: FencedProcessorBlock
+    context: ProcessorContext
+
+
+@dataclass(frozen=True)
+class ProcessorOutputFile:
+    """A generated file requested by a processor."""
+
+    path: str
+    content: str
+
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+
+
+@dataclass(frozen=True)
+class ProcessorResult:
+    """Deterministic processor result envelope."""
+
+    content: str | None = None
+    files: list[ProcessorOutputFile] = field(default_factory=list)
+    diagnostics: list[Diagnostic] = field(default_factory=list)
+    dependencies: list[str] = field(default_factory=list)
+    provenance: list[OperationProvenance] = field(default_factory=list)
+
+    @property
+    def valid(self) -> bool:
+        return not any(diagnostic.severity == "error" for diagnostic in self.diagnostics)
+
+    def to_dict(self) -> dict[str, Any]:
+        data = {
+            "valid": self.valid,
+            "content": self.content,
+            "files": [file.to_dict() for file in self.files],
+            "diagnostics": [diagnostic.to_dict() for diagnostic in self.diagnostics],
+            "dependencies": self.dependencies,
+            "provenance": [event.to_dict() for event in self.provenance],
+        }
+        return {key: value for key, value in data.items() if value not in (None, [], {})}
+
+
+@dataclass(frozen=True)
+class ProcessorRun:
+    """Results from running all processor blocks in a document."""
+
+    source_path: str | None
+    blocks: list[FencedProcessorBlock]
+    results: list[ProcessorResult]
+
+    @property
+    def valid(self) -> bool:
+        return all(result.valid for result in self.results)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "valid": self.valid,
+            "source_path": self.source_path,
+            "count": len(self.results),
+            "blocks": [block.to_dict() for block in self.blocks],
+            "results": [result.to_dict() for result in self.results],
+        }
+
+
+class ProcessorRegistry:
+    """Explicit registry for deterministic fenced-block processors."""
+
+    def __init__(self) -> None:
+        self._processors: dict[str, ProcessorCallable] = {}
+
+    def register(self, name: str, processor: ProcessorCallable) -> None:
+        key = _slug(name)
+        if not key:
+            raise ValueError("Processor name cannot be empty")
+        self._processors[key] = processor
+
+    def names(self) -> list[str]:
+        return sorted(self._processors)
+
+    def run(self, request: ProcessorRequest) -> ProcessorResult:
+        processor = self._processors.get(_slug(request.block.processor))
+        if processor is None:
+            return ProcessorResult(
+                diagnostics=[
+                    Diagnostic(
+                        severity="error",
+                        code="processor.unknown",
+                        message=f"Unknown processor `{request.block.processor}`",
+                        source=SourceLocation(
+                            path=request.block.source_path,
+                            line=request.block.line_start,
+                        ),
+                    )
+                ]
+            )
+        return processor(request)
+
+
+def default_processor_registry() -> ProcessorRegistry:
+    """Create the default deterministic processor registry."""
+
+    registry = ProcessorRegistry()
+    registry.register("identity", _identity_processor)
+    registry.register("uppercase", _uppercase_processor)
+    registry.register("include", _include_processor)
+    return registry
+
+
+def discover_fenced_processors(
+    markdown: str,
+    *,
+    source_path: str | Path | None = None,
+) -> list[FencedProcessorBlock]:
+    """Discover fenced blocks that explicitly opt into processor handling."""
+
+    parser = MarkdownIt("commonmark", {"tables": True}).enable("table")
+    blocks: list[FencedProcessorBlock] = []
+    used_ids: dict[str, int] = {}
+    for index, token in enumerate(parser.parse(markdown)):
+        if token.type != "fence":
+            continue
+        attrs = _parse_fence_info(token.info)
+        processor = _processor_name(attrs)
+        if not processor:
+            continue
+        unit_id = _dedupe_id(_slug(attrs.get("id") or f"{processor}-{index}"), used_ids)
+        line_start = token.map[0] + 1 if token.map else None
+        line_end = token.map[1] if token.map else None
+        blocks.append(
+            FencedProcessorBlock(
+                processor=processor,
+                content=token.content,
+                unit_id=unit_id,
+                attrs={
+                    key: value
+                    for key, value in attrs.items()
+                    if key not in {"id", "language", "processor"}
+                },
+                language=attrs.get("language"),
+                source_path=str(source_path) if source_path else None,
+                line_start=line_start,
+                line_end=line_end,
+                content_hash=_hash_text(token.content),
+            )
+        )
+    return blocks
+
+
+def run_fenced_processors(
+    markdown: str,
+    *,
+    context: ProcessorContext,
+    registry: ProcessorRegistry | None = None,
+    source_path: str | Path | None = None,
+) -> ProcessorRun:
+    """Run all processor-marked fenced blocks in document order."""
+
+    active_registry = registry or default_processor_registry()
+    blocks = discover_fenced_processors(markdown, source_path=source_path or context.current_path)
+    results = [
+        active_registry.run(ProcessorRequest(block=block, context=context))
+        for block in blocks
+    ]
+    return ProcessorRun(
+        source_path=str(source_path or context.current_path) if source_path or context.current_path else None,
+        blocks=blocks,
+        results=results,
+    )
+
+
+def _identity_processor(request: ProcessorRequest) -> ProcessorResult:
+    return ProcessorResult(
+        content=request.block.content,
+        provenance=[
+            OperationProvenance(
+                operation="processor.identity",
+                source_path=request.block.source_path,
+                line_start=request.block.line_start,
+                line_end=request.block.line_end,
+                metadata={"unit_id": request.block.unit_id},
+            )
+        ],
+    )
+
+
+def _uppercase_processor(request: ProcessorRequest) -> ProcessorResult:
+    return ProcessorResult(
+        content=request.block.content.upper(),
+        provenance=[
+            OperationProvenance(
+                operation="processor.uppercase",
+                source_path=request.block.source_path,
+                line_start=request.block.line_start,
+                line_end=request.block.line_end,
+                metadata={"unit_id": request.block.unit_id},
+            )
+        ],
+    )
+
+
+def _include_processor(request: ProcessorRequest) -> ProcessorResult:
+    reference = request.block.attrs.get("ref")
+    if not reference:
+        return ProcessorResult(
+            diagnostics=[
+                Diagnostic(
+                    severity="error",
+                    code="processor.include.missing_ref",
+                    message="Include processor requires a `ref` attribute",
+                    source=SourceLocation(
+                        path=request.block.source_path,
+                        line=request.block.line_start,
+                    ),
+                )
+            ]
+        )
+    try:
+        resolution = resolve_reference(reference, context=request.context.reference_context())
+    except ReferenceResolutionError as exc:
+        return ProcessorResult(
+            diagnostics=[
+                Diagnostic(
+                    severity="error",
+                    code="processor.include.reference_error",
+                    message=str(exc),
+                    source=SourceLocation(
+                        path=request.block.source_path,
+                        line=request.block.line_start,
+                    ),
+                )
+            ]
+        )
+    content = "\n\n".join(unit.text for unit in resolution.units)
+    return ProcessorResult(
+        content=content,
+        dependencies=[resolution.target_path],
+        provenance=[
+            OperationProvenance(
+                operation="processor.include",
+                source_path=request.block.source_path,
+                line_start=request.block.line_start,
+                line_end=request.block.line_end,
+                target_path=resolution.target_path,
+                dependencies=[resolution.target_path],
+                metadata={"ref": reference, "unit_ids": [unit.unit_id for unit in resolution.units]},
+            )
+        ],
+    )
+
+
+def _processor_name(attrs: dict[str, str]) -> str | None:
+    if "processor" in attrs:
+        return attrs["processor"]
+    language = attrs.get("language", "")
+    if language.startswith("mkt-"):
+        return language.removeprefix("mkt-")
+    if language == "mkt" and "type" in attrs:
+        return attrs["type"]
+    return None
+
+
+def _parse_fence_info(info: str) -> dict[str, str]:
+    match = re.match(r"^(?P<language>[^\s{]+)?(?:\s+\{(?P<attrs>.*)\})?\s*$", info.strip())
+    if not match:
+        return {"language": info.strip()} if info.strip() else {}
+    attrs = _parse_attrs(match.group("attrs") or "")
+    language = match.group("language")
+    if language:
+        attrs["language"] = language
+    return attrs
+
+
+def _parse_attrs(raw: str) -> dict[str, str]:
+    attrs: dict[str, str] = {}
+    for part in shlex.split(raw):
+        if part.startswith("#") and len(part) > 1:
+            attrs["id"] = part[1:]
+            continue
+        if "=" not in part:
+            attrs[part] = "true"
+            continue
+        key, value = part.split("=", 1)
+        attrs[key.strip()] = value.strip()
+    return attrs
+
+
+def _dedupe_id(unit_id: str, used_ids: dict[str, int]) -> str:
+    count = used_ids.get(unit_id, 0) + 1
+    used_ids[unit_id] = count
+    return unit_id if count == 1 else f"{unit_id}-{count}"
+
+
+def _slug(value: str) -> str:
+    slug = re.sub(r"[^a-z0-9_.:-]+", "-", value.strip().lower())
+    slug = re.sub(r"-+", "-", slug).strip("-")
+    return slug
+
+
+def _hash_text(text: str) -> str:
+    return "sha256:" + hashlib.sha256(text.encode("utf-8")).hexdigest()
--- a/src/markitect_tool/reference/init.py
+++ b/src/markitect_tool/reference/init.py
@@ -0,0 +1,25 @@
+"""Namespaced content reference resolution for Markdown artifacts."""
+
+from markitect_tool.reference.engine import (
+    ContentUnit,
+    ReferenceAddress,
+    ReferenceContext,
+    ReferenceResolution,
+    ReferenceResolutionError,
+    SourceSpan,
+    load_namespaces,
+    parse_reference,
+    resolve_reference,
+)
+
+__all__ = [
+    "ContentUnit",
+    "ReferenceAddress",
+    "ReferenceContext",
+    "ReferenceResolution",
+    "ReferenceResolutionError",
+    "SourceSpan",
+    "load_namespaces",
+    "parse_reference",
+    "resolve_reference",
+]
--- a/src/markitect_tool/reference/engine.py
+++ b/src/markitect_tool/reference/engine.py
@@ -0,0 +1,626 @@
+"""Reference parsing and resolution for Markdown content units."""
+
+from __future__ import annotations
+
+import hashlib
+import re
+import shlex
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any
+
+from markdown_it import MarkdownIt
+
+from markitect_tool.core import ContentBlock, Document, Heading, Section, parse_markdown
+from markitect_tool.query import InvalidQueryError, QueryMatch, query_document
+
+
+class ReferenceResolutionError(ValueError):
+    """Raised when a content reference cannot be resolved."""
+
+
+@dataclass(frozen=True)
+class ReferenceAddress:
+    """Parsed content reference address.
+
+    Syntax is intentionally compact and Markdown-friendly:
+
+    - ``path/to/file.md``
+    - ``std:clauses/payment.md``
+    - ``std:clauses/payment.md#section:terms``
+    - ``std:clauses/payment.md::sections[heading=Terms]``
+    - ``#intro`` for a fragment in the current document
+    """
+
+    raw: str
+    namespace: str | None = None
+    address: str = ""
+    fragment: str | None = None
+    selector: str | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            key: value
+            for key, value in asdict(self).items()
+            if value is not None and value != ""
+        }
+
+
+@dataclass(frozen=True)
+class ReferenceContext:
+    """Inputs used to resolve namespaced and relative content references."""
+
+    root: Path = Path(".")
+    current_path: Path | None = None
+    namespaces: dict[str, str] = field(default_factory=dict)
+
+    @classmethod
+    def from_document(
+        cls,
+        document: Document,
+        *,
+        root: str | Path = ".",
+        current_path: str | Path | None = None,
+    ) -> "ReferenceContext":
+        """Build a reference context from document frontmatter."""
+
+        source_path = current_path or document.source_path
+        return cls(
+            root=Path(root),
+            current_path=Path(source_path) if source_path else None,
+            namespaces=load_namespaces(document.frontmatter),
+        )
+
+    def to_dict(self) -> dict[str, Any]:
+        data = {
+            "root": str(self.root),
+            "current_path": str(self.current_path) if self.current_path else None,
+            "namespaces": self.namespaces,
+        }
+        return {key: value for key, value in data.items() if value is not None}
+
+
+@dataclass(frozen=True)
+class SourceSpan:
+    """Line span for a resolved unit in its source file."""
+
+    line_start: int | None = None
+    line_end: int | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        return {key: value for key, value in asdict(self).items() if value is not None}
+
+
+@dataclass(frozen=True)
+class ContentUnit:
+    """One addressable content unit resolved from Markdown."""
+
+    kind: str
+    unit_id: str
+    text: str
+    source_path: str
+    span: SourceSpan | None = None
+    name: str | None = None
+    content_hash: str = ""
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+    def to_dict(self) -> dict[str, Any]:
+        data = {
+            "kind": self.kind,
+            "unit_id": self.unit_id,
+            "name": self.name,
+            "source_path": self.source_path,
+            "span": self.span.to_dict() if self.span else None,
+            "content_hash": self.content_hash,
+            "metadata": self.metadata or None,
+            "text": self.text,
+        }
+        return {key: value for key, value in data.items() if value is not None}
+
+
+@dataclass(frozen=True)
+class ReferenceResolution:
+    """Resolved content reference and its dependency edge."""
+
+    reference: ReferenceAddress
+    source_path: str
+    target_path: str
+    units: list[ContentUnit]
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "reference": self.reference.to_dict(),
+            "source_path": self.source_path,
+            "target_path": self.target_path,
+            "count": len(self.units),
+            "units": [unit.to_dict() for unit in self.units],
+        }
+
+
+_NAMESPACE_RE = re.compile(r"^(?P<namespace>[A-Za-z][A-Za-z0-9_.-]*):(?P<address>.*)$")
+_HEADING_ID_RE = re.compile(r"^(?P<title>.*?)(?:\s+\{#(?P<id>[A-Za-z0-9_.:-]+)\})?$")
+_REGION_OPEN_RE = re.compile(r"<!--\s*mkt:region\s+(?P<attrs>.*?)\s*-->")
+_REGION_CLOSE_RE = re.compile(r"<!--\s*/mkt:region\s*-->")
+_FENCE_ATTRS_RE = re.compile(r"^(?P<language>[^\s{]+)?(?:\s+\{(?P<attrs>.*)\})?\s*$")
+
+
+def parse_reference(reference: str) -> ReferenceAddress:
+    """Parse a compact Markitect content reference."""
+
+    raw = reference.strip()
+    if not raw:
+        raise ReferenceResolutionError("Reference cannot be empty")
+
+    selector: str | None = None
+    base = raw
+    if "::" in base:
+        base, selector = base.split("::", 1)
+        selector = selector.strip()
+        if not selector:
+            raise ReferenceResolutionError(f"Reference selector is empty in `{reference}`")
+
+    fragment: str | None = None
+    if "#" in base:
+        base, fragment = base.split("#", 1)
+        fragment = fragment.strip()
+        if not fragment:
+            raise ReferenceResolutionError(f"Reference fragment is empty in `{reference}`")
+
+    namespace: str | None = None
+    address = base.strip()
+    match = _NAMESPACE_RE.match(address)
+    if match and "/" not in match.group("namespace") and "\\" not in match.group("namespace"):
+        namespace = match.group("namespace")
+        address = match.group("address").strip()
+
+    return ReferenceAddress(
+        raw=raw,
+        namespace=namespace,
+        address=address,
+        fragment=fragment,
+        selector=selector,
+    )
+
+
+def load_namespaces(frontmatter: dict[str, Any]) -> dict[str, str]:
+    """Load namespace mappings from Markdown frontmatter."""
+
+    raw_namespaces = frontmatter.get("namespaces", {})
+    if raw_namespaces is None:
+        return {}
+    if not isinstance(raw_namespaces, dict):
+        raise ReferenceResolutionError("Frontmatter `namespaces` must be a mapping")
+
+    namespaces: dict[str, str] = {}
+    for raw_key, raw_value in raw_namespaces.items():
+        key = str(raw_key).strip().rstrip(":")
+        if not key:
+            raise ReferenceResolutionError("Namespace keys cannot be empty")
+        if not _NAMESPACE_RE.match(f"{key}:"):
+            raise ReferenceResolutionError(f"Invalid namespace key `{raw_key}`")
+        if not isinstance(raw_value, str):
+            raise ReferenceResolutionError(f"Namespace `{key}` must map to a string path")
+        value = raw_value.strip()
+        if not value:
+            raise ReferenceResolutionError(f"Namespace `{key}` cannot map to an empty path")
+        namespaces[key] = value
+    return namespaces
+
+
+def resolve_reference(
+    reference: str | ReferenceAddress,
+    *,
+    context: ReferenceContext,
+) -> ReferenceResolution:
+    """Resolve a content reference to one or more content units."""
+
+    address = parse_reference(reference) if isinstance(reference, str) else reference
+    root = context.root.resolve()
+    source_path = context.current_path.resolve() if context.current_path else root
+    target_path = _resolve_target_path(address, context, root, source_path)
+    if not target_path.exists() or not target_path.is_file():
+        raise ReferenceResolutionError(f"Referenced file not found: {target_path}")
+
+    markdown = target_path.read_text(encoding="utf-8")
+    document = parse_markdown(markdown, source_path=str(target_path))
+
+    if address.selector and address.fragment:
+        raise ReferenceResolutionError("Reference cannot use both fragment and selector")
+    if address.selector:
+        units = _units_from_selector(document, address.selector, target_path)
+    elif address.fragment:
+        units = _units_from_fragment(document, address.fragment, target_path, markdown)
+    else:
+        units = [_document_unit(document, target_path, markdown)]
+
+    if not units:
+        raise ReferenceResolutionError(f"Reference `{address.raw}` did not match any content units")
+
+    return ReferenceResolution(
+        reference=address,
+        source_path=str(source_path),
+        target_path=str(target_path),
+        units=units,
+    )
+
+
+def _resolve_target_path(
+    address: ReferenceAddress,
+    context: ReferenceContext,
+    root: Path,
+    source_path: Path,
+) -> Path:
+    if address.namespace:
+        if address.namespace not in context.namespaces:
+            raise ReferenceResolutionError(f"Unknown namespace `{address.namespace}`")
+        namespace_target = _path_from_namespace(context.namespaces[address.namespace], root)
+        candidate = namespace_target / address.address if namespace_target.is_dir() else namespace_target
+    elif address.address:
+        base_dir = source_path.parent if source_path.is_file() else root
+        candidate = Path(address.address)
+        candidate = candidate if candidate.is_absolute() else base_dir / candidate
+    elif context.current_path:
+        candidate = context.current_path
+    else:
+        raise ReferenceResolutionError("Pathless references require a current document")
+
+    resolved = candidate.resolve()
+    try:
+        resolved.relative_to(root)
+    except ValueError as exc:
+        raise ReferenceResolutionError(f"Reference escapes root: {address.raw}") from exc
+    return resolved
+
+
+def _path_from_namespace(raw_path: str, root: Path) -> Path:
+    path = Path(raw_path)
+    if not path.is_absolute():
+        path = root / path
+    return path.resolve()
+
+
+def _units_from_selector(
+    document: Document,
+    selector: str,
+    target_path: Path,
+) -> list[ContentUnit]:
+    try:
+        matches = query_document(document, selector)
+    except InvalidQueryError as exc:
+        raise ReferenceResolutionError(str(exc)) from exc
+    return [_unit_from_query_match(match, target_path) for match in matches]
+
+
+def _units_from_fragment(
+    document: Document,
+    fragment: str,
+    target_path: Path,
+    markdown: str,
+) -> list[ContentUnit]:
+    kind, _, value = fragment.partition(":")
+    if not value:
+        kind, value = "id", kind
+    lookup = _slug(value)
+
+    if kind == "document":
+        return [_document_unit(document, target_path, markdown)]
+    if kind == "id":
+        for units in [
+            _section_units(document, target_path),
+            _region_units(markdown, target_path),
+            _fenced_block_units(markdown, target_path),
+            _heading_units(document, target_path),
+        ]:
+            matches = [
+                unit for unit in units if unit.unit_id == lookup or _slug(unit.name or "") == lookup
+            ]
+            if matches:
+                return matches
+        return []
+    if kind in {"id", "section"}:
+        sections = _section_units(document, target_path)
+        return [unit for unit in sections if unit.unit_id == lookup or _slug(unit.name or "") == lookup]
+    if kind == "heading":
+        headings = _heading_units(document, target_path)
+        return [unit for unit in headings if unit.unit_id == lookup or _slug(unit.name or "") == lookup]
+    if kind == "block":
+        return _block_fragment_units(document, target_path, value)
+    if kind == "region":
+        return [unit for unit in _region_units(markdown, target_path) if unit.unit_id == lookup]
+    if kind == "fence":
+        return [unit for unit in _fenced_block_units(markdown, target_path) if unit.unit_id == lookup]
+    if kind == "tag":
+        return [
+            unit
+            for unit in _region_units(markdown, target_path) + _fenced_block_units(markdown, target_path)
+            if lookup in {_slug(tag) for tag in unit.metadata.get("tags", [])}
+        ]
+    if kind == "line":
+        return _line_range_units(markdown, target_path, value)
+    raise ReferenceResolutionError(f"Unsupported reference fragment kind `{kind}`")
+
+
+def _document_unit(document: Document, target_path: Path, markdown: str) -> ContentUnit:
+    unit_id = _slug(str(document.frontmatter.get("id") or target_path.stem))
+    return _content_unit(
+        kind="document",
+        unit_id=unit_id,
+        text=markdown,
+        source_path=target_path,
+        span=SourceSpan(1, len(markdown.splitlines())),
+        name=str(document.frontmatter.get("title") or target_path.stem),
+        metadata={"frontmatter": document.frontmatter},
+    )
+
+
+def _unit_from_query_match(match: QueryMatch, target_path: Path) -> ContentUnit:
+    unit_id = _slug(match.path.replace("$.", "").replace("[", "-").replace("]", ""))
+    name = match.text.splitlines()[0].lstrip("# ").strip() if match.text else match.kind
+    return _content_unit(
+        kind=match.kind,
+        unit_id=unit_id,
+        text=match.text if match.text is not None else str(match.value),
+        source_path=target_path,
+        span=SourceSpan(match.line, None),
+        name=name,
+        metadata={"query_path": match.path, "value": match.value},
+    )
+
+
+def _section_units(document: Document, target_path: Path) -> list[ContentUnit]:
+    used_ids: dict[str, int] = {}
+    return [
+        _section_unit(section, target_path, used_ids)
+        for section in document.sections
+    ]
+
+
+def _section_unit(
+    section: Section,
+    target_path: Path,
+    used_ids: dict[str, int],
+) -> ContentUnit:
+    title, explicit_id = _heading_title_and_id(section.heading)
+    unit_id = _dedupe_id(_slug(explicit_id or title), used_ids)
+    line_end = section.blocks[-1].line_end if section.blocks else section.heading.line
+    lines = [f"{'#' * section.heading.level} {section.heading.text}"]
+    for block in section.blocks:
+        if block.text:
+            lines.extend(["", block.text])
+    return _content_unit(
+        kind="section",
+        unit_id=unit_id,
+        text="\n".join(lines).strip(),
+        source_path=target_path,
+        span=SourceSpan(section.heading.line, line_end),
+        name=title,
+        metadata={"heading_level": section.heading.level},
+    )
+
+
+def _heading_units(document: Document, target_path: Path) -> list[ContentUnit]:
+    used_ids: dict[str, int] = {}
+    units: list[ContentUnit] = []
+    for heading in document.headings:
+        title, explicit_id = _heading_title_and_id(heading)
+        unit_id = _dedupe_id(_slug(explicit_id or title), used_ids)
+        units.append(
+            _content_unit(
+                kind="heading",
+                unit_id=unit_id,
+                text=f"{'#' * heading.level} {heading.text}",
+                source_path=target_path,
+                span=SourceSpan(heading.line, heading.line),
+                name=title,
+                metadata={"heading_level": heading.level},
+            )
+        )
+    return units
+
+
+def _block_fragment_units(
+    document: Document,
+    target_path: Path,
+    value: str,
+) -> list[ContentUnit]:
+    blocks = _block_units(document.blocks, target_path)
+    if value.isdigit():
+        index = int(value)
+        return [blocks[index]] if 0 <= index < len(blocks) else []
+    lookup = _slug(value)
+    return [unit for unit in blocks if unit.unit_id == lookup]
+
+
+def _block_units(blocks: list[ContentBlock], target_path: Path) -> list[ContentUnit]:
+    used_ids: dict[str, int] = {}
+    units: list[ContentUnit] = []
+    for index, block in enumerate(blocks):
+        base_id = f"{block.type}-{block.line_start or index}"
+        units.append(
+            _content_unit(
+                kind=block.type,
+                unit_id=_dedupe_id(_slug(base_id), used_ids),
+                text=block.text,
+                source_path=target_path,
+                span=SourceSpan(block.line_start, block.line_end),
+                name=block.type,
+                metadata={"block_index": index},
+            )
+        )
+    return units
+
+
+def _region_units(markdown: str, target_path: Path) -> list[ContentUnit]:
+    lines = markdown.splitlines()
+    units: list[ContentUnit] = []
+    open_region: tuple[int, str, list[str]] | None = None
+
+    for index, line in enumerate(lines, start=1):
+        open_match = _REGION_OPEN_RE.search(line)
+        close_match = _REGION_CLOSE_RE.search(line)
+        if open_match and open_region is not None:
+            raise ReferenceResolutionError("Nested mkt:region blocks are not supported")
+        if close_match:
+            if open_region is None:
+                raise ReferenceResolutionError("Region close marker has no matching open marker")
+            start_line, region_id, tags = open_region
+            content_lines = lines[start_line:index - 1]
+            units.append(
+                _content_unit(
+                    kind="region",
+                    unit_id=_slug(region_id),
+                    text="\n".join(content_lines).strip(),
+                    source_path=target_path,
+                    span=SourceSpan(start_line, index),
+                    name=region_id,
+                    metadata={"tags": tags},
+                )
+            )
+            open_region = None
+            continue
+        if open_match:
+            attrs = _parse_attrs(open_match.group("attrs"))
+            region_id = attrs.get("id")
+            if not region_id:
+                raise ReferenceResolutionError("Region marker requires an id attribute")
+            open_region = (index, region_id, _tags_from_attrs(attrs))
+
+    if open_region is not None:
+        raise ReferenceResolutionError("Region open marker has no matching close marker")
+    return units
+
+
+def _fenced_block_units(markdown: str, target_path: Path) -> list[ContentUnit]:
+    parser = MarkdownIt("commonmark", {"tables": True}).enable("table")
+    units: list[ContentUnit] = []
+    used_ids: dict[str, int] = {}
+    for index, token in enumerate(parser.parse(markdown)):
+        if token.type != "fence":
+            continue
+        attrs = _parse_fence_info(token.info)
+        unit_id = attrs.get("id")
+        if not unit_id:
+            continue
+        line_start = token.map[0] + 1 if token.map else None
+        line_end = token.map[1] if token.map else None
+        units.append(
+            _content_unit(
+                kind="fenced_block",
+                unit_id=_dedupe_id(_slug(unit_id), used_ids),
+                text=token.content,
+                source_path=target_path,
+                span=SourceSpan(line_start, line_end),
+                name=unit_id,
+                metadata={
+                    "language": attrs.get("language"),
+                    "tags": _tags_from_attrs(attrs),
+                    "attrs": {
+                        key: value
+                        for key, value in attrs.items()
+                        if key not in {"id", "language", "tag", "tags"}
+                    },
+                    "block_index": index,
+                },
+            )
+        )
+    return units
+
+
+def _line_range_units(markdown: str, target_path: Path, value: str) -> list[ContentUnit]:
+    match = re.match(r"^(?P<start>\d+)(?:-(?P<end>\d+))?$", value)
+    if not match:
+        raise ReferenceResolutionError("Line fragments must use `line:start` or `line:start-end`")
+    start = int(match.group("start"))
+    end = int(match.group("end") or start)
+    lines = markdown.splitlines()
+    if start < 1 or end < start or end > len(lines):
+        return []
+    text = "\n".join(lines[start - 1:end])
+    return [
+        _content_unit(
+            kind="line_range",
+            unit_id=f"line-{start}-{end}",
+            text=text,
+            source_path=target_path,
+            span=SourceSpan(start, end),
+            name=f"lines {start}-{end}",
+            metadata={},
+        )
+    ]
+
+
+def _parse_fence_info(info: str) -> dict[str, str]:
+    match = _FENCE_ATTRS_RE.match(info.strip())
+    if not match:
+        return {"language": info.strip()} if info.strip() else {}
+    attrs = _parse_attrs(match.group("attrs") or "")
+    language = match.group("language")
+    if language:
+        attrs["language"] = language
+    if "id" not in attrs and attrs:
+        for key in list(attrs):
+            if key.startswith("#"):
+                attrs["id"] = key[1:]
+                del attrs[key]
+                break
+    return attrs
+
+
+def _parse_attrs(raw: str) -> dict[str, str]:
+    attrs: dict[str, str] = {}
+    for part in shlex.split(raw):
+        if part.startswith("#") and len(part) > 1:
+            attrs["id"] = part[1:]
+            continue
+        if "=" not in part:
+            attrs[part] = "true"
+            continue
+        key, value = part.split("=", 1)
+        attrs[key.strip()] = value.strip()
+    return attrs
+
+
+def _tags_from_attrs(attrs: dict[str, str]) -> list[str]:
+    raw = attrs.get("tags") or attrs.get("tag") or ""
+    return [tag.strip() for tag in re.split(r"[, ]+", raw) if tag.strip()]
+
+
+def _content_unit(
+    *,
+    kind: str,
+    unit_id: str,
+    text: str,
+    source_path: Path,
+    span: SourceSpan | None,
+    name: str | None,
+    metadata: dict[str, Any] | None = None,
+) -> ContentUnit:
+    return ContentUnit(
+        kind=kind,
+        unit_id=unit_id,
+        text=text,
+        source_path=str(source_path),
+        span=span,
+        name=name,
+        content_hash="sha256:" + hashlib.sha256(text.encode("utf-8")).hexdigest(),
+        metadata=metadata or {},
+    )
+
+
+def _heading_title_and_id(heading: Heading) -> tuple[str, str | None]:
+    match = _HEADING_ID_RE.match(heading.text.strip())
+    if not match:
+        return heading.text.strip(), None
+    return match.group("title").strip(), match.group("id")
+
+
+def _dedupe_id(unit_id: str, used_ids: dict[str, int]) -> str:
+    count = used_ids.get(unit_id, 0) + 1
+    used_ids[unit_id] = count
+    return unit_id if count == 1 else f"{unit_id}-{count}"
+
+
+def _slug(value: str) -> str:
+    slug = re.sub(r"[^a-z0-9_.:-]+", "-", value.strip().lower())
+    slug = re.sub(r"-+", "-", slug).strip("-")
+    return slug or "unit"