extension for ref resolve, explode, implode, weave, tangle

2026-05-04 02:25:49 +02:00
parent 8203f50fd5
commit 65bfc1aebf
39 changed files with 3959 additions and 25 deletions
--- a/tests/test_content_class_resolution.py
+++ b/tests/test_content_class_resolution.py
@@ -0,0 +1,106 @@
+from pathlib import Path
+
+from click.testing import CliRunner
+
+from markitect_tool.cli import main
+from markitect_tool.content_class import load_content_classes
+
+
+def test_c3_linearization_for_diamond_inheritance():
+    registry = load_content_classes(
+        {
+            "classes": {
+                "base": {"slots": {"sections": ["Overview"]}},
+                "left": {"extends": ["base"], "slots": {"sections": ["Left"]}},
+                "right": {"extends": ["base"], "slots": {"sections": ["Right"]}},
+                "leaf": {"extends": ["left", "right"], "slots": {"title": "Leaf"}},
+            }
+        }
+    )
+
+    assert registry.linearize("leaf") == ["leaf", "left", "right", "base"]
+
+
+def test_compose_merges_slots_with_explicit_policies():
+    registry = load_content_classes(
+        {
+            "classes": {
+                "base": {
+                    "slots": {
+                        "sections": ["Overview"],
+                        "assertions": {"tone": "plain", "depth": "short"},
+                    }
+                },
+                "market": {
+                    "extends": ["base"],
+                    "slots": {
+                        "sections": ["Pricing"],
+                        "assertions": {"depth": "detailed"},
+                    },
+                    "merge_policies": {
+                        "sections": "append",
+                        "assertions": "deep_merge",
+                    },
+                },
+                "instance": {
+                    "extends": ["market"],
+                    "slots": {"sections": ["Risks"]},
+                    "merge_policies": {"sections": "append"},
+                },
+            }
+        }
+    )
+
+    result = registry.compose("instance")
+
+    assert result.valid
+    assert result.slots["sections"] == ["Overview", "Pricing", "Risks"]
+    assert result.slots["assertions"] == {"tone": "plain", "depth": "detailed"}
+
+
+def test_compose_reports_error_on_conflict():
+    registry = load_content_classes(
+        {
+            "classes": {
+                "base": {"slots": {"owner": "A"}},
+                "instance": {
+                    "extends": ["base"],
+                    "slots": {"owner": "B"},
+                    "merge_policies": {"owner": "error_on_conflict"},
+                },
+            }
+        }
+    )
+
+    result = registry.compose("instance")
+
+    assert not result.valid
+    assert result.diagnostics[0].code == "content_class.merge_conflict"
+
+
+def test_mkt_class_resolve_outputs_text(tmp_path: Path):
+    class_file = tmp_path / "classes.yaml"
+    class_file.write_text(
+        """classes:
+  base:
+    slots:
+      sections:
+        - Overview
+  instance:
+    extends:
+      - base
+    slots:
+      sections:
+        - Risks
+    merge_policies:
+      sections: append
+""",
+        encoding="utf-8",
+    )
+
+    result = CliRunner().invoke(main, ["class", "resolve", str(class_file), "instance"])
+
+    assert result.exit_code == 0
+    assert "linearization: instance -> base" in result.output
+    assert "Overview" in result.output
+    assert "Risks" in result.output
--- a/tests/test_explode_implode.py
+++ b/tests/test_explode_implode.py
@@ -0,0 +1,93 @@
+from pathlib import Path
+
+import pytest
+from click.testing import CliRunner
+
+from markitect_tool.cli import main
+from markitect_tool.explode import (
+    EXPLODE_MANIFEST_NAME,
+    ExplodeError,
+    explode_markdown_file,
+    implode_markdown_directory,
+)
+
+
+ROUNDTRIP_DOC = """---
+title: Explode Example
+---
+
+Opening text before the first heading.
+
+# Intro
+
+Intro body.
+
+## Detail
+
+Detail body.
+
+# Later
+
+Later body.
+"""
+
+
+def test_flat_explode_implode_roundtrips_exact_markdown(tmp_path: Path):
+    source = tmp_path / "source.md"
+    output_dir = tmp_path / "exploded"
+    source.write_text(ROUNDTRIP_DOC, encoding="utf-8")
+
+    result = explode_markdown_file(source, output_dir, variant="flat")
+    imploded = implode_markdown_directory(output_dir)
+
+    assert Path(result.manifest_path).name == EXPLODE_MANIFEST_NAME
+    assert (output_dir / "00-preamble.md").exists()
+    assert (output_dir / "sections" / "01-intro.md").exists()
+    assert imploded.markdown == ROUNDTRIP_DOC
+    assert imploded.current_hash == result.manifest.source_hash
+
+
+def test_hierarchical_explode_places_child_sections_under_parent(tmp_path: Path):
+    source = tmp_path / "source.md"
+    output_dir = tmp_path / "exploded"
+    source.write_text(ROUNDTRIP_DOC, encoding="utf-8")
+
+    result = explode_markdown_file(source, output_dir, variant="hierarchical")
+
+    files = {Path(path).relative_to(output_dir).as_posix() for path in result.written_files}
+    assert "01-intro.md" in files
+    assert "01-intro/02-detail.md" in files
+    assert implode_markdown_directory(output_dir).markdown == ROUNDTRIP_DOC
+
+
+def test_explode_rejects_non_empty_output_without_force(tmp_path: Path):
+    source = tmp_path / "source.md"
+    output_dir = tmp_path / "exploded"
+    output_dir.mkdir()
+    (output_dir / "existing.md").write_text("Existing", encoding="utf-8")
+    source.write_text(ROUNDTRIP_DOC, encoding="utf-8")
+
+    with pytest.raises(ExplodeError, match="not empty"):
+        explode_markdown_file(source, output_dir)
+
+
+def test_mkt_explode_and_implode(tmp_path: Path):
+    source = tmp_path / "source.md"
+    output_dir = tmp_path / "exploded"
+    rebuilt = tmp_path / "rebuilt.md"
+    source.write_text(ROUNDTRIP_DOC, encoding="utf-8")
+    runner = CliRunner()
+
+    explode_result = runner.invoke(
+        main,
+        ["explode", str(source), "--output-dir", str(output_dir), "--variant", "flat"],
+    )
+    implode_result = runner.invoke(
+        main,
+        ["implode", str(output_dir), "--output", str(rebuilt)],
+    )
+
+    assert explode_result.exit_code == 0
+    assert "entries: 4" in explode_result.output
+    assert implode_result.exit_code == 0
+    assert rebuilt.read_text(encoding="utf-8") == ROUNDTRIP_DOC
--- a/tests/test_literate_weave_tangle.py
+++ b/tests/test_literate_weave_tangle.py
@@ -0,0 +1,91 @@
+from pathlib import Path
+
+from click.testing import CliRunner
+
+from markitect_tool.cli import main
+from markitect_tool.literate import (
+    discover_code_chunks,
+    tangle_markdown,
+    weave_markdown,
+    write_tangle_files,
+)
+
+
+LITERATE_DOC = """# Literate Example
+
+```python {#helpers}
+def helper():
+    return "ready"
+```
+
+```python {#main tangle="src/app.py"}
+<<helpers>>
+
+def main():
+    return helper()
+```
+"""
+
+
+def test_discover_code_chunks_with_references_and_targets():
+    chunks = discover_code_chunks(LITERATE_DOC, source_path="example.md")
+
+    assert [chunk.chunk_id for chunk in chunks] == ["helpers", "main"]
+    assert chunks[1].target_path == "src/app.py"
+    assert chunks[1].references == ["helpers"]
+
+
+def test_tangle_expands_named_chunk_references():
+    result = tangle_markdown(LITERATE_DOC, source_path="example.md")
+
+    assert result.valid
+    assert len(result.files) == 1
+    assert result.files[0].path == "src/app.py"
+    assert "def helper" in result.files[0].content
+    assert "<<helpers>>" not in result.files[0].content
+    assert result.provenance[0].operation == "literate.tangle"
+
+
+def test_tangle_reports_missing_chunk_reference():
+    markdown = """```python {#main tangle="src/app.py"}
+<<missing>>
+```
+"""
+
+    result = tangle_markdown(markdown, source_path="example.md")
+
+    assert not result.valid
+    assert result.diagnostics[0].code == "literate.missing_chunk"
+
+
+def test_weave_appends_chunk_index():
+    result = weave_markdown(LITERATE_DOC, source_path="example.md")
+
+    assert "## Code Chunk Index" in result.markdown
+    assert "`main` -> `src/app.py`; refs: `helpers`" in result.markdown
+
+
+def test_write_tangle_files(tmp_path: Path):
+    result = tangle_markdown(LITERATE_DOC, source_path="example.md")
+
+    written = write_tangle_files(result, tmp_path)
+
+    assert written == [str(tmp_path / "src" / "app.py")]
+    assert "def main" in (tmp_path / "src" / "app.py").read_text(encoding="utf-8")
+
+
+def test_mkt_tangle_and_weave(tmp_path: Path):
+    source = tmp_path / "literate.md"
+    output_dir = tmp_path / "out"
+    woven = tmp_path / "woven.md"
+    source.write_text(LITERATE_DOC, encoding="utf-8")
+    runner = CliRunner()
+
+    tangle_result = runner.invoke(main, ["tangle", str(source), "--output-dir", str(output_dir)])
+    weave_result = runner.invoke(main, ["weave", str(source), "--output", str(woven)])
+
+    assert tangle_result.exit_code == 0
+    assert "files: 1" in tangle_result.output
+    assert (output_dir / "src" / "app.py").exists()
+    assert weave_result.exit_code == 0
+    assert "## Code Chunk Index" in woven.read_text(encoding="utf-8")
--- a/tests/test_ops_transform_compose_include.py
+++ b/tests/test_ops_transform_compose_include.py
@@ -34,6 +34,27 @@ title: Original
    assert "## Intro" in result.markdown
    assert "### Detail" in result.markdown
    assert result.operations == ["set_frontmatter", "shift_headings:1"]
+    assert [event.operation for event in result.provenance] == [
+        "set_frontmatter",
+        "shift_headings",
+    ]
+
+
+def test_transform_shifts_headings_without_touching_fenced_code():
+    markdown = """# Intro
+
+```markdown
+# Literal Heading
+```
+
+## Real Heading
+"""
+
+    result = transform_markdown(markdown, heading_delta=1)
+
+    assert "```markdown\n# Literal Heading\n```" in result.markdown
+    assert "### Real Heading" in result.markdown
+    assert result.provenance[0].metadata["affected_lines"] == [1, 7]


 def test_transform_extracts_selector_text():
@@ -104,6 +125,25 @@ def test_resolve_includes_supports_brace_shorthand(tmp_path: Path):
    assert "Before" in result.markdown
    assert "Included body." in result.markdown
    assert "After" in result.markdown
+    assert result.provenance[0].operation == "include"
+    assert result.provenance[0].target_path == str(partial.resolve())
+
+
+def test_resolve_includes_ignores_markers_inside_fenced_code(tmp_path: Path):
+    partial = tmp_path / "partial.md"
+    partial.write_text("Included body.", encoding="utf-8")
+    markdown = """```markdown
+{{include:partial.md}}
+```
+
+{{include:partial.md}}
+"""
+
+    result = resolve_includes(markdown, base_dir=tmp_path)
+
+    assert result.markdown.count("Included body.") == 1
+    assert "{{include:partial.md}}" in result.markdown
+    assert result.included_paths == [str(partial.resolve())]


 def test_resolve_includes_rejects_cycles(tmp_path: Path):
--- a/tests/test_processor_registry.py
+++ b/tests/test_processor_registry.py
@@ -0,0 +1,105 @@
+from pathlib import Path
+
+from click.testing import CliRunner
+
+from markitect_tool.cli import main
+from markitect_tool.core import parse_markdown
+from markitect_tool.processor import (
+    ProcessorContext,
+    default_processor_registry,
+    discover_fenced_processors,
+    run_fenced_processors,
+)
+from markitect_tool.reference import load_namespaces
+
+
+def test_discover_fenced_processors_from_language_prefix():
+    markdown = """# Doc
+
+```mkt-uppercase {#shout}
+hello
+```
+"""
+
+    blocks = discover_fenced_processors(markdown, source_path="doc.md")
+
+    assert len(blocks) == 1
+    assert blocks[0].processor == "uppercase"
+    assert blocks[0].unit_id == "shout"
+    assert blocks[0].line_start == 3
+
+
+def test_default_registry_runs_uppercase_processor():
+    markdown = """```mkt-uppercase {#shout}
+hello
+```
+"""
+    context = ProcessorContext()
+
+    run = run_fenced_processors(markdown, context=context)
+
+    assert run.valid
+    assert run.results[0].content == "HELLO\n"
+    assert run.results[0].provenance[0].operation == "processor.uppercase"
+
+
+def test_include_processor_uses_reference_resolver(tmp_path: Path):
+    source = tmp_path / "doc.md"
+    partial = tmp_path / "partial.md"
+    source.write_text(
+        """---
+namespaces:
+  local: .
+---
+
+```mkt-include {#intro ref="local:partial.md#summary"}
+```
+""",
+        encoding="utf-8",
+    )
+    partial.write_text("# Partial\n\n## Summary\n\nIncluded summary.\n", encoding="utf-8")
+    document = parse_markdown(source.read_text(encoding="utf-8"), source_path=str(source))
+    context = ProcessorContext(
+        root=tmp_path,
+        current_path=source,
+        namespaces=load_namespaces(document.frontmatter),
+    )
+
+    run = run_fenced_processors(source.read_text(encoding="utf-8"), context=context)
+
+    assert run.valid
+    assert run.results[0].dependencies == [str(partial.resolve())]
+    assert "Included summary" in run.results[0].content
+
+
+def test_unknown_processor_returns_diagnostic():
+    markdown = """```mkt-nope {#x}
+content
+```
+"""
+    registry = default_processor_registry()
+
+    run = run_fenced_processors(markdown, context=ProcessorContext(), registry=registry)
+
+    assert not run.valid
+    assert run.results[0].diagnostics[0].code == "processor.unknown"
+
+
+def test_mkt_process_outputs_text(tmp_path: Path):
+    source = tmp_path / "doc.md"
+    source.write_text(
+        """# Doc
+
+```mkt-uppercase {#shout}
+hello
+```
+""",
+        encoding="utf-8",
+    )
+
+    result = CliRunner().invoke(main, ["process", str(source), "--root", str(tmp_path)])
+
+    assert result.exit_code == 0
+    assert "valid" in result.output
+    assert "uppercase shout" in result.output
+    assert "HELLO" in result.output
--- a/tests/test_reference_resolution.py
+++ b/tests/test_reference_resolution.py
@@ -0,0 +1,195 @@
+from pathlib import Path
+
+import pytest
+from click.testing import CliRunner
+
+from markitect_tool.cli import main
+from markitect_tool.core import parse_markdown
+from markitect_tool.reference import (
+    ReferenceContext,
+    ReferenceResolutionError,
+    load_namespaces,
+    parse_reference,
+    resolve_reference,
+)
+
+
+def test_parse_reference_splits_namespace_fragment_and_selector():
+    address = parse_reference("std:clauses/payment.md#section:fees::blocks[type=code]")
+
+    assert address.namespace == "std"
+    assert address.address == "clauses/payment.md"
+    assert address.fragment == "section:fees"
+    assert address.selector == "blocks[type=code]"
+
+
+def test_load_namespaces_accepts_optional_colon_suffix():
+    namespaces = load_namespaces({"namespaces": {"std:": "./standard", "src": "../src"}})
+
+    assert namespaces == {"std": "./standard", "src": "../src"}
+
+
+def test_resolve_path_reference_returns_document_unit(tmp_path: Path):
+    context_file = tmp_path / "context.md"
+    target_file = tmp_path / "target.md"
+    context_file.write_text("# Context\n", encoding="utf-8")
+    target_file.write_text("---\nid: target-doc\ntitle: Target\n---\n\n# Target\n\nBody.", encoding="utf-8")
+    context = ReferenceContext(root=tmp_path, current_path=context_file)
+
+    resolution = resolve_reference("target.md", context=context)
+
+    assert resolution.target_path == str(target_file.resolve())
+    assert len(resolution.units) == 1
+    assert resolution.units[0].kind == "document"
+    assert resolution.units[0].unit_id == "target-doc"
+    assert "# Target" in resolution.units[0].text
+
+
+def test_resolve_namespace_reference_and_explicit_section_id(tmp_path: Path):
+    standard = tmp_path / "standard"
+    standard.mkdir()
+    context_file = tmp_path / "context.md"
+    clause_file = standard / "clauses.md"
+    context_file.write_text(
+        "---\nnamespaces:\n  std: ./standard\n---\n\n# Context\n",
+        encoding="utf-8",
+    )
+    clause_file.write_text(
+        "# Clauses\n\n## Payment Terms {#payment-terms}\n\nPay within 30 days.\n",
+        encoding="utf-8",
+    )
+    document = parse_markdown(context_file.read_text(encoding="utf-8"), source_path=str(context_file))
+    context = ReferenceContext.from_document(document, root=tmp_path)
+
+    resolution = resolve_reference("std:clauses.md#section:payment-terms", context=context)
+
+    assert resolution.units[0].kind == "section"
+    assert resolution.units[0].unit_id == "payment-terms"
+    assert resolution.units[0].name == "Payment Terms"
+    assert "Pay within 30 days" in resolution.units[0].text
+
+
+def test_resolve_selector_reference_uses_existing_query_engine(tmp_path: Path):
+    standard = tmp_path / "standard"
+    standard.mkdir()
+    context_file = tmp_path / "context.md"
+    source_file = standard / "clauses.md"
+    context_file.write_text(
+        "---\nnamespaces:\n  std: ./standard\n---\n\n# Context\n",
+        encoding="utf-8",
+    )
+    source_file.write_text(
+        "# Clauses\n\n## Warranty\n\nWarranty text.\n\n## Liability\n\nLiability text.\n",
+        encoding="utf-8",
+    )
+    context = ReferenceContext.from_document(parse_markdown(context_file.read_text(encoding="utf-8"), str(context_file)), root=tmp_path)
+
+    resolution = resolve_reference("std:clauses.md::sections[heading=Warranty]", context=context)
+
+    assert [unit.kind for unit in resolution.units] == ["section"]
+    assert resolution.units[0].name == "Warranty"
+    assert "Liability" not in resolution.units[0].text
+
+
+def test_resolve_pathless_fragment_uses_current_document(tmp_path: Path):
+    context_file = tmp_path / "context.md"
+    context_file.write_text("# Context\n\n## Overview\n\nUseful local context.\n", encoding="utf-8")
+    context = ReferenceContext(root=tmp_path, current_path=context_file)
+
+    resolution = resolve_reference("#overview", context=context)
+
+    assert resolution.target_path == str(context_file.resolve())
+    assert resolution.units[0].kind == "section"
+    assert resolution.units[0].unit_id == "overview"
+    assert "Useful local context" in resolution.units[0].text
+
+
+def test_resolve_named_region_by_id_and_tag(tmp_path: Path):
+    context_file = tmp_path / "context.md"
+    context_file.write_text(
+        """# Context
+
+<!-- mkt:region id="overview" tags="reuse summary" -->
+Reusable region text.
+<!-- /mkt:region -->
+""",
+        encoding="utf-8",
+    )
+    context = ReferenceContext(root=tmp_path, current_path=context_file)
+
+    by_id = resolve_reference("#region:overview", context=context)
+    by_tag = resolve_reference("#tag:summary", context=context)
+
+    assert by_id.units[0].kind == "region"
+    assert by_id.units[0].text == "Reusable region text."
+    assert by_tag.units[0].unit_id == "overview"
+
+
+def test_resolve_fenced_block_by_id(tmp_path: Path):
+    context_file = tmp_path / "context.md"
+    context_file.write_text(
+        """# Context
+
+```python {#load-config tags="code setup" tangle="src/config.py"}
+def load_config():
+    return {}
+```
+""",
+        encoding="utf-8",
+    )
+    context = ReferenceContext(root=tmp_path, current_path=context_file)
+
+    resolution = resolve_reference("#fence:load-config", context=context)
+
+    assert resolution.units[0].kind == "fenced_block"
+    assert resolution.units[0].unit_id == "load-config"
+    assert resolution.units[0].metadata["language"] == "python"
+    assert resolution.units[0].metadata["attrs"]["tangle"] == "src/config.py"
+    assert "def load_config" in resolution.units[0].text
+
+
+def test_resolve_line_range_fragment(tmp_path: Path):
+    context_file = tmp_path / "context.md"
+    context_file.write_text("# Context\n\nLine A\nLine B\nLine C\n", encoding="utf-8")
+    context = ReferenceContext(root=tmp_path, current_path=context_file)
+
+    resolution = resolve_reference("#line:3-4", context=context)
+
+    assert resolution.units[0].kind == "line_range"
+    assert resolution.units[0].span.line_start == 3
+    assert resolution.units[0].text == "Line A\nLine B"
+
+
+def test_resolve_rejects_unknown_namespace(tmp_path: Path):
+    context_file = tmp_path / "context.md"
+    context_file.write_text("# Context\n", encoding="utf-8")
+    context = ReferenceContext(root=tmp_path, current_path=context_file)
+
+    with pytest.raises(ReferenceResolutionError, match="Unknown namespace"):
+        resolve_reference("missing:doc.md", context=context)
+
+
+def test_resolve_rejects_paths_outside_root(tmp_path: Path):
+    context_file = tmp_path / "context.md"
+    context_file.write_text("# Context\n", encoding="utf-8")
+    context = ReferenceContext(root=tmp_path, current_path=context_file)
+
+    with pytest.raises(ReferenceResolutionError, match="escapes root"):
+        resolve_reference("../outside.md", context=context)
+
+
+def test_mkt_ref_resolve_outputs_text(tmp_path: Path):
+    context_file = tmp_path / "context.md"
+    target_file = tmp_path / "target.md"
+    context_file.write_text("# Context\n", encoding="utf-8")
+    target_file.write_text("# Target\n\n## Decision\n\nChosen.", encoding="utf-8")
+
+    result = CliRunner().invoke(
+        main,
+        ["ref", "resolve", str(context_file), "target.md#decision", "--root", str(tmp_path)],
+    )
+
+    assert result.exit_code == 0
+    assert "1 unit(s)" in result.output
+    assert "section decision" in result.output
+    assert "Decision" in result.output
--- a/tests/test_wp0010_migration_examples.py
+++ b/tests/test_wp0010_migration_examples.py
@@ -0,0 +1,60 @@
+from pathlib import Path
+
+from markitect_tool.core import parse_markdown_file
+from markitect_tool.explode import explode_markdown_file, implode_markdown_directory
+from markitect_tool.ops import resolve_includes
+from markitect_tool.processor import ProcessorContext, run_fenced_processors
+from markitect_tool.reference import load_namespaces
+from markitect_tool.literate import tangle_markdown
+
+
+EXAMPLES = Path("examples/migration")
+
+
+def test_migration_explode_example_roundtrips(tmp_path: Path):
+    source = EXAMPLES / "legacy-explode-source.md"
+    original = source.read_text(encoding="utf-8")
+
+    explode_markdown_file(source, tmp_path / "exploded", variant="hierarchical")
+    result = implode_markdown_directory(tmp_path / "exploded")
+
+    assert result.markdown == original
+
+
+def test_migration_reference_backed_transclusion_example():
+    source = EXAMPLES / "legacy-transclusion-context.md"
+    document = parse_markdown_file(source)
+    context = ProcessorContext(
+        root=EXAMPLES,
+        current_path=source,
+        namespaces=load_namespaces(document.frontmatter),
+    )
+
+    result = run_fenced_processors(source.read_text(encoding="utf-8"), context=context)
+
+    assert result.valid
+    assert "Payment is due within 30 days" in result.results[0].content
+
+
+def test_migration_path_include_example():
+    source = EXAMPLES / "legacy-path-include.md"
+
+    result = resolve_includes(
+        source.read_text(encoding="utf-8"),
+        base_dir=EXAMPLES,
+        current_path=source,
+    )
+
+    assert "## Warranty" in result.markdown
+    assert "Warranty begins on the effective date" in result.markdown
+
+
+def test_migration_literate_example_tangles():
+    source = EXAMPLES / "legacy-literate.md"
+
+    result = tangle_markdown(source.read_text(encoding="utf-8"), source_path=source)
+
+    assert result.valid
+    assert result.files[0].path == "src/app.py"
+    assert "CONFIG" in result.files[0].content
+    assert "<<config>>" not in result.files[0].content