extension for ref resolve, explode, implode, weave, tangle

This commit is contained in:
2026-05-04 02:25:49 +02:00
parent 8203f50fd5
commit 65bfc1aebf
39 changed files with 3959 additions and 25 deletions

View File

@@ -4,6 +4,7 @@ from markitect_tool.ops.engine import (
ComposeResult,
IncludeError,
IncludeResult,
OperationProvenance,
TransformResult,
compose_files,
resolve_includes,
@@ -14,6 +15,7 @@ __all__ = [
"ComposeResult",
"IncludeError",
"IncludeResult",
"OperationProvenance",
"TransformResult",
"compose_files",
"resolve_includes",

View File

@@ -9,6 +9,7 @@ from pathlib import Path
from typing import Any
import yaml
from markdown_it import MarkdownIt
from markitect_tool.core import parse_markdown
from markitect_tool.query import extract_document
@@ -18,15 +19,46 @@ class IncludeError(ValueError):
"""Raised when include resolution cannot continue."""
@dataclass(frozen=True)
class OperationProvenance:
"""Structured provenance for deterministic Markdown operations."""
operation: str
source_path: str | None = None
line_start: int | None = None
line_end: int | None = None
target_path: str | None = None
dependencies: list[str] = field(default_factory=list)
metadata: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
data = {
"operation": self.operation,
"source_path": self.source_path,
"line_start": self.line_start,
"line_end": self.line_end,
"target_path": self.target_path,
"dependencies": self.dependencies or None,
"metadata": self.metadata or None,
}
return {key: value for key, value in data.items() if value is not None}
@dataclass(frozen=True)
class TransformResult:
"""Result of a deterministic Markdown transform."""
markdown: str
operations: list[str] = field(default_factory=list)
provenance: list[OperationProvenance] = field(default_factory=list)
def to_dict(self) -> dict[str, Any]:
return asdict(self)
data: dict[str, Any] = {
"markdown": self.markdown,
"operations": self.operations,
"provenance": [event.to_dict() for event in self.provenance],
}
return {key: value for key, value in data.items() if value}
@dataclass(frozen=True)
@@ -46,9 +78,15 @@ class IncludeResult:
markdown: str
included_paths: list[str] = field(default_factory=list)
provenance: list[OperationProvenance] = field(default_factory=list)
def to_dict(self) -> dict[str, Any]:
return asdict(self)
data: dict[str, Any] = {
"markdown": self.markdown,
"included_paths": self.included_paths,
"provenance": [event.to_dict() for event in self.provenance],
}
return {key: value for key, value in data.items() if value}
_COMMENT_INCLUDE_RE = re.compile(r"<!--\s*mkt:include\s+(?P<attrs>.*?)\s*-->", re.DOTALL)
@@ -68,15 +106,30 @@ def transform_markdown(
"""Apply deterministic operations to one Markdown document."""
operations: list[str] = []
provenance: list[OperationProvenance] = []
frontmatter, body = _split_frontmatter(markdown)
if set_frontmatter:
frontmatter = _deep_merge(frontmatter, set_frontmatter)
operations.append("set_frontmatter")
provenance.append(
OperationProvenance(
operation="set_frontmatter",
source_path=source_path,
metadata={"keys": sorted(set_frontmatter.keys())},
)
)
if heading_delta:
body = shift_heading_levels(body, heading_delta)
body, affected_lines = _shift_heading_levels(body, heading_delta)
operations.append(f"shift_headings:{heading_delta}")
provenance.append(
OperationProvenance(
operation="shift_headings",
source_path=source_path,
metadata={"delta": heading_delta, "affected_lines": affected_lines},
)
)
if extract_selector:
document_text = _join_frontmatter(frontmatter, body) if frontmatter else body
@@ -84,24 +137,71 @@ def transform_markdown(
body = "\n\n".join(extract_document(document, extract_selector))
frontmatter = {}
operations.append(f"extract:{extract_selector}")
provenance.append(
OperationProvenance(
operation="extract",
source_path=source_path,
metadata={"selector": extract_selector},
)
)
if strip_frontmatter:
frontmatter = {}
operations.append("strip_frontmatter")
provenance.append(
OperationProvenance(
operation="strip_frontmatter",
source_path=source_path,
)
)
return TransformResult(markdown=_join_frontmatter(frontmatter, body), operations=operations)
return TransformResult(
markdown=_join_frontmatter(frontmatter, body),
operations=operations,
provenance=provenance,
)
def shift_heading_levels(markdown: str, delta: int) -> str:
"""Shift ATX heading levels by delta while clamping to levels 1 through 6."""
def replace(match: re.Match[str]) -> str:
shifted, _affected_lines = _shift_heading_levels(markdown, delta)
return shifted
def _shift_heading_levels(markdown: str, delta: int) -> tuple[str, list[int]]:
ignored_lines = _code_line_numbers(markdown)
affected_lines: list[int] = []
rendered_lines: list[str] = []
for line_number, line in enumerate(markdown.splitlines(keepends=True), start=1):
if line_number in ignored_lines:
rendered_lines.append(line)
continue
line_body = line.rstrip("\r\n")
line_ending = line[len(line_body) :]
match = _HEADING_RE.match(line_body)
if not match:
rendered_lines.append(line)
continue
marks = match.group(1)
suffix = match.group(2)
level = min(max(len(marks) + delta, 1), 6)
return f"{'#' * level}{suffix}"
rendered_lines.append(f"{'#' * level}{suffix}{line_ending}")
affected_lines.append(line_number)
return _HEADING_RE.sub(replace, markdown)
return "".join(rendered_lines), affected_lines
def _code_line_numbers(markdown: str) -> set[int]:
parser = MarkdownIt("commonmark", {"tables": True}).enable("table")
ignored_lines: set[int] = set()
for token in parser.parse(markdown):
if token.type not in {"fence", "code_block"} or not token.map:
continue
start, end = token.map
ignored_lines.update(range(start + 1, end + 1))
return ignored_lines
def compose_files(
@@ -154,18 +254,22 @@ def resolve_includes(
root = Path(base_dir).resolve()
stack = [Path(current_path).resolve()] if current_path else []
included: list[Path] = []
provenance: list[OperationProvenance] = []
resolved = _resolve_include_text(
markdown,
root=root,
current_dir=Path(current_path).resolve().parent if current_path else root,
source_path=Path(current_path).resolve() if current_path else None,
stack=stack,
included=included,
provenance=provenance,
depth=0,
max_depth=max_depth,
)
return IncludeResult(
markdown=resolved,
included_paths=[str(path) for path in included],
provenance=provenance,
)
@@ -174,34 +278,73 @@ def _resolve_include_text(
*,
root: Path,
current_dir: Path,
source_path: Path | None,
stack: list[Path],
included: list[Path],
provenance: list[OperationProvenance],
depth: int,
max_depth: int,
) -> str:
if depth > max_depth:
raise IncludeError(f"Include depth exceeded max_depth={max_depth}")
def replace_comment(match: re.Match[str]) -> str:
attrs = _parse_include_attrs(match.group("attrs"))
return _render_include(attrs, root, current_dir, stack, included, depth, max_depth)
ignored_lines = _code_line_numbers(markdown)
rendered_lines: list[str] = []
def replace_brace(match: re.Match[str]) -> str:
attrs = {"path": match.group("path").strip()}
return _render_include(attrs, root, current_dir, stack, included, depth, max_depth)
for line_number, line in enumerate(markdown.splitlines(keepends=True), start=1):
if line_number in ignored_lines:
rendered_lines.append(line)
continue
markdown = _COMMENT_INCLUDE_RE.sub(replace_comment, markdown)
return _BRACE_INCLUDE_RE.sub(replace_brace, markdown)
def replace_comment(match: re.Match[str]) -> str:
attrs = _parse_include_attrs(match.group("attrs"))
return _render_include(
attrs,
root,
current_dir,
source_path,
stack,
included,
provenance,
depth,
max_depth,
marker_line=line_number,
)
def replace_brace(match: re.Match[str]) -> str:
attrs = {"path": match.group("path").strip()}
return _render_include(
attrs,
root,
current_dir,
source_path,
stack,
included,
provenance,
depth,
max_depth,
marker_line=line_number,
)
line = _COMMENT_INCLUDE_RE.sub(replace_comment, line)
line = _BRACE_INCLUDE_RE.sub(replace_brace, line)
rendered_lines.append(line)
return "".join(rendered_lines)
def _render_include(
attrs: dict[str, str],
root: Path,
current_dir: Path,
source_path: Path | None,
stack: list[Path],
included: list[Path],
provenance: list[OperationProvenance],
depth: int,
max_depth: int,
*,
marker_line: int,
) -> str:
raw_path = attrs.get("path")
if not raw_path:
@@ -228,12 +371,33 @@ def _render_include(
body = shift_heading_levels(body, heading_delta)
included.append(include_path)
provenance.append(
OperationProvenance(
operation="include",
source_path=str(source_path) if source_path else None,
line_start=marker_line,
line_end=marker_line,
target_path=str(include_path),
dependencies=[str(include_path)],
metadata={
key: value
for key, value in {
"selector": selector,
"heading_delta": heading_delta if heading_delta else None,
"include_frontmatter": attrs.get("include_frontmatter"),
}.items()
if value is not None
},
)
)
return _resolve_include_text(
body.strip(),
root=root,
current_dir=include_path.parent,
source_path=include_path,
stack=stack + [include_path],
included=included,
provenance=provenance,
depth=depth + 1,
max_depth=max_depth,
)