generated from coulomb/repo-seed
extension for ref resolve, explode, implode, weave, tangle
This commit is contained in:
@@ -4,6 +4,7 @@ from markitect_tool.ops.engine import (
|
||||
ComposeResult,
|
||||
IncludeError,
|
||||
IncludeResult,
|
||||
OperationProvenance,
|
||||
TransformResult,
|
||||
compose_files,
|
||||
resolve_includes,
|
||||
@@ -14,6 +15,7 @@ __all__ = [
|
||||
"ComposeResult",
|
||||
"IncludeError",
|
||||
"IncludeResult",
|
||||
"OperationProvenance",
|
||||
"TransformResult",
|
||||
"compose_files",
|
||||
"resolve_includes",
|
||||
|
||||
@@ -9,6 +9,7 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
from markdown_it import MarkdownIt
|
||||
|
||||
from markitect_tool.core import parse_markdown
|
||||
from markitect_tool.query import extract_document
|
||||
@@ -18,15 +19,46 @@ class IncludeError(ValueError):
|
||||
"""Raised when include resolution cannot continue."""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class OperationProvenance:
|
||||
"""Structured provenance for deterministic Markdown operations."""
|
||||
|
||||
operation: str
|
||||
source_path: str | None = None
|
||||
line_start: int | None = None
|
||||
line_end: int | None = None
|
||||
target_path: str | None = None
|
||||
dependencies: list[str] = field(default_factory=list)
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
data = {
|
||||
"operation": self.operation,
|
||||
"source_path": self.source_path,
|
||||
"line_start": self.line_start,
|
||||
"line_end": self.line_end,
|
||||
"target_path": self.target_path,
|
||||
"dependencies": self.dependencies or None,
|
||||
"metadata": self.metadata or None,
|
||||
}
|
||||
return {key: value for key, value in data.items() if value is not None}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TransformResult:
|
||||
"""Result of a deterministic Markdown transform."""
|
||||
|
||||
markdown: str
|
||||
operations: list[str] = field(default_factory=list)
|
||||
provenance: list[OperationProvenance] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
data: dict[str, Any] = {
|
||||
"markdown": self.markdown,
|
||||
"operations": self.operations,
|
||||
"provenance": [event.to_dict() for event in self.provenance],
|
||||
}
|
||||
return {key: value for key, value in data.items() if value}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -46,9 +78,15 @@ class IncludeResult:
|
||||
|
||||
markdown: str
|
||||
included_paths: list[str] = field(default_factory=list)
|
||||
provenance: list[OperationProvenance] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
data: dict[str, Any] = {
|
||||
"markdown": self.markdown,
|
||||
"included_paths": self.included_paths,
|
||||
"provenance": [event.to_dict() for event in self.provenance],
|
||||
}
|
||||
return {key: value for key, value in data.items() if value}
|
||||
|
||||
|
||||
_COMMENT_INCLUDE_RE = re.compile(r"<!--\s*mkt:include\s+(?P<attrs>.*?)\s*-->", re.DOTALL)
|
||||
@@ -68,15 +106,30 @@ def transform_markdown(
|
||||
"""Apply deterministic operations to one Markdown document."""
|
||||
|
||||
operations: list[str] = []
|
||||
provenance: list[OperationProvenance] = []
|
||||
frontmatter, body = _split_frontmatter(markdown)
|
||||
|
||||
if set_frontmatter:
|
||||
frontmatter = _deep_merge(frontmatter, set_frontmatter)
|
||||
operations.append("set_frontmatter")
|
||||
provenance.append(
|
||||
OperationProvenance(
|
||||
operation="set_frontmatter",
|
||||
source_path=source_path,
|
||||
metadata={"keys": sorted(set_frontmatter.keys())},
|
||||
)
|
||||
)
|
||||
|
||||
if heading_delta:
|
||||
body = shift_heading_levels(body, heading_delta)
|
||||
body, affected_lines = _shift_heading_levels(body, heading_delta)
|
||||
operations.append(f"shift_headings:{heading_delta}")
|
||||
provenance.append(
|
||||
OperationProvenance(
|
||||
operation="shift_headings",
|
||||
source_path=source_path,
|
||||
metadata={"delta": heading_delta, "affected_lines": affected_lines},
|
||||
)
|
||||
)
|
||||
|
||||
if extract_selector:
|
||||
document_text = _join_frontmatter(frontmatter, body) if frontmatter else body
|
||||
@@ -84,24 +137,71 @@ def transform_markdown(
|
||||
body = "\n\n".join(extract_document(document, extract_selector))
|
||||
frontmatter = {}
|
||||
operations.append(f"extract:{extract_selector}")
|
||||
provenance.append(
|
||||
OperationProvenance(
|
||||
operation="extract",
|
||||
source_path=source_path,
|
||||
metadata={"selector": extract_selector},
|
||||
)
|
||||
)
|
||||
|
||||
if strip_frontmatter:
|
||||
frontmatter = {}
|
||||
operations.append("strip_frontmatter")
|
||||
provenance.append(
|
||||
OperationProvenance(
|
||||
operation="strip_frontmatter",
|
||||
source_path=source_path,
|
||||
)
|
||||
)
|
||||
|
||||
return TransformResult(markdown=_join_frontmatter(frontmatter, body), operations=operations)
|
||||
return TransformResult(
|
||||
markdown=_join_frontmatter(frontmatter, body),
|
||||
operations=operations,
|
||||
provenance=provenance,
|
||||
)
|
||||
|
||||
|
||||
def shift_heading_levels(markdown: str, delta: int) -> str:
|
||||
"""Shift ATX heading levels by delta while clamping to levels 1 through 6."""
|
||||
|
||||
def replace(match: re.Match[str]) -> str:
|
||||
shifted, _affected_lines = _shift_heading_levels(markdown, delta)
|
||||
return shifted
|
||||
|
||||
|
||||
def _shift_heading_levels(markdown: str, delta: int) -> tuple[str, list[int]]:
|
||||
ignored_lines = _code_line_numbers(markdown)
|
||||
affected_lines: list[int] = []
|
||||
rendered_lines: list[str] = []
|
||||
|
||||
for line_number, line in enumerate(markdown.splitlines(keepends=True), start=1):
|
||||
if line_number in ignored_lines:
|
||||
rendered_lines.append(line)
|
||||
continue
|
||||
line_body = line.rstrip("\r\n")
|
||||
line_ending = line[len(line_body) :]
|
||||
match = _HEADING_RE.match(line_body)
|
||||
if not match:
|
||||
rendered_lines.append(line)
|
||||
continue
|
||||
marks = match.group(1)
|
||||
suffix = match.group(2)
|
||||
level = min(max(len(marks) + delta, 1), 6)
|
||||
return f"{'#' * level}{suffix}"
|
||||
rendered_lines.append(f"{'#' * level}{suffix}{line_ending}")
|
||||
affected_lines.append(line_number)
|
||||
|
||||
return _HEADING_RE.sub(replace, markdown)
|
||||
return "".join(rendered_lines), affected_lines
|
||||
|
||||
|
||||
def _code_line_numbers(markdown: str) -> set[int]:
|
||||
parser = MarkdownIt("commonmark", {"tables": True}).enable("table")
|
||||
ignored_lines: set[int] = set()
|
||||
for token in parser.parse(markdown):
|
||||
if token.type not in {"fence", "code_block"} or not token.map:
|
||||
continue
|
||||
start, end = token.map
|
||||
ignored_lines.update(range(start + 1, end + 1))
|
||||
return ignored_lines
|
||||
|
||||
|
||||
def compose_files(
|
||||
@@ -154,18 +254,22 @@ def resolve_includes(
|
||||
root = Path(base_dir).resolve()
|
||||
stack = [Path(current_path).resolve()] if current_path else []
|
||||
included: list[Path] = []
|
||||
provenance: list[OperationProvenance] = []
|
||||
resolved = _resolve_include_text(
|
||||
markdown,
|
||||
root=root,
|
||||
current_dir=Path(current_path).resolve().parent if current_path else root,
|
||||
source_path=Path(current_path).resolve() if current_path else None,
|
||||
stack=stack,
|
||||
included=included,
|
||||
provenance=provenance,
|
||||
depth=0,
|
||||
max_depth=max_depth,
|
||||
)
|
||||
return IncludeResult(
|
||||
markdown=resolved,
|
||||
included_paths=[str(path) for path in included],
|
||||
provenance=provenance,
|
||||
)
|
||||
|
||||
|
||||
@@ -174,34 +278,73 @@ def _resolve_include_text(
|
||||
*,
|
||||
root: Path,
|
||||
current_dir: Path,
|
||||
source_path: Path | None,
|
||||
stack: list[Path],
|
||||
included: list[Path],
|
||||
provenance: list[OperationProvenance],
|
||||
depth: int,
|
||||
max_depth: int,
|
||||
) -> str:
|
||||
if depth > max_depth:
|
||||
raise IncludeError(f"Include depth exceeded max_depth={max_depth}")
|
||||
|
||||
def replace_comment(match: re.Match[str]) -> str:
|
||||
attrs = _parse_include_attrs(match.group("attrs"))
|
||||
return _render_include(attrs, root, current_dir, stack, included, depth, max_depth)
|
||||
ignored_lines = _code_line_numbers(markdown)
|
||||
rendered_lines: list[str] = []
|
||||
|
||||
def replace_brace(match: re.Match[str]) -> str:
|
||||
attrs = {"path": match.group("path").strip()}
|
||||
return _render_include(attrs, root, current_dir, stack, included, depth, max_depth)
|
||||
for line_number, line in enumerate(markdown.splitlines(keepends=True), start=1):
|
||||
if line_number in ignored_lines:
|
||||
rendered_lines.append(line)
|
||||
continue
|
||||
|
||||
markdown = _COMMENT_INCLUDE_RE.sub(replace_comment, markdown)
|
||||
return _BRACE_INCLUDE_RE.sub(replace_brace, markdown)
|
||||
def replace_comment(match: re.Match[str]) -> str:
|
||||
attrs = _parse_include_attrs(match.group("attrs"))
|
||||
return _render_include(
|
||||
attrs,
|
||||
root,
|
||||
current_dir,
|
||||
source_path,
|
||||
stack,
|
||||
included,
|
||||
provenance,
|
||||
depth,
|
||||
max_depth,
|
||||
marker_line=line_number,
|
||||
)
|
||||
|
||||
def replace_brace(match: re.Match[str]) -> str:
|
||||
attrs = {"path": match.group("path").strip()}
|
||||
return _render_include(
|
||||
attrs,
|
||||
root,
|
||||
current_dir,
|
||||
source_path,
|
||||
stack,
|
||||
included,
|
||||
provenance,
|
||||
depth,
|
||||
max_depth,
|
||||
marker_line=line_number,
|
||||
)
|
||||
|
||||
line = _COMMENT_INCLUDE_RE.sub(replace_comment, line)
|
||||
line = _BRACE_INCLUDE_RE.sub(replace_brace, line)
|
||||
rendered_lines.append(line)
|
||||
|
||||
return "".join(rendered_lines)
|
||||
|
||||
|
||||
def _render_include(
|
||||
attrs: dict[str, str],
|
||||
root: Path,
|
||||
current_dir: Path,
|
||||
source_path: Path | None,
|
||||
stack: list[Path],
|
||||
included: list[Path],
|
||||
provenance: list[OperationProvenance],
|
||||
depth: int,
|
||||
max_depth: int,
|
||||
*,
|
||||
marker_line: int,
|
||||
) -> str:
|
||||
raw_path = attrs.get("path")
|
||||
if not raw_path:
|
||||
@@ -228,12 +371,33 @@ def _render_include(
|
||||
body = shift_heading_levels(body, heading_delta)
|
||||
|
||||
included.append(include_path)
|
||||
provenance.append(
|
||||
OperationProvenance(
|
||||
operation="include",
|
||||
source_path=str(source_path) if source_path else None,
|
||||
line_start=marker_line,
|
||||
line_end=marker_line,
|
||||
target_path=str(include_path),
|
||||
dependencies=[str(include_path)],
|
||||
metadata={
|
||||
key: value
|
||||
for key, value in {
|
||||
"selector": selector,
|
||||
"heading_delta": heading_delta if heading_delta else None,
|
||||
"include_frontmatter": attrs.get("include_frontmatter"),
|
||||
}.items()
|
||||
if value is not None
|
||||
},
|
||||
)
|
||||
)
|
||||
return _resolve_include_text(
|
||||
body.strip(),
|
||||
root=root,
|
||||
current_dir=include_path.parent,
|
||||
source_path=include_path,
|
||||
stack=stack + [include_path],
|
||||
included=included,
|
||||
provenance=provenance,
|
||||
depth=depth + 1,
|
||||
max_depth=max_depth,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user