- ContentMacro: add __post_init__ to auto-derive raw_text when built
programmatically, preventing str.replace("", X) corruption
- MacroParser: add @{target} shorthand syntax support mapped to REQUIRED kind,
updating parse, has_macros, count_macros, and find_macro_positions
- Artifact: store content in model and SQLite DB, replace resolver placeholder
with actual artifact content, add migration for existing databases
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
238 lines
6.7 KiB
Python
238 lines
6.7 KiB
Python
"""
|
|
Macro parser for extracting ContentMacros from template content.
|
|
|
|
Implements FR-2.2: Macro detection and extraction
|
|
"""
|
|
|
|
import re
|
|
from typing import List, Tuple
|
|
|
|
from markitect.prompts.templates.models import ContentMacro, MacroKind
|
|
|
|
|
|
class MacroParsingError(Exception):
|
|
"""Raised when macro syntax is invalid."""
|
|
pass
|
|
|
|
|
|
class MacroParser:
|
|
"""
|
|
Parser for extracting content macros from template text.
|
|
|
|
Supports macro syntax:
|
|
{{<kind>:<target>[|<param1>=<value1>|<param2>=<value2>...]}}
|
|
|
|
Where kind is: require, optional, or generate
|
|
|
|
Examples:
|
|
{{require:glossary}}
|
|
{{optional:technical-constraints}}
|
|
{{generate:code-examples|language=python|framework=fastapi}}
|
|
"""
|
|
|
|
# Macro pattern: {{kind:target|param=value|...}}
|
|
# More permissive pattern to catch all macro-like syntax for validation
|
|
# Allows empty target to enable validation error messages
|
|
MACRO_PATTERN = re.compile(
|
|
r'\{\{([a-zA-Z]+):([^}|]*)([^}]*)\}\}',
|
|
re.IGNORECASE
|
|
)
|
|
|
|
# Shorthand pattern: @{target} — maps to MacroKind.REQUIRED
|
|
SHORTHAND_PATTERN = re.compile(r'@\{([^}]+)\}')
|
|
|
|
# Parameter pattern: |key=value
|
|
PARAM_PATTERN = re.compile(r'\|([^=]+)=([^|]+)')
|
|
|
|
# Supported macro kinds mapping
|
|
KIND_MAPPING = {
|
|
'require': MacroKind.REQUIRED,
|
|
'required': MacroKind.REQUIRED,
|
|
'optional': MacroKind.OPTIONAL,
|
|
'generate': MacroKind.GENERATE,
|
|
'gen': MacroKind.GENERATE,
|
|
}
|
|
|
|
def parse(self, content: str) -> List[ContentMacro]:
|
|
"""
|
|
Extract all content macros from template content.
|
|
|
|
Args:
|
|
content: Template content string
|
|
|
|
Returns:
|
|
List of extracted ContentMacros
|
|
|
|
Raises:
|
|
MacroParsingError: If macro syntax is invalid
|
|
"""
|
|
macros = []
|
|
lines = content.split('\n')
|
|
|
|
for line_num, line in enumerate(lines, start=1):
|
|
line_macros = self._parse_line(line, line_num)
|
|
macros.extend(line_macros)
|
|
|
|
return macros
|
|
|
|
def _parse_line(self, line: str, line_number: int) -> List[ContentMacro]:
|
|
"""
|
|
Extract macros from a single line.
|
|
|
|
Args:
|
|
line: Line of text
|
|
line_number: Line number for error reporting
|
|
|
|
Returns:
|
|
List of macros found in line
|
|
"""
|
|
macros = []
|
|
|
|
for match in self.MACRO_PATTERN.finditer(line):
|
|
try:
|
|
macro = self._parse_match(match, line_number)
|
|
macros.append(macro)
|
|
except MacroParsingError as e:
|
|
# Add line context to error
|
|
raise MacroParsingError(
|
|
f"Line {line_number}: {e}"
|
|
) from e
|
|
|
|
# Scan for @{target} shorthand syntax
|
|
for match in self.SHORTHAND_PATTERN.finditer(line):
|
|
target = match.group(1).strip()
|
|
raw_text = match.group(0)
|
|
if target:
|
|
macros.append(ContentMacro(
|
|
kind=MacroKind.REQUIRED,
|
|
target=target,
|
|
parameters={},
|
|
raw_text=raw_text,
|
|
line_number=line_number,
|
|
))
|
|
|
|
return macros
|
|
|
|
def _parse_match(self, match: re.Match, line_number: int) -> ContentMacro:
|
|
"""
|
|
Parse a regex match into a ContentMacro.
|
|
|
|
Args:
|
|
match: Regex match object
|
|
line_number: Line number
|
|
|
|
Returns:
|
|
Parsed ContentMacro
|
|
|
|
Raises:
|
|
MacroParsingError: If macro is malformed
|
|
"""
|
|
kind_str = match.group(1).lower()
|
|
target = match.group(2).strip()
|
|
params_str = match.group(3)
|
|
raw_text = match.group(0)
|
|
|
|
# Validate and map kind
|
|
if kind_str not in self.KIND_MAPPING:
|
|
raise MacroParsingError(
|
|
f"Invalid macro kind '{kind_str}', expected: require, optional, or generate"
|
|
)
|
|
|
|
kind = self.KIND_MAPPING[kind_str]
|
|
|
|
# Validate target
|
|
if not target:
|
|
raise MacroParsingError(
|
|
f"Macro target cannot be empty in: {raw_text}"
|
|
)
|
|
|
|
# Parse parameters
|
|
parameters = self._parse_parameters(params_str)
|
|
|
|
return ContentMacro(
|
|
kind=kind,
|
|
target=target,
|
|
parameters=parameters,
|
|
raw_text=raw_text,
|
|
line_number=line_number,
|
|
)
|
|
|
|
def _parse_parameters(self, params_str: str) -> dict:
|
|
"""
|
|
Parse parameter string into dictionary.
|
|
|
|
Args:
|
|
params_str: Parameter string like "|key1=value1|key2=value2"
|
|
|
|
Returns:
|
|
Dictionary of parameters
|
|
"""
|
|
if not params_str:
|
|
return {}
|
|
|
|
parameters = {}
|
|
for match in self.PARAM_PATTERN.finditer(params_str):
|
|
key = match.group(1).strip()
|
|
value = match.group(2).strip()
|
|
parameters[key] = value
|
|
|
|
return parameters
|
|
|
|
def find_macro_positions(self, content: str) -> List[Tuple[int, int, str]]:
|
|
"""
|
|
Find positions of all macros in content.
|
|
|
|
Useful for macro substitution during resolution.
|
|
|
|
Args:
|
|
content: Template content
|
|
|
|
Returns:
|
|
List of (start_pos, end_pos, macro_text) tuples sorted by position
|
|
"""
|
|
positions = []
|
|
for match in self.MACRO_PATTERN.finditer(content):
|
|
positions.append((
|
|
match.start(),
|
|
match.end(),
|
|
match.group(0)
|
|
))
|
|
for match in self.SHORTHAND_PATTERN.finditer(content):
|
|
positions.append((
|
|
match.start(),
|
|
match.end(),
|
|
match.group(0)
|
|
))
|
|
positions.sort(key=lambda p: p[0])
|
|
return positions
|
|
|
|
def count_macros(self, content: str) -> dict:
|
|
"""
|
|
Count macros by kind.
|
|
|
|
Args:
|
|
content: Template content
|
|
|
|
Returns:
|
|
Dictionary with counts: {'required': N, 'optional': M, 'generate': K}
|
|
"""
|
|
macros = self.parse(content)
|
|
counts = {
|
|
'required': sum(1 for m in macros if m.kind == MacroKind.REQUIRED),
|
|
'optional': sum(1 for m in macros if m.kind == MacroKind.OPTIONAL),
|
|
'generate': sum(1 for m in macros if m.kind == MacroKind.GENERATE),
|
|
}
|
|
return counts
|
|
|
|
def has_macros(self, content: str) -> bool:
|
|
"""
|
|
Check if content contains any macros.
|
|
|
|
Args:
|
|
content: Template content
|
|
|
|
Returns:
|
|
True if any macros found
|
|
"""
|
|
return bool(self.MACRO_PATTERN.search(content) or self.SHORTHAND_PATTERN.search(content))
|