""" Macro parser for extracting ContentMacros from template content. Implements FR-2.2: Macro detection and extraction """ import re from typing import List, Tuple from markitect.prompts.templates.models import ContentMacro, MacroKind class MacroParsingError(Exception): """Raised when macro syntax is invalid.""" pass class MacroParser: """ Parser for extracting content macros from template text. Supports macro syntax: {{:[|=|=...]}} Where kind is: require, optional, or generate Examples: {{require:glossary}} {{optional:technical-constraints}} {{generate:code-examples|language=python|framework=fastapi}} """ # Macro pattern: {{kind:target|param=value|...}} # More permissive pattern to catch all macro-like syntax for validation # Allows empty target to enable validation error messages MACRO_PATTERN = re.compile( r'\{\{([a-zA-Z]+):([^}|]*)([^}]*)\}\}', re.IGNORECASE ) # Parameter pattern: |key=value PARAM_PATTERN = re.compile(r'\|([^=]+)=([^|]+)') # Supported macro kinds mapping KIND_MAPPING = { 'require': MacroKind.REQUIRED, 'required': MacroKind.REQUIRED, 'optional': MacroKind.OPTIONAL, 'generate': MacroKind.GENERATE, 'gen': MacroKind.GENERATE, } def parse(self, content: str) -> List[ContentMacro]: """ Extract all content macros from template content. Args: content: Template content string Returns: List of extracted ContentMacros Raises: MacroParsingError: If macro syntax is invalid """ macros = [] lines = content.split('\n') for line_num, line in enumerate(lines, start=1): line_macros = self._parse_line(line, line_num) macros.extend(line_macros) return macros def _parse_line(self, line: str, line_number: int) -> List[ContentMacro]: """ Extract macros from a single line. Args: line: Line of text line_number: Line number for error reporting Returns: List of macros found in line """ macros = [] for match in self.MACRO_PATTERN.finditer(line): try: macro = self._parse_match(match, line_number) macros.append(macro) except MacroParsingError as e: # Add line context to error raise MacroParsingError( f"Line {line_number}: {e}" ) from e return macros def _parse_match(self, match: re.Match, line_number: int) -> ContentMacro: """ Parse a regex match into a ContentMacro. Args: match: Regex match object line_number: Line number Returns: Parsed ContentMacro Raises: MacroParsingError: If macro is malformed """ kind_str = match.group(1).lower() target = match.group(2).strip() params_str = match.group(3) raw_text = match.group(0) # Validate and map kind if kind_str not in self.KIND_MAPPING: raise MacroParsingError( f"Invalid macro kind '{kind_str}', expected: require, optional, or generate" ) kind = self.KIND_MAPPING[kind_str] # Validate target if not target: raise MacroParsingError( f"Macro target cannot be empty in: {raw_text}" ) # Parse parameters parameters = self._parse_parameters(params_str) return ContentMacro( kind=kind, target=target, parameters=parameters, raw_text=raw_text, line_number=line_number, ) def _parse_parameters(self, params_str: str) -> dict: """ Parse parameter string into dictionary. Args: params_str: Parameter string like "|key1=value1|key2=value2" Returns: Dictionary of parameters """ if not params_str: return {} parameters = {} for match in self.PARAM_PATTERN.finditer(params_str): key = match.group(1).strip() value = match.group(2).strip() parameters[key] = value return parameters def find_macro_positions(self, content: str) -> List[Tuple[int, int, str]]: """ Find positions of all macros in content. Useful for macro substitution during resolution. Args: content: Template content Returns: List of (start_pos, end_pos, macro_text) tuples """ positions = [] for match in self.MACRO_PATTERN.finditer(content): positions.append(( match.start(), match.end(), match.group(0) )) return positions def count_macros(self, content: str) -> dict: """ Count macros by kind. Args: content: Template content Returns: Dictionary with counts: {'required': N, 'optional': M, 'generate': K} """ macros = self.parse(content) counts = { 'required': sum(1 for m in macros if m.kind == MacroKind.REQUIRED), 'optional': sum(1 for m in macros if m.kind == MacroKind.OPTIONAL), 'generate': sum(1 for m in macros if m.kind == MacroKind.GENERATE), } return counts def has_macros(self, content: str) -> bool: """ Check if content contains any macros. Args: content: Template content Returns: True if any macros found """ return bool(self.MACRO_PATTERN.search(content))