""" Transclusion directive parsing. Provides parsers and handlers for various transclusion directives including file inclusion, variable substitution, and conditional content. """ import re from typing import Dict, Any, Optional, Tuple, List from dataclasses import dataclass @dataclass class Directive: """Represents a parsed transclusion directive.""" type: str args: Dict[str, Any] content: Optional[str] = None start_pos: int = 0 end_pos: int = 0 class DirectiveParser: """ Parser for transclusion directives in markdown content. Supports various directive types including file inclusion, variable substitution, and conditional content processing. """ # Directive patterns INCLUDE_PATTERN = re.compile(r'\{\{\s*include\s+"([^"]+)"\s*\}\}', re.IGNORECASE) INCLUDE_WITH_ARGS_PATTERN = re.compile( r'\{\{\s*include\s+"([^"]+)"\s+(.+?)\s*\}\}', re.IGNORECASE ) VARIABLE_PATTERN = re.compile(r'\{\{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\}\}') CONDITIONAL_BLOCK_PATTERN = re.compile( r'\{\{\s*if\s+([^}]+)\s*\}\}(.*?)\{\{\s*endif\s*\}\}', re.DOTALL | re.IGNORECASE ) @classmethod def parse_directives(cls, content: str) -> List[Directive]: """ Parse all directives from content. Args: content: Content to parse Returns: List of parsed directives """ directives = [] # Parse include directives with arguments for match in cls.INCLUDE_WITH_ARGS_PATTERN.finditer(content): file_path = match.group(1) args_str = match.group(2) args = cls._parse_directive_args(args_str) args['file'] = file_path directives.append(Directive( type='include', args=args, start_pos=match.start(), end_pos=match.end() )) # Parse simple include directives for match in cls.INCLUDE_PATTERN.finditer(content): # Skip if already parsed as include with args if any(d.start_pos <= match.start() < d.end_pos for d in directives): continue file_path = match.group(1) directives.append(Directive( type='include', args={'file': file_path}, start_pos=match.start(), end_pos=match.end() )) # Parse variable references for match in cls.VARIABLE_PATTERN.finditer(content): # Skip if inside other directives if any(d.start_pos <= match.start() < d.end_pos for d in directives): continue var_name = match.group(1) directives.append(Directive( type='variable', args={'name': var_name}, start_pos=match.start(), end_pos=match.end() )) # Parse conditional blocks for match in cls.CONDITIONAL_BLOCK_PATTERN.finditer(content): condition = match.group(1) block_content = match.group(2) directives.append(Directive( type='conditional', args={'condition': condition}, content=block_content, start_pos=match.start(), end_pos=match.end() )) # Sort by position to process in order directives.sort(key=lambda d: d.start_pos) return directives @classmethod def _parse_directive_args(cls, args_str: str) -> Dict[str, Any]: """ Parse directive arguments string. Args: args_str: Arguments string to parse Returns: Dictionary of parsed arguments """ args = {} # Simple key=value parsing for part in args_str.split(): if '=' in part: key, value = part.split('=', 1) # Remove quotes if present if value.startswith('"') and value.endswith('"'): value = value[1:-1] elif value.startswith("'") and value.endswith("'"): value = value[1:-1] # Try to convert to appropriate type if value.lower() in ('true', 'false'): value = value.lower() == 'true' elif value.isdigit(): value = int(value) else: try: value = float(value) except ValueError: pass # Keep as string args[key] = value return args @classmethod def extract_file_includes(cls, content: str) -> List[str]: """ Extract all file paths from include directives. Args: content: Content to analyze Returns: List of file paths referenced in include directives """ files = [] # Extract from simple includes for match in cls.INCLUDE_PATTERN.finditer(content): files.append(match.group(1)) # Extract from includes with args for match in cls.INCLUDE_WITH_ARGS_PATTERN.finditer(content): files.append(match.group(1)) return files