## Issue #65 - Template Engine Foundation (COMPLETED) - Implement complete TDD8 methodology with 30 comprehensive tests (100% passing) - Add template variable parser with Unicode and dot notation support - Add template rendering engine with strict/lenient modes - Add business document generation (invoices, reports) - Add CLI integration with `markitect template-render` command - Add performance optimization (1000+ variables in <0.1s) ## Critical CLI Regression Fix - Fix broken `markitect --help` due to import path issues in markitect/issues/base.py - Add proper path resolution for domain module accessibility - Add 12 comprehensive CLI integration tests to prevent future regressions - Restore full CLI functionality with 35+ working commands ## Template Engine Architecture - markitect/template/parser.py - Variable parsing with comprehensive validation - markitect/template/engine.py - Template rendering with business logic - markitect/template/__init__.py - Structured package exports - Comprehensive exception hierarchy for robust error handling ## Test Coverage Excellence - 30 Issue #65 tests: parser (9), substitution (14), integration (7) - 12 CLI integration tests for regression prevention - Business scenario validation with real invoice/report generation - Performance benchmarking and error handling validation ## CLI Professional Enhancement - Add template-render command with comprehensive options - Fix import path issues preventing CLI access - Add validation, data checking, output options - Support JSON/YAML data formats with auto-detection ## Business Impact - Transform MarkiTect from document analysis to business automation platform - Enable professional invoice and report generation - Provide robust CLI interface for document workflows - Establish foundation for Epic #64 advanced template features 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
203 lines
6.7 KiB
Python
203 lines
6.7 KiB
Python
"""
|
|
Template parser for extracting and analyzing template variables.
|
|
|
|
This module provides the core parsing functionality for the MarkiTect template engine,
|
|
focusing on variable extraction and template syntax analysis.
|
|
"""
|
|
|
|
import re
|
|
from typing import List, Set, Optional, Dict, Any
|
|
from dataclasses import dataclass
|
|
|
|
|
|
class TemplateParsingError(Exception):
|
|
"""Base exception for template parsing errors."""
|
|
def __init__(self, message: str, position: Optional[int] = None, context: Optional[str] = None):
|
|
self.position = position
|
|
self.context = context
|
|
super().__init__(message)
|
|
|
|
|
|
class InvalidVariableSyntaxError(TemplateParsingError):
|
|
"""Raised when variable syntax is invalid."""
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class TemplateAnalysis:
|
|
"""Structured template analysis results."""
|
|
total_variables: int
|
|
unique_variables: int
|
|
variables: List[str]
|
|
root_variables: List[str]
|
|
nested_variables: List[str]
|
|
max_nesting_depth: int
|
|
syntax_errors: List[str]
|
|
|
|
|
|
class TemplateParser:
|
|
"""Parser for template variables and syntax analysis."""
|
|
|
|
# Regular expression to match template variables {{variable}} or {{object.property}}
|
|
# Supports unicode characters in variable names
|
|
VARIABLE_PATTERN = re.compile(r'\{\{\s*([a-zA-Z_\u00a0-\uffff][a-zA-Z0-9_\u00a0-\uffff]*(?:\.[a-zA-Z_\u00a0-\uffff][a-zA-Z0-9_\u00a0-\uffff]*)*)\s*\}\}', re.UNICODE)
|
|
|
|
def __init__(self):
|
|
"""Initialize the template parser."""
|
|
self._validation_pattern = None
|
|
|
|
def extract_variables(self, template_text: str) -> List[str]:
|
|
"""
|
|
Extract all template variables from the given text.
|
|
|
|
Args:
|
|
template_text: The template content to parse
|
|
|
|
Returns:
|
|
List of variable names found in the template (without duplicates)
|
|
"""
|
|
if not template_text:
|
|
return []
|
|
|
|
# Find all matches using the regex pattern
|
|
matches = self.VARIABLE_PATTERN.findall(template_text)
|
|
|
|
# Use dict.fromkeys() for O(1) deduplication while preserving order
|
|
return list(dict.fromkeys(matches))
|
|
|
|
def get_variable_set(self, template_text: str) -> Set[str]:
|
|
"""
|
|
Get a set of unique variables from the template.
|
|
|
|
Args:
|
|
template_text: The template content to parse
|
|
|
|
Returns:
|
|
Set of unique variable names
|
|
"""
|
|
return set(self.extract_variables(template_text))
|
|
|
|
@property
|
|
def _cached_validation_pattern(self) -> re.Pattern:
|
|
"""Lazy-loaded validation pattern to avoid recompilation."""
|
|
if self._validation_pattern is None:
|
|
self._validation_pattern = re.compile(
|
|
r'\{\{\s*[a-zA-Z_\u00a0-\uffff][a-zA-Z0-9_\u00a0-\uffff]*(?:\.[a-zA-Z_\u00a0-\uffff][a-zA-Z0-9_\u00a0-\uffff]*)*\s*\}\}',
|
|
re.UNICODE
|
|
)
|
|
return self._validation_pattern
|
|
|
|
def validate_variable_syntax(self, template_text: str) -> List[str]:
|
|
"""
|
|
Validate template variable syntax and return any errors.
|
|
|
|
Args:
|
|
template_text: The template content to validate
|
|
|
|
Returns:
|
|
List of error messages for invalid syntax
|
|
"""
|
|
errors = []
|
|
errors.extend(self._check_brace_matching(template_text))
|
|
errors.extend(self._check_variable_format(template_text))
|
|
return errors
|
|
|
|
def _check_brace_matching(self, template_text: str) -> List[str]:
|
|
"""Check for unmatched braces."""
|
|
errors = []
|
|
# Look for potential template variable patterns (single or double braces)
|
|
potential_vars = re.findall(r'\{+[^}]*\}*', template_text)
|
|
|
|
for potential in potential_vars:
|
|
if potential.count('{') != potential.count('}'):
|
|
errors.append(f"Unmatched braces in: {potential}")
|
|
return errors
|
|
|
|
def _check_variable_format(self, template_text: str) -> List[str]:
|
|
"""Check variable name format compliance."""
|
|
errors = []
|
|
# Only check patterns that look like they should be template variables
|
|
# Look for double-brace patterns specifically
|
|
potential_vars = re.findall(r'\{\{[^}]*\}\}?', template_text)
|
|
|
|
for potential in potential_vars:
|
|
if not self._cached_validation_pattern.match(potential):
|
|
if '{{' in potential and '}}' in potential:
|
|
errors.append(f"Invalid variable syntax: {potential}")
|
|
return errors
|
|
|
|
def is_valid_variable_name(self, variable_name: str) -> bool:
|
|
"""
|
|
Check if a variable name follows valid naming conventions.
|
|
|
|
Args:
|
|
variable_name: The variable name to validate
|
|
|
|
Returns:
|
|
True if the variable name is valid, False otherwise
|
|
"""
|
|
if not variable_name:
|
|
return False
|
|
|
|
# Split on dots for nested property access
|
|
parts = variable_name.split('.')
|
|
|
|
for part in parts:
|
|
# Each part must be a valid identifier (supporting unicode)
|
|
if not re.match(r'^[a-zA-Z_\u00a0-\uffff][a-zA-Z0-9_\u00a0-\uffff]*$', part, re.UNICODE):
|
|
return False
|
|
|
|
return True
|
|
|
|
def get_nested_depth(self, variable_name: str) -> int:
|
|
"""
|
|
Get the nesting depth of a variable (number of dots + 1).
|
|
|
|
Args:
|
|
variable_name: The variable name to analyze
|
|
|
|
Returns:
|
|
Depth of nesting (1 for simple variables, >1 for nested)
|
|
"""
|
|
return len(variable_name.split('.'))
|
|
|
|
def get_root_variables(self, template_text: str) -> Set[str]:
|
|
"""
|
|
Get only the root-level variables (without nested properties).
|
|
|
|
Args:
|
|
template_text: The template content to parse
|
|
|
|
Returns:
|
|
Set of root variable names
|
|
"""
|
|
variables = self.get_variable_set(template_text)
|
|
root_vars = set()
|
|
|
|
for var in variables:
|
|
root = var.split('.')[0]
|
|
root_vars.add(root)
|
|
|
|
return root_vars
|
|
|
|
def analyze_template(self, template_text: str) -> TemplateAnalysis:
|
|
"""
|
|
Perform comprehensive analysis of a template.
|
|
|
|
Args:
|
|
template_text: The template content to analyze
|
|
|
|
Returns:
|
|
TemplateAnalysis containing structured analysis results
|
|
"""
|
|
variables = self.extract_variables(template_text)
|
|
|
|
return TemplateAnalysis(
|
|
total_variables=len(variables),
|
|
unique_variables=len(set(variables)),
|
|
variables=variables,
|
|
root_variables=list(self.get_root_variables(template_text)),
|
|
nested_variables=[var for var in variables if '.' in var],
|
|
max_nesting_depth=max([self.get_nested_depth(var) for var in variables]) if variables else 0,
|
|
syntax_errors=self.validate_variable_syntax(template_text)
|
|
) |