Files
markitect-main/markitect/template/parser.py
tegwick bcbe78d04f feat: Complete Issue #65 Template Engine Foundation + Fix CLI Regression
## Issue #65 - Template Engine Foundation (COMPLETED)
- Implement complete TDD8 methodology with 30 comprehensive tests (100% passing)
- Add template variable parser with Unicode and dot notation support
- Add template rendering engine with strict/lenient modes
- Add business document generation (invoices, reports)
- Add CLI integration with `markitect template-render` command
- Add performance optimization (1000+ variables in <0.1s)

## Critical CLI Regression Fix
- Fix broken `markitect --help` due to import path issues in markitect/issues/base.py
- Add proper path resolution for domain module accessibility
- Add 12 comprehensive CLI integration tests to prevent future regressions
- Restore full CLI functionality with 35+ working commands

## Template Engine Architecture
- markitect/template/parser.py - Variable parsing with comprehensive validation
- markitect/template/engine.py - Template rendering with business logic
- markitect/template/__init__.py - Structured package exports
- Comprehensive exception hierarchy for robust error handling

## Test Coverage Excellence
- 30 Issue #65 tests: parser (9), substitution (14), integration (7)
- 12 CLI integration tests for regression prevention
- Business scenario validation with real invoice/report generation
- Performance benchmarking and error handling validation

## CLI Professional Enhancement
- Add template-render command with comprehensive options
- Fix import path issues preventing CLI access
- Add validation, data checking, output options
- Support JSON/YAML data formats with auto-detection

## Business Impact
- Transform MarkiTect from document analysis to business automation platform
- Enable professional invoice and report generation
- Provide robust CLI interface for document workflows
- Establish foundation for Epic #64 advanced template features

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-02 15:33:32 +02:00

203 lines
6.7 KiB
Python

"""
Template parser for extracting and analyzing template variables.
This module provides the core parsing functionality for the MarkiTect template engine,
focusing on variable extraction and template syntax analysis.
"""
import re
from typing import List, Set, Optional, Dict, Any
from dataclasses import dataclass
class TemplateParsingError(Exception):
"""Base exception for template parsing errors."""
def __init__(self, message: str, position: Optional[int] = None, context: Optional[str] = None):
self.position = position
self.context = context
super().__init__(message)
class InvalidVariableSyntaxError(TemplateParsingError):
"""Raised when variable syntax is invalid."""
pass
@dataclass
class TemplateAnalysis:
"""Structured template analysis results."""
total_variables: int
unique_variables: int
variables: List[str]
root_variables: List[str]
nested_variables: List[str]
max_nesting_depth: int
syntax_errors: List[str]
class TemplateParser:
"""Parser for template variables and syntax analysis."""
# Regular expression to match template variables {{variable}} or {{object.property}}
# Supports unicode characters in variable names
VARIABLE_PATTERN = re.compile(r'\{\{\s*([a-zA-Z_\u00a0-\uffff][a-zA-Z0-9_\u00a0-\uffff]*(?:\.[a-zA-Z_\u00a0-\uffff][a-zA-Z0-9_\u00a0-\uffff]*)*)\s*\}\}', re.UNICODE)
def __init__(self):
"""Initialize the template parser."""
self._validation_pattern = None
def extract_variables(self, template_text: str) -> List[str]:
"""
Extract all template variables from the given text.
Args:
template_text: The template content to parse
Returns:
List of variable names found in the template (without duplicates)
"""
if not template_text:
return []
# Find all matches using the regex pattern
matches = self.VARIABLE_PATTERN.findall(template_text)
# Use dict.fromkeys() for O(1) deduplication while preserving order
return list(dict.fromkeys(matches))
def get_variable_set(self, template_text: str) -> Set[str]:
"""
Get a set of unique variables from the template.
Args:
template_text: The template content to parse
Returns:
Set of unique variable names
"""
return set(self.extract_variables(template_text))
@property
def _cached_validation_pattern(self) -> re.Pattern:
"""Lazy-loaded validation pattern to avoid recompilation."""
if self._validation_pattern is None:
self._validation_pattern = re.compile(
r'\{\{\s*[a-zA-Z_\u00a0-\uffff][a-zA-Z0-9_\u00a0-\uffff]*(?:\.[a-zA-Z_\u00a0-\uffff][a-zA-Z0-9_\u00a0-\uffff]*)*\s*\}\}',
re.UNICODE
)
return self._validation_pattern
def validate_variable_syntax(self, template_text: str) -> List[str]:
"""
Validate template variable syntax and return any errors.
Args:
template_text: The template content to validate
Returns:
List of error messages for invalid syntax
"""
errors = []
errors.extend(self._check_brace_matching(template_text))
errors.extend(self._check_variable_format(template_text))
return errors
def _check_brace_matching(self, template_text: str) -> List[str]:
"""Check for unmatched braces."""
errors = []
# Look for potential template variable patterns (single or double braces)
potential_vars = re.findall(r'\{+[^}]*\}*', template_text)
for potential in potential_vars:
if potential.count('{') != potential.count('}'):
errors.append(f"Unmatched braces in: {potential}")
return errors
def _check_variable_format(self, template_text: str) -> List[str]:
"""Check variable name format compliance."""
errors = []
# Only check patterns that look like they should be template variables
# Look for double-brace patterns specifically
potential_vars = re.findall(r'\{\{[^}]*\}\}?', template_text)
for potential in potential_vars:
if not self._cached_validation_pattern.match(potential):
if '{{' in potential and '}}' in potential:
errors.append(f"Invalid variable syntax: {potential}")
return errors
def is_valid_variable_name(self, variable_name: str) -> bool:
"""
Check if a variable name follows valid naming conventions.
Args:
variable_name: The variable name to validate
Returns:
True if the variable name is valid, False otherwise
"""
if not variable_name:
return False
# Split on dots for nested property access
parts = variable_name.split('.')
for part in parts:
# Each part must be a valid identifier (supporting unicode)
if not re.match(r'^[a-zA-Z_\u00a0-\uffff][a-zA-Z0-9_\u00a0-\uffff]*$', part, re.UNICODE):
return False
return True
def get_nested_depth(self, variable_name: str) -> int:
"""
Get the nesting depth of a variable (number of dots + 1).
Args:
variable_name: The variable name to analyze
Returns:
Depth of nesting (1 for simple variables, >1 for nested)
"""
return len(variable_name.split('.'))
def get_root_variables(self, template_text: str) -> Set[str]:
"""
Get only the root-level variables (without nested properties).
Args:
template_text: The template content to parse
Returns:
Set of root variable names
"""
variables = self.get_variable_set(template_text)
root_vars = set()
for var in variables:
root = var.split('.')[0]
root_vars.add(root)
return root_vars
def analyze_template(self, template_text: str) -> TemplateAnalysis:
"""
Perform comprehensive analysis of a template.
Args:
template_text: The template content to analyze
Returns:
TemplateAnalysis containing structured analysis results
"""
variables = self.extract_variables(template_text)
return TemplateAnalysis(
total_variables=len(variables),
unique_variables=len(set(variables)),
variables=variables,
root_variables=list(self.get_root_variables(template_text)),
nested_variables=[var for var in variables if '.' in var],
max_nesting_depth=max([self.get_nested_depth(var) for var in variables]) if variables else 0,
syntax_errors=self.validate_variable_syntax(template_text)
)