""" Built-in processor plugins for MarkiTect. These processors handle various content processing tasks. """ import re from typing import Any from ..base import ProcessorPlugin, PluginMetadata, PluginType from ..decorators import register_plugin @register_plugin("markdown_processor") class MarkdownProcessor(ProcessorPlugin): """Basic markdown content processor.""" @property def metadata(self) -> PluginMetadata: return PluginMetadata( name="markdown_processor", version="1.0.0", description="Process markdown content", author="MarkiTect Team", plugin_type=PluginType.PROCESSOR ) def process(self, content: str, **kwargs) -> str: """Process markdown content.""" # Basic markdown processing - normalize line endings content = content.replace('\r\n', '\n').replace('\r', '\n') # Add processing options if kwargs.get('normalize_headers', False): content = self._normalize_headers(content) if kwargs.get('fix_line_endings', True): content = self._fix_line_endings(content) return content def can_process(self, content: str, **kwargs) -> bool: """Check if content appears to be markdown.""" # Simple heuristic - check for markdown patterns markdown_patterns = [ r'^#{1,6}\s', # Headers r'^\*\s', # Unordered lists r'^\d+\.\s', # Ordered lists r'\*\*.*\*\*', # Bold r'\*.*\*', # Italic r'`.*`', # Inline code r'```', # Code blocks ] for pattern in markdown_patterns: if re.search(pattern, content, re.MULTILINE): return True return False def _normalize_headers(self, content: str) -> str: """Normalize header formatting.""" lines = content.split('\n') normalized_lines = [] for line in lines: # Ensure space after # in headers if re.match(r'^#{1,6}[^#\s]', line): hash_count = len(line) - len(line.lstrip('#')) rest = line[hash_count:].lstrip() normalized_lines.append('#' * hash_count + ' ' + rest) else: normalized_lines.append(line) return '\n'.join(normalized_lines) def _fix_line_endings(self, content: str) -> str: """Fix common line ending issues.""" # Remove trailing whitespace lines = [line.rstrip() for line in content.split('\n')] # Ensure single newline at end while lines and not lines[-1]: lines.pop() return '\n'.join(lines) + '\n' if lines else '' @register_plugin("text_processor") class TextProcessor(ProcessorPlugin): """Generic text processor.""" @property def metadata(self) -> PluginMetadata: return PluginMetadata( name="text_processor", version="1.0.0", description="Process generic text content", author="MarkiTect Team", plugin_type=PluginType.PROCESSOR ) def process(self, content: str, **kwargs) -> str: """Process text content.""" if kwargs.get('normalize_whitespace', False): content = self._normalize_whitespace(content) if kwargs.get('remove_empty_lines', False): content = self._remove_empty_lines(content) if kwargs.get('trim_lines', False): content = self._trim_lines(content) return content def can_process(self, content: str, **kwargs) -> bool: """Can process any text content.""" return isinstance(content, str) def _normalize_whitespace(self, content: str) -> str: """Normalize whitespace in content.""" # Replace multiple spaces with single space content = re.sub(r' +', ' ', content) # Replace multiple newlines with double newline content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content) return content def _remove_empty_lines(self, content: str) -> str: """Remove completely empty lines.""" lines = content.split('\n') return '\n'.join(line for line in lines if line.strip()) def _trim_lines(self, content: str) -> str: """Trim whitespace from each line.""" lines = content.split('\n') return '\n'.join(line.strip() for line in lines)