Files
markitect-main/markitect/plugins/builtin/processors.py
tegwick b0de32d083 feat: implement comprehensive plugin architecture and extensions system (issue #19)
Complete plugin system implementation providing extensible architecture for MarkiTect:

🏗️ **Core Plugin Architecture**:
- BasePlugin abstract class with lifecycle management (initialize/cleanup)
- Specialized plugin types: ProcessorPlugin, FormatterPlugin, ValidatorPlugin, ExporterPlugin, CommandPlugin
- PluginMetadata system with version, dependencies, and type information
- Plugin initialization and configuration validation

🔍 **Plugin Discovery & Management**:
- PluginManager with automatic discovery from built-in modules and directories
- PluginRegistry for centralized plugin registration and lifecycle management
- Support for plugin loading, unloading, and reloading with configuration
- Plugin discovery from multiple sources (built-in, directories, packages)

🛠️ **CLI Integration**:
- markitect plugin-list: List all available plugins with metadata
- markitect plugin-load: Load plugins with optional configuration
- markitect plugin-unload: Unload plugins and cleanup resources
- markitect plugin-info: Show detailed plugin information
- markitect plugin-discover: Discover and refresh plugin catalog

📦 **Built-in Plugins**:
- JSON/YAML/Table formatters for output formatting
- Markdown/Text processors for content processing
- Auto-registered via @register_plugin decorator
- Comprehensive configuration options

🔧 **Developer Experience**:
- @register_plugin decorator for easy plugin registration
- Plugin configuration validation and error handling
- Comprehensive API documentation with examples
- Plugin development guide and best practices

📋 **Example Plugins**:
- Advanced text processor with case conversion and pattern replacement
- XML/CSV formatters demonstrating custom output formats
- Complete examples showing plugin development patterns

🧪 **Test Coverage**:
- 59 comprehensive tests covering all plugin functionality
- Tests for plugin lifecycle, registration, discovery, and CLI integration
- Error handling and edge case coverage
- Built-in plugin validation

Technical Implementation:
- Plugin types: processor, formatter, validator, exporter, generator, importer, transformer, extension, backend, command
- Configuration-driven plugin management with YAML/JSON support
- Graceful error handling and plugin isolation
- Plugin dependency validation and compatibility checking

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-03 11:23:32 +02:00

136 lines
4.4 KiB
Python

"""
Built-in processor plugins for MarkiTect.
These processors handle various content processing tasks.
"""
import re
from typing import Any
from ..base import ProcessorPlugin, PluginMetadata, PluginType
from ..decorators import register_plugin
@register_plugin("markdown_processor")
class MarkdownProcessor(ProcessorPlugin):
"""Basic markdown content processor."""
@property
def metadata(self) -> PluginMetadata:
return PluginMetadata(
name="markdown_processor",
version="1.0.0",
description="Process markdown content",
author="MarkiTect Team",
plugin_type=PluginType.PROCESSOR
)
def process(self, content: str, **kwargs) -> str:
"""Process markdown content."""
# Basic markdown processing - normalize line endings
content = content.replace('\r\n', '\n').replace('\r', '\n')
# Add processing options
if kwargs.get('normalize_headers', False):
content = self._normalize_headers(content)
if kwargs.get('fix_line_endings', True):
content = self._fix_line_endings(content)
return content
def can_process(self, content: str, **kwargs) -> bool:
"""Check if content appears to be markdown."""
# Simple heuristic - check for markdown patterns
markdown_patterns = [
r'^#{1,6}\s', # Headers
r'^\*\s', # Unordered lists
r'^\d+\.\s', # Ordered lists
r'\*\*.*\*\*', # Bold
r'\*.*\*', # Italic
r'`.*`', # Inline code
r'```', # Code blocks
]
for pattern in markdown_patterns:
if re.search(pattern, content, re.MULTILINE):
return True
return False
def _normalize_headers(self, content: str) -> str:
"""Normalize header formatting."""
lines = content.split('\n')
normalized_lines = []
for line in lines:
# Ensure space after # in headers
if re.match(r'^#{1,6}[^#\s]', line):
hash_count = len(line) - len(line.lstrip('#'))
rest = line[hash_count:].lstrip()
normalized_lines.append('#' * hash_count + ' ' + rest)
else:
normalized_lines.append(line)
return '\n'.join(normalized_lines)
def _fix_line_endings(self, content: str) -> str:
"""Fix common line ending issues."""
# Remove trailing whitespace
lines = [line.rstrip() for line in content.split('\n')]
# Ensure single newline at end
while lines and not lines[-1]:
lines.pop()
return '\n'.join(lines) + '\n' if lines else ''
@register_plugin("text_processor")
class TextProcessor(ProcessorPlugin):
"""Generic text processor."""
@property
def metadata(self) -> PluginMetadata:
return PluginMetadata(
name="text_processor",
version="1.0.0",
description="Process generic text content",
author="MarkiTect Team",
plugin_type=PluginType.PROCESSOR
)
def process(self, content: str, **kwargs) -> str:
"""Process text content."""
if kwargs.get('normalize_whitespace', False):
content = self._normalize_whitespace(content)
if kwargs.get('remove_empty_lines', False):
content = self._remove_empty_lines(content)
if kwargs.get('trim_lines', False):
content = self._trim_lines(content)
return content
def can_process(self, content: str, **kwargs) -> bool:
"""Can process any text content."""
return isinstance(content, str)
def _normalize_whitespace(self, content: str) -> str:
"""Normalize whitespace in content."""
# Replace multiple spaces with single space
content = re.sub(r' +', ' ', content)
# Replace multiple newlines with double newline
content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content)
return content
def _remove_empty_lines(self, content: str) -> str:
"""Remove completely empty lines."""
lines = content.split('\n')
return '\n'.join(line for line in lines if line.strip())
def _trim_lines(self, content: str) -> str:
"""Trim whitespace from each line."""
lines = content.split('\n')
return '\n'.join(line.strip() for line in lines)