markitect-main/markitect/plugins/builtin/processors.py

"""
Built-in processor plugins for MarkiTect.

These processors handle various content processing tasks.
"""

import re
from typing import Any

from ..base import ProcessorPlugin, PluginMetadata, PluginType
from ..decorators import register_plugin


@register_plugin("markdown_processor")
class MarkdownProcessor(ProcessorPlugin):
    """Basic markdown content processor."""

    @property
    def metadata(self) -> PluginMetadata:
        return PluginMetadata(
            name="markdown_processor",
            version="1.0.0",
            description="Process markdown content",
            author="MarkiTect Team",
            plugin_type=PluginType.PROCESSOR
        )

    def process(self, content: str, **kwargs) -> str:
        """Process markdown content."""
        # Basic markdown processing - normalize line endings
        content = content.replace('\r\n', '\n').replace('\r', '\n')

        # Add processing options
        if kwargs.get('normalize_headers', False):
            content = self._normalize_headers(content)

        if kwargs.get('fix_line_endings', True):
            content = self._fix_line_endings(content)

        return content

    def can_process(self, content: str, **kwargs) -> bool:
        """Check if content appears to be markdown."""
        # Simple heuristic - check for markdown patterns
        markdown_patterns = [
            r'^#{1,6}\s',  # Headers
            r'^\*\s',      # Unordered lists
            r'^\d+\.\s',   # Ordered lists
            r'\*\*.*\*\*', # Bold
            r'\*.*\*',     # Italic
            r'`.*`',       # Inline code
            r'```',        # Code blocks
        ]

        for pattern in markdown_patterns:
            if re.search(pattern, content, re.MULTILINE):
                return True

        return False

    def _normalize_headers(self, content: str) -> str:
        """Normalize header formatting."""
        lines = content.split('\n')
        normalized_lines = []

        for line in lines:
            # Ensure space after # in headers
            if re.match(r'^#{1,6}[^#\s]', line):
                hash_count = len(line) - len(line.lstrip('#'))
                rest = line[hash_count:].lstrip()
                normalized_lines.append('#' * hash_count + ' ' + rest)
            else:
                normalized_lines.append(line)

        return '\n'.join(normalized_lines)

    def _fix_line_endings(self, content: str) -> str:
        """Fix common line ending issues."""
        # Remove trailing whitespace
        lines = [line.rstrip() for line in content.split('\n')]

        # Ensure single newline at end
        while lines and not lines[-1]:
            lines.pop()

        return '\n'.join(lines) + '\n' if lines else ''


@register_plugin("text_processor")
class TextProcessor(ProcessorPlugin):
    """Generic text processor."""

    @property
    def metadata(self) -> PluginMetadata:
        return PluginMetadata(
            name="text_processor",
            version="1.0.0",
            description="Process generic text content",
            author="MarkiTect Team",
            plugin_type=PluginType.PROCESSOR
        )

    def process(self, content: str, **kwargs) -> str:
        """Process text content."""
        if kwargs.get('normalize_whitespace', False):
            content = self._normalize_whitespace(content)

        if kwargs.get('remove_empty_lines', False):
            content = self._remove_empty_lines(content)

        if kwargs.get('trim_lines', False):
            content = self._trim_lines(content)

        return content

    def can_process(self, content: str, **kwargs) -> bool:
        """Can process any text content."""
        return isinstance(content, str)

    def _normalize_whitespace(self, content: str) -> str:
        """Normalize whitespace in content."""
        # Replace multiple spaces with single space
        content = re.sub(r' +', ' ', content)
        # Replace multiple newlines with double newline
        content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content)
        return content

    def _remove_empty_lines(self, content: str) -> str:
        """Remove completely empty lines."""
        lines = content.split('\n')
        return '\n'.join(line for line in lines if line.strip())

    def _trim_lines(self, content: str) -> str:
        """Trim whitespace from each line."""
        lines = content.split('\n')
        return '\n'.join(line.strip() for line in lines)