""" Example formatter plugin for MarkiTect. This demonstrates how to create a custom formatter plugin. """ import xml.etree.ElementTree as ET from datetime import datetime from typing import Any, Dict, List, Union from markitect.plugins.base import FormatterPlugin, PluginMetadata, PluginType from markitect.plugins.decorators import register_plugin @register_plugin("xml_formatter") class XmlFormatter(FormatterPlugin): """ XML formatter plugin that converts data structures to XML format. Supports formatting of dictionaries, lists, and primitive types into well-formed XML with customizable root element and formatting options. """ @property def metadata(self) -> PluginMetadata: return PluginMetadata( name="xml_formatter", version="1.0.0", description="Format output as XML", author="MarkiTect Team", plugin_type=PluginType.FORMATTER ) def format(self, data: Any, **kwargs) -> str: """ Format data as XML. Args: data: Data to format **kwargs: Formatting options: - root_element: Name of root XML element (default: 'root') - indent: Indentation string (default: ' ') - include_timestamp: Add timestamp attribute (default: False) - encoding: XML encoding declaration (default: 'utf-8') Returns: XML formatted string """ root_name = kwargs.get('root_element', 'root') indent_str = kwargs.get('indent', ' ') include_timestamp = kwargs.get('include_timestamp', False) encoding = kwargs.get('encoding', 'utf-8') # Create root element root = ET.Element(root_name) # Add timestamp if requested if include_timestamp: root.set('timestamp', datetime.now().isoformat()) # Convert data to XML elements self._data_to_xml(data, root) # Create tree and format tree = ET.ElementTree(root) # Format with indentation self._indent_xml(root, indent_str) # Convert to string xml_str = ET.tostring(root, encoding='unicode') # Add XML declaration if encoding specified if encoding: xml_str = f'\\n{xml_str}' return xml_str def get_file_extension(self) -> str: """Get XML file extension.""" return '.xml' def _data_to_xml(self, data: Any, parent: ET.Element) -> None: """Convert data to XML elements recursively.""" if isinstance(data, dict): self._dict_to_xml(data, parent) elif isinstance(data, (list, tuple)): self._list_to_xml(data, parent) else: parent.text = str(data) def _dict_to_xml(self, data: Dict[str, Any], parent: ET.Element) -> None: """Convert dictionary to XML elements.""" for key, value in data.items(): # Sanitize key name for XML element_name = self._sanitize_xml_name(str(key)) element = ET.SubElement(parent, element_name) if isinstance(value, dict): self._dict_to_xml(value, element) elif isinstance(value, (list, tuple)): self._list_to_xml(value, element) else: element.text = str(value) if value is not None else '' def _list_to_xml(self, data: List[Any], parent: ET.Element) -> None: """Convert list to XML elements.""" for i, item in enumerate(data): # Use 'item' as default element name, or extract from dict if isinstance(item, dict) and len(item) == 1: # If dict has single key, use that as element name key = list(item.keys())[0] element_name = self._sanitize_xml_name(str(key)) element = ET.SubElement(parent, element_name) self._data_to_xml(item[key], element) else: element = ET.SubElement(parent, 'item') element.set('index', str(i)) self._data_to_xml(item, element) def _sanitize_xml_name(self, name: str) -> str: """Sanitize string to be valid XML element name.""" # Remove invalid characters and ensure it starts with letter/underscore import re name = re.sub(r'[^a-zA-Z0-9_-]', '_', name) if name and not name[0].isalpha() and name[0] != '_': name = '_' + name return name or 'element' def _indent_xml(self, elem: ET.Element, indent: str, level: int = 0) -> None: """Add indentation to XML for pretty printing.""" i = "\\n" + level * indent if len(elem): if not elem.text or not elem.text.strip(): elem.text = i + indent if not elem.tail or not elem.tail.strip(): elem.tail = i for child in elem: self._indent_xml(child, indent, level + 1) if not child.tail or not child.tail.strip(): child.tail = i else: if level and (not elem.tail or not elem.tail.strip()): elem.tail = i @register_plugin("csv_formatter") class CsvFormatter(FormatterPlugin): """ CSV formatter plugin that converts data structures to CSV format. Best suited for tabular data (list of dictionaries or list of lists). """ @property def metadata(self) -> PluginMetadata: return PluginMetadata( name="csv_formatter", version="1.0.0", description="Format output as CSV", author="MarkiTect Team", plugin_type=PluginType.FORMATTER ) def format(self, data: Any, **kwargs) -> str: """ Format data as CSV. Args: data: Data to format (preferably list of dicts or list of lists) **kwargs: Formatting options: - delimiter: CSV delimiter (default: ',') - quote_char: Quote character (default: '"') - include_headers: Include headers for dict data (default: True) - escape_quotes: Escape quotes in data (default: True) Returns: CSV formatted string """ delimiter = kwargs.get('delimiter', ',') quote_char = kwargs.get('quote_char', '"') include_headers = kwargs.get('include_headers', True) escape_quotes = kwargs.get('escape_quotes', True) if not isinstance(data, (list, tuple)): # Convert single item to list data = [data] if not data: return "" lines = [] # Handle list of dictionaries if isinstance(data[0], dict): # Get all unique keys for headers all_keys = set() for item in data: if isinstance(item, dict): all_keys.update(item.keys()) headers = sorted(all_keys) if include_headers: lines.append(self._format_csv_row(headers, delimiter, quote_char, escape_quotes)) for item in data: if isinstance(item, dict): row = [str(item.get(key, '')) for key in headers] lines.append(self._format_csv_row(row, delimiter, quote_char, escape_quotes)) # Handle list of lists/tuples elif isinstance(data[0], (list, tuple)): for item in data: if isinstance(item, (list, tuple)): row = [str(cell) for cell in item] lines.append(self._format_csv_row(row, delimiter, quote_char, escape_quotes)) # Handle list of primitives else: if include_headers: lines.append(self._format_csv_row(['value'], delimiter, quote_char, escape_quotes)) for item in data: lines.append(self._format_csv_row([str(item)], delimiter, quote_char, escape_quotes)) return '\\n'.join(lines) def get_file_extension(self) -> str: """Get CSV file extension.""" return '.csv' def _format_csv_row(self, row: List[str], delimiter: str, quote_char: str, escape_quotes: bool) -> str: """Format a single CSV row.""" formatted_cells = [] for cell in row: cell_str = str(cell) # Escape quotes if needed if escape_quotes and quote_char in cell_str: cell_str = cell_str.replace(quote_char, quote_char + quote_char) # Quote cell if it contains delimiter, quote char, or newlines if (delimiter in cell_str or quote_char in cell_str or '\\n' in cell_str or '\\r' in cell_str): cell_str = f"{quote_char}{cell_str}{quote_char}" formatted_cells.append(cell_str) return delimiter.join(formatted_cells) # Example usage: if __name__ == '__main__': # Test XML formatter xml_formatter = XmlFormatter() test_data = { 'users': [ {'name': 'John', 'age': 30, 'email': 'john@example.com'}, {'name': 'Jane', 'age': 25, 'email': 'jane@example.com'} ], 'metadata': { 'total_count': 2, 'last_updated': '2023-10-01' } } xml_result = xml_formatter.format(test_data, include_timestamp=True) print("XML Format:") print(xml_result) print() # Test CSV formatter csv_formatter = CsvFormatter() csv_data = [ {'name': 'John', 'age': 30, 'email': 'john@example.com'}, {'name': 'Jane', 'age': 25, 'email': 'jane@example.com'}, {'name': 'Bob Smith', 'age': 35, 'email': 'bob@example.com'} ] csv_result = csv_formatter.format(csv_data) print("CSV Format:") print(csv_result)