""" MDZ (Markdown Zip) format implementation. Provides self-contained markdown packages with embedded assets, stored as compressed ZIP archives with standardized structure. """ import json import zipfile from datetime import datetime from pathlib import Path from typing import Dict, List, Any, Optional from .base import PackagingVariant, PackageFormat from .metadata import PackageMetadata, AssetMetadata from .asset_utils import AssetUtils from .path_utils import PathUtils from .errors import PackageFormatError, AssetError class MdzVariant(PackagingVariant): """ MDZ (Markdown Zip) variant implementation. Creates self-contained packages with embedded assets stored as compressed ZIP archives. """ def __init__(self, variant_type=None): """Initialize the MDZ variant.""" # Import ExplodeVariant here to avoid circular import if variant_type is None: from ..explode_variants.enums import ExplodeVariant variant_type = ExplodeVariant.MDZ super().__init__(variant_type) self.format = PackageFormat.MDZ @property def name(self) -> str: return "MDZ Package" @property def description(self) -> str: return "Self-contained markdown package with embedded assets" def create_package(self, source_path: Path, options: Dict[str, Any]) -> Dict[str, Any]: """ Create an MDZ package from source content. Args: source_path: Path to source markdown or directory options: Package creation options Returns: Dictionary with creation results """ output_path = options.get('output_path') if not output_path: if source_path.is_file(): output_path = source_path.with_suffix('.mdz') else: output_path = source_path.parent / f"{source_path.name}.mdz" else: output_path = Path(output_path) # Discover assets assets = AssetUtils.discover_assets(source_path) # Create ZIP package try: with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf: asset_metadata = [] asset_map = {} # Read main markdown content if source_path.is_file(): content = source_path.read_text(encoding='utf-8') else: # For directories, combine markdown files content = self._combine_markdown_files(source_path) # Add assets for asset_path in assets: relative_path = asset_path.relative_to(source_path) if source_path.is_dir() else asset_path.name package_path = f"assets/{relative_path}" # Add asset to ZIP zf.write(asset_path, package_path) # Create metadata metadata = AssetUtils.create_asset_metadata( asset_path, package_path, str(relative_path) ) asset_metadata.append(metadata) # Map for path rewriting asset_map[str(relative_path)] = package_path # Rewrite asset paths in content and add to ZIP updated_content = PathUtils.rewrite_asset_paths(content, asset_map) zf.writestr("content.md", updated_content) # Create and add package metadata package_metadata = PackageMetadata( format=PackageFormat.MDZ, version="1.0", created=datetime.now().isoformat(), markitect_version="0.1.0", assets=asset_metadata ) metadata_json = json.dumps({ 'format': package_metadata.format, 'version': package_metadata.version, 'created': package_metadata.created, 'markitect_version': package_metadata.markitect_version, 'assets': [ { 'path': asset.path, 'original_path': asset.original_path, 'size': asset.size, 'checksum': asset.checksum, 'mime_type': asset.mime_type } for asset in package_metadata.assets ] }, indent=2) zf.writestr("package.json", metadata_json) except Exception as e: raise PackageFormatError(f"Failed to create MDZ package: {e}") return { 'success': True, 'package_path': output_path, 'assets_embedded': len(assets), 'package_size': output_path.stat().st_size } def extract_package(self, package_path: Path, options: Dict[str, Any]) -> Dict[str, Any]: """ Extract an MDZ package to destination. Args: package_path: Path to MDZ package file options: Extraction options Returns: Dictionary with extraction results """ output_dir = options.get('output_dir') if not output_dir: output_dir = package_path.with_suffix('') else: output_dir = Path(output_dir) try: with zipfile.ZipFile(package_path, 'r') as zf: # Extract all files zf.extractall(output_dir) # Get list of extracted files extracted_files = [output_dir / name for name in zf.namelist()] except Exception as e: raise PackageFormatError(f"Failed to extract MDZ package: {e}") return { 'success': True, 'output_directory': output_dir, 'files_extracted': len(extracted_files), 'extracted_files': extracted_files } def get_package_metadata(self, package_path: Path) -> PackageMetadata: """ Get metadata from an MDZ package. Args: package_path: Path to MDZ package file Returns: PackageMetadata object """ try: with zipfile.ZipFile(package_path, 'r') as zf: # Read package metadata metadata_json = zf.read("package.json").decode('utf-8') metadata_dict = json.loads(metadata_json) # Convert asset dictionaries back to AssetMetadata objects assets = [ AssetMetadata(**asset_dict) for asset_dict in metadata_dict.get('assets', []) ] return PackageMetadata( format=metadata_dict['format'], version=metadata_dict['version'], created=metadata_dict['created'], markitect_version=metadata_dict['markitect_version'], assets=assets, dependencies=metadata_dict.get('dependencies') ) except Exception as e: raise PackageFormatError(f"Failed to read MDZ package metadata: {e}") def embed_assets(self, assets: List[Path], package_path: Path) -> List[AssetMetadata]: """ Embed assets into an existing MDZ package. Args: assets: List of asset paths to embed package_path: Path to MDZ package file Returns: List of AssetMetadata for embedded assets """ # This would be implemented for updating existing packages raise NotImplementedError("Asset embedding for existing packages not yet implemented") def rewrite_asset_paths(self, content: str, asset_map: Dict[str, str]) -> str: """ Rewrite asset paths in content. Args: content: Content to process asset_map: Mapping from original to new paths Returns: Content with rewritten paths """ return PathUtils.rewrite_asset_paths(content, asset_map) def _combine_markdown_files(self, directory: Path) -> str: """ Combine markdown files from a directory. Args: directory: Directory containing markdown files Returns: Combined markdown content """ content_parts = [] # Find all markdown files md_files = sorted(directory.rglob("*.md")) for md_file in md_files: try: content = md_file.read_text(encoding='utf-8') content_parts.append(content) except Exception: continue # Skip files that can't be read return "\n\n".join(content_parts) def _normalize_path(self, path: str) -> str: """ Normalize a path for cross-platform compatibility. Args: path: Path to normalize Returns: Normalized path string """ return PathUtils.normalize_path(path) # Required BaseVariant abstract methods def explode(self, input_file: Path, options) -> Any: """ Explode operation for MDZ format. For MDZ packages, this extracts the package to a directory structure. Args: input_file: Path to MDZ package file options: Explosion options Returns: Explosion result """ from ..explode_variants.base_variant import ExplodeResult if not input_file.suffix.lower() == '.mdz': raise PackageFormatError(f"Expected .mdz file, got {input_file}") # Extract package to temporary directory first output_dir = input_file.parent / input_file.stem result = self.extract_package(input_file, {'output_path': output_dir}) return ExplodeResult( output_directory=output_dir, manifest_file=output_dir / "package.json", created_files=[output_dir / "content.md"] + list((output_dir / "assets").rglob("*")), metadata={'extraction_result': result} ) def implode(self, input_directory: Path, options) -> Any: """ Implode operation for MDZ format. For MDZ packages, this creates a package from a directory structure. Args: input_directory: Directory to package options: Implode options Returns: Implode result """ from ..explode_variants.base_variant import ImplodeResult # Create MDZ package from directory output_file = input_directory.with_suffix('.mdz') result = self.create_package(input_directory, {'output_path': output_file}) return ImplodeResult( output_file=output_file, processed_files=list(input_directory.rglob("*")), metadata={'creation_result': result} ) def can_handle_directory(self, directory: Path) -> bool: """ Check if directory can be handled by MDZ variant. Args: directory: Directory to check Returns: True if directory contains MDZ-compatible content """ # Check for package.json (extracted MDZ) or markdown files if (directory / "package.json").exists(): return True # Check for markdown files that could be packaged md_files = list(directory.rglob("*.md")) return len(md_files) > 0 def get_detection_patterns(self) -> Dict[str, Any]: """ Get detection patterns for MDZ format. Returns: Detection pattern configuration """ return { "file_extensions": [".mdz"], "content_signatures": ["package.json"], "directory_patterns": ["assets/"], "confidence_weight": 0.9, "priority": 100 # High priority for explicit .mdz files }