""" Space to Directory Exporter. Exports Information Space content to a canonical directory structure using the existing VariantFactory for different organization styles. """ import hashlib import json import logging from dataclasses import dataclass, field from datetime import datetime from enum import Enum from pathlib import Path from typing import Dict, Any, Optional, List, Set from ..models import InformationSpace, SpaceDocument from ..events import EventBus, SpaceEventType, SpaceEvent logger = logging.getLogger(__name__) class ExportVariant(Enum): """Directory organization variants for export.""" FLAT = "flat" # All files at root level HIERARCHICAL = "hierarchical" # Folder per document hierarchy BY_PATH = "by_path" # Mirror space_path structure @dataclass class ExportConfig: """ Configuration for directory export. Attributes: variant: Directory organization style include_metadata: Whether to export metadata files include_manifest: Whether to create manifest.json overwrite: Whether to overwrite existing files preserve_timestamps: Whether to preserve file timestamps exclude_patterns: Glob patterns for files to exclude """ variant: ExportVariant = ExportVariant.BY_PATH include_metadata: bool = True include_manifest: bool = True overwrite: bool = False preserve_timestamps: bool = True exclude_patterns: List[str] = field(default_factory=list) @dataclass class ExportedFile: """ Record of an exported file. Attributes: document_id: Source document ID space_path: Original path in space file_path: Exported file path content_hash: Hash of exported content size: File size in bytes """ document_id: str space_path: str file_path: Path content_hash: str size: int @dataclass class ExportResult: """ Result of an export operation. Attributes: space_id: Exported space ID target_directory: Export target directory exported_files: List of exported files skipped_files: Files that were skipped errors: Any errors encountered manifest_path: Path to manifest file if created duration_ms: Export duration in milliseconds """ space_id: str target_directory: Path exported_files: List[ExportedFile] = field(default_factory=list) skipped_files: List[str] = field(default_factory=list) errors: Dict[str, str] = field(default_factory=dict) manifest_path: Optional[Path] = None duration_ms: int = 0 @property def success(self) -> bool: """Check if export was successful.""" return len(self.errors) == 0 @property def file_count(self) -> int: """Total number of exported files.""" return len(self.exported_files) class SpaceDirectoryExporter: """ Exports Information Space content to directory structure. Features: - Multiple directory organization variants - Manifest generation for reimport - Metadata file export - Incremental export (skip unchanged) - Event emission for progress tracking """ def __init__( self, config: Optional[ExportConfig] = None, event_bus: Optional[EventBus] = None, ): """ Initialize the exporter. Args: config: Export configuration event_bus: Event bus for notifications """ self.config = config or ExportConfig() self.event_bus = event_bus def export_space( self, space: InformationSpace, documents: List[SpaceDocument], content_provider: callable, target_directory: Path, ) -> ExportResult: """ Export a space to a directory. Args: space: The space to export documents: Documents in the space content_provider: Function(document_id) -> content string target_directory: Target directory path Returns: ExportResult with details of the export """ start_time = datetime.now() result = ExportResult( space_id=space.id, target_directory=target_directory, ) self._emit_event( SpaceEventType.SYNC_STARTED, space.id, {"direction": "export", "target": str(target_directory)}, ) try: # Create target directory target_directory.mkdir(parents=True, exist_ok=True) # Export each document for doc in documents: try: exported = self._export_document( doc, content_provider, target_directory ) if exported: result.exported_files.append(exported) else: result.skipped_files.append(doc.space_path) except Exception as e: logger.error(f"Failed to export {doc.space_path}: {e}") result.errors[doc.space_path] = str(e) # Create manifest if configured if self.config.include_manifest: result.manifest_path = self._write_manifest( space, result.exported_files, target_directory ) # Create metadata file if configured if self.config.include_metadata: self._write_metadata(space, target_directory) # Calculate duration end_time = datetime.now() result.duration_ms = int((end_time - start_time).total_seconds() * 1000) self._emit_event( SpaceEventType.SYNC_COMPLETED, space.id, { "direction": "export", "file_count": result.file_count, "errors": len(result.errors), }, ) except Exception as e: logger.error(f"Export failed: {e}") result.errors["_export"] = str(e) self._emit_event( SpaceEventType.SYNC_CONFLICT, space.id, {"direction": "export", "error": str(e)}, ) return result def _export_document( self, doc: SpaceDocument, content_provider: callable, target_directory: Path, ) -> Optional[ExportedFile]: """Export a single document.""" # Get content try: content = content_provider(doc.document_id) except Exception as e: raise ValueError(f"Failed to get content for {doc.document_id}: {e}") if content is None: return None # Determine target path based on variant target_path = self._get_target_path(doc, target_directory) # Check if file exists and whether to overwrite if target_path.exists() and not self.config.overwrite: # Check if content is same existing_hash = self._compute_file_hash(target_path) content_hash = self._compute_hash(content) if existing_hash == content_hash: return None # Skip unchanged file # Create parent directories target_path.parent.mkdir(parents=True, exist_ok=True) # Write content target_path.write_text(content, encoding="utf-8") content_hash = self._compute_hash(content) return ExportedFile( document_id=doc.document_id, space_path=doc.space_path, file_path=target_path, content_hash=content_hash, size=len(content.encode("utf-8")), ) def _get_target_path(self, doc: SpaceDocument, target_directory: Path) -> Path: """Determine the target file path based on variant.""" if self.config.variant == ExportVariant.FLAT: # All files at root, use document ID as name filename = self._sanitize_filename(doc.space_path) return target_directory / filename elif self.config.variant == ExportVariant.HIERARCHICAL: # Create folder structure based on path depth parts = doc.space_path.strip("/").split("/") if len(parts) > 1: # Create subdirectory for each path component except last subdir = target_directory.joinpath(*parts[:-1]) return subdir / parts[-1] else: return target_directory / parts[0] else: # BY_PATH (default) # Mirror the space_path structure directly relative_path = doc.space_path.lstrip("/") return target_directory / relative_path def _sanitize_filename(self, path: str) -> str: """Sanitize a path to be a valid filename.""" # Replace path separators with underscores name = path.strip("/").replace("/", "_") # Ensure .md extension if not name.endswith(".md"): name = name + ".md" return name def _compute_hash(self, content: str) -> str: """Compute hash of content.""" return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16] def _compute_file_hash(self, path: Path) -> str: """Compute hash of file content.""" content = path.read_text(encoding="utf-8") return self._compute_hash(content) def _write_manifest( self, space: InformationSpace, exported_files: List[ExportedFile], target_directory: Path, ) -> Path: """Write export manifest file.""" manifest = { "space_id": space.id, "space_name": space.name, "exported_at": datetime.now().isoformat(), "variant": self.config.variant.value, "files": [ { "document_id": f.document_id, "space_path": f.space_path, "file_path": str(f.file_path.relative_to(target_directory)), "content_hash": f.content_hash, "size": f.size, } for f in exported_files ], } manifest_path = target_directory / ".markitect-manifest.json" manifest_path.write_text( json.dumps(manifest, indent=2), encoding="utf-8" ) return manifest_path def _write_metadata( self, space: InformationSpace, target_directory: Path ) -> Path: """Write space metadata file.""" # Serialize metadata properly space_metadata = space.metadata if hasattr(space_metadata, "to_dict"): space_metadata = space_metadata.to_dict() elif not isinstance(space_metadata, dict): space_metadata = {} metadata = { "id": space.id, "name": space.name, "description": space.description, "status": space.status.value if hasattr(space.status, "value") else str(space.status), "config": space.config.to_dict() if hasattr(space.config, "to_dict") else {}, "metadata": space_metadata, } metadata_path = target_directory / ".markitect-space.json" metadata_path.write_text( json.dumps(metadata, indent=2), encoding="utf-8" ) return metadata_path def _emit_event( self, event_type: SpaceEventType, space_id: str, payload: Dict[str, Any] ) -> None: """Emit an event if event bus is available.""" if not self.event_bus: return event = SpaceEvent( event_type=event_type, space_id=space_id, payload=payload, ) self.event_bus.emit(event) class IncrementalExporter(SpaceDirectoryExporter): """ Exporter with incremental change detection. Only exports files that have changed since last export. """ def __init__( self, config: Optional[ExportConfig] = None, event_bus: Optional[EventBus] = None, ): """Initialize incremental exporter.""" super().__init__(config, event_bus) self._last_export_hashes: Dict[str, str] = {} def load_previous_state(self, target_directory: Path) -> None: """Load previous export state from manifest.""" manifest_path = target_directory / ".markitect-manifest.json" if manifest_path.exists(): try: manifest = json.loads(manifest_path.read_text(encoding="utf-8")) for file_info in manifest.get("files", []): self._last_export_hashes[file_info["document_id"]] = file_info[ "content_hash" ] except Exception as e: logger.warning(f"Failed to load previous manifest: {e}") def has_changed(self, document_id: str, content: str) -> bool: """Check if document content has changed.""" current_hash = self._compute_hash(content) previous_hash = self._last_export_hashes.get(document_id) return previous_hash is None or previous_hash != current_hash