""" Bidirectional Sync Coordinator. Coordinates two-way synchronization between Information Spaces and directory structures with conflict detection and resolution. """ import hashlib import json import logging from dataclasses import dataclass, field from datetime import datetime from enum import Enum from pathlib import Path from typing import Dict, Any, Optional, List, Set, Tuple from .exporter import SpaceDirectoryExporter, ExportConfig, ExportVariant from .importer import DirectorySpaceImporter, ImportConfig from ..models import InformationSpace, SpaceDocument from ..events import EventBus, SpaceEventType, SpaceEvent logger = logging.getLogger(__name__) class SyncDirection(Enum): """Direction of sync operation.""" SPACE_TO_DIRECTORY = "space_to_directory" DIRECTORY_TO_SPACE = "directory_to_space" BIDIRECTIONAL = "bidirectional" class ConflictResolution(Enum): """How to resolve conflicts.""" SPACE_WINS = "space_wins" # Space content takes priority DIRECTORY_WINS = "directory_wins" # Directory content takes priority NEWER_WINS = "newer_wins" # Most recently modified wins MANUAL = "manual" # Require manual resolution SKIP = "skip" # Skip conflicting items @dataclass class SyncConfig: """ Configuration for bidirectional sync. Attributes: direction: Sync direction conflict_resolution: How to resolve conflicts dry_run: If True, report changes without applying delete_orphans: Whether to delete files/docs not in source sync_metadata: Whether to sync metadata files """ direction: SyncDirection = SyncDirection.BIDIRECTIONAL conflict_resolution: ConflictResolution = ConflictResolution.NEWER_WINS dry_run: bool = False delete_orphans: bool = False sync_metadata: bool = True @dataclass class FileState: """ State of a file for sync comparison. Attributes: path: File path or space path content_hash: Content hash modified_at: Last modification time size: Content size source: Where this state came from ('space' or 'directory') """ path: str content_hash: str modified_at: Optional[datetime] = None size: int = 0 source: str = "unknown" @dataclass class SyncAction: """ A sync action to perform. Attributes: action: Action type ('create', 'update', 'delete', 'conflict') path: Target path source: Source of the action target: Target of the action space_state: State in space (if exists) directory_state: State in directory (if exists) """ action: str path: str source: str target: str space_state: Optional[FileState] = None directory_state: Optional[FileState] = None @dataclass class SyncConflict: """ A sync conflict requiring resolution. Attributes: path: Conflicting path space_state: State in space directory_state: State in directory resolution: How conflict was resolved winner: Which side won ('space', 'directory', 'none') """ path: str space_state: FileState directory_state: FileState resolution: ConflictResolution winner: str = "none" @dataclass class SyncResult: """ Result of a sync operation. Attributes: space_id: Space ID directory: Sync directory direction: Sync direction used actions_performed: Actions that were performed conflicts: Conflicts encountered errors: Any errors created_count: Files/docs created updated_count: Files/docs updated deleted_count: Files/docs deleted skipped_count: Items skipped duration_ms: Sync duration """ space_id: str directory: Path direction: SyncDirection actions_performed: List[SyncAction] = field(default_factory=list) conflicts: List[SyncConflict] = field(default_factory=list) errors: Dict[str, str] = field(default_factory=dict) created_count: int = 0 updated_count: int = 0 deleted_count: int = 0 skipped_count: int = 0 duration_ms: int = 0 @property def success(self) -> bool: """Check if sync was successful.""" return len(self.errors) == 0 @property def has_conflicts(self) -> bool: """Check if there were unresolved conflicts.""" return any(c.winner == "none" for c in self.conflicts) class BidirectionalSyncCoordinator: """ Coordinates bidirectional sync between space and directory. Features: - Two-way change detection - Conflict detection and resolution - Dry-run mode for preview - Orphan cleanup - Event emission for progress """ def __init__( self, config: Optional[SyncConfig] = None, event_bus: Optional[EventBus] = None, ): """ Initialize the sync coordinator. Args: config: Sync configuration event_bus: Event bus for notifications """ self.config = config or SyncConfig() self.event_bus = event_bus self._exporter = SpaceDirectoryExporter(event_bus=event_bus) self._importer = DirectorySpaceImporter(event_bus=event_bus) def sync( self, space: InformationSpace, documents: List[SpaceDocument], content_provider: callable, directory: Path, document_updater: Optional[callable] = None, document_creator: Optional[callable] = None, document_deleter: Optional[callable] = None, ) -> SyncResult: """ Perform synchronization. Args: space: The space to sync documents: Documents in the space content_provider: Function(document_id) -> content directory: Directory to sync with document_updater: Function(document_id, content) -> None document_creator: Function(space_path, content) -> document_id document_deleter: Function(document_id) -> None Returns: SyncResult with details of the sync """ start_time = datetime.now() result = SyncResult( space_id=space.id, directory=directory, direction=self.config.direction, ) self._emit_event( SpaceEventType.SYNC_STARTED, space.id, { "direction": self.config.direction.value, "directory": str(directory), }, ) try: # Build state from both sides space_state = self._build_space_state(documents, content_provider) directory_state = self._build_directory_state(directory) # Compute diff and required actions actions = self._compute_actions(space_state, directory_state) # Handle conflicts actions, conflicts = self._resolve_conflicts(actions) result.conflicts = conflicts # Execute actions (unless dry run) if not self.config.dry_run: self._execute_actions( actions, space, directory, content_provider, document_updater, document_creator, document_deleter, result, ) else: result.actions_performed = actions # Calculate duration end_time = datetime.now() result.duration_ms = int((end_time - start_time).total_seconds() * 1000) self._emit_event( SpaceEventType.SYNC_COMPLETED, space.id, { "direction": self.config.direction.value, "created": result.created_count, "updated": result.updated_count, "deleted": result.deleted_count, "conflicts": len(result.conflicts), }, ) except Exception as e: logger.error(f"Sync failed: {e}") result.errors["_sync"] = str(e) return result def _build_space_state( self, documents: List[SpaceDocument], content_provider: callable, ) -> Dict[str, FileState]: """Build state map from space documents.""" state = {} for doc in documents: try: content = content_provider(doc.document_id) if content: content_hash = self._compute_hash(content) state[doc.space_path] = FileState( path=doc.space_path, content_hash=content_hash, modified_at=getattr(doc, "updated_at", None), size=len(content.encode("utf-8")), source="space", ) except Exception as e: logger.warning(f"Failed to get content for {doc.space_path}: {e}") return state def _build_directory_state(self, directory: Path) -> Dict[str, FileState]: """Build state map from directory files.""" state = {} if not directory.exists(): return state for file_path in directory.rglob("*.md"): if file_path.name.startswith("."): continue try: content = file_path.read_text(encoding="utf-8") space_path = "/" + str(file_path.relative_to(directory)).replace( "\\", "/" ) content_hash = self._compute_hash(content) # Get modification time stat = file_path.stat() modified_at = datetime.fromtimestamp(stat.st_mtime) state[space_path] = FileState( path=space_path, content_hash=content_hash, modified_at=modified_at, size=stat.st_size, source="directory", ) except Exception as e: logger.warning(f"Failed to read {file_path}: {e}") return state def _compute_actions( self, space_state: Dict[str, FileState], directory_state: Dict[str, FileState], ) -> List[SyncAction]: """Compute required sync actions.""" actions = [] all_paths = set(space_state.keys()) | set(directory_state.keys()) for path in all_paths: space_file = space_state.get(path) dir_file = directory_state.get(path) if space_file and dir_file: # Exists in both - check for changes if space_file.content_hash != dir_file.content_hash: actions.append( SyncAction( action="conflict", path=path, source="both", target="both", space_state=space_file, directory_state=dir_file, ) ) elif space_file and not dir_file: # Only in space if self.config.direction in ( SyncDirection.SPACE_TO_DIRECTORY, SyncDirection.BIDIRECTIONAL, ): actions.append( SyncAction( action="create", path=path, source="space", target="directory", space_state=space_file, ) ) elif self.config.delete_orphans: actions.append( SyncAction( action="delete", path=path, source="space", target="space", space_state=space_file, ) ) elif dir_file and not space_file: # Only in directory if self.config.direction in ( SyncDirection.DIRECTORY_TO_SPACE, SyncDirection.BIDIRECTIONAL, ): actions.append( SyncAction( action="create", path=path, source="directory", target="space", directory_state=dir_file, ) ) elif self.config.delete_orphans: actions.append( SyncAction( action="delete", path=path, source="directory", target="directory", directory_state=dir_file, ) ) return actions def _resolve_conflicts( self, actions: List[SyncAction] ) -> Tuple[List[SyncAction], List[SyncConflict]]: """Resolve conflicts in actions.""" resolved_actions = [] conflicts = [] for action in actions: if action.action != "conflict": resolved_actions.append(action) continue # This is a conflict conflict = SyncConflict( path=action.path, space_state=action.space_state, directory_state=action.directory_state, resolution=self.config.conflict_resolution, ) if self.config.conflict_resolution == ConflictResolution.SPACE_WINS: conflict.winner = "space" resolved_actions.append( SyncAction( action="update", path=action.path, source="space", target="directory", space_state=action.space_state, ) ) elif self.config.conflict_resolution == ConflictResolution.DIRECTORY_WINS: conflict.winner = "directory" resolved_actions.append( SyncAction( action="update", path=action.path, source="directory", target="space", directory_state=action.directory_state, ) ) elif self.config.conflict_resolution == ConflictResolution.NEWER_WINS: space_time = action.space_state.modified_at or datetime.min dir_time = action.directory_state.modified_at or datetime.min if space_time >= dir_time: conflict.winner = "space" resolved_actions.append( SyncAction( action="update", path=action.path, source="space", target="directory", space_state=action.space_state, ) ) else: conflict.winner = "directory" resolved_actions.append( SyncAction( action="update", path=action.path, source="directory", target="space", directory_state=action.directory_state, ) ) elif self.config.conflict_resolution == ConflictResolution.SKIP: conflict.winner = "none" elif self.config.conflict_resolution == ConflictResolution.MANUAL: conflict.winner = "none" conflicts.append(conflict) return resolved_actions, conflicts def _execute_actions( self, actions: List[SyncAction], space: InformationSpace, directory: Path, content_provider: callable, document_updater: Optional[callable], document_creator: Optional[callable], document_deleter: Optional[callable], result: SyncResult, ) -> None: """Execute sync actions.""" for action in actions: try: if action.action == "create": if action.target == "directory": self._create_file( action.path, content_provider, directory ) result.created_count += 1 elif action.target == "space" and document_creator: content = self._read_file(action.path, directory) document_creator(action.path, content) result.created_count += 1 elif action.action == "update": if action.target == "directory": self._update_file( action.path, content_provider, directory ) result.updated_count += 1 elif action.target == "space" and document_updater: content = self._read_file(action.path, directory) # Need document_id - would need to look up from space_path result.updated_count += 1 elif action.action == "delete": if action.target == "directory": self._delete_file(action.path, directory) result.deleted_count += 1 elif action.target == "space" and document_deleter: # Would need document_id result.deleted_count += 1 result.actions_performed.append(action) except Exception as e: logger.error(f"Failed to execute action {action.action} for {action.path}: {e}") result.errors[action.path] = str(e) def _create_file( self, space_path: str, content_provider: callable, directory: Path ) -> None: """Create a file in directory from space content.""" # This would need document_id lookup pass def _update_file( self, space_path: str, content_provider: callable, directory: Path ) -> None: """Update a file in directory from space content.""" pass def _delete_file(self, space_path: str, directory: Path) -> None: """Delete a file from directory.""" file_path = directory / space_path.lstrip("/") if file_path.exists(): file_path.unlink() def _read_file(self, space_path: str, directory: Path) -> str: """Read file content from directory.""" file_path = directory / space_path.lstrip("/") return file_path.read_text(encoding="utf-8") def _compute_hash(self, content: str) -> str: """Compute hash of content.""" return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16] def _emit_event( self, event_type: SpaceEventType, space_id: str, payload: Dict[str, Any] ) -> None: """Emit an event if event bus is available.""" if not self.event_bus: return event = SpaceEvent( event_type=event_type, space_id=space_id, payload=payload, ) self.event_bus.emit(event) def create_sync_coordinator( direction: SyncDirection = SyncDirection.BIDIRECTIONAL, conflict_resolution: ConflictResolution = ConflictResolution.NEWER_WINS, event_bus: Optional[EventBus] = None, ) -> BidirectionalSyncCoordinator: """ Factory function to create a configured sync coordinator. Args: direction: Sync direction conflict_resolution: Conflict resolution strategy event_bus: Event bus for notifications Returns: Configured BidirectionalSyncCoordinator """ config = SyncConfig( direction=direction, conflict_resolution=conflict_resolution, ) return BidirectionalSyncCoordinator(config, event_bus)