Implements directory synchronization for Information Spaces: - SpaceDirectoryExporter: Export space to directory structure - Multiple variants: flat, hierarchical, by_path - Manifest generation for reimport - Incremental export (skip unchanged files) - Metadata file export - IncrementalExporter for change detection - DirectorySpaceImporter: Import directory content into space - Recursive directory scanning - Multiple file pattern support - Conflict detection with strategies (skip/overwrite/rename) - Manifest-based import for intelligent reimport - Structure preservation in space paths - BidirectionalSyncCoordinator: Two-way sync with conflict detection - Sync directions: space-to-directory, directory-to-space, bidirectional - Conflict resolution strategies: space_wins, directory_wins, newer_wins, manual, skip - Dry-run mode for preview - Orphan cleanup option - Event emission for progress tracking 45 unit tests covering all sync components. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
405 lines
13 KiB
Python
405 lines
13 KiB
Python
"""
|
|
Space to Directory Exporter.
|
|
|
|
Exports Information Space content to a canonical directory structure
|
|
using the existing VariantFactory for different organization styles.
|
|
"""
|
|
|
|
import hashlib
|
|
import json
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional, List, Set
|
|
|
|
from ..models import InformationSpace, SpaceDocument
|
|
from ..events import EventBus, SpaceEventType, SpaceEvent
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ExportVariant(Enum):
|
|
"""Directory organization variants for export."""
|
|
|
|
FLAT = "flat" # All files at root level
|
|
HIERARCHICAL = "hierarchical" # Folder per document hierarchy
|
|
BY_PATH = "by_path" # Mirror space_path structure
|
|
|
|
|
|
@dataclass
|
|
class ExportConfig:
|
|
"""
|
|
Configuration for directory export.
|
|
|
|
Attributes:
|
|
variant: Directory organization style
|
|
include_metadata: Whether to export metadata files
|
|
include_manifest: Whether to create manifest.json
|
|
overwrite: Whether to overwrite existing files
|
|
preserve_timestamps: Whether to preserve file timestamps
|
|
exclude_patterns: Glob patterns for files to exclude
|
|
"""
|
|
|
|
variant: ExportVariant = ExportVariant.BY_PATH
|
|
include_metadata: bool = True
|
|
include_manifest: bool = True
|
|
overwrite: bool = False
|
|
preserve_timestamps: bool = True
|
|
exclude_patterns: List[str] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class ExportedFile:
|
|
"""
|
|
Record of an exported file.
|
|
|
|
Attributes:
|
|
document_id: Source document ID
|
|
space_path: Original path in space
|
|
file_path: Exported file path
|
|
content_hash: Hash of exported content
|
|
size: File size in bytes
|
|
"""
|
|
|
|
document_id: str
|
|
space_path: str
|
|
file_path: Path
|
|
content_hash: str
|
|
size: int
|
|
|
|
|
|
@dataclass
|
|
class ExportResult:
|
|
"""
|
|
Result of an export operation.
|
|
|
|
Attributes:
|
|
space_id: Exported space ID
|
|
target_directory: Export target directory
|
|
exported_files: List of exported files
|
|
skipped_files: Files that were skipped
|
|
errors: Any errors encountered
|
|
manifest_path: Path to manifest file if created
|
|
duration_ms: Export duration in milliseconds
|
|
"""
|
|
|
|
space_id: str
|
|
target_directory: Path
|
|
exported_files: List[ExportedFile] = field(default_factory=list)
|
|
skipped_files: List[str] = field(default_factory=list)
|
|
errors: Dict[str, str] = field(default_factory=dict)
|
|
manifest_path: Optional[Path] = None
|
|
duration_ms: int = 0
|
|
|
|
@property
|
|
def success(self) -> bool:
|
|
"""Check if export was successful."""
|
|
return len(self.errors) == 0
|
|
|
|
@property
|
|
def file_count(self) -> int:
|
|
"""Total number of exported files."""
|
|
return len(self.exported_files)
|
|
|
|
|
|
class SpaceDirectoryExporter:
|
|
"""
|
|
Exports Information Space content to directory structure.
|
|
|
|
Features:
|
|
- Multiple directory organization variants
|
|
- Manifest generation for reimport
|
|
- Metadata file export
|
|
- Incremental export (skip unchanged)
|
|
- Event emission for progress tracking
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
config: Optional[ExportConfig] = None,
|
|
event_bus: Optional[EventBus] = None,
|
|
):
|
|
"""
|
|
Initialize the exporter.
|
|
|
|
Args:
|
|
config: Export configuration
|
|
event_bus: Event bus for notifications
|
|
"""
|
|
self.config = config or ExportConfig()
|
|
self.event_bus = event_bus
|
|
|
|
def export_space(
|
|
self,
|
|
space: InformationSpace,
|
|
documents: List[SpaceDocument],
|
|
content_provider: callable,
|
|
target_directory: Path,
|
|
) -> ExportResult:
|
|
"""
|
|
Export a space to a directory.
|
|
|
|
Args:
|
|
space: The space to export
|
|
documents: Documents in the space
|
|
content_provider: Function(document_id) -> content string
|
|
target_directory: Target directory path
|
|
|
|
Returns:
|
|
ExportResult with details of the export
|
|
"""
|
|
start_time = datetime.now()
|
|
result = ExportResult(
|
|
space_id=space.id,
|
|
target_directory=target_directory,
|
|
)
|
|
|
|
self._emit_event(
|
|
SpaceEventType.SYNC_STARTED,
|
|
space.id,
|
|
{"direction": "export", "target": str(target_directory)},
|
|
)
|
|
|
|
try:
|
|
# Create target directory
|
|
target_directory.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Export each document
|
|
for doc in documents:
|
|
try:
|
|
exported = self._export_document(
|
|
doc, content_provider, target_directory
|
|
)
|
|
if exported:
|
|
result.exported_files.append(exported)
|
|
else:
|
|
result.skipped_files.append(doc.space_path)
|
|
except Exception as e:
|
|
logger.error(f"Failed to export {doc.space_path}: {e}")
|
|
result.errors[doc.space_path] = str(e)
|
|
|
|
# Create manifest if configured
|
|
if self.config.include_manifest:
|
|
result.manifest_path = self._write_manifest(
|
|
space, result.exported_files, target_directory
|
|
)
|
|
|
|
# Create metadata file if configured
|
|
if self.config.include_metadata:
|
|
self._write_metadata(space, target_directory)
|
|
|
|
# Calculate duration
|
|
end_time = datetime.now()
|
|
result.duration_ms = int((end_time - start_time).total_seconds() * 1000)
|
|
|
|
self._emit_event(
|
|
SpaceEventType.SYNC_COMPLETED,
|
|
space.id,
|
|
{
|
|
"direction": "export",
|
|
"file_count": result.file_count,
|
|
"errors": len(result.errors),
|
|
},
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Export failed: {e}")
|
|
result.errors["_export"] = str(e)
|
|
self._emit_event(
|
|
SpaceEventType.SYNC_CONFLICT,
|
|
space.id,
|
|
{"direction": "export", "error": str(e)},
|
|
)
|
|
|
|
return result
|
|
|
|
def _export_document(
|
|
self,
|
|
doc: SpaceDocument,
|
|
content_provider: callable,
|
|
target_directory: Path,
|
|
) -> Optional[ExportedFile]:
|
|
"""Export a single document."""
|
|
# Get content
|
|
try:
|
|
content = content_provider(doc.document_id)
|
|
except Exception as e:
|
|
raise ValueError(f"Failed to get content for {doc.document_id}: {e}")
|
|
|
|
if content is None:
|
|
return None
|
|
|
|
# Determine target path based on variant
|
|
target_path = self._get_target_path(doc, target_directory)
|
|
|
|
# Check if file exists and whether to overwrite
|
|
if target_path.exists() and not self.config.overwrite:
|
|
# Check if content is same
|
|
existing_hash = self._compute_file_hash(target_path)
|
|
content_hash = self._compute_hash(content)
|
|
if existing_hash == content_hash:
|
|
return None # Skip unchanged file
|
|
|
|
# Create parent directories
|
|
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Write content
|
|
target_path.write_text(content, encoding="utf-8")
|
|
|
|
content_hash = self._compute_hash(content)
|
|
return ExportedFile(
|
|
document_id=doc.document_id,
|
|
space_path=doc.space_path,
|
|
file_path=target_path,
|
|
content_hash=content_hash,
|
|
size=len(content.encode("utf-8")),
|
|
)
|
|
|
|
def _get_target_path(self, doc: SpaceDocument, target_directory: Path) -> Path:
|
|
"""Determine the target file path based on variant."""
|
|
if self.config.variant == ExportVariant.FLAT:
|
|
# All files at root, use document ID as name
|
|
filename = self._sanitize_filename(doc.space_path)
|
|
return target_directory / filename
|
|
|
|
elif self.config.variant == ExportVariant.HIERARCHICAL:
|
|
# Create folder structure based on path depth
|
|
parts = doc.space_path.strip("/").split("/")
|
|
if len(parts) > 1:
|
|
# Create subdirectory for each path component except last
|
|
subdir = target_directory.joinpath(*parts[:-1])
|
|
return subdir / parts[-1]
|
|
else:
|
|
return target_directory / parts[0]
|
|
|
|
else: # BY_PATH (default)
|
|
# Mirror the space_path structure directly
|
|
relative_path = doc.space_path.lstrip("/")
|
|
return target_directory / relative_path
|
|
|
|
def _sanitize_filename(self, path: str) -> str:
|
|
"""Sanitize a path to be a valid filename."""
|
|
# Replace path separators with underscores
|
|
name = path.strip("/").replace("/", "_")
|
|
# Ensure .md extension
|
|
if not name.endswith(".md"):
|
|
name = name + ".md"
|
|
return name
|
|
|
|
def _compute_hash(self, content: str) -> str:
|
|
"""Compute hash of content."""
|
|
return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
|
|
|
|
def _compute_file_hash(self, path: Path) -> str:
|
|
"""Compute hash of file content."""
|
|
content = path.read_text(encoding="utf-8")
|
|
return self._compute_hash(content)
|
|
|
|
def _write_manifest(
|
|
self,
|
|
space: InformationSpace,
|
|
exported_files: List[ExportedFile],
|
|
target_directory: Path,
|
|
) -> Path:
|
|
"""Write export manifest file."""
|
|
manifest = {
|
|
"space_id": space.id,
|
|
"space_name": space.name,
|
|
"exported_at": datetime.now().isoformat(),
|
|
"variant": self.config.variant.value,
|
|
"files": [
|
|
{
|
|
"document_id": f.document_id,
|
|
"space_path": f.space_path,
|
|
"file_path": str(f.file_path.relative_to(target_directory)),
|
|
"content_hash": f.content_hash,
|
|
"size": f.size,
|
|
}
|
|
for f in exported_files
|
|
],
|
|
}
|
|
|
|
manifest_path = target_directory / ".markitect-manifest.json"
|
|
manifest_path.write_text(
|
|
json.dumps(manifest, indent=2), encoding="utf-8"
|
|
)
|
|
return manifest_path
|
|
|
|
def _write_metadata(
|
|
self, space: InformationSpace, target_directory: Path
|
|
) -> Path:
|
|
"""Write space metadata file."""
|
|
# Serialize metadata properly
|
|
space_metadata = space.metadata
|
|
if hasattr(space_metadata, "to_dict"):
|
|
space_metadata = space_metadata.to_dict()
|
|
elif not isinstance(space_metadata, dict):
|
|
space_metadata = {}
|
|
|
|
metadata = {
|
|
"id": space.id,
|
|
"name": space.name,
|
|
"description": space.description,
|
|
"status": space.status.value if hasattr(space.status, "value") else str(space.status),
|
|
"config": space.config.to_dict() if hasattr(space.config, "to_dict") else {},
|
|
"metadata": space_metadata,
|
|
}
|
|
|
|
metadata_path = target_directory / ".markitect-space.json"
|
|
metadata_path.write_text(
|
|
json.dumps(metadata, indent=2), encoding="utf-8"
|
|
)
|
|
return metadata_path
|
|
|
|
def _emit_event(
|
|
self, event_type: SpaceEventType, space_id: str, payload: Dict[str, Any]
|
|
) -> None:
|
|
"""Emit an event if event bus is available."""
|
|
if not self.event_bus:
|
|
return
|
|
|
|
event = SpaceEvent(
|
|
event_type=event_type,
|
|
space_id=space_id,
|
|
payload=payload,
|
|
)
|
|
self.event_bus.emit(event)
|
|
|
|
|
|
class IncrementalExporter(SpaceDirectoryExporter):
|
|
"""
|
|
Exporter with incremental change detection.
|
|
|
|
Only exports files that have changed since last export.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
config: Optional[ExportConfig] = None,
|
|
event_bus: Optional[EventBus] = None,
|
|
):
|
|
"""Initialize incremental exporter."""
|
|
super().__init__(config, event_bus)
|
|
self._last_export_hashes: Dict[str, str] = {}
|
|
|
|
def load_previous_state(self, target_directory: Path) -> None:
|
|
"""Load previous export state from manifest."""
|
|
manifest_path = target_directory / ".markitect-manifest.json"
|
|
if manifest_path.exists():
|
|
try:
|
|
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
|
|
for file_info in manifest.get("files", []):
|
|
self._last_export_hashes[file_info["document_id"]] = file_info[
|
|
"content_hash"
|
|
]
|
|
except Exception as e:
|
|
logger.warning(f"Failed to load previous manifest: {e}")
|
|
|
|
def has_changed(self, document_id: str, content: str) -> bool:
|
|
"""Check if document content has changed."""
|
|
current_hash = self._compute_hash(content)
|
|
previous_hash = self._last_export_hashes.get(document_id)
|
|
return previous_hash is None or previous_hash != current_hash
|