feat(spaces): implement Phase 5 Directory Sync Mode
Implements directory synchronization for Information Spaces: - SpaceDirectoryExporter: Export space to directory structure - Multiple variants: flat, hierarchical, by_path - Manifest generation for reimport - Incremental export (skip unchanged files) - Metadata file export - IncrementalExporter for change detection - DirectorySpaceImporter: Import directory content into space - Recursive directory scanning - Multiple file pattern support - Conflict detection with strategies (skip/overwrite/rename) - Manifest-based import for intelligent reimport - Structure preservation in space paths - BidirectionalSyncCoordinator: Two-way sync with conflict detection - Sync directions: space-to-directory, directory-to-space, bidirectional - Conflict resolution strategies: space_wins, directory_wins, newer_wins, manual, skip - Dry-run mode for preview - Orphan cleanup option - Event emission for progress tracking 45 unit tests covering all sync components. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
404
markitect/spaces/sync/exporter.py
Normal file
404
markitect/spaces/sync/exporter.py
Normal file
@@ -0,0 +1,404 @@
|
||||
"""
|
||||
Space to Directory Exporter.
|
||||
|
||||
Exports Information Space content to a canonical directory structure
|
||||
using the existing VariantFactory for different organization styles.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional, List, Set
|
||||
|
||||
from ..models import InformationSpace, SpaceDocument
|
||||
from ..events import EventBus, SpaceEventType, SpaceEvent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ExportVariant(Enum):
|
||||
"""Directory organization variants for export."""
|
||||
|
||||
FLAT = "flat" # All files at root level
|
||||
HIERARCHICAL = "hierarchical" # Folder per document hierarchy
|
||||
BY_PATH = "by_path" # Mirror space_path structure
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExportConfig:
|
||||
"""
|
||||
Configuration for directory export.
|
||||
|
||||
Attributes:
|
||||
variant: Directory organization style
|
||||
include_metadata: Whether to export metadata files
|
||||
include_manifest: Whether to create manifest.json
|
||||
overwrite: Whether to overwrite existing files
|
||||
preserve_timestamps: Whether to preserve file timestamps
|
||||
exclude_patterns: Glob patterns for files to exclude
|
||||
"""
|
||||
|
||||
variant: ExportVariant = ExportVariant.BY_PATH
|
||||
include_metadata: bool = True
|
||||
include_manifest: bool = True
|
||||
overwrite: bool = False
|
||||
preserve_timestamps: bool = True
|
||||
exclude_patterns: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExportedFile:
|
||||
"""
|
||||
Record of an exported file.
|
||||
|
||||
Attributes:
|
||||
document_id: Source document ID
|
||||
space_path: Original path in space
|
||||
file_path: Exported file path
|
||||
content_hash: Hash of exported content
|
||||
size: File size in bytes
|
||||
"""
|
||||
|
||||
document_id: str
|
||||
space_path: str
|
||||
file_path: Path
|
||||
content_hash: str
|
||||
size: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExportResult:
|
||||
"""
|
||||
Result of an export operation.
|
||||
|
||||
Attributes:
|
||||
space_id: Exported space ID
|
||||
target_directory: Export target directory
|
||||
exported_files: List of exported files
|
||||
skipped_files: Files that were skipped
|
||||
errors: Any errors encountered
|
||||
manifest_path: Path to manifest file if created
|
||||
duration_ms: Export duration in milliseconds
|
||||
"""
|
||||
|
||||
space_id: str
|
||||
target_directory: Path
|
||||
exported_files: List[ExportedFile] = field(default_factory=list)
|
||||
skipped_files: List[str] = field(default_factory=list)
|
||||
errors: Dict[str, str] = field(default_factory=dict)
|
||||
manifest_path: Optional[Path] = None
|
||||
duration_ms: int = 0
|
||||
|
||||
@property
|
||||
def success(self) -> bool:
|
||||
"""Check if export was successful."""
|
||||
return len(self.errors) == 0
|
||||
|
||||
@property
|
||||
def file_count(self) -> int:
|
||||
"""Total number of exported files."""
|
||||
return len(self.exported_files)
|
||||
|
||||
|
||||
class SpaceDirectoryExporter:
|
||||
"""
|
||||
Exports Information Space content to directory structure.
|
||||
|
||||
Features:
|
||||
- Multiple directory organization variants
|
||||
- Manifest generation for reimport
|
||||
- Metadata file export
|
||||
- Incremental export (skip unchanged)
|
||||
- Event emission for progress tracking
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: Optional[ExportConfig] = None,
|
||||
event_bus: Optional[EventBus] = None,
|
||||
):
|
||||
"""
|
||||
Initialize the exporter.
|
||||
|
||||
Args:
|
||||
config: Export configuration
|
||||
event_bus: Event bus for notifications
|
||||
"""
|
||||
self.config = config or ExportConfig()
|
||||
self.event_bus = event_bus
|
||||
|
||||
def export_space(
|
||||
self,
|
||||
space: InformationSpace,
|
||||
documents: List[SpaceDocument],
|
||||
content_provider: callable,
|
||||
target_directory: Path,
|
||||
) -> ExportResult:
|
||||
"""
|
||||
Export a space to a directory.
|
||||
|
||||
Args:
|
||||
space: The space to export
|
||||
documents: Documents in the space
|
||||
content_provider: Function(document_id) -> content string
|
||||
target_directory: Target directory path
|
||||
|
||||
Returns:
|
||||
ExportResult with details of the export
|
||||
"""
|
||||
start_time = datetime.now()
|
||||
result = ExportResult(
|
||||
space_id=space.id,
|
||||
target_directory=target_directory,
|
||||
)
|
||||
|
||||
self._emit_event(
|
||||
SpaceEventType.SYNC_STARTED,
|
||||
space.id,
|
||||
{"direction": "export", "target": str(target_directory)},
|
||||
)
|
||||
|
||||
try:
|
||||
# Create target directory
|
||||
target_directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Export each document
|
||||
for doc in documents:
|
||||
try:
|
||||
exported = self._export_document(
|
||||
doc, content_provider, target_directory
|
||||
)
|
||||
if exported:
|
||||
result.exported_files.append(exported)
|
||||
else:
|
||||
result.skipped_files.append(doc.space_path)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to export {doc.space_path}: {e}")
|
||||
result.errors[doc.space_path] = str(e)
|
||||
|
||||
# Create manifest if configured
|
||||
if self.config.include_manifest:
|
||||
result.manifest_path = self._write_manifest(
|
||||
space, result.exported_files, target_directory
|
||||
)
|
||||
|
||||
# Create metadata file if configured
|
||||
if self.config.include_metadata:
|
||||
self._write_metadata(space, target_directory)
|
||||
|
||||
# Calculate duration
|
||||
end_time = datetime.now()
|
||||
result.duration_ms = int((end_time - start_time).total_seconds() * 1000)
|
||||
|
||||
self._emit_event(
|
||||
SpaceEventType.SYNC_COMPLETED,
|
||||
space.id,
|
||||
{
|
||||
"direction": "export",
|
||||
"file_count": result.file_count,
|
||||
"errors": len(result.errors),
|
||||
},
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Export failed: {e}")
|
||||
result.errors["_export"] = str(e)
|
||||
self._emit_event(
|
||||
SpaceEventType.SYNC_CONFLICT,
|
||||
space.id,
|
||||
{"direction": "export", "error": str(e)},
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _export_document(
|
||||
self,
|
||||
doc: SpaceDocument,
|
||||
content_provider: callable,
|
||||
target_directory: Path,
|
||||
) -> Optional[ExportedFile]:
|
||||
"""Export a single document."""
|
||||
# Get content
|
||||
try:
|
||||
content = content_provider(doc.document_id)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to get content for {doc.document_id}: {e}")
|
||||
|
||||
if content is None:
|
||||
return None
|
||||
|
||||
# Determine target path based on variant
|
||||
target_path = self._get_target_path(doc, target_directory)
|
||||
|
||||
# Check if file exists and whether to overwrite
|
||||
if target_path.exists() and not self.config.overwrite:
|
||||
# Check if content is same
|
||||
existing_hash = self._compute_file_hash(target_path)
|
||||
content_hash = self._compute_hash(content)
|
||||
if existing_hash == content_hash:
|
||||
return None # Skip unchanged file
|
||||
|
||||
# Create parent directories
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Write content
|
||||
target_path.write_text(content, encoding="utf-8")
|
||||
|
||||
content_hash = self._compute_hash(content)
|
||||
return ExportedFile(
|
||||
document_id=doc.document_id,
|
||||
space_path=doc.space_path,
|
||||
file_path=target_path,
|
||||
content_hash=content_hash,
|
||||
size=len(content.encode("utf-8")),
|
||||
)
|
||||
|
||||
def _get_target_path(self, doc: SpaceDocument, target_directory: Path) -> Path:
|
||||
"""Determine the target file path based on variant."""
|
||||
if self.config.variant == ExportVariant.FLAT:
|
||||
# All files at root, use document ID as name
|
||||
filename = self._sanitize_filename(doc.space_path)
|
||||
return target_directory / filename
|
||||
|
||||
elif self.config.variant == ExportVariant.HIERARCHICAL:
|
||||
# Create folder structure based on path depth
|
||||
parts = doc.space_path.strip("/").split("/")
|
||||
if len(parts) > 1:
|
||||
# Create subdirectory for each path component except last
|
||||
subdir = target_directory.joinpath(*parts[:-1])
|
||||
return subdir / parts[-1]
|
||||
else:
|
||||
return target_directory / parts[0]
|
||||
|
||||
else: # BY_PATH (default)
|
||||
# Mirror the space_path structure directly
|
||||
relative_path = doc.space_path.lstrip("/")
|
||||
return target_directory / relative_path
|
||||
|
||||
def _sanitize_filename(self, path: str) -> str:
|
||||
"""Sanitize a path to be a valid filename."""
|
||||
# Replace path separators with underscores
|
||||
name = path.strip("/").replace("/", "_")
|
||||
# Ensure .md extension
|
||||
if not name.endswith(".md"):
|
||||
name = name + ".md"
|
||||
return name
|
||||
|
||||
def _compute_hash(self, content: str) -> str:
|
||||
"""Compute hash of content."""
|
||||
return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
def _compute_file_hash(self, path: Path) -> str:
|
||||
"""Compute hash of file content."""
|
||||
content = path.read_text(encoding="utf-8")
|
||||
return self._compute_hash(content)
|
||||
|
||||
def _write_manifest(
|
||||
self,
|
||||
space: InformationSpace,
|
||||
exported_files: List[ExportedFile],
|
||||
target_directory: Path,
|
||||
) -> Path:
|
||||
"""Write export manifest file."""
|
||||
manifest = {
|
||||
"space_id": space.id,
|
||||
"space_name": space.name,
|
||||
"exported_at": datetime.now().isoformat(),
|
||||
"variant": self.config.variant.value,
|
||||
"files": [
|
||||
{
|
||||
"document_id": f.document_id,
|
||||
"space_path": f.space_path,
|
||||
"file_path": str(f.file_path.relative_to(target_directory)),
|
||||
"content_hash": f.content_hash,
|
||||
"size": f.size,
|
||||
}
|
||||
for f in exported_files
|
||||
],
|
||||
}
|
||||
|
||||
manifest_path = target_directory / ".markitect-manifest.json"
|
||||
manifest_path.write_text(
|
||||
json.dumps(manifest, indent=2), encoding="utf-8"
|
||||
)
|
||||
return manifest_path
|
||||
|
||||
def _write_metadata(
|
||||
self, space: InformationSpace, target_directory: Path
|
||||
) -> Path:
|
||||
"""Write space metadata file."""
|
||||
# Serialize metadata properly
|
||||
space_metadata = space.metadata
|
||||
if hasattr(space_metadata, "to_dict"):
|
||||
space_metadata = space_metadata.to_dict()
|
||||
elif not isinstance(space_metadata, dict):
|
||||
space_metadata = {}
|
||||
|
||||
metadata = {
|
||||
"id": space.id,
|
||||
"name": space.name,
|
||||
"description": space.description,
|
||||
"status": space.status.value if hasattr(space.status, "value") else str(space.status),
|
||||
"config": space.config.to_dict() if hasattr(space.config, "to_dict") else {},
|
||||
"metadata": space_metadata,
|
||||
}
|
||||
|
||||
metadata_path = target_directory / ".markitect-space.json"
|
||||
metadata_path.write_text(
|
||||
json.dumps(metadata, indent=2), encoding="utf-8"
|
||||
)
|
||||
return metadata_path
|
||||
|
||||
def _emit_event(
|
||||
self, event_type: SpaceEventType, space_id: str, payload: Dict[str, Any]
|
||||
) -> None:
|
||||
"""Emit an event if event bus is available."""
|
||||
if not self.event_bus:
|
||||
return
|
||||
|
||||
event = SpaceEvent(
|
||||
event_type=event_type,
|
||||
space_id=space_id,
|
||||
payload=payload,
|
||||
)
|
||||
self.event_bus.emit(event)
|
||||
|
||||
|
||||
class IncrementalExporter(SpaceDirectoryExporter):
|
||||
"""
|
||||
Exporter with incremental change detection.
|
||||
|
||||
Only exports files that have changed since last export.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: Optional[ExportConfig] = None,
|
||||
event_bus: Optional[EventBus] = None,
|
||||
):
|
||||
"""Initialize incremental exporter."""
|
||||
super().__init__(config, event_bus)
|
||||
self._last_export_hashes: Dict[str, str] = {}
|
||||
|
||||
def load_previous_state(self, target_directory: Path) -> None:
|
||||
"""Load previous export state from manifest."""
|
||||
manifest_path = target_directory / ".markitect-manifest.json"
|
||||
if manifest_path.exists():
|
||||
try:
|
||||
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
for file_info in manifest.get("files", []):
|
||||
self._last_export_hashes[file_info["document_id"]] = file_info[
|
||||
"content_hash"
|
||||
]
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load previous manifest: {e}")
|
||||
|
||||
def has_changed(self, document_id: str, content: str) -> bool:
|
||||
"""Check if document content has changed."""
|
||||
current_hash = self._compute_hash(content)
|
||||
previous_hash = self._last_export_hashes.get(document_id)
|
||||
return previous_hash is None or previous_hash != current_hash
|
||||
Reference in New Issue
Block a user