Files
markitect-main/markitect/spaces/sync/exporter.py
tegwick 535b83996b feat(spaces): implement Phase 5 Directory Sync Mode
Implements directory synchronization for Information Spaces:

- SpaceDirectoryExporter: Export space to directory structure
  - Multiple variants: flat, hierarchical, by_path
  - Manifest generation for reimport
  - Incremental export (skip unchanged files)
  - Metadata file export
  - IncrementalExporter for change detection

- DirectorySpaceImporter: Import directory content into space
  - Recursive directory scanning
  - Multiple file pattern support
  - Conflict detection with strategies (skip/overwrite/rename)
  - Manifest-based import for intelligent reimport
  - Structure preservation in space paths

- BidirectionalSyncCoordinator: Two-way sync with conflict detection
  - Sync directions: space-to-directory, directory-to-space, bidirectional
  - Conflict resolution strategies: space_wins, directory_wins, newer_wins, manual, skip
  - Dry-run mode for preview
  - Orphan cleanup option
  - Event emission for progress tracking

45 unit tests covering all sync components.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-08 12:11:37 +01:00

405 lines
13 KiB
Python

"""
Space to Directory Exporter.
Exports Information Space content to a canonical directory structure
using the existing VariantFactory for different organization styles.
"""
import hashlib
import json
import logging
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Dict, Any, Optional, List, Set
from ..models import InformationSpace, SpaceDocument
from ..events import EventBus, SpaceEventType, SpaceEvent
logger = logging.getLogger(__name__)
class ExportVariant(Enum):
"""Directory organization variants for export."""
FLAT = "flat" # All files at root level
HIERARCHICAL = "hierarchical" # Folder per document hierarchy
BY_PATH = "by_path" # Mirror space_path structure
@dataclass
class ExportConfig:
"""
Configuration for directory export.
Attributes:
variant: Directory organization style
include_metadata: Whether to export metadata files
include_manifest: Whether to create manifest.json
overwrite: Whether to overwrite existing files
preserve_timestamps: Whether to preserve file timestamps
exclude_patterns: Glob patterns for files to exclude
"""
variant: ExportVariant = ExportVariant.BY_PATH
include_metadata: bool = True
include_manifest: bool = True
overwrite: bool = False
preserve_timestamps: bool = True
exclude_patterns: List[str] = field(default_factory=list)
@dataclass
class ExportedFile:
"""
Record of an exported file.
Attributes:
document_id: Source document ID
space_path: Original path in space
file_path: Exported file path
content_hash: Hash of exported content
size: File size in bytes
"""
document_id: str
space_path: str
file_path: Path
content_hash: str
size: int
@dataclass
class ExportResult:
"""
Result of an export operation.
Attributes:
space_id: Exported space ID
target_directory: Export target directory
exported_files: List of exported files
skipped_files: Files that were skipped
errors: Any errors encountered
manifest_path: Path to manifest file if created
duration_ms: Export duration in milliseconds
"""
space_id: str
target_directory: Path
exported_files: List[ExportedFile] = field(default_factory=list)
skipped_files: List[str] = field(default_factory=list)
errors: Dict[str, str] = field(default_factory=dict)
manifest_path: Optional[Path] = None
duration_ms: int = 0
@property
def success(self) -> bool:
"""Check if export was successful."""
return len(self.errors) == 0
@property
def file_count(self) -> int:
"""Total number of exported files."""
return len(self.exported_files)
class SpaceDirectoryExporter:
"""
Exports Information Space content to directory structure.
Features:
- Multiple directory organization variants
- Manifest generation for reimport
- Metadata file export
- Incremental export (skip unchanged)
- Event emission for progress tracking
"""
def __init__(
self,
config: Optional[ExportConfig] = None,
event_bus: Optional[EventBus] = None,
):
"""
Initialize the exporter.
Args:
config: Export configuration
event_bus: Event bus for notifications
"""
self.config = config or ExportConfig()
self.event_bus = event_bus
def export_space(
self,
space: InformationSpace,
documents: List[SpaceDocument],
content_provider: callable,
target_directory: Path,
) -> ExportResult:
"""
Export a space to a directory.
Args:
space: The space to export
documents: Documents in the space
content_provider: Function(document_id) -> content string
target_directory: Target directory path
Returns:
ExportResult with details of the export
"""
start_time = datetime.now()
result = ExportResult(
space_id=space.id,
target_directory=target_directory,
)
self._emit_event(
SpaceEventType.SYNC_STARTED,
space.id,
{"direction": "export", "target": str(target_directory)},
)
try:
# Create target directory
target_directory.mkdir(parents=True, exist_ok=True)
# Export each document
for doc in documents:
try:
exported = self._export_document(
doc, content_provider, target_directory
)
if exported:
result.exported_files.append(exported)
else:
result.skipped_files.append(doc.space_path)
except Exception as e:
logger.error(f"Failed to export {doc.space_path}: {e}")
result.errors[doc.space_path] = str(e)
# Create manifest if configured
if self.config.include_manifest:
result.manifest_path = self._write_manifest(
space, result.exported_files, target_directory
)
# Create metadata file if configured
if self.config.include_metadata:
self._write_metadata(space, target_directory)
# Calculate duration
end_time = datetime.now()
result.duration_ms = int((end_time - start_time).total_seconds() * 1000)
self._emit_event(
SpaceEventType.SYNC_COMPLETED,
space.id,
{
"direction": "export",
"file_count": result.file_count,
"errors": len(result.errors),
},
)
except Exception as e:
logger.error(f"Export failed: {e}")
result.errors["_export"] = str(e)
self._emit_event(
SpaceEventType.SYNC_CONFLICT,
space.id,
{"direction": "export", "error": str(e)},
)
return result
def _export_document(
self,
doc: SpaceDocument,
content_provider: callable,
target_directory: Path,
) -> Optional[ExportedFile]:
"""Export a single document."""
# Get content
try:
content = content_provider(doc.document_id)
except Exception as e:
raise ValueError(f"Failed to get content for {doc.document_id}: {e}")
if content is None:
return None
# Determine target path based on variant
target_path = self._get_target_path(doc, target_directory)
# Check if file exists and whether to overwrite
if target_path.exists() and not self.config.overwrite:
# Check if content is same
existing_hash = self._compute_file_hash(target_path)
content_hash = self._compute_hash(content)
if existing_hash == content_hash:
return None # Skip unchanged file
# Create parent directories
target_path.parent.mkdir(parents=True, exist_ok=True)
# Write content
target_path.write_text(content, encoding="utf-8")
content_hash = self._compute_hash(content)
return ExportedFile(
document_id=doc.document_id,
space_path=doc.space_path,
file_path=target_path,
content_hash=content_hash,
size=len(content.encode("utf-8")),
)
def _get_target_path(self, doc: SpaceDocument, target_directory: Path) -> Path:
"""Determine the target file path based on variant."""
if self.config.variant == ExportVariant.FLAT:
# All files at root, use document ID as name
filename = self._sanitize_filename(doc.space_path)
return target_directory / filename
elif self.config.variant == ExportVariant.HIERARCHICAL:
# Create folder structure based on path depth
parts = doc.space_path.strip("/").split("/")
if len(parts) > 1:
# Create subdirectory for each path component except last
subdir = target_directory.joinpath(*parts[:-1])
return subdir / parts[-1]
else:
return target_directory / parts[0]
else: # BY_PATH (default)
# Mirror the space_path structure directly
relative_path = doc.space_path.lstrip("/")
return target_directory / relative_path
def _sanitize_filename(self, path: str) -> str:
"""Sanitize a path to be a valid filename."""
# Replace path separators with underscores
name = path.strip("/").replace("/", "_")
# Ensure .md extension
if not name.endswith(".md"):
name = name + ".md"
return name
def _compute_hash(self, content: str) -> str:
"""Compute hash of content."""
return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
def _compute_file_hash(self, path: Path) -> str:
"""Compute hash of file content."""
content = path.read_text(encoding="utf-8")
return self._compute_hash(content)
def _write_manifest(
self,
space: InformationSpace,
exported_files: List[ExportedFile],
target_directory: Path,
) -> Path:
"""Write export manifest file."""
manifest = {
"space_id": space.id,
"space_name": space.name,
"exported_at": datetime.now().isoformat(),
"variant": self.config.variant.value,
"files": [
{
"document_id": f.document_id,
"space_path": f.space_path,
"file_path": str(f.file_path.relative_to(target_directory)),
"content_hash": f.content_hash,
"size": f.size,
}
for f in exported_files
],
}
manifest_path = target_directory / ".markitect-manifest.json"
manifest_path.write_text(
json.dumps(manifest, indent=2), encoding="utf-8"
)
return manifest_path
def _write_metadata(
self, space: InformationSpace, target_directory: Path
) -> Path:
"""Write space metadata file."""
# Serialize metadata properly
space_metadata = space.metadata
if hasattr(space_metadata, "to_dict"):
space_metadata = space_metadata.to_dict()
elif not isinstance(space_metadata, dict):
space_metadata = {}
metadata = {
"id": space.id,
"name": space.name,
"description": space.description,
"status": space.status.value if hasattr(space.status, "value") else str(space.status),
"config": space.config.to_dict() if hasattr(space.config, "to_dict") else {},
"metadata": space_metadata,
}
metadata_path = target_directory / ".markitect-space.json"
metadata_path.write_text(
json.dumps(metadata, indent=2), encoding="utf-8"
)
return metadata_path
def _emit_event(
self, event_type: SpaceEventType, space_id: str, payload: Dict[str, Any]
) -> None:
"""Emit an event if event bus is available."""
if not self.event_bus:
return
event = SpaceEvent(
event_type=event_type,
space_id=space_id,
payload=payload,
)
self.event_bus.emit(event)
class IncrementalExporter(SpaceDirectoryExporter):
"""
Exporter with incremental change detection.
Only exports files that have changed since last export.
"""
def __init__(
self,
config: Optional[ExportConfig] = None,
event_bus: Optional[EventBus] = None,
):
"""Initialize incremental exporter."""
super().__init__(config, event_bus)
self._last_export_hashes: Dict[str, str] = {}
def load_previous_state(self, target_directory: Path) -> None:
"""Load previous export state from manifest."""
manifest_path = target_directory / ".markitect-manifest.json"
if manifest_path.exists():
try:
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
for file_info in manifest.get("files", []):
self._last_export_hashes[file_info["document_id"]] = file_info[
"content_hash"
]
except Exception as e:
logger.warning(f"Failed to load previous manifest: {e}")
def has_changed(self, document_id: str, content: str) -> bool:
"""Check if document content has changed."""
current_hash = self._compute_hash(content)
previous_hash = self._last_export_hashes.get(document_id)
return previous_hash is None or previous_hash != current_hash