fix: Add missing infrastructure files from data access improvements
Add infrastructure components that were created during issue #24 but not properly committed: - Data access repositories and interfaces - Connection management infrastructure - Exception handling framework - Configuration management - Documentation from data access pattern improvements These files are essential infrastructure components that enable the repository pattern and improved data access strategies. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
495
infrastructure/repositories/filesystem_repository.py
Normal file
495
infrastructure/repositories/filesystem_repository.py
Normal file
@@ -0,0 +1,495 @@
|
||||
"""
|
||||
Filesystem repository implementation with atomic operations.
|
||||
|
||||
Provides reliable file operations with proper error handling,
|
||||
atomic writes, and workspace management.
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import uuid
|
||||
from infrastructure.logging import get_logger
|
||||
from typing import List, Optional
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from infrastructure.repositories.interfaces import WorkspaceRepository
|
||||
from infrastructure.exceptions import (
|
||||
ErrorContext, OperationType, ResourceNotFoundError,
|
||||
DuplicateResourceError, ValidationError
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class FilesystemWorkspaceRepository(WorkspaceRepository):
|
||||
"""
|
||||
Filesystem implementation of WorkspaceRepository.
|
||||
|
||||
Provides reliable workspace and file operations with atomic writes,
|
||||
proper validation, and comprehensive error handling.
|
||||
"""
|
||||
|
||||
def __init__(self, base_workspace_dir: str = ".markitect_workspace"):
|
||||
self.base_path = Path(base_workspace_dir).resolve()
|
||||
self.base_path.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"Initialized workspace repository at {self.base_path}")
|
||||
|
||||
async def create_workspace(
|
||||
self,
|
||||
workspace_id: str,
|
||||
base_path: Path,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Path:
|
||||
"""Create a new workspace directory."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"create_workspace_{workspace_id}",
|
||||
operation_type=OperationType.WRITE,
|
||||
resource_type="Workspace",
|
||||
resource_id=workspace_id
|
||||
)
|
||||
|
||||
# Validate workspace ID
|
||||
if not self._is_valid_workspace_id(workspace_id):
|
||||
raise ValidationError(
|
||||
"workspace_id",
|
||||
workspace_id,
|
||||
"Workspace ID must be alphanumeric with optional dashes and underscores",
|
||||
context
|
||||
)
|
||||
|
||||
workspace_path = self.base_path / workspace_id
|
||||
|
||||
# Check if workspace already exists
|
||||
if workspace_path.exists():
|
||||
raise DuplicateResourceError("Workspace", workspace_id, context)
|
||||
|
||||
try:
|
||||
# Create workspace directory with proper permissions
|
||||
workspace_path.mkdir(parents=True, exist_ok=False, mode=0o755)
|
||||
|
||||
# Create standard subdirectories
|
||||
(workspace_path / "files").mkdir(exist_ok=True)
|
||||
(workspace_path / "temp").mkdir(exist_ok=True)
|
||||
(workspace_path / "logs").mkdir(exist_ok=True)
|
||||
|
||||
# Create workspace metadata file
|
||||
metadata = {
|
||||
"id": workspace_id,
|
||||
"created_at": datetime.utcnow().isoformat(),
|
||||
"version": "1.0",
|
||||
"type": "markitect_workspace"
|
||||
}
|
||||
|
||||
await self._write_json_file(
|
||||
workspace_path / ".workspace_meta.json",
|
||||
metadata,
|
||||
context
|
||||
)
|
||||
|
||||
logger.info(f"Created workspace: {workspace_id}")
|
||||
return workspace_path
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to create workspace {workspace_id}: {e}")
|
||||
# Cleanup partial creation
|
||||
if workspace_path.exists():
|
||||
shutil.rmtree(workspace_path, ignore_errors=True)
|
||||
|
||||
raise self._map_os_error_to_exception(e, f"create workspace {workspace_id}", context)
|
||||
|
||||
async def get_workspace_path(
|
||||
self,
|
||||
workspace_id: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Path:
|
||||
"""Get the path to a workspace."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"get_workspace_path_{workspace_id}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="Workspace",
|
||||
resource_id=workspace_id
|
||||
)
|
||||
|
||||
workspace_path = self.base_path / workspace_id
|
||||
|
||||
if not workspace_path.exists() or not workspace_path.is_dir():
|
||||
raise ResourceNotFoundError("Workspace", workspace_id, context)
|
||||
|
||||
return workspace_path
|
||||
|
||||
async def list_workspaces(
|
||||
self,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> List[str]:
|
||||
"""List all available workspaces."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id="list_workspaces",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="Workspace"
|
||||
)
|
||||
|
||||
try:
|
||||
workspaces = []
|
||||
|
||||
if not self.base_path.exists():
|
||||
return workspaces
|
||||
|
||||
for item in self.base_path.iterdir():
|
||||
if item.is_dir() and self._is_valid_workspace_id(item.name):
|
||||
# Verify it's a valid workspace by checking for metadata
|
||||
metadata_file = item / ".workspace_meta.json"
|
||||
if metadata_file.exists():
|
||||
workspaces.append(item.name)
|
||||
|
||||
return sorted(workspaces)
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to list workspaces: {e}")
|
||||
raise self._map_os_error_to_exception(e, "list workspaces", context)
|
||||
|
||||
async def write_file(
|
||||
self,
|
||||
workspace_id: str,
|
||||
file_path: str,
|
||||
content: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Path:
|
||||
"""Write content to a file in the workspace using atomic operations."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"write_file_{workspace_id}_{file_path}",
|
||||
operation_type=OperationType.WRITE,
|
||||
resource_type="WorkspaceFile",
|
||||
resource_id=f"{workspace_id}/{file_path}",
|
||||
request_data={"content_length": len(content)}
|
||||
)
|
||||
|
||||
# Validate inputs
|
||||
workspace_path = await self.get_workspace_path(workspace_id, context)
|
||||
|
||||
if not self._is_safe_file_path(file_path):
|
||||
raise ValidationError(
|
||||
"file_path",
|
||||
file_path,
|
||||
"File path contains invalid characters or attempts directory traversal",
|
||||
context
|
||||
)
|
||||
|
||||
# Validate file extension
|
||||
allowed_extensions = {".md", ".txt", ".py", ".js", ".json", ".yaml", ".yml", ".rst", ".csv"}
|
||||
file_ext = Path(file_path).suffix.lower()
|
||||
if file_ext and file_ext not in allowed_extensions:
|
||||
raise ValidationError(
|
||||
"file_path",
|
||||
file_path,
|
||||
f"File extension {file_ext} is not allowed",
|
||||
context
|
||||
)
|
||||
|
||||
# Validate content size (100MB limit)
|
||||
max_size = 100 * 1024 * 1024 # 100MB
|
||||
if len(content.encode('utf-8')) > max_size:
|
||||
raise ValidationError(
|
||||
"content",
|
||||
f"{len(content)} characters",
|
||||
f"File content exceeds maximum size of {max_size} bytes",
|
||||
context
|
||||
)
|
||||
|
||||
target_path = workspace_path / "files" / file_path
|
||||
|
||||
try:
|
||||
# Ensure parent directory exists
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Atomic write using temporary file
|
||||
await self._atomic_write_file(target_path, content, context)
|
||||
|
||||
logger.info(f"Wrote file {file_path} in workspace {workspace_id}")
|
||||
return target_path
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to write file {file_path} in workspace {workspace_id}: {e}")
|
||||
raise self._map_os_error_to_exception(e, f"write file {file_path}", context)
|
||||
|
||||
async def read_file(
|
||||
self,
|
||||
workspace_id: str,
|
||||
file_path: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> str:
|
||||
"""Read content from a file in the workspace."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"read_file_{workspace_id}_{file_path}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="WorkspaceFile",
|
||||
resource_id=f"{workspace_id}/{file_path}"
|
||||
)
|
||||
|
||||
# Validate inputs
|
||||
workspace_path = await self.get_workspace_path(workspace_id, context)
|
||||
|
||||
if not self._is_safe_file_path(file_path):
|
||||
raise ValidationError(
|
||||
"file_path",
|
||||
file_path,
|
||||
"File path contains invalid characters or attempts directory traversal",
|
||||
context
|
||||
)
|
||||
|
||||
target_path = workspace_path / "files" / file_path
|
||||
|
||||
if not target_path.exists():
|
||||
raise ResourceNotFoundError("File", f"{workspace_id}/{file_path}", context)
|
||||
|
||||
if not target_path.is_file():
|
||||
raise ValidationError(
|
||||
"file_path",
|
||||
file_path,
|
||||
"Path exists but is not a regular file",
|
||||
context
|
||||
)
|
||||
|
||||
try:
|
||||
# Read file with encoding detection
|
||||
content = target_path.read_text(encoding='utf-8')
|
||||
|
||||
logger.debug(f"Read file {file_path} from workspace {workspace_id}")
|
||||
return content
|
||||
|
||||
except UnicodeDecodeError as e:
|
||||
logger.error(f"Failed to decode file {file_path} as UTF-8: {e}")
|
||||
raise ValidationError(
|
||||
"file_content",
|
||||
"binary data",
|
||||
"File does not contain valid UTF-8 text",
|
||||
context
|
||||
)
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to read file {file_path} from workspace {workspace_id}: {e}")
|
||||
raise self._map_os_error_to_exception(e, f"read file {file_path}", context)
|
||||
|
||||
async def delete_workspace(
|
||||
self,
|
||||
workspace_id: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> bool:
|
||||
"""Delete a workspace and all its contents."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"delete_workspace_{workspace_id}",
|
||||
operation_type=OperationType.DELETE,
|
||||
resource_type="Workspace",
|
||||
resource_id=workspace_id
|
||||
)
|
||||
|
||||
workspace_path = await self.get_workspace_path(workspace_id, context)
|
||||
|
||||
try:
|
||||
# Use shutil.rmtree for recursive deletion
|
||||
shutil.rmtree(workspace_path)
|
||||
|
||||
logger.info(f"Deleted workspace: {workspace_id}")
|
||||
return True
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to delete workspace {workspace_id}: {e}")
|
||||
raise self._map_os_error_to_exception(e, f"delete workspace {workspace_id}", context)
|
||||
|
||||
async def list_files(
|
||||
self,
|
||||
workspace_id: str,
|
||||
pattern: Optional[str] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> List[str]:
|
||||
"""List files in a workspace."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"list_files_{workspace_id}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="WorkspaceFile",
|
||||
metadata={"workspace_id": workspace_id, "pattern": pattern}
|
||||
)
|
||||
|
||||
workspace_path = await self.get_workspace_path(workspace_id, context)
|
||||
files_dir = workspace_path / "files"
|
||||
|
||||
if not files_dir.exists():
|
||||
return []
|
||||
|
||||
try:
|
||||
files = []
|
||||
|
||||
# Walk through all files in the workspace
|
||||
for item in files_dir.rglob("*"):
|
||||
if item.is_file():
|
||||
# Get relative path from files directory
|
||||
relative_path = str(item.relative_to(files_dir))
|
||||
|
||||
# Apply pattern filter if provided
|
||||
if pattern is None or self._matches_pattern(relative_path, pattern):
|
||||
files.append(relative_path)
|
||||
|
||||
return sorted(files)
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to list files in workspace {workspace_id}: {e}")
|
||||
raise self._map_os_error_to_exception(e, f"list files in workspace {workspace_id}", context)
|
||||
|
||||
async def cleanup_old_workspaces(self, days_threshold: int = 30) -> int:
|
||||
"""Clean up workspaces older than specified days."""
|
||||
logger.info(f"Starting cleanup of workspaces older than {days_threshold} days")
|
||||
|
||||
try:
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=days_threshold)
|
||||
deleted_count = 0
|
||||
|
||||
if not self.base_path.exists():
|
||||
return 0
|
||||
|
||||
for workspace_dir in self.base_path.iterdir():
|
||||
if not workspace_dir.is_dir():
|
||||
continue
|
||||
|
||||
try:
|
||||
# Check workspace metadata for creation date
|
||||
metadata_file = workspace_dir / ".workspace_meta.json"
|
||||
if not metadata_file.exists():
|
||||
continue
|
||||
|
||||
metadata = await self._read_json_file(metadata_file)
|
||||
created_at_str = metadata.get("created_at")
|
||||
|
||||
if not created_at_str:
|
||||
continue
|
||||
|
||||
created_at = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
|
||||
|
||||
if created_at < cutoff_date:
|
||||
await self.delete_workspace(workspace_dir.name)
|
||||
deleted_count += 1
|
||||
logger.info(f"Cleaned up old workspace: {workspace_dir.name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to process workspace {workspace_dir.name} during cleanup: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"Cleanup completed: deleted {deleted_count} old workspaces")
|
||||
return deleted_count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during workspace cleanup: {e}")
|
||||
return 0
|
||||
|
||||
# Helper methods
|
||||
|
||||
def _is_valid_workspace_id(self, workspace_id: str) -> bool:
|
||||
"""Validate workspace ID format."""
|
||||
if not workspace_id or len(workspace_id) > 100:
|
||||
return False
|
||||
|
||||
# Allow alphanumeric, dash, underscore
|
||||
import re
|
||||
return re.match(r'^[a-zA-Z0-9_-]+$', workspace_id) is not None
|
||||
|
||||
def _is_safe_file_path(self, file_path: str) -> bool:
|
||||
"""Check if file path is safe (no directory traversal)."""
|
||||
if not file_path:
|
||||
return False
|
||||
|
||||
# Normalize path
|
||||
normalized = os.path.normpath(file_path)
|
||||
|
||||
# Check for directory traversal attempts
|
||||
if normalized.startswith("..") or "/.." in normalized or "\\.." in normalized:
|
||||
return False
|
||||
|
||||
# Check for absolute paths
|
||||
if os.path.isabs(normalized):
|
||||
return False
|
||||
|
||||
# Check for unsafe characters
|
||||
unsafe_chars = {"<", ">", ":", "\"", "|", "?", "*", "\0"}
|
||||
if any(char in file_path for char in unsafe_chars):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _matches_pattern(self, file_path: str, pattern: str) -> bool:
|
||||
"""Check if file path matches the given pattern."""
|
||||
import fnmatch
|
||||
return fnmatch.fnmatch(file_path.lower(), pattern.lower())
|
||||
|
||||
async def _atomic_write_file(self, target_path: Path, content: str, context: ErrorContext):
|
||||
"""Write file atomically using temporary file."""
|
||||
temp_dir = target_path.parent / ".tmp"
|
||||
temp_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Create temporary file in same directory as target
|
||||
temp_fd, temp_path = tempfile.mkstemp(
|
||||
dir=temp_dir,
|
||||
prefix=f".tmp_{target_path.name}_",
|
||||
suffix=".tmp"
|
||||
)
|
||||
|
||||
try:
|
||||
# Write content to temporary file
|
||||
with os.fdopen(temp_fd, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
f.flush()
|
||||
os.fsync(f.fileno()) # Ensure data is written to disk
|
||||
|
||||
# Atomic move to final location
|
||||
temp_path_obj = Path(temp_path)
|
||||
temp_path_obj.replace(target_path)
|
||||
|
||||
except Exception:
|
||||
# Clean up temporary file on error
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
finally:
|
||||
# Clean up temp directory if empty
|
||||
try:
|
||||
temp_dir.rmdir()
|
||||
except OSError:
|
||||
pass # Directory not empty or doesn't exist
|
||||
|
||||
async def _write_json_file(self, file_path: Path, data: dict, context: Optional[ErrorContext] = None):
|
||||
"""Write JSON data to file atomically."""
|
||||
import json
|
||||
json_content = json.dumps(data, indent=2)
|
||||
await self._atomic_write_file(file_path, json_content, context)
|
||||
|
||||
async def _read_json_file(self, file_path: Path) -> dict:
|
||||
"""Read JSON data from file."""
|
||||
import json
|
||||
content = file_path.read_text(encoding='utf-8')
|
||||
return json.loads(content)
|
||||
|
||||
def _map_os_error_to_exception(self, os_error: OSError, operation: str, context: ErrorContext):
|
||||
"""Map OS errors to appropriate domain exceptions."""
|
||||
from infrastructure.exceptions import (
|
||||
ResourceNotFoundError, ValidationError, DatabaseError
|
||||
)
|
||||
|
||||
if os_error.errno == 2: # No such file or directory
|
||||
return ResourceNotFoundError("File", operation, context)
|
||||
elif os_error.errno == 13: # Permission denied
|
||||
return ValidationError("permissions", operation, "Permission denied", context)
|
||||
elif os_error.errno == 28: # No space left on device
|
||||
return DatabaseError(f"Insufficient disk space for {operation}", os_error, context)
|
||||
elif os_error.errno == 17: # File exists
|
||||
return DuplicateResourceError("File", operation, context)
|
||||
else:
|
||||
return DatabaseError(f"Filesystem error during {operation}", os_error, context)
|
||||
Reference in New Issue
Block a user