Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
- Replace deprecated datetime.utcnow() with datetime.now(timezone.utc) across all domain models, services, infrastructure, and test files - Add missing timezone imports to all affected files - Fix pytest.ini configuration format from [tool:pytest] to [pytest] - Remove warning suppressions to expose actual issues - Ensure proper pytest marker registration for smoke tests Results: - 305 passed, 2 skipped, 0 warnings (down from 111 warnings) - All functionality preserved with modern datetime API usage - Improved code quality by addressing root causes vs suppression 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
495 lines
18 KiB
Python
495 lines
18 KiB
Python
"""
|
|
Filesystem repository implementation with atomic operations.
|
|
|
|
Provides reliable file operations with proper error handling,
|
|
atomic writes, and workspace management.
|
|
"""
|
|
|
|
import os
|
|
import shutil
|
|
import tempfile
|
|
import uuid
|
|
from infrastructure.logging import get_logger
|
|
from typing import List, Optional
|
|
from pathlib import Path
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
from infrastructure.repositories.interfaces import WorkspaceRepository
|
|
from infrastructure.exceptions import (
|
|
ErrorContext, OperationType, ResourceNotFoundError,
|
|
DuplicateResourceError, ValidationError
|
|
)
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
class FilesystemWorkspaceRepository(WorkspaceRepository):
|
|
"""
|
|
Filesystem implementation of WorkspaceRepository.
|
|
|
|
Provides reliable workspace and file operations with atomic writes,
|
|
proper validation, and comprehensive error handling.
|
|
"""
|
|
|
|
def __init__(self, base_workspace_dir: str = ".markitect_workspace"):
|
|
self.base_path = Path(base_workspace_dir).resolve()
|
|
self.base_path.mkdir(parents=True, exist_ok=True)
|
|
logger.info(f"Initialized workspace repository at {self.base_path}")
|
|
|
|
async def create_workspace(
|
|
self,
|
|
workspace_id: str,
|
|
base_path: Path,
|
|
context: Optional[ErrorContext] = None
|
|
) -> Path:
|
|
"""Create a new workspace directory."""
|
|
if context is None:
|
|
context = ErrorContext(
|
|
operation_id=f"create_workspace_{workspace_id}",
|
|
operation_type=OperationType.WRITE,
|
|
resource_type="Workspace",
|
|
resource_id=workspace_id
|
|
)
|
|
|
|
# Validate workspace ID
|
|
if not self._is_valid_workspace_id(workspace_id):
|
|
raise ValidationError(
|
|
"workspace_id",
|
|
workspace_id,
|
|
"Workspace ID must be alphanumeric with optional dashes and underscores",
|
|
context
|
|
)
|
|
|
|
workspace_path = self.base_path / workspace_id
|
|
|
|
# Check if workspace already exists
|
|
if workspace_path.exists():
|
|
raise DuplicateResourceError("Workspace", workspace_id, context)
|
|
|
|
try:
|
|
# Create workspace directory with proper permissions
|
|
workspace_path.mkdir(parents=True, exist_ok=False, mode=0o755)
|
|
|
|
# Create standard subdirectories
|
|
(workspace_path / "files").mkdir(exist_ok=True)
|
|
(workspace_path / "temp").mkdir(exist_ok=True)
|
|
(workspace_path / "logs").mkdir(exist_ok=True)
|
|
|
|
# Create workspace metadata file
|
|
metadata = {
|
|
"id": workspace_id,
|
|
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
"version": "1.0",
|
|
"type": "markitect_workspace"
|
|
}
|
|
|
|
await self._write_json_file(
|
|
workspace_path / ".workspace_meta.json",
|
|
metadata,
|
|
context
|
|
)
|
|
|
|
logger.info(f"Created workspace: {workspace_id}")
|
|
return workspace_path
|
|
|
|
except OSError as e:
|
|
logger.error(f"Failed to create workspace {workspace_id}: {e}")
|
|
# Cleanup partial creation
|
|
if workspace_path.exists():
|
|
shutil.rmtree(workspace_path, ignore_errors=True)
|
|
|
|
raise self._map_os_error_to_exception(e, f"create workspace {workspace_id}", context)
|
|
|
|
async def get_workspace_path(
|
|
self,
|
|
workspace_id: str,
|
|
context: Optional[ErrorContext] = None
|
|
) -> Path:
|
|
"""Get the path to a workspace."""
|
|
if context is None:
|
|
context = ErrorContext(
|
|
operation_id=f"get_workspace_path_{workspace_id}",
|
|
operation_type=OperationType.READ,
|
|
resource_type="Workspace",
|
|
resource_id=workspace_id
|
|
)
|
|
|
|
workspace_path = self.base_path / workspace_id
|
|
|
|
if not workspace_path.exists() or not workspace_path.is_dir():
|
|
raise ResourceNotFoundError("Workspace", workspace_id, context)
|
|
|
|
return workspace_path
|
|
|
|
async def list_workspaces(
|
|
self,
|
|
context: Optional[ErrorContext] = None
|
|
) -> List[str]:
|
|
"""List all available workspaces."""
|
|
if context is None:
|
|
context = ErrorContext(
|
|
operation_id="list_workspaces",
|
|
operation_type=OperationType.READ,
|
|
resource_type="Workspace"
|
|
)
|
|
|
|
try:
|
|
workspaces = []
|
|
|
|
if not self.base_path.exists():
|
|
return workspaces
|
|
|
|
for item in self.base_path.iterdir():
|
|
if item.is_dir() and self._is_valid_workspace_id(item.name):
|
|
# Verify it's a valid workspace by checking for metadata
|
|
metadata_file = item / ".workspace_meta.json"
|
|
if metadata_file.exists():
|
|
workspaces.append(item.name)
|
|
|
|
return sorted(workspaces)
|
|
|
|
except OSError as e:
|
|
logger.error(f"Failed to list workspaces: {e}")
|
|
raise self._map_os_error_to_exception(e, "list workspaces", context)
|
|
|
|
async def write_file(
|
|
self,
|
|
workspace_id: str,
|
|
file_path: str,
|
|
content: str,
|
|
context: Optional[ErrorContext] = None
|
|
) -> Path:
|
|
"""Write content to a file in the workspace using atomic operations."""
|
|
if context is None:
|
|
context = ErrorContext(
|
|
operation_id=f"write_file_{workspace_id}_{file_path}",
|
|
operation_type=OperationType.WRITE,
|
|
resource_type="WorkspaceFile",
|
|
resource_id=f"{workspace_id}/{file_path}",
|
|
request_data={"content_length": len(content)}
|
|
)
|
|
|
|
# Validate inputs
|
|
workspace_path = await self.get_workspace_path(workspace_id, context)
|
|
|
|
if not self._is_safe_file_path(file_path):
|
|
raise ValidationError(
|
|
"file_path",
|
|
file_path,
|
|
"File path contains invalid characters or attempts directory traversal",
|
|
context
|
|
)
|
|
|
|
# Validate file extension
|
|
allowed_extensions = {".md", ".txt", ".py", ".js", ".json", ".yaml", ".yml", ".rst", ".csv"}
|
|
file_ext = Path(file_path).suffix.lower()
|
|
if file_ext and file_ext not in allowed_extensions:
|
|
raise ValidationError(
|
|
"file_path",
|
|
file_path,
|
|
f"File extension {file_ext} is not allowed",
|
|
context
|
|
)
|
|
|
|
# Validate content size (100MB limit)
|
|
max_size = 100 * 1024 * 1024 # 100MB
|
|
if len(content.encode('utf-8')) > max_size:
|
|
raise ValidationError(
|
|
"content",
|
|
f"{len(content)} characters",
|
|
f"File content exceeds maximum size of {max_size} bytes",
|
|
context
|
|
)
|
|
|
|
target_path = workspace_path / "files" / file_path
|
|
|
|
try:
|
|
# Ensure parent directory exists
|
|
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Atomic write using temporary file
|
|
await self._atomic_write_file(target_path, content, context)
|
|
|
|
logger.info(f"Wrote file {file_path} in workspace {workspace_id}")
|
|
return target_path
|
|
|
|
except OSError as e:
|
|
logger.error(f"Failed to write file {file_path} in workspace {workspace_id}: {e}")
|
|
raise self._map_os_error_to_exception(e, f"write file {file_path}", context)
|
|
|
|
async def read_file(
|
|
self,
|
|
workspace_id: str,
|
|
file_path: str,
|
|
context: Optional[ErrorContext] = None
|
|
) -> str:
|
|
"""Read content from a file in the workspace."""
|
|
if context is None:
|
|
context = ErrorContext(
|
|
operation_id=f"read_file_{workspace_id}_{file_path}",
|
|
operation_type=OperationType.READ,
|
|
resource_type="WorkspaceFile",
|
|
resource_id=f"{workspace_id}/{file_path}"
|
|
)
|
|
|
|
# Validate inputs
|
|
workspace_path = await self.get_workspace_path(workspace_id, context)
|
|
|
|
if not self._is_safe_file_path(file_path):
|
|
raise ValidationError(
|
|
"file_path",
|
|
file_path,
|
|
"File path contains invalid characters or attempts directory traversal",
|
|
context
|
|
)
|
|
|
|
target_path = workspace_path / "files" / file_path
|
|
|
|
if not target_path.exists():
|
|
raise ResourceNotFoundError("File", f"{workspace_id}/{file_path}", context)
|
|
|
|
if not target_path.is_file():
|
|
raise ValidationError(
|
|
"file_path",
|
|
file_path,
|
|
"Path exists but is not a regular file",
|
|
context
|
|
)
|
|
|
|
try:
|
|
# Read file with encoding detection
|
|
content = target_path.read_text(encoding='utf-8')
|
|
|
|
logger.debug(f"Read file {file_path} from workspace {workspace_id}")
|
|
return content
|
|
|
|
except UnicodeDecodeError as e:
|
|
logger.error(f"Failed to decode file {file_path} as UTF-8: {e}")
|
|
raise ValidationError(
|
|
"file_content",
|
|
"binary data",
|
|
"File does not contain valid UTF-8 text",
|
|
context
|
|
)
|
|
|
|
except OSError as e:
|
|
logger.error(f"Failed to read file {file_path} from workspace {workspace_id}: {e}")
|
|
raise self._map_os_error_to_exception(e, f"read file {file_path}", context)
|
|
|
|
async def delete_workspace(
|
|
self,
|
|
workspace_id: str,
|
|
context: Optional[ErrorContext] = None
|
|
) -> bool:
|
|
"""Delete a workspace and all its contents."""
|
|
if context is None:
|
|
context = ErrorContext(
|
|
operation_id=f"delete_workspace_{workspace_id}",
|
|
operation_type=OperationType.DELETE,
|
|
resource_type="Workspace",
|
|
resource_id=workspace_id
|
|
)
|
|
|
|
workspace_path = await self.get_workspace_path(workspace_id, context)
|
|
|
|
try:
|
|
# Use shutil.rmtree for recursive deletion
|
|
shutil.rmtree(workspace_path)
|
|
|
|
logger.info(f"Deleted workspace: {workspace_id}")
|
|
return True
|
|
|
|
except OSError as e:
|
|
logger.error(f"Failed to delete workspace {workspace_id}: {e}")
|
|
raise self._map_os_error_to_exception(e, f"delete workspace {workspace_id}", context)
|
|
|
|
async def list_files(
|
|
self,
|
|
workspace_id: str,
|
|
pattern: Optional[str] = None,
|
|
context: Optional[ErrorContext] = None
|
|
) -> List[str]:
|
|
"""List files in a workspace."""
|
|
if context is None:
|
|
context = ErrorContext(
|
|
operation_id=f"list_files_{workspace_id}",
|
|
operation_type=OperationType.READ,
|
|
resource_type="WorkspaceFile",
|
|
metadata={"workspace_id": workspace_id, "pattern": pattern}
|
|
)
|
|
|
|
workspace_path = await self.get_workspace_path(workspace_id, context)
|
|
files_dir = workspace_path / "files"
|
|
|
|
if not files_dir.exists():
|
|
return []
|
|
|
|
try:
|
|
files = []
|
|
|
|
# Walk through all files in the workspace
|
|
for item in files_dir.rglob("*"):
|
|
if item.is_file():
|
|
# Get relative path from files directory
|
|
relative_path = str(item.relative_to(files_dir))
|
|
|
|
# Apply pattern filter if provided
|
|
if pattern is None or self._matches_pattern(relative_path, pattern):
|
|
files.append(relative_path)
|
|
|
|
return sorted(files)
|
|
|
|
except OSError as e:
|
|
logger.error(f"Failed to list files in workspace {workspace_id}: {e}")
|
|
raise self._map_os_error_to_exception(e, f"list files in workspace {workspace_id}", context)
|
|
|
|
async def cleanup_old_workspaces(self, days_threshold: int = 30) -> int:
|
|
"""Clean up workspaces older than specified days."""
|
|
logger.info(f"Starting cleanup of workspaces older than {days_threshold} days")
|
|
|
|
try:
|
|
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_threshold)
|
|
deleted_count = 0
|
|
|
|
if not self.base_path.exists():
|
|
return 0
|
|
|
|
for workspace_dir in self.base_path.iterdir():
|
|
if not workspace_dir.is_dir():
|
|
continue
|
|
|
|
try:
|
|
# Check workspace metadata for creation date
|
|
metadata_file = workspace_dir / ".workspace_meta.json"
|
|
if not metadata_file.exists():
|
|
continue
|
|
|
|
metadata = await self._read_json_file(metadata_file)
|
|
created_at_str = metadata.get("created_at")
|
|
|
|
if not created_at_str:
|
|
continue
|
|
|
|
created_at = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
|
|
|
|
if created_at < cutoff_date:
|
|
await self.delete_workspace(workspace_dir.name)
|
|
deleted_count += 1
|
|
logger.info(f"Cleaned up old workspace: {workspace_dir.name}")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Failed to process workspace {workspace_dir.name} during cleanup: {e}")
|
|
continue
|
|
|
|
logger.info(f"Cleanup completed: deleted {deleted_count} old workspaces")
|
|
return deleted_count
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error during workspace cleanup: {e}")
|
|
return 0
|
|
|
|
# Helper methods
|
|
|
|
def _is_valid_workspace_id(self, workspace_id: str) -> bool:
|
|
"""Validate workspace ID format."""
|
|
if not workspace_id or len(workspace_id) > 100:
|
|
return False
|
|
|
|
# Allow alphanumeric, dash, underscore
|
|
import re
|
|
return re.match(r'^[a-zA-Z0-9_-]+$', workspace_id) is not None
|
|
|
|
def _is_safe_file_path(self, file_path: str) -> bool:
|
|
"""Check if file path is safe (no directory traversal)."""
|
|
if not file_path:
|
|
return False
|
|
|
|
# Normalize path
|
|
normalized = os.path.normpath(file_path)
|
|
|
|
# Check for directory traversal attempts
|
|
if normalized.startswith("..") or "/.." in normalized or "\\.." in normalized:
|
|
return False
|
|
|
|
# Check for absolute paths
|
|
if os.path.isabs(normalized):
|
|
return False
|
|
|
|
# Check for unsafe characters
|
|
unsafe_chars = {"<", ">", ":", "\"", "|", "?", "*", "\0"}
|
|
if any(char in file_path for char in unsafe_chars):
|
|
return False
|
|
|
|
return True
|
|
|
|
def _matches_pattern(self, file_path: str, pattern: str) -> bool:
|
|
"""Check if file path matches the given pattern."""
|
|
import fnmatch
|
|
return fnmatch.fnmatch(file_path.lower(), pattern.lower())
|
|
|
|
async def _atomic_write_file(self, target_path: Path, content: str, context: ErrorContext):
|
|
"""Write file atomically using temporary file."""
|
|
temp_dir = target_path.parent / ".tmp"
|
|
temp_dir.mkdir(exist_ok=True)
|
|
|
|
# Create temporary file in same directory as target
|
|
temp_fd, temp_path = tempfile.mkstemp(
|
|
dir=temp_dir,
|
|
prefix=f".tmp_{target_path.name}_",
|
|
suffix=".tmp"
|
|
)
|
|
|
|
try:
|
|
# Write content to temporary file
|
|
with os.fdopen(temp_fd, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
f.flush()
|
|
os.fsync(f.fileno()) # Ensure data is written to disk
|
|
|
|
# Atomic move to final location
|
|
temp_path_obj = Path(temp_path)
|
|
temp_path_obj.replace(target_path)
|
|
|
|
except Exception:
|
|
# Clean up temporary file on error
|
|
try:
|
|
os.unlink(temp_path)
|
|
except OSError:
|
|
pass
|
|
raise
|
|
|
|
finally:
|
|
# Clean up temp directory if empty
|
|
try:
|
|
temp_dir.rmdir()
|
|
except OSError:
|
|
pass # Directory not empty or doesn't exist
|
|
|
|
async def _write_json_file(self, file_path: Path, data: dict, context: Optional[ErrorContext] = None):
|
|
"""Write JSON data to file atomically."""
|
|
import json
|
|
json_content = json.dumps(data, indent=2)
|
|
await self._atomic_write_file(file_path, json_content, context)
|
|
|
|
async def _read_json_file(self, file_path: Path) -> dict:
|
|
"""Read JSON data from file."""
|
|
import json
|
|
content = file_path.read_text(encoding='utf-8')
|
|
return json.loads(content)
|
|
|
|
def _map_os_error_to_exception(self, os_error: OSError, operation: str, context: ErrorContext):
|
|
"""Map OS errors to appropriate domain exceptions."""
|
|
from infrastructure.exceptions import (
|
|
ResourceNotFoundError, ValidationError, DatabaseError
|
|
)
|
|
|
|
if os_error.errno == 2: # No such file or directory
|
|
return ResourceNotFoundError("File", operation, context)
|
|
elif os_error.errno == 13: # Permission denied
|
|
return ValidationError("permissions", operation, "Permission denied", context)
|
|
elif os_error.errno == 28: # No space left on device
|
|
return DatabaseError(f"Insufficient disk space for {operation}", os_error, context)
|
|
elif os_error.errno == 17: # File exists
|
|
return DuplicateResourceError("File", operation, context)
|
|
else:
|
|
return DatabaseError(f"Filesystem error during {operation}", os_error, context) |