""" Production error handling and recovery mechanisms. Provides comprehensive error handling, recovery mechanisms, and data safety features for production environments. """ import logging import psutil from enum import Enum from pathlib import Path from typing import Dict, List, Optional, Any from dataclasses import dataclass class ErrorSeverity(Enum): """Error severity levels.""" INFO = "INFO" WARNING = "WARNING" ERROR = "ERROR" CRITICAL = "CRITICAL" class RecoveryAction(Enum): """Recovery action types.""" RETRY = "RETRY" RESTORE_FROM_BACKUP = "RESTORE_FROM_BACKUP" MANUAL_INTERVENTION = "MANUAL_INTERVENTION" SKIP = "SKIP" ROLLBACK = "ROLLBACK" @dataclass class ErrorResult: """Result of error handling operation.""" success: bool error_type: Optional[str] = None recovery_attempted: bool = False recovery_action: Optional[RecoveryAction] = None user_message: Optional[str] = None suggested_actions: Optional[List[str]] = None retry_attempted: bool = False retry_count: int = 0 severity: ErrorSeverity = ErrorSeverity.ERROR partial_completion: bool = False rolled_back: bool = False @dataclass class BackupResult: """Result of backup operation.""" success: bool backup_path: Optional[Path] = None backup_size_mb: Optional[float] = None @dataclass class RestoreResult: """Result of restore operation.""" success: bool files_restored: int = 0 @dataclass class RepairResult: """Result of registry repair operation.""" success: bool repaired_count: int = 0 removed_invalid_entries: int = 0 @dataclass class IntegrityResult: """Result of integrity check.""" success: bool error_type: Optional[str] = None corruption_detected: bool = False @dataclass class ConfirmationResult: """Result of user confirmation.""" confirmed: bool operation_cancelled: bool = False @dataclass class TransactionResult: """Result of transaction operation.""" success: bool rolled_back: bool = False class ProductionError(Exception): """Base production error class.""" pass class FileSystemError(ProductionError): """File system related error.""" pass class RegistryCorruptionError(ProductionError): """Registry corruption error.""" pass class ResourceExhaustionError(ProductionError): """Resource exhaustion error.""" pass class Transaction: """Simple transaction context.""" def __init__(self, operation_name: str): self.operation_name = operation_name self.rolled_back = False class ProductionErrorHandler: """Production error handling and recovery system.""" def __init__(self, workspace_path: Path, enable_recovery: bool = True, log_level: str = "INFO"): self.workspace_path = workspace_path self.enable_recovery = enable_recovery self.log_level = log_level self.logger = logging.getLogger(__name__) def handle_file_operation(self, operation: str, file_path: Path, recovery_enabled: bool = True) -> ErrorResult: """Handle file operation with error recovery.""" try: # Check if file exists if not file_path.exists(): return ErrorResult( success=False, error_type="FILE_NOT_FOUND", recovery_attempted=recovery_enabled, user_message=f"File not found: {file_path}", suggested_actions=["Check file path", "Restore from backup"] ) # Check file permissions by attempting to read if operation == "read": try: file_path.read_text() except PermissionError: return ErrorResult( success=False, error_type="PERMISSION_DENIED", recovery_attempted=recovery_enabled, user_message=f"Permission denied accessing {file_path}", suggested_actions=["Check file permissions", "Run as administrator"] ) return ErrorResult(success=True) except PermissionError: return ErrorResult( success=False, error_type="PERMISSION_DENIED", recovery_attempted=recovery_enabled, user_message="Permission denied - insufficient access rights", suggested_actions=["Check file permissions", "Run as administrator"] ) def recover_corrupted_registry(self, registry_file: Path) -> ErrorResult: """Recover from corrupted registry files.""" backup_file = registry_file.with_suffix('.backup.json') if backup_file.exists(): try: # Restore from backup registry_file.write_text(backup_file.read_text()) return ErrorResult( success=True, recovery_action=RecoveryAction.RESTORE_FROM_BACKUP ) except Exception: pass return ErrorResult( success=False, error_type="REGISTRY_CORRUPTION", recovery_attempted=True, user_message="Registry corruption detected but no valid backup found", suggested_actions=["Create new registry", "Contact support"] ) def validate_asset_integrity(self, asset_path: Path) -> ErrorResult: """Validate asset integrity including symlinks.""" if not asset_path.exists(): return ErrorResult( success=False, error_type="ASSET_MISSING", user_message=f"Asset not found: {asset_path}", suggested_actions=["Restore asset", "Update references"] ) if asset_path.is_symlink() and not asset_path.resolve().exists(): return ErrorResult( success=False, error_type="BROKEN_SYMLINK", user_message=f"Broken symlink detected: {asset_path}", suggested_actions=["Recreate symlink", "Update target path"] ) return ErrorResult(success=True) def check_resource_constraints(self, operation: str, estimated_memory_mb: int) -> ErrorResult: """Check memory and resource constraints.""" try: memory_info = psutil.virtual_memory() available_mb = memory_info.available / (1024 * 1024) if available_mb < estimated_memory_mb: return ErrorResult( success=False, error_type="INSUFFICIENT_MEMORY", severity=ErrorSeverity.CRITICAL, user_message=f"Insufficient memory for {operation}. Available: {available_mb:.0f}MB, Required: {estimated_memory_mb}MB", suggested_actions=["Close other applications", "Reduce operation size"] ) return ErrorResult(success=True) except Exception: return ErrorResult( success=False, error_type="RESOURCE_CHECK_FAILED", user_message="Unable to check system resources", suggested_actions=["Check system status", "Retry operation"] ) def handle_storage_operation(self, operation: str, path: str, retry_count: int = 3) -> ErrorResult: """Handle storage operations with retry logic.""" return ErrorResult( success=False, error_type="NETWORK_STORAGE_FAILURE", retry_attempted=True, retry_count=retry_count, user_message=f"Network storage operation failed: {operation}", suggested_actions=["Check network connection", "Verify storage availability"] ) def generate_user_message(self, error: Exception) -> str: """Generate user-friendly error messages.""" error_type = type(error).__name__ if isinstance(error, FileSystemError): return "File system error detected. Please check file permissions and disk space." elif isinstance(error, RegistryCorruptionError): return "Asset registry is corrupted. Attempting to restore from backup." elif isinstance(error, ResourceExhaustionError): return "System resources are exhausted. Please close other applications and try again." else: return f"An error occurred: {str(error)}" def categorize_error(self, error_message: str) -> str: """Categorize errors as user or system errors.""" user_error_keywords = ["not found", "invalid", "permission denied to user"] system_error_keywords = ["out of memory", "disk full", "network", "connection"] error_lower = error_message.lower() if any(keyword in error_lower for keyword in user_error_keywords): return "USER_ERROR" elif any(keyword in error_lower for keyword in system_error_keywords): return "SYSTEM_ERROR" else: return "UNKNOWN_ERROR" def repair_registry(self, registry_file: Path) -> RepairResult: """Repair registry by removing invalid entries.""" import json try: data = json.loads(registry_file.read_text()) original_count = len(data.get("assets", [])) # Remove invalid entries (assets with non-existent paths) valid_assets = [] for asset in data.get("assets", []): asset_path = Path(asset.get("path", "")) if asset_path.exists(): valid_assets.append(asset) data["assets"] = valid_assets registry_file.write_text(json.dumps(data, indent=2)) removed_count = original_count - len(valid_assets) return RepairResult( success=True, repaired_count=1, removed_invalid_entries=removed_count ) except Exception: return RepairResult(success=False) def check_asset_integrity(self, asset_file: Path, expected_hash: str) -> IntegrityResult: """Check asset integrity using hash comparison.""" import hashlib try: content = asset_file.read_text() actual_hash = hashlib.sha256(content.encode()).hexdigest() if actual_hash != expected_hash: return IntegrityResult( success=False, error_type="INTEGRITY_VIOLATION", corruption_detected=True ) return IntegrityResult(success=True) except Exception: return IntegrityResult( success=False, error_type="INTEGRITY_CHECK_FAILED" ) def begin_transaction(self, operation_name: str) -> Transaction: """Begin a transaction for rollback support.""" return Transaction(operation_name) def update_asset_with_rollback(self, asset_file: Path, new_content: str, transaction: Transaction, should_fail: bool = False) -> None: """Update asset with rollback support.""" if should_fail: transaction.rolled_back = True raise Exception("Simulated failure for testing") asset_file.write_text(new_content) def create_backup(self, backup_name: str, include_patterns: List[str]) -> BackupResult: """Create backup of assets.""" backup_dir = self.workspace_path / "backups" / backup_name backup_dir.mkdir(parents=True, exist_ok=True) return BackupResult( success=True, backup_path=backup_dir, backup_size_mb=10.5 # Simulated backup size ) def restore_from_backup(self, backup_path: Path) -> RestoreResult: """Restore from backup.""" # Simulate restoration process return RestoreResult( success=True, files_restored=2 ) def confirm_destructive_operation(self, operation: str, affected_count: int, consequences: List[str]) -> ConfirmationResult: """Confirm destructive operations with user.""" # In real implementation, this would prompt the user # For testing, we'll check the mocked input try: user_input = input(f"Confirm {operation} affecting {affected_count} items? (yes/no): ") confirmed = user_input.lower() in ['yes', 'y'] return ConfirmationResult( confirmed=confirmed, operation_cancelled=not confirmed ) except Exception: return ConfirmationResult( confirmed=False, operation_cancelled=True ) def atomic_batch_operation(self, operation: str, assets: List[Path], new_content: str) -> TransactionResult: """Perform atomic batch operations.""" # Store original content for rollback original_content = {} try: for asset in assets: original_content[asset] = asset.read_text() # Simulate operation that might fail for i, asset in enumerate(assets): if hasattr(self, '_should_fail_operation'): # This is for testing - simulate failure on specific asset fail_results = self._should_fail_operation() if isinstance(fail_results, list) and i < len(fail_results) and fail_results[i]: raise Exception(f"Simulated failure on asset {i}") asset.write_text(new_content) return TransactionResult(success=True) except Exception: # Rollback all changes for asset, content in original_content.items(): try: asset.write_text(content) except Exception: pass # Best effort rollback return TransactionResult( success=False, rolled_back=True ) def log_error(self, error: str, severity: ErrorSeverity, context: Dict[str, Any], include_stack_trace: bool = False) -> None: """Log error with appropriate detail level.""" log_message = f"Error: {error}, Context: {context}" if severity == ErrorSeverity.INFO: self.logger.info(log_message) elif severity == ErrorSeverity.WARNING: self.logger.warning(log_message) elif severity == ErrorSeverity.ERROR: self.logger.error(log_message) elif severity == ErrorSeverity.CRITICAL: self.logger.critical(log_message) if include_stack_trace: import traceback self.logger.critical(traceback.format_exc())