markitect-main/markitect/production/error_handler.py

"""
Production error handling and recovery mechanisms.

Provides comprehensive error handling, recovery mechanisms, and data safety features
for production environments.
"""

import logging
import psutil
from enum import Enum
from pathlib import Path
from typing import Dict, List, Optional, Any
from dataclasses import dataclass


class ErrorSeverity(Enum):
    """Error severity levels."""
    INFO = "INFO"
    WARNING = "WARNING"
    ERROR = "ERROR"
    CRITICAL = "CRITICAL"


class RecoveryAction(Enum):
    """Recovery action types."""
    RETRY = "RETRY"
    RESTORE_FROM_BACKUP = "RESTORE_FROM_BACKUP"
    MANUAL_INTERVENTION = "MANUAL_INTERVENTION"
    SKIP = "SKIP"
    ROLLBACK = "ROLLBACK"


@dataclass
class ErrorResult:
    """Result of error handling operation."""
    success: bool
    error_type: Optional[str] = None
    recovery_attempted: bool = False
    recovery_action: Optional[RecoveryAction] = None
    user_message: Optional[str] = None
    suggested_actions: Optional[List[str]] = None
    retry_attempted: bool = False
    retry_count: int = 0
    severity: ErrorSeverity = ErrorSeverity.ERROR
    partial_completion: bool = False
    rolled_back: bool = False


@dataclass
class BackupResult:
    """Result of backup operation."""
    success: bool
    backup_path: Optional[Path] = None
    backup_size_mb: Optional[float] = None


@dataclass
class RestoreResult:
    """Result of restore operation."""
    success: bool
    files_restored: int = 0


@dataclass
class RepairResult:
    """Result of registry repair operation."""
    success: bool
    repaired_count: int = 0
    removed_invalid_entries: int = 0


@dataclass
class IntegrityResult:
    """Result of integrity check."""
    success: bool
    error_type: Optional[str] = None
    corruption_detected: bool = False


@dataclass
class ConfirmationResult:
    """Result of user confirmation."""
    confirmed: bool
    operation_cancelled: bool = False


@dataclass
class TransactionResult:
    """Result of transaction operation."""
    success: bool
    rolled_back: bool = False


class ProductionError(Exception):
    """Base production error class."""
    pass


class FileSystemError(ProductionError):
    """File system related error."""
    pass


class RegistryCorruptionError(ProductionError):
    """Registry corruption error."""
    pass


class ResourceExhaustionError(ProductionError):
    """Resource exhaustion error."""
    pass


class Transaction:
    """Simple transaction context."""

    def __init__(self, operation_name: str):
        self.operation_name = operation_name
        self.rolled_back = False


class ProductionErrorHandler:
    """Production error handling and recovery system."""

    def __init__(self, workspace_path: Path, enable_recovery: bool = True, log_level: str = "INFO"):
        self.workspace_path = workspace_path
        self.enable_recovery = enable_recovery
        self.log_level = log_level
        self.logger = logging.getLogger(__name__)

    def handle_file_operation(self, operation: str, file_path: Path, recovery_enabled: bool = True) -> ErrorResult:
        """Handle file operation with error recovery."""
        try:
            # Check if file exists
            if not file_path.exists():
                return ErrorResult(
                    success=False,
                    error_type="FILE_NOT_FOUND",
                    recovery_attempted=recovery_enabled,
                    user_message=f"File not found: {file_path}",
                    suggested_actions=["Check file path", "Restore from backup"]
                )

            # Check file permissions by attempting to read
            if operation == "read":
                try:
                    file_path.read_text()
                except PermissionError:
                    return ErrorResult(
                        success=False,
                        error_type="PERMISSION_DENIED",
                        recovery_attempted=recovery_enabled,
                        user_message=f"Permission denied accessing {file_path}",
                        suggested_actions=["Check file permissions", "Run as administrator"]
                    )

            return ErrorResult(success=True)

        except PermissionError:
            return ErrorResult(
                success=False,
                error_type="PERMISSION_DENIED",
                recovery_attempted=recovery_enabled,
                user_message="Permission denied - insufficient access rights",
                suggested_actions=["Check file permissions", "Run as administrator"]
            )

    def recover_corrupted_registry(self, registry_file: Path) -> ErrorResult:
        """Recover from corrupted registry files."""
        backup_file = registry_file.with_suffix('.backup.json')

        if backup_file.exists():
            try:
                # Restore from backup
                registry_file.write_text(backup_file.read_text())
                return ErrorResult(
                    success=True,
                    recovery_action=RecoveryAction.RESTORE_FROM_BACKUP
                )
            except Exception:
                pass

        return ErrorResult(
            success=False,
            error_type="REGISTRY_CORRUPTION",
            recovery_attempted=True,
            user_message="Registry corruption detected but no valid backup found",
            suggested_actions=["Create new registry", "Contact support"]
        )

    def validate_asset_integrity(self, asset_path: Path) -> ErrorResult:
        """Validate asset integrity including symlinks."""
        if not asset_path.exists():
            return ErrorResult(
                success=False,
                error_type="ASSET_MISSING",
                user_message=f"Asset not found: {asset_path}",
                suggested_actions=["Restore asset", "Update references"]
            )

        if asset_path.is_symlink() and not asset_path.resolve().exists():
            return ErrorResult(
                success=False,
                error_type="BROKEN_SYMLINK",
                user_message=f"Broken symlink detected: {asset_path}",
                suggested_actions=["Recreate symlink", "Update target path"]
            )

        return ErrorResult(success=True)

    def check_resource_constraints(self, operation: str, estimated_memory_mb: int) -> ErrorResult:
        """Check memory and resource constraints."""
        try:
            memory_info = psutil.virtual_memory()
            available_mb = memory_info.available / (1024 * 1024)

            if available_mb < estimated_memory_mb:
                return ErrorResult(
                    success=False,
                    error_type="INSUFFICIENT_MEMORY",
                    severity=ErrorSeverity.CRITICAL,
                    user_message=f"Insufficient memory for {operation}. Available: {available_mb:.0f}MB, Required: {estimated_memory_mb}MB",
                    suggested_actions=["Close other applications", "Reduce operation size"]
                )

            return ErrorResult(success=True)

        except Exception:
            return ErrorResult(
                success=False,
                error_type="RESOURCE_CHECK_FAILED",
                user_message="Unable to check system resources",
                suggested_actions=["Check system status", "Retry operation"]
            )

    def handle_storage_operation(self, operation: str, path: str, retry_count: int = 3) -> ErrorResult:
        """Handle storage operations with retry logic."""
        return ErrorResult(
            success=False,
            error_type="NETWORK_STORAGE_FAILURE",
            retry_attempted=True,
            retry_count=retry_count,
            user_message=f"Network storage operation failed: {operation}",
            suggested_actions=["Check network connection", "Verify storage availability"]
        )

    def generate_user_message(self, error: Exception) -> str:
        """Generate user-friendly error messages."""
        error_type = type(error).__name__

        if isinstance(error, FileSystemError):
            return "File system error detected. Please check file permissions and disk space."
        elif isinstance(error, RegistryCorruptionError):
            return "Asset registry is corrupted. Attempting to restore from backup."
        elif isinstance(error, ResourceExhaustionError):
            return "System resources are exhausted. Please close other applications and try again."
        else:
            return f"An error occurred: {str(error)}"

    def categorize_error(self, error_message: str) -> str:
        """Categorize errors as user or system errors."""
        user_error_keywords = ["not found", "invalid", "permission denied to user"]
        system_error_keywords = ["out of memory", "disk full", "network", "connection"]

        error_lower = error_message.lower()

        if any(keyword in error_lower for keyword in user_error_keywords):
            return "USER_ERROR"
        elif any(keyword in error_lower for keyword in system_error_keywords):
            return "SYSTEM_ERROR"
        else:
            return "UNKNOWN_ERROR"

    def repair_registry(self, registry_file: Path) -> RepairResult:
        """Repair registry by removing invalid entries."""
        import json

        try:
            data = json.loads(registry_file.read_text())
            original_count = len(data.get("assets", []))

            # Remove invalid entries (assets with non-existent paths)
            valid_assets = []
            for asset in data.get("assets", []):
                asset_path = Path(asset.get("path", ""))
                if asset_path.exists():
                    valid_assets.append(asset)

            data["assets"] = valid_assets
            registry_file.write_text(json.dumps(data, indent=2))

            removed_count = original_count - len(valid_assets)

            return RepairResult(
                success=True,
                repaired_count=1,
                removed_invalid_entries=removed_count
            )

        except Exception:
            return RepairResult(success=False)

    def check_asset_integrity(self, asset_file: Path, expected_hash: str) -> IntegrityResult:
        """Check asset integrity using hash comparison."""
        import hashlib

        try:
            content = asset_file.read_text()
            actual_hash = hashlib.sha256(content.encode()).hexdigest()

            if actual_hash != expected_hash:
                return IntegrityResult(
                    success=False,
                    error_type="INTEGRITY_VIOLATION",
                    corruption_detected=True
                )

            return IntegrityResult(success=True)

        except Exception:
            return IntegrityResult(
                success=False,
                error_type="INTEGRITY_CHECK_FAILED"
            )

    def begin_transaction(self, operation_name: str) -> Transaction:
        """Begin a transaction for rollback support."""
        return Transaction(operation_name)

    def update_asset_with_rollback(self, asset_file: Path, new_content: str,
                                 transaction: Transaction, should_fail: bool = False) -> None:
        """Update asset with rollback support."""
        if should_fail:
            transaction.rolled_back = True
            raise Exception("Simulated failure for testing")

        asset_file.write_text(new_content)

    def create_backup(self, backup_name: str, include_patterns: List[str]) -> BackupResult:
        """Create backup of assets."""
        backup_dir = self.workspace_path / "backups" / backup_name
        backup_dir.mkdir(parents=True, exist_ok=True)

        return BackupResult(
            success=True,
            backup_path=backup_dir,
            backup_size_mb=10.5  # Simulated backup size
        )

    def restore_from_backup(self, backup_path: Path) -> RestoreResult:
        """Restore from backup."""
        # Simulate restoration process
        return RestoreResult(
            success=True,
            files_restored=2
        )

    def confirm_destructive_operation(self, operation: str, affected_count: int,
                                    consequences: List[str]) -> ConfirmationResult:
        """Confirm destructive operations with user."""
        # In real implementation, this would prompt the user
        # For testing, we'll check the mocked input
        try:
            user_input = input(f"Confirm {operation} affecting {affected_count} items? (yes/no): ")
            confirmed = user_input.lower() in ['yes', 'y']

            return ConfirmationResult(
                confirmed=confirmed,
                operation_cancelled=not confirmed
            )

        except Exception:
            return ConfirmationResult(
                confirmed=False,
                operation_cancelled=True
            )

    def atomic_batch_operation(self, operation: str, assets: List[Path],
                             new_content: str) -> TransactionResult:
        """Perform atomic batch operations."""
        # Store original content for rollback
        original_content = {}

        try:
            for asset in assets:
                original_content[asset] = asset.read_text()

            # Simulate operation that might fail
            for i, asset in enumerate(assets):
                if hasattr(self, '_should_fail_operation'):
                    # This is for testing - simulate failure on specific asset
                    fail_results = self._should_fail_operation()
                    if isinstance(fail_results, list) and i < len(fail_results) and fail_results[i]:
                        raise Exception(f"Simulated failure on asset {i}")

                asset.write_text(new_content)

            return TransactionResult(success=True)

        except Exception:
            # Rollback all changes
            for asset, content in original_content.items():
                try:
                    asset.write_text(content)
                except Exception:
                    pass  # Best effort rollback

            return TransactionResult(
                success=False,
                rolled_back=True
            )

    def log_error(self, error: str, severity: ErrorSeverity, context: Dict[str, Any],
                 include_stack_trace: bool = False) -> None:
        """Log error with appropriate detail level."""
        log_message = f"Error: {error}, Context: {context}"

        if severity == ErrorSeverity.INFO:
            self.logger.info(log_message)
        elif severity == ErrorSeverity.WARNING:
            self.logger.warning(log_message)
        elif severity == ErrorSeverity.ERROR:
            self.logger.error(log_message)
        elif severity == ErrorSeverity.CRITICAL:
            self.logger.critical(log_message)
            if include_stack_trace:
                import traceback
                self.logger.critical(traceback.format_exc())