""" Clean Asset Manager implementation with object-oriented design. This is the new implementation that replaces the dict-based approach with proper domain models and clean architecture patterns. """ import hashlib import mimetypes from pathlib import Path from typing import List, Optional, Dict, Any from datetime import datetime import logging import shutil from .models import Asset, AssetCollection from .repository import AssetRepository, JsonFileRepository class AssetManagerError(Exception): """Asset manager specific errors.""" pass class AssetManager: """Clean asset manager with object-oriented interface.""" def __init__(self, storage_path: Path, repository: Optional[AssetRepository] = None): """Initialize asset manager. Args: storage_path: Directory for content-addressable asset storage repository: Asset repository (defaults to JSON file) """ self.storage_path = Path(storage_path) self.storage_path.mkdir(parents=True, exist_ok=True) # Use provided repository or default to JSON file if repository is None: registry_path = self.storage_path / "registry.json" self.repository = JsonFileRepository(registry_path) else: self.repository = repository self.logger = logging.getLogger(f'{__name__}.{self.__class__.__name__}') def add_asset(self, source_path: Path, description: Optional[str] = None) -> Asset: """Add an asset from a source file. Args: source_path: Path to the source file description: Optional description Returns: Asset object for the added asset Raises: AssetManagerError: If file doesn't exist or can't be processed """ source_path = Path(source_path) if not source_path.exists(): raise AssetManagerError(f"Source file does not exist: {source_path}") if not source_path.is_file(): raise AssetManagerError(f"Source path is not a file: {source_path}") try: # Calculate content hash content_hash = self._calculate_hash(source_path) # Check if asset already exists existing_asset = self.repository.get_by_hash(content_hash) if existing_asset: self.logger.info(f"Asset already exists (deduplicated): {content_hash[:12]}...") return existing_asset # Determine storage path (content-addressable) storage_path = self._get_storage_path(content_hash, source_path.suffix) # Copy file to storage storage_path.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(source_path, storage_path) # Create asset object asset = Asset( content_hash=content_hash, filename=source_path.name, size_bytes=source_path.stat().st_size, mime_type=mimetypes.guess_type(source_path)[0] or "application/octet-stream", path=str(storage_path), original_path=str(source_path), created_at=datetime.now(), description=description ) # Add to repository self.repository.add(asset) self.logger.info(f"Added new asset: {asset.filename} ({content_hash[:12]}...)") return asset except Exception as e: raise AssetManagerError(f"Failed to add asset {source_path}: {e}") from e def get_asset(self, content_hash: str) -> Optional[Asset]: """Get asset by content hash.""" return self.repository.get_by_hash(content_hash) def list_assets(self) -> List[Asset]: """List all managed assets.""" return self.repository.list_all() def get_assets_collection(self) -> AssetCollection: """Get assets as a collection with additional methods.""" assets = self.list_assets() return AssetCollection(assets=assets, created_at=datetime.now()) def remove_asset(self, content_hash: str, remove_file: bool = True) -> bool: """Remove an asset. Args: content_hash: Hash of asset to remove remove_file: Whether to remove the physical file Returns: True if asset was removed, False if not found """ asset = self.repository.get_by_hash(content_hash) if not asset: return False # Remove from repository if self.repository.remove(content_hash): if remove_file and asset.path: try: Path(asset.path).unlink(missing_ok=True) self.logger.info(f"Removed asset file: {asset.path}") except Exception as e: self.logger.warning(f"Failed to remove asset file {asset.path}: {e}") self.logger.info(f"Removed asset: {asset.filename} ({content_hash[:12]}...)") return True return False def find_assets_by_name(self, filename: str) -> List[Asset]: """Find assets by filename.""" assets = self.list_assets() return [asset for asset in assets if asset.filename == filename] def find_assets_by_type(self, mime_type_prefix: str) -> List[Asset]: """Find assets by MIME type prefix (e.g., 'image/').""" assets = self.list_assets() return [asset for asset in assets if asset.mime_type.startswith(mime_type_prefix)] def get_images(self) -> List[Asset]: """Get all image assets.""" return self.find_assets_by_type("image/") def get_documents(self) -> List[Asset]: """Get all document assets.""" assets = self.list_assets() return [asset for asset in assets if asset.is_document()] def get_stats(self) -> Dict[str, Any]: """Get asset manager statistics.""" repo_stats = self.repository.get_stats() assets = self.list_assets() # Additional computed stats images = [a for a in assets if a.is_image()] documents = [a for a in assets if a.is_document()] return { **repo_stats, "storage_path": str(self.storage_path), "images_count": len(images), "documents_count": len(documents), "average_size": repo_stats["total_size_bytes"] / max(1, repo_stats["total_assets"]) } def verify_integrity(self) -> Dict[str, Any]: """Verify integrity of all assets.""" assets = self.list_assets() results = { "total_assets": len(assets), "valid_assets": 0, "missing_files": [], "hash_mismatches": [], "errors": [] } for asset in assets: try: storage_path = Path(asset.path) # Check if file exists if not storage_path.exists(): results["missing_files"].append(asset.content_hash) continue # Verify hash actual_hash = self._calculate_hash(storage_path) if actual_hash != asset.content_hash: results["hash_mismatches"].append({ "asset_hash": asset.content_hash, "actual_hash": actual_hash, "filename": asset.filename }) continue results["valid_assets"] += 1 except Exception as e: results["errors"].append({ "asset_hash": asset.content_hash, "error": str(e) }) return results def _calculate_hash(self, file_path: Path) -> str: """Calculate SHA-256 hash of file.""" hash_algo = hashlib.sha256() with open(file_path, 'rb') as f: for chunk in iter(lambda: f.read(8192), b""): hash_algo.update(chunk) return hash_algo.hexdigest() def _get_storage_path(self, content_hash: str, extension: str) -> Path: """Get content-addressable storage path.""" # Use first 2 chars for directory structure subdir = content_hash[:2] filename = content_hash + (extension or "") return self.storage_path / subdir / filename