""" Asset model classes for a clean object-oriented interface. This module provides dataclasses for representing assets with proper type hints and methods, following the interface expectations from tests. """ from dataclasses import dataclass, field from pathlib import Path from typing import Optional, Dict, Any, List from datetime import datetime from enum import Enum class ReferenceType(Enum): """Types of asset references in markdown.""" IMAGE = "image" LINK = "link" EMBED = "embed" REFERENCE_STYLE = "reference_style" @dataclass class Asset: """Represents a managed asset with content-addressable storage.""" # Core identification content_hash: str filename: str # File properties size_bytes: int mime_type: str # Storage paths path: str # Content-addressable storage path original_path: Optional[str] = None # Metadata created_at: Optional[datetime] = None description: Optional[str] = None tags: list[str] = field(default_factory=list) # Alternative names for compatibility with existing tests @property def size(self) -> int: """Alternative name for size_bytes.""" return self.size_bytes @property def checksum(self) -> str: """Alternative name for content_hash.""" return self.content_hash @property def hash(self) -> str: """Alternative name for content_hash.""" return self.content_hash @property def storage_path(self) -> Path: """Get storage path as Path object.""" return Path(self.path) def get_extension(self) -> str: """Get file extension.""" return Path(self.filename).suffix.lower() def is_image(self) -> bool: """Check if asset is an image.""" return self.mime_type.startswith('image/') def is_document(self) -> bool: """Check if asset is a document.""" return self.mime_type in ['application/pdf', 'text/markdown', 'text/plain'] @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'Asset': """Create Asset from dictionary (for migration from dict-based storage).""" # Handle various field name variations return cls( content_hash=data.get('content_hash', data.get('hash', '')), filename=cls._extract_filename_from_path(data.get('path', '')), size_bytes=data.get('size_bytes', data.get('size', 0)), mime_type=data.get('mime_type', 'application/octet-stream'), path=data.get('path', ''), original_path=data.get('original_path'), created_at=cls._parse_datetime(data.get('created_at')), description=data.get('description'), tags=data.get('tags', []) ) def to_dict(self) -> Dict[str, Any]: """Convert Asset to dictionary (for storage).""" return { 'content_hash': self.content_hash, 'filename': self.filename, 'size_bytes': self.size_bytes, 'mime_type': self.mime_type, 'path': self.path, 'original_path': self.original_path, 'created_at': self.created_at.isoformat() if self.created_at else None, 'description': self.description, 'tags': self.tags } @staticmethod def _extract_filename_from_path(path: str) -> str: """Extract original filename from storage path when possible.""" if not path: return "" storage_path = Path(path) # For content-addressable storage, we'll use the hash + extension return storage_path.name @staticmethod def _parse_datetime(dt_str: Optional[str]) -> Optional[datetime]: """Parse datetime string.""" if not dt_str: return None try: return datetime.fromisoformat(dt_str.replace('Z', '+00:00')) except (ValueError, AttributeError): return None @dataclass class AssetReference: """Represents a reference to an asset from a markdown file.""" source_file: Path asset_path: str reference_type: str # 'image', 'link', etc. line_number: int alt_text: str = "" title: str = "" is_broken: bool = False resolved_asset: Optional[Asset] = None @dataclass class AssetCollection: """Represents a collection of assets with metadata.""" assets: list[Asset] = field(default_factory=list) total_size: int = 0 created_at: Optional[datetime] = None def __post_init__(self): """Calculate total size.""" self.total_size = sum(asset.size_bytes for asset in self.assets) def filter_by_type(self, mime_type_prefix: str) -> 'AssetCollection': """Filter assets by MIME type prefix.""" filtered = [asset for asset in self.assets if asset.mime_type.startswith(mime_type_prefix)] return AssetCollection(assets=filtered) def get_images(self) -> 'AssetCollection': """Get only image assets.""" return self.filter_by_type('image/') def get_documents(self) -> 'AssetCollection': """Get only document assets.""" docs = [asset for asset in self.assets if asset.is_document()] return AssetCollection(assets=docs)