- Add enhanced AssetManager with database integration and usage tracking - Implement Asset model with from_dict/to_dict conversion methods - Add resolve_asset_references() for linking discovered assets to imports - Integrate AssetDatabase with enhanced schema and performance indexes - Fix database schema constraints and test compatibility issues - Add list_assets_as_objects() method for dict-to-object migration - Resolve 91% of asset management tests (51/56 passing) Key features: * Content-addressable asset storage with deduplication * Database-backed usage statistics and processing logs * Asset reference resolution from markdown files * Enhanced performance with indexing and caching * Object-oriented Asset model with backwards compatibility 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
166 lines
5.2 KiB
Python
166 lines
5.2 KiB
Python
"""
|
|
Asset model classes for a clean object-oriented interface.
|
|
|
|
This module provides dataclasses for representing assets with proper
|
|
type hints and methods, following the interface expectations from tests.
|
|
"""
|
|
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Optional, Dict, Any, List
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
|
|
|
|
class ReferenceType(Enum):
|
|
"""Types of asset references in markdown."""
|
|
IMAGE = "image"
|
|
LINK = "link"
|
|
EMBED = "embed"
|
|
REFERENCE_STYLE = "reference_style"
|
|
|
|
|
|
@dataclass
|
|
class Asset:
|
|
"""Represents a managed asset with content-addressable storage."""
|
|
|
|
# Core identification
|
|
content_hash: str
|
|
filename: str
|
|
|
|
# File properties
|
|
size_bytes: int
|
|
mime_type: str
|
|
|
|
# Storage paths
|
|
path: str # Content-addressable storage path
|
|
original_path: Optional[str] = None
|
|
|
|
# Metadata
|
|
created_at: Optional[datetime] = None
|
|
description: Optional[str] = None
|
|
tags: list[str] = field(default_factory=list)
|
|
|
|
# Alternative names for compatibility with existing tests
|
|
@property
|
|
def size(self) -> int:
|
|
"""Alternative name for size_bytes."""
|
|
return self.size_bytes
|
|
|
|
@property
|
|
def checksum(self) -> str:
|
|
"""Alternative name for content_hash."""
|
|
return self.content_hash
|
|
|
|
@property
|
|
def hash(self) -> str:
|
|
"""Alternative name for content_hash."""
|
|
return self.content_hash
|
|
|
|
@property
|
|
def storage_path(self) -> Path:
|
|
"""Get storage path as Path object."""
|
|
return Path(self.path)
|
|
|
|
def get_extension(self) -> str:
|
|
"""Get file extension."""
|
|
return Path(self.filename).suffix.lower()
|
|
|
|
def is_image(self) -> bool:
|
|
"""Check if asset is an image."""
|
|
return self.mime_type.startswith('image/')
|
|
|
|
def is_document(self) -> bool:
|
|
"""Check if asset is a document."""
|
|
return self.mime_type in ['application/pdf', 'text/markdown', 'text/plain']
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> 'Asset':
|
|
"""Create Asset from dictionary (for migration from dict-based storage)."""
|
|
# Handle various field name variations
|
|
return cls(
|
|
content_hash=data.get('content_hash', data.get('hash', '')),
|
|
filename=cls._extract_filename_from_path(data.get('path', '')),
|
|
size_bytes=data.get('size_bytes', data.get('size', 0)),
|
|
mime_type=data.get('mime_type', 'application/octet-stream'),
|
|
path=data.get('path', ''),
|
|
original_path=data.get('original_path'),
|
|
created_at=cls._parse_datetime(data.get('created_at')),
|
|
description=data.get('description'),
|
|
tags=data.get('tags', [])
|
|
)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert Asset to dictionary (for storage)."""
|
|
return {
|
|
'content_hash': self.content_hash,
|
|
'filename': self.filename,
|
|
'size_bytes': self.size_bytes,
|
|
'mime_type': self.mime_type,
|
|
'path': self.path,
|
|
'original_path': self.original_path,
|
|
'created_at': self.created_at.isoformat() if self.created_at else None,
|
|
'description': self.description,
|
|
'tags': self.tags
|
|
}
|
|
|
|
@staticmethod
|
|
def _extract_filename_from_path(path: str) -> str:
|
|
"""Extract original filename from storage path when possible."""
|
|
if not path:
|
|
return ""
|
|
storage_path = Path(path)
|
|
# For content-addressable storage, we'll use the hash + extension
|
|
return storage_path.name
|
|
|
|
@staticmethod
|
|
def _parse_datetime(dt_str: Optional[str]) -> Optional[datetime]:
|
|
"""Parse datetime string."""
|
|
if not dt_str:
|
|
return None
|
|
try:
|
|
return datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
|
|
except (ValueError, AttributeError):
|
|
return None
|
|
|
|
|
|
@dataclass
|
|
class AssetReference:
|
|
"""Represents a reference to an asset from a markdown file."""
|
|
|
|
source_file: Path
|
|
asset_path: str
|
|
reference_type: str # 'image', 'link', etc.
|
|
line_number: int
|
|
alt_text: str = ""
|
|
title: str = ""
|
|
is_broken: bool = False
|
|
resolved_asset: Optional[Asset] = None
|
|
|
|
|
|
@dataclass
|
|
class AssetCollection:
|
|
"""Represents a collection of assets with metadata."""
|
|
|
|
assets: list[Asset] = field(default_factory=list)
|
|
total_size: int = 0
|
|
created_at: Optional[datetime] = None
|
|
|
|
def __post_init__(self):
|
|
"""Calculate total size."""
|
|
self.total_size = sum(asset.size_bytes for asset in self.assets)
|
|
|
|
def filter_by_type(self, mime_type_prefix: str) -> 'AssetCollection':
|
|
"""Filter assets by MIME type prefix."""
|
|
filtered = [asset for asset in self.assets
|
|
if asset.mime_type.startswith(mime_type_prefix)]
|
|
return AssetCollection(assets=filtered)
|
|
|
|
def get_images(self) -> 'AssetCollection':
|
|
"""Get only image assets."""
|
|
return self.filter_by_type('image/')
|
|
|
|
def get_documents(self) -> 'AssetCollection':
|
|
"""Get only document assets."""
|
|
docs = [asset for asset in self.assets if asset.is_document()]
|
|
return AssetCollection(assets=docs) |