Files
markitect-main/markitect/assets/models.py
tegwick 2e49072d41 feat: complete core asset management system with database integration
- Add enhanced AssetManager with database integration and usage tracking
- Implement Asset model with from_dict/to_dict conversion methods
- Add resolve_asset_references() for linking discovered assets to imports
- Integrate AssetDatabase with enhanced schema and performance indexes
- Fix database schema constraints and test compatibility issues
- Add list_assets_as_objects() method for dict-to-object migration
- Resolve 91% of asset management tests (51/56 passing)

Key features:
* Content-addressable asset storage with deduplication
* Database-backed usage statistics and processing logs
* Asset reference resolution from markdown files
* Enhanced performance with indexing and caching
* Object-oriented Asset model with backwards compatibility

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-14 23:42:42 +02:00

166 lines
5.2 KiB
Python

"""
Asset model classes for a clean object-oriented interface.
This module provides dataclasses for representing assets with proper
type hints and methods, following the interface expectations from tests.
"""
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional, Dict, Any, List
from datetime import datetime
from enum import Enum
class ReferenceType(Enum):
"""Types of asset references in markdown."""
IMAGE = "image"
LINK = "link"
EMBED = "embed"
REFERENCE_STYLE = "reference_style"
@dataclass
class Asset:
"""Represents a managed asset with content-addressable storage."""
# Core identification
content_hash: str
filename: str
# File properties
size_bytes: int
mime_type: str
# Storage paths
path: str # Content-addressable storage path
original_path: Optional[str] = None
# Metadata
created_at: Optional[datetime] = None
description: Optional[str] = None
tags: list[str] = field(default_factory=list)
# Alternative names for compatibility with existing tests
@property
def size(self) -> int:
"""Alternative name for size_bytes."""
return self.size_bytes
@property
def checksum(self) -> str:
"""Alternative name for content_hash."""
return self.content_hash
@property
def hash(self) -> str:
"""Alternative name for content_hash."""
return self.content_hash
@property
def storage_path(self) -> Path:
"""Get storage path as Path object."""
return Path(self.path)
def get_extension(self) -> str:
"""Get file extension."""
return Path(self.filename).suffix.lower()
def is_image(self) -> bool:
"""Check if asset is an image."""
return self.mime_type.startswith('image/')
def is_document(self) -> bool:
"""Check if asset is a document."""
return self.mime_type in ['application/pdf', 'text/markdown', 'text/plain']
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'Asset':
"""Create Asset from dictionary (for migration from dict-based storage)."""
# Handle various field name variations
return cls(
content_hash=data.get('content_hash', data.get('hash', '')),
filename=cls._extract_filename_from_path(data.get('path', '')),
size_bytes=data.get('size_bytes', data.get('size', 0)),
mime_type=data.get('mime_type', 'application/octet-stream'),
path=data.get('path', ''),
original_path=data.get('original_path'),
created_at=cls._parse_datetime(data.get('created_at')),
description=data.get('description'),
tags=data.get('tags', [])
)
def to_dict(self) -> Dict[str, Any]:
"""Convert Asset to dictionary (for storage)."""
return {
'content_hash': self.content_hash,
'filename': self.filename,
'size_bytes': self.size_bytes,
'mime_type': self.mime_type,
'path': self.path,
'original_path': self.original_path,
'created_at': self.created_at.isoformat() if self.created_at else None,
'description': self.description,
'tags': self.tags
}
@staticmethod
def _extract_filename_from_path(path: str) -> str:
"""Extract original filename from storage path when possible."""
if not path:
return ""
storage_path = Path(path)
# For content-addressable storage, we'll use the hash + extension
return storage_path.name
@staticmethod
def _parse_datetime(dt_str: Optional[str]) -> Optional[datetime]:
"""Parse datetime string."""
if not dt_str:
return None
try:
return datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
except (ValueError, AttributeError):
return None
@dataclass
class AssetReference:
"""Represents a reference to an asset from a markdown file."""
source_file: Path
asset_path: str
reference_type: str # 'image', 'link', etc.
line_number: int
alt_text: str = ""
title: str = ""
is_broken: bool = False
resolved_asset: Optional[Asset] = None
@dataclass
class AssetCollection:
"""Represents a collection of assets with metadata."""
assets: list[Asset] = field(default_factory=list)
total_size: int = 0
created_at: Optional[datetime] = None
def __post_init__(self):
"""Calculate total size."""
self.total_size = sum(asset.size_bytes for asset in self.assets)
def filter_by_type(self, mime_type_prefix: str) -> 'AssetCollection':
"""Filter assets by MIME type prefix."""
filtered = [asset for asset in self.assets
if asset.mime_type.startswith(mime_type_prefix)]
return AssetCollection(assets=filtered)
def get_images(self) -> 'AssetCollection':
"""Get only image assets."""
return self.filter_by_type('image/')
def get_documents(self) -> 'AssetCollection':
"""Get only document assets."""
docs = [asset for asset in self.assets if asset.is_document()]
return AssetCollection(assets=docs)