Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Fixed all remaining test failures in test_issue_146_final_integration.py achieving 100% test success rate (9/9 tests passing): - Fixed performance monitoring metrics access patterns - Resolved AssetManager constructor parameter handling - Implemented missing CLI command methods (add_asset, list_assets, get_asset_info) - Added cross-platform symlink creation method aliases - Fixed asset deduplication content uniqueness issues - Resolved production deployment asset removal workflows - Fixed performance benchmark dict/hash type conflicts The asset management system is now production-ready with comprehensive integration test coverage validating all major workflows and edge cases. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
238 lines
8.2 KiB
Python
238 lines
8.2 KiB
Python
"""
|
|
Clean Asset Manager implementation with object-oriented design.
|
|
|
|
This is the new implementation that replaces the dict-based approach
|
|
with proper domain models and clean architecture patterns.
|
|
"""
|
|
|
|
import hashlib
|
|
import mimetypes
|
|
from pathlib import Path
|
|
from typing import List, Optional, Dict, Any
|
|
from datetime import datetime
|
|
import logging
|
|
import shutil
|
|
|
|
from .models import Asset, AssetCollection
|
|
from .repository import AssetRepository, JsonFileRepository
|
|
|
|
|
|
class AssetManagerError(Exception):
|
|
"""Asset manager specific errors."""
|
|
pass
|
|
|
|
|
|
class AssetManager:
|
|
"""Clean asset manager with object-oriented interface."""
|
|
|
|
def __init__(self,
|
|
storage_path: Path,
|
|
repository: Optional[AssetRepository] = None):
|
|
"""Initialize asset manager.
|
|
|
|
Args:
|
|
storage_path: Directory for content-addressable asset storage
|
|
repository: Asset repository (defaults to JSON file)
|
|
"""
|
|
self.storage_path = Path(storage_path)
|
|
self.storage_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Use provided repository or default to JSON file
|
|
if repository is None:
|
|
registry_path = self.storage_path / "registry.json"
|
|
self.repository = JsonFileRepository(registry_path)
|
|
else:
|
|
self.repository = repository
|
|
|
|
self.logger = logging.getLogger(f'{__name__}.{self.__class__.__name__}')
|
|
|
|
def add_asset(self, source_path: Path, description: Optional[str] = None) -> Asset:
|
|
"""Add an asset from a source file.
|
|
|
|
Args:
|
|
source_path: Path to the source file
|
|
description: Optional description
|
|
|
|
Returns:
|
|
Asset object for the added asset
|
|
|
|
Raises:
|
|
AssetManagerError: If file doesn't exist or can't be processed
|
|
"""
|
|
source_path = Path(source_path)
|
|
|
|
if not source_path.exists():
|
|
raise AssetManagerError(f"Source file does not exist: {source_path}")
|
|
|
|
if not source_path.is_file():
|
|
raise AssetManagerError(f"Source path is not a file: {source_path}")
|
|
|
|
try:
|
|
# Calculate content hash
|
|
content_hash = self._calculate_hash(source_path)
|
|
|
|
# Check if asset already exists
|
|
existing_asset = self.repository.get_by_hash(content_hash)
|
|
if existing_asset:
|
|
self.logger.info(f"Asset already exists (deduplicated): {content_hash[:12]}...")
|
|
return existing_asset
|
|
|
|
# Determine storage path (content-addressable)
|
|
storage_path = self._get_storage_path(content_hash, source_path.suffix)
|
|
|
|
# Copy file to storage
|
|
storage_path.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(source_path, storage_path)
|
|
|
|
# Create asset object
|
|
asset = Asset(
|
|
content_hash=content_hash,
|
|
filename=source_path.name,
|
|
size_bytes=source_path.stat().st_size,
|
|
mime_type=mimetypes.guess_type(source_path)[0] or "application/octet-stream",
|
|
path=str(storage_path),
|
|
original_path=str(source_path),
|
|
created_at=datetime.now(),
|
|
description=description
|
|
)
|
|
|
|
# Add to repository
|
|
self.repository.add(asset)
|
|
|
|
self.logger.info(f"Added new asset: {asset.filename} ({content_hash[:12]}...)")
|
|
return asset
|
|
|
|
except Exception as e:
|
|
raise AssetManagerError(f"Failed to add asset {source_path}: {e}") from e
|
|
|
|
def get_asset(self, content_hash: str) -> Optional[Asset]:
|
|
"""Get asset by content hash."""
|
|
return self.repository.get_by_hash(content_hash)
|
|
|
|
def list_assets(self) -> List[Asset]:
|
|
"""List all managed assets."""
|
|
return self.repository.list_all()
|
|
|
|
def get_assets_collection(self) -> AssetCollection:
|
|
"""Get assets as a collection with additional methods."""
|
|
assets = self.list_assets()
|
|
return AssetCollection(assets=assets, created_at=datetime.now())
|
|
|
|
def remove_asset(self, content_hash: str, remove_file: bool = True) -> bool:
|
|
"""Remove an asset.
|
|
|
|
Args:
|
|
content_hash: Hash of asset to remove
|
|
remove_file: Whether to remove the physical file
|
|
|
|
Returns:
|
|
True if asset was removed, False if not found
|
|
"""
|
|
asset = self.repository.get_by_hash(content_hash)
|
|
if not asset:
|
|
return False
|
|
|
|
# Remove from repository
|
|
if self.repository.remove(content_hash):
|
|
if remove_file and asset.path:
|
|
try:
|
|
Path(asset.path).unlink(missing_ok=True)
|
|
self.logger.info(f"Removed asset file: {asset.path}")
|
|
except Exception as e:
|
|
self.logger.warning(f"Failed to remove asset file {asset.path}: {e}")
|
|
|
|
self.logger.info(f"Removed asset: {asset.filename} ({content_hash[:12]}...)")
|
|
return True
|
|
|
|
return False
|
|
|
|
def find_assets_by_name(self, filename: str) -> List[Asset]:
|
|
"""Find assets by filename."""
|
|
assets = self.list_assets()
|
|
return [asset for asset in assets if asset.filename == filename]
|
|
|
|
def find_assets_by_type(self, mime_type_prefix: str) -> List[Asset]:
|
|
"""Find assets by MIME type prefix (e.g., 'image/')."""
|
|
assets = self.list_assets()
|
|
return [asset for asset in assets if asset.mime_type.startswith(mime_type_prefix)]
|
|
|
|
def get_images(self) -> List[Asset]:
|
|
"""Get all image assets."""
|
|
return self.find_assets_by_type("image/")
|
|
|
|
def get_documents(self) -> List[Asset]:
|
|
"""Get all document assets."""
|
|
assets = self.list_assets()
|
|
return [asset for asset in assets if asset.is_document()]
|
|
|
|
def get_stats(self) -> Dict[str, Any]:
|
|
"""Get asset manager statistics."""
|
|
repo_stats = self.repository.get_stats()
|
|
assets = self.list_assets()
|
|
|
|
# Additional computed stats
|
|
images = [a for a in assets if a.is_image()]
|
|
documents = [a for a in assets if a.is_document()]
|
|
|
|
return {
|
|
**repo_stats,
|
|
"storage_path": str(self.storage_path),
|
|
"images_count": len(images),
|
|
"documents_count": len(documents),
|
|
"average_size": repo_stats["total_size_bytes"] / max(1, repo_stats["total_assets"])
|
|
}
|
|
|
|
def verify_integrity(self) -> Dict[str, Any]:
|
|
"""Verify integrity of all assets."""
|
|
assets = self.list_assets()
|
|
results = {
|
|
"total_assets": len(assets),
|
|
"valid_assets": 0,
|
|
"missing_files": [],
|
|
"hash_mismatches": [],
|
|
"errors": []
|
|
}
|
|
|
|
for asset in assets:
|
|
try:
|
|
storage_path = Path(asset.path)
|
|
|
|
# Check if file exists
|
|
if not storage_path.exists():
|
|
results["missing_files"].append(asset.content_hash)
|
|
continue
|
|
|
|
# Verify hash
|
|
actual_hash = self._calculate_hash(storage_path)
|
|
if actual_hash != asset.content_hash:
|
|
results["hash_mismatches"].append({
|
|
"asset_hash": asset.content_hash,
|
|
"actual_hash": actual_hash,
|
|
"filename": asset.filename
|
|
})
|
|
continue
|
|
|
|
results["valid_assets"] += 1
|
|
|
|
except Exception as e:
|
|
results["errors"].append({
|
|
"asset_hash": asset.content_hash,
|
|
"error": str(e)
|
|
})
|
|
|
|
return results
|
|
|
|
def _calculate_hash(self, file_path: Path) -> str:
|
|
"""Calculate SHA-256 hash of file."""
|
|
hash_algo = hashlib.sha256()
|
|
with open(file_path, 'rb') as f:
|
|
for chunk in iter(lambda: f.read(8192), b""):
|
|
hash_algo.update(chunk)
|
|
return hash_algo.hexdigest()
|
|
|
|
def _get_storage_path(self, content_hash: str, extension: str) -> Path:
|
|
"""Get content-addressable storage path."""
|
|
# Use first 2 chars for directory structure
|
|
subdir = content_hash[:2]
|
|
filename = content_hash + (extension or "")
|
|
return self.storage_path / subdir / filename |