Files
markitect-main/markitect/assets/manager_v2.py
tegwick 567f01121e
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat: complete Issue #146 final integration testing
Fixed all remaining test failures in test_issue_146_final_integration.py
achieving 100% test success rate (9/9 tests passing):

- Fixed performance monitoring metrics access patterns
- Resolved AssetManager constructor parameter handling
- Implemented missing CLI command methods (add_asset, list_assets, get_asset_info)
- Added cross-platform symlink creation method aliases
- Fixed asset deduplication content uniqueness issues
- Resolved production deployment asset removal workflows
- Fixed performance benchmark dict/hash type conflicts

The asset management system is now production-ready with comprehensive
integration test coverage validating all major workflows and edge cases.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-15 00:19:52 +02:00

238 lines
8.2 KiB
Python

"""
Clean Asset Manager implementation with object-oriented design.
This is the new implementation that replaces the dict-based approach
with proper domain models and clean architecture patterns.
"""
import hashlib
import mimetypes
from pathlib import Path
from typing import List, Optional, Dict, Any
from datetime import datetime
import logging
import shutil
from .models import Asset, AssetCollection
from .repository import AssetRepository, JsonFileRepository
class AssetManagerError(Exception):
"""Asset manager specific errors."""
pass
class AssetManager:
"""Clean asset manager with object-oriented interface."""
def __init__(self,
storage_path: Path,
repository: Optional[AssetRepository] = None):
"""Initialize asset manager.
Args:
storage_path: Directory for content-addressable asset storage
repository: Asset repository (defaults to JSON file)
"""
self.storage_path = Path(storage_path)
self.storage_path.mkdir(parents=True, exist_ok=True)
# Use provided repository or default to JSON file
if repository is None:
registry_path = self.storage_path / "registry.json"
self.repository = JsonFileRepository(registry_path)
else:
self.repository = repository
self.logger = logging.getLogger(f'{__name__}.{self.__class__.__name__}')
def add_asset(self, source_path: Path, description: Optional[str] = None) -> Asset:
"""Add an asset from a source file.
Args:
source_path: Path to the source file
description: Optional description
Returns:
Asset object for the added asset
Raises:
AssetManagerError: If file doesn't exist or can't be processed
"""
source_path = Path(source_path)
if not source_path.exists():
raise AssetManagerError(f"Source file does not exist: {source_path}")
if not source_path.is_file():
raise AssetManagerError(f"Source path is not a file: {source_path}")
try:
# Calculate content hash
content_hash = self._calculate_hash(source_path)
# Check if asset already exists
existing_asset = self.repository.get_by_hash(content_hash)
if existing_asset:
self.logger.info(f"Asset already exists (deduplicated): {content_hash[:12]}...")
return existing_asset
# Determine storage path (content-addressable)
storage_path = self._get_storage_path(content_hash, source_path.suffix)
# Copy file to storage
storage_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(source_path, storage_path)
# Create asset object
asset = Asset(
content_hash=content_hash,
filename=source_path.name,
size_bytes=source_path.stat().st_size,
mime_type=mimetypes.guess_type(source_path)[0] or "application/octet-stream",
path=str(storage_path),
original_path=str(source_path),
created_at=datetime.now(),
description=description
)
# Add to repository
self.repository.add(asset)
self.logger.info(f"Added new asset: {asset.filename} ({content_hash[:12]}...)")
return asset
except Exception as e:
raise AssetManagerError(f"Failed to add asset {source_path}: {e}") from e
def get_asset(self, content_hash: str) -> Optional[Asset]:
"""Get asset by content hash."""
return self.repository.get_by_hash(content_hash)
def list_assets(self) -> List[Asset]:
"""List all managed assets."""
return self.repository.list_all()
def get_assets_collection(self) -> AssetCollection:
"""Get assets as a collection with additional methods."""
assets = self.list_assets()
return AssetCollection(assets=assets, created_at=datetime.now())
def remove_asset(self, content_hash: str, remove_file: bool = True) -> bool:
"""Remove an asset.
Args:
content_hash: Hash of asset to remove
remove_file: Whether to remove the physical file
Returns:
True if asset was removed, False if not found
"""
asset = self.repository.get_by_hash(content_hash)
if not asset:
return False
# Remove from repository
if self.repository.remove(content_hash):
if remove_file and asset.path:
try:
Path(asset.path).unlink(missing_ok=True)
self.logger.info(f"Removed asset file: {asset.path}")
except Exception as e:
self.logger.warning(f"Failed to remove asset file {asset.path}: {e}")
self.logger.info(f"Removed asset: {asset.filename} ({content_hash[:12]}...)")
return True
return False
def find_assets_by_name(self, filename: str) -> List[Asset]:
"""Find assets by filename."""
assets = self.list_assets()
return [asset for asset in assets if asset.filename == filename]
def find_assets_by_type(self, mime_type_prefix: str) -> List[Asset]:
"""Find assets by MIME type prefix (e.g., 'image/')."""
assets = self.list_assets()
return [asset for asset in assets if asset.mime_type.startswith(mime_type_prefix)]
def get_images(self) -> List[Asset]:
"""Get all image assets."""
return self.find_assets_by_type("image/")
def get_documents(self) -> List[Asset]:
"""Get all document assets."""
assets = self.list_assets()
return [asset for asset in assets if asset.is_document()]
def get_stats(self) -> Dict[str, Any]:
"""Get asset manager statistics."""
repo_stats = self.repository.get_stats()
assets = self.list_assets()
# Additional computed stats
images = [a for a in assets if a.is_image()]
documents = [a for a in assets if a.is_document()]
return {
**repo_stats,
"storage_path": str(self.storage_path),
"images_count": len(images),
"documents_count": len(documents),
"average_size": repo_stats["total_size_bytes"] / max(1, repo_stats["total_assets"])
}
def verify_integrity(self) -> Dict[str, Any]:
"""Verify integrity of all assets."""
assets = self.list_assets()
results = {
"total_assets": len(assets),
"valid_assets": 0,
"missing_files": [],
"hash_mismatches": [],
"errors": []
}
for asset in assets:
try:
storage_path = Path(asset.path)
# Check if file exists
if not storage_path.exists():
results["missing_files"].append(asset.content_hash)
continue
# Verify hash
actual_hash = self._calculate_hash(storage_path)
if actual_hash != asset.content_hash:
results["hash_mismatches"].append({
"asset_hash": asset.content_hash,
"actual_hash": actual_hash,
"filename": asset.filename
})
continue
results["valid_assets"] += 1
except Exception as e:
results["errors"].append({
"asset_hash": asset.content_hash,
"error": str(e)
})
return results
def _calculate_hash(self, file_path: Path) -> str:
"""Calculate SHA-256 hash of file."""
hash_algo = hashlib.sha256()
with open(file_path, 'rb') as f:
for chunk in iter(lambda: f.read(8192), b""):
hash_algo.update(chunk)
return hash_algo.hexdigest()
def _get_storage_path(self, content_hash: str, extension: str) -> Path:
"""Get content-addressable storage path."""
# Use first 2 chars for directory structure
subdir = content_hash[:2]
filename = content_hash + (extension or "")
return self.storage_path / subdir / filename