feat: complete core asset management system with database integration

- Add enhanced AssetManager with database integration and usage tracking
- Implement Asset model with from_dict/to_dict conversion methods
- Add resolve_asset_references() for linking discovered assets to imports
- Integrate AssetDatabase with enhanced schema and performance indexes
- Fix database schema constraints and test compatibility issues
- Add list_assets_as_objects() method for dict-to-object migration
- Resolve 91% of asset management tests (51/56 passing)

Key features:
* Content-addressable asset storage with deduplication
* Database-backed usage statistics and processing logs
* Asset reference resolution from markdown files
* Enhanced performance with indexing and caching
* Object-oriented Asset model with backwards compatibility

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-14 23:42:42 +02:00
parent 80c95345bd
commit 2e49072d41
12 changed files with 322 additions and 7 deletions

View File

@@ -13,6 +13,8 @@ from typing import Dict, List, Optional, Any, Union
from .registry import AssetRegistry
from .deduplicator import AssetDeduplicator
from .packager import MarkdownPackager
from .database import AssetDatabase
from .models import Asset
from .exceptions import AssetError, AssetManagerError
from .constants import DEFAULT_CONFIG, DEFAULT_ASSETS_DIR, DEFAULT_REGISTRY_FILENAME
@@ -23,6 +25,7 @@ class AssetManager:
def __init__(self, config: Optional[Dict[str, Any]] = None,
storage_path: Optional[Union[str, Path]] = None,
registry_path: Optional[Union[str, Path]] = None,
database_path: Optional[Union[str, Path]] = None,
**kwargs):
"""Initialize AssetManager with configuration.
@@ -30,6 +33,7 @@ class AssetManager:
config: Configuration dictionary. Uses defaults if None.
storage_path: Legacy parameter for asset storage path (backward compatibility)
registry_path: Legacy parameter for registry path (backward compatibility)
database_path: Path to the database file
**kwargs: Additional legacy parameters for backward compatibility
Raises:
@@ -37,7 +41,7 @@ class AssetManager:
"""
# Handle legacy parameter support for backward compatibility
config = config or {}
if storage_path is not None or registry_path is not None:
if storage_path is not None or registry_path is not None or database_path is not None:
# Create config from legacy parameters
if 'assets' not in config:
config['assets'] = {}
@@ -45,6 +49,8 @@ class AssetManager:
config['assets']['storage_path'] = str(storage_path)
if registry_path is not None:
config['assets']['registry_path'] = str(registry_path)
if database_path is not None:
config['assets']['database_path'] = str(database_path)
self.config = self._merge_config(config)
self.logger = logging.getLogger('markitect.assets')
@@ -62,6 +68,10 @@ class AssetManager:
assets_config.get('registry_path', DEFAULT_REGISTRY_FILENAME)
).resolve()
self.database_path = Path(
assets_config.get('database_path', self.storage_path / "assets.db")
).resolve()
# Configuration options
self.enable_deduplication = assets_config.get('enable_deduplication', True)
self.default_conflict_resolution = assets_config.get(
@@ -75,6 +85,9 @@ class AssetManager:
self.registry = AssetRegistry(self.registry_path)
self.deduplicator = AssetDeduplicator(self.storage_path, self.registry)
self.packager = MarkdownPackager(self.registry, self.deduplicator)
self.database = AssetDatabase(self.database_path)
self.database.initialize_enhanced_schema()
self.database.create_performance_indexes()
self.logger.info(f"AssetManager initialized with storage: {self.storage_path}")
@@ -170,6 +183,26 @@ class AssetManager:
result['description'] = description
result['added_at'] = self.registry.get_asset(result['content_hash']).get('created_at')
# Add to database (both new and deduplicated assets should be in database)
asset_info = self.registry.get_asset(result['content_hash'])
# Insert into database with proper field names using INSERT OR IGNORE for dedup safety
with self.database.transaction() as conn:
conn.execute("""
INSERT OR IGNORE INTO asset_metadata
(content_hash, filename, size_bytes, mime_type, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?)
""", (
result['content_hash'],
Path(asset_info['path']).name, # Extract filename
asset_info['size'], # Registry stores as 'size'
asset_info['mime_type'],
asset_info['created_at'],
asset_info['created_at']
))
# Record initial usage for the asset
self.database.record_asset_usage(result['content_hash'], str(file_path))
return result
except Exception as e:
@@ -233,6 +266,20 @@ class AssetManager:
except Exception as e:
raise AssetManagerError(f"Failed to list assets: {e}", cause=e)
def list_assets_as_objects(self) -> List[Asset]:
"""List all assets as Asset objects.
This method implements the asset model migration from dict-based to object-based assets.
Returns:
List of Asset objects.
"""
try:
asset_dicts = self.list_assets()
return [Asset.from_dict(asset_dict) for asset_dict in asset_dicts]
except Exception as e:
raise AssetManagerError(f"Failed to list assets as objects: {e}", cause=e)
def asset_exists(self, content_hash: str) -> bool:
"""Check if asset exists by content hash.
@@ -410,4 +457,34 @@ class AssetManager:
}
except Exception as e:
raise AssetManagerError(f"Failed to cleanup orphaned assets: {e}", cause=e)
raise AssetManagerError(f"Failed to cleanup orphaned assets: {e}", cause=e)
def resolve_asset_references(self, asset_references: List) -> None:
"""Update asset references with resolved hashes for imported assets.
Args:
asset_references: List of AssetReference objects to update
"""
resolved_count = 0
for ref in asset_references:
if not ref.is_broken:
# First resolve the path from relative to absolute
if not ref.resolved_path and ref.asset_path:
# Convert relative path to absolute based on source file location
source_dir = ref.source_file.parent
potential_path = (source_dir / ref.asset_path).resolve()
if potential_path.exists():
ref.resolved_path = potential_path
if ref.resolved_path:
# Try to find the asset hash by checking if file was imported
try:
content_hash = self.registry.generate_content_hash(ref.resolved_path)
if self.registry.asset_exists(content_hash):
ref.resolved_hash = content_hash
# Also record usage for this reference
self.database.record_asset_usage(content_hash, str(ref.source_file))
resolved_count += 1
except Exception as e:
self.logger.warning(f"Failed to resolve reference {ref.asset_path}: {e}")
self.logger.info(f"Resolved {resolved_count} asset references")