""" Enhanced database functionality for Issue #144. This module provides enhanced database schema, performance optimizations, and usage tracking for the asset management system. """ import sqlite3 import json import time from pathlib import Path from typing import List, Dict, Any, Optional, Iterator from datetime import datetime, timedelta from contextlib import contextmanager from .exceptions import AssetError class AssetDatabase: """Enhanced database for asset management with performance features.""" def __init__(self, db_path: Path, enable_pooling: bool = False, max_connections: int = 5): """Initialize enhanced asset database.""" self.db_path = db_path self.enable_pooling = enable_pooling self.max_connections = max_connections self._initialize_base_schema() def _initialize_base_schema(self): """Initialize basic asset metadata schema.""" with sqlite3.connect(self.db_path) as conn: conn.execute(""" CREATE TABLE IF NOT EXISTS asset_metadata ( content_hash TEXT PRIMARY KEY, filename TEXT NOT NULL, size_bytes INTEGER NOT NULL, mime_type TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """) conn.commit() def initialize_enhanced_schema(self): """Initialize enhanced schema for Issue #144 features.""" with sqlite3.connect(self.db_path) as conn: # Asset usage tracking conn.execute(""" CREATE TABLE IF NOT EXISTS asset_usage_stats ( content_hash TEXT, document_count INTEGER DEFAULT 0, last_used TIMESTAMP, access_frequency FLOAT DEFAULT 0.0, FOREIGN KEY (content_hash) REFERENCES asset_metadata(content_hash) ) """) # Asset processing history conn.execute(""" CREATE TABLE IF NOT EXISTS asset_processing_log ( id INTEGER PRIMARY KEY AUTOINCREMENT, content_hash TEXT, operation TEXT, timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, details JSON, success BOOLEAN DEFAULT TRUE ) """) # Package metadata conn.execute(""" CREATE TABLE IF NOT EXISTS package_metadata ( package_id TEXT PRIMARY KEY, name TEXT, created_at TIMESTAMP, file_path TEXT, size_bytes INTEGER, asset_count INTEGER, checksum TEXT ) """) conn.commit() def create_performance_indexes(self): """Create indexes for optimized queries.""" with sqlite3.connect(self.db_path) as conn: indexes = [ "CREATE INDEX IF NOT EXISTS idx_usage_content_hash ON asset_usage_stats(content_hash)", "CREATE INDEX IF NOT EXISTS idx_usage_last_used ON asset_usage_stats(last_used)", "CREATE INDEX IF NOT EXISTS idx_processing_timestamp ON asset_processing_log(timestamp)", "CREATE INDEX IF NOT EXISTS idx_processing_operation ON asset_processing_log(operation)", "CREATE INDEX IF NOT EXISTS idx_metadata_mime_type ON asset_metadata(mime_type)", "CREATE INDEX IF NOT EXISTS idx_metadata_created_at ON asset_metadata(created_at)" ] for index_sql in indexes: conn.execute(index_sql) conn.commit() def record_asset_usage(self, content_hash: str, document_path: str): """Record asset usage for statistics tracking.""" with sqlite3.connect(self.db_path) as conn: # Check if usage record exists cursor = conn.cursor() cursor.execute( "SELECT document_count FROM asset_usage_stats WHERE content_hash = ?", (content_hash,) ) result = cursor.fetchone() if result: # Update existing record new_count = result[0] + 1 conn.execute(""" UPDATE asset_usage_stats SET document_count = ?, last_used = CURRENT_TIMESTAMP, access_frequency = access_frequency + 1.0 WHERE content_hash = ? """, (new_count, content_hash)) else: # Insert new record conn.execute(""" INSERT INTO asset_usage_stats (content_hash, document_count, last_used, access_frequency) VALUES (?, 1, CURRENT_TIMESTAMP, 1.0) """, (content_hash,)) conn.commit() def get_asset_usage_stats(self, content_hash: str) -> Optional[Dict[str, Any]]: """Get usage statistics for an asset.""" with sqlite3.connect(self.db_path) as conn: conn.row_factory = sqlite3.Row cursor = conn.cursor() cursor.execute(""" SELECT document_count, last_used, access_frequency FROM asset_usage_stats WHERE content_hash = ? """, (content_hash,)) row = cursor.fetchone() if row: return { 'document_count': row['document_count'], 'last_used': datetime.fromisoformat(row['last_used']), 'access_frequency': row['access_frequency'] } return None def log_processing_operation(self, content_hash: str, operation: str, details: Dict[str, Any], success: bool = True) -> int: """Log a processing operation.""" with sqlite3.connect(self.db_path) as conn: cursor = conn.cursor() cursor.execute(""" INSERT INTO asset_processing_log (content_hash, operation, details, success) VALUES (?, ?, ?, ?) """, (content_hash, operation, json.dumps(details), success)) conn.commit() return cursor.lastrowid def get_processing_history(self, content_hash: str) -> List[Dict[str, Any]]: """Get processing history for an asset.""" with sqlite3.connect(self.db_path) as conn: conn.row_factory = sqlite3.Row cursor = conn.cursor() cursor.execute(""" SELECT operation, timestamp, details, success FROM asset_processing_log WHERE content_hash = ? ORDER BY timestamp DESC """, (content_hash,)) history = [] for row in cursor.fetchall(): history.append({ 'operation': row['operation'], 'timestamp': datetime.fromisoformat(row['timestamp']), 'details': json.loads(row['details']), 'success': bool(row['success']) }) return history def get_all_assets(self) -> List[Dict[str, Any]]: """Get all assets from the database.""" with sqlite3.connect(self.db_path) as conn: conn.row_factory = sqlite3.Row cursor = conn.cursor() cursor.execute("SELECT * FROM asset_metadata") assets = [] for row in cursor.fetchall(): assets.append({ 'content_hash': row['content_hash'], 'filename': row['filename'], 'size_bytes': row['size_bytes'], 'mime_type': row['mime_type'], 'created_at': datetime.fromisoformat(row['created_at']), 'updated_at': datetime.fromisoformat(row['updated_at']) }) return assets def get_recently_used_assets(self, limit: int = 20) -> List[Dict[str, Any]]: """Get recently used assets.""" with sqlite3.connect(self.db_path) as conn: conn.row_factory = sqlite3.Row cursor = conn.cursor() cursor.execute(""" SELECT m.content_hash, m.filename, u.last_used, u.document_count FROM asset_metadata m JOIN asset_usage_stats u ON m.content_hash = u.content_hash ORDER BY u.last_used DESC LIMIT ? """, (limit,)) assets = [] for row in cursor.fetchall(): assets.append({ 'content_hash': row['content_hash'], 'filename': row['filename'], 'last_used': datetime.fromisoformat(row['last_used']), 'document_count': row['document_count'] }) return assets def create_backup(self, backup_path: Path): """Create a backup of the database.""" import shutil shutil.copy2(self.db_path, backup_path) @contextmanager def transaction(self): """Context manager for database transactions.""" conn = sqlite3.connect(self.db_path) try: yield conn conn.commit() except Exception: conn.rollback() raise finally: conn.close() class DatabaseMigration: """Database migration management.""" def __init__(self, db_path: Path): """Initialize migration manager.""" self.db_path = db_path self._initialize_migration_table() def _initialize_migration_table(self): """Initialize migration tracking table.""" with sqlite3.connect(self.db_path) as conn: conn.execute(""" CREATE TABLE IF NOT EXISTS migration_history ( migration_name TEXT PRIMARY KEY, applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """) conn.commit() def create_base_schema(self): """Create base schema (for testing).""" with sqlite3.connect(self.db_path) as conn: conn.execute(""" CREATE TABLE IF NOT EXISTS asset_metadata ( content_hash TEXT PRIMARY KEY, filename TEXT NOT NULL ) """) conn.commit() def apply_migration(self, migration_name: str): """Apply a named migration.""" with sqlite3.connect(self.db_path) as conn: # Check if already applied cursor = conn.cursor() cursor.execute( "SELECT migration_name FROM migration_history WHERE migration_name = ?", (migration_name,) ) if cursor.fetchone(): return # Already applied # Apply migration based on name if migration_name == "add_usage_tracking": conn.execute(""" CREATE TABLE IF NOT EXISTS asset_usage_stats ( content_hash TEXT, document_count INTEGER DEFAULT 0 ) """) elif migration_name == "add_processing_log": conn.execute(""" CREATE TABLE IF NOT EXISTS asset_processing_log ( id INTEGER PRIMARY KEY AUTOINCREMENT, content_hash TEXT, operation TEXT ) """) elif migration_name == "add_package_metadata": conn.execute(""" CREATE TABLE IF NOT EXISTS package_metadata ( package_id TEXT PRIMARY KEY, name TEXT ) """) # Record migration conn.execute( "INSERT INTO migration_history (migration_name) VALUES (?)", (migration_name,) ) conn.commit() def get_applied_migrations(self) -> List[str]: """Get list of applied migrations.""" with sqlite3.connect(self.db_path) as conn: cursor = conn.cursor() cursor.execute("SELECT migration_name FROM migration_history") return [row[0] for row in cursor.fetchall()]