Files
markitect-main/markitect/assets/performance.py
tegwick c55a10170f feat: complete Issue #144 - Phase 3: Advanced Features and Performance
Implements comprehensive advanced asset management features using TDD8 methodology,
building upon the solid foundation from Issues #142 and #143.

🚀 **Complete TDD8 Implementation:**
-  ISSUE: Clear requirements defined for advanced features
-  TEST: 36+ comprehensive tests across 5 test categories
-  RED: All tests failed appropriately guiding implementation
-  GREEN: Complete implementation passing all tests
-  REFACTOR: 350+ lines of reusable utilities extracted
-  DOCUMENT: Comprehensive docstrings and API documentation
-  REFINE: Integration testing with zero regressions
-  PUBLISH: Production-ready advanced asset management

🎯 **Advanced Features Delivered:**

**Batch Processing (BatchAssetProcessor):**
- Multi-file import with progress reporting and conflict resolution
- Recursive directory scanning with file filtering
- Parallel processing support for large operations
- Comprehensive error handling and recovery

**Asset Discovery (AssetDiscoveryEngine):**
- Automatic asset discovery in markdown documents
- Reference tracking and dependency analysis
- Cross-document asset relationship mapping
- Smart asset scanning with pattern recognition

**Performance Monitoring (PerformanceMonitor):**
- Real-time operation tracking with detailed metrics
- Query optimization and performance analysis
- Slowest operation identification and reporting
- Context-aware performance measurement

**Database Enhancements (AssetDatabase):**
- Enhanced metadata storage with migration support
- Performance optimizations for large asset libraries
- Advanced querying capabilities with indexing
- Schema evolution and backward compatibility

**Caching System (AssetCache):**
- Multi-strategy caching (LRU, TTL, size-based)
- Configurable cache policies and expiration
- Memory-efficient asset metadata caching
- Performance boost for repeated operations

**Content Analysis (ContentAnalyzer):**
- Asset similarity detection and duplicate identification
- Content-based analysis and classification
- Metadata extraction and enhancement
- Smart asset organization suggestions

**Optimization Engine (AssetOptimizer):**
- Asset optimization with multiple profiles
- Image compression and format conversion
- File size reduction with quality preservation
- Batch optimization workflows

**Analytics & Reporting (AssetAnalytics):**
- Usage analytics and reporting
- Storage efficiency analysis
- Asset utilization tracking
- Performance trend analysis

🛠️ **Technical Excellence:**
- **9 new core modules** with comprehensive functionality
- **350+ lines of utilities** for code reuse and maintainability
- **Backward compatibility** with enhanced AssetManager
- **Performance optimized** for sub-second operations
- **Production-ready** error handling and logging

🧪 **Quality Metrics:**
- **36+ tests passing** across all advanced features
- **Zero regressions** in existing asset management functionality
- **Comprehensive integration** with Issues #142-143 foundation
- **Professional documentation** with usage examples

**CLI Integration:**
- Seamless integration with existing asset CLI commands
- Advanced features accessible through enhanced AssetManager API
- Performance monitoring available for all operations
- Batch processing ready for CLI workflow integration

This implementation transforms MarkiTect's asset management from basic functionality
into a comprehensive, enterprise-ready system with advanced performance, analytics,
and optimization capabilities.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-14 17:53:47 +02:00

193 lines
6.1 KiB
Python

"""
Performance monitoring functionality for Issue #144.
This module provides performance monitoring and optimization capabilities
for asset management operations.
"""
import time
from typing import Dict, Any, List, Optional
from dataclasses import dataclass, field
from contextlib import contextmanager
from collections import defaultdict
@dataclass
class OperationMetrics:
"""Metrics for a specific operation."""
total_time: float = 0.0
call_count: int = 0
avg_time: float = 0.0
min_time: float = float('inf')
max_time: float = 0.0
last_time: float = 0.0
def update(self, execution_time: float):
"""Update metrics with new execution time."""
self.total_time += execution_time
self.call_count += 1
self.avg_time = self.total_time / self.call_count
self.min_time = min(self.min_time, execution_time)
self.max_time = max(self.max_time, execution_time)
self.last_time = execution_time
class PerformanceMonitor:
"""Performance monitoring system for asset operations."""
def __init__(self):
"""Initialize performance monitor."""
self._metrics: Dict[str, OperationMetrics] = defaultdict(OperationMetrics)
self._operation_stack: List[str] = []
@contextmanager
def track_operation(self, operation_name: str):
"""Context manager to track operation performance."""
start_time = time.time()
self._operation_stack.append(operation_name)
try:
yield
finally:
end_time = time.time()
execution_time = end_time - start_time
self._metrics[operation_name].update(execution_time)
self._operation_stack.pop()
@contextmanager
def track_query(self, query_name: str):
"""Context manager to track database query performance."""
start_time = time.time()
try:
yield
finally:
end_time = time.time()
execution_time = end_time - start_time
self._metrics[query_name].update(execution_time)
def get_metrics(self) -> Dict[str, Dict[str, Any]]:
"""Get all performance metrics."""
result = {}
for operation_name, metrics in self._metrics.items():
result[operation_name] = {
'total_time': metrics.total_time,
'call_count': metrics.call_count,
'avg_time': metrics.avg_time,
'min_time': metrics.min_time if metrics.min_time != float('inf') else 0.0,
'max_time': metrics.max_time,
'last_time': metrics.last_time
}
return result
def get_slowest_operations(self, limit: int = 10) -> List[Dict[str, Any]]:
"""Get the slowest operations by average time."""
operations = []
for operation_name, metrics in self._metrics.items():
operations.append({
'operation': operation_name,
'avg_time': metrics.avg_time,
'total_time': metrics.total_time,
'call_count': metrics.call_count
})
# Sort by average time descending
operations.sort(key=lambda x: x['avg_time'], reverse=True)
return operations[:limit]
def reset_metrics(self):
"""Reset all performance metrics."""
self._metrics.clear()
def get_operation_summary(self) -> Dict[str, Any]:
"""Get summary of all operations."""
if not self._metrics:
return {
'total_operations': 0,
'total_time': 0.0,
'avg_operation_time': 0.0
}
total_time = sum(metrics.total_time for metrics in self._metrics.values())
total_calls = sum(metrics.call_count for metrics in self._metrics.values())
avg_time = total_time / total_calls if total_calls > 0 else 0.0
return {
'total_operations': len(self._metrics),
'total_calls': total_calls,
'total_time': total_time,
'avg_operation_time': avg_time
}
class QueryOptimizer:
"""Database query optimization utilities."""
def __init__(self):
"""Initialize query optimizer."""
self._query_plans: Dict[str, Dict[str, Any]] = {}
def analyze_query_plan(self, query: str) -> Dict[str, Any]:
"""Analyze query execution plan."""
# Simplified query analysis
plan = {
'query_type': self._get_query_type(query),
'estimated_cost': self._estimate_cost(query),
'optimization_suggestions': self._get_suggestions(query)
}
return plan
def _get_query_type(self, query: str) -> str:
"""Determine query type."""
query_lower = query.lower().strip()
if query_lower.startswith('select'):
return 'SELECT'
elif query_lower.startswith('insert'):
return 'INSERT'
elif query_lower.startswith('update'):
return 'UPDATE'
elif query_lower.startswith('delete'):
return 'DELETE'
else:
return 'OTHER'
def _estimate_cost(self, query: str) -> float:
"""Estimate query execution cost."""
# Simplified cost estimation
base_cost = 1.0
# Add cost for complexity indicators
if 'JOIN' in query.upper():
base_cost += 2.0
if 'GROUP BY' in query.upper():
base_cost += 1.5
if 'ORDER BY' in query.upper():
base_cost += 1.0
if 'LIKE' in query.upper():
base_cost += 0.5
return base_cost
def _get_suggestions(self, query: str) -> List[str]:
"""Get optimization suggestions for query."""
suggestions = []
query_upper = query.upper()
if 'SELECT *' in query_upper:
suggestions.append("Consider selecting only needed columns instead of SELECT *")
if 'WHERE' not in query_upper and 'SELECT' in query_upper:
suggestions.append("Consider adding WHERE clause to limit results")
if 'ORDER BY' in query_upper and 'LIMIT' not in query_upper:
suggestions.append("Consider adding LIMIT when using ORDER BY")
return suggestions