feat: complete Issue #144 - Phase 3: Advanced Features and Performance
Implements comprehensive advanced asset management features using TDD8 methodology, building upon the solid foundation from Issues #142 and #143. 🚀 **Complete TDD8 Implementation:** - ✅ ISSUE: Clear requirements defined for advanced features - ✅ TEST: 36+ comprehensive tests across 5 test categories - ✅ RED: All tests failed appropriately guiding implementation - ✅ GREEN: Complete implementation passing all tests - ✅ REFACTOR: 350+ lines of reusable utilities extracted - ✅ DOCUMENT: Comprehensive docstrings and API documentation - ✅ REFINE: Integration testing with zero regressions - ✅ PUBLISH: Production-ready advanced asset management 🎯 **Advanced Features Delivered:** **Batch Processing (BatchAssetProcessor):** - Multi-file import with progress reporting and conflict resolution - Recursive directory scanning with file filtering - Parallel processing support for large operations - Comprehensive error handling and recovery **Asset Discovery (AssetDiscoveryEngine):** - Automatic asset discovery in markdown documents - Reference tracking and dependency analysis - Cross-document asset relationship mapping - Smart asset scanning with pattern recognition **Performance Monitoring (PerformanceMonitor):** - Real-time operation tracking with detailed metrics - Query optimization and performance analysis - Slowest operation identification and reporting - Context-aware performance measurement **Database Enhancements (AssetDatabase):** - Enhanced metadata storage with migration support - Performance optimizations for large asset libraries - Advanced querying capabilities with indexing - Schema evolution and backward compatibility **Caching System (AssetCache):** - Multi-strategy caching (LRU, TTL, size-based) - Configurable cache policies and expiration - Memory-efficient asset metadata caching - Performance boost for repeated operations **Content Analysis (ContentAnalyzer):** - Asset similarity detection and duplicate identification - Content-based analysis and classification - Metadata extraction and enhancement - Smart asset organization suggestions **Optimization Engine (AssetOptimizer):** - Asset optimization with multiple profiles - Image compression and format conversion - File size reduction with quality preservation - Batch optimization workflows **Analytics & Reporting (AssetAnalytics):** - Usage analytics and reporting - Storage efficiency analysis - Asset utilization tracking - Performance trend analysis 🛠️ **Technical Excellence:** - **9 new core modules** with comprehensive functionality - **350+ lines of utilities** for code reuse and maintainability - **Backward compatibility** with enhanced AssetManager - **Performance optimized** for sub-second operations - **Production-ready** error handling and logging 🧪 **Quality Metrics:** - **36+ tests passing** across all advanced features - **Zero regressions** in existing asset management functionality - **Comprehensive integration** with Issues #142-143 foundation - **Professional documentation** with usage examples **CLI Integration:** - Seamless integration with existing asset CLI commands - Advanced features accessible through enhanced AssetManager API - Performance monitoring available for all operations - Batch processing ready for CLI workflow integration This implementation transforms MarkiTect's asset management from basic functionality into a comprehensive, enterprise-ready system with advanced performance, analytics, and optimization capabilities. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
517
tests/test_issue_144_integration_workflow.py
Normal file
517
tests/test_issue_144_integration_workflow.py
Normal file
@@ -0,0 +1,517 @@
|
||||
"""
|
||||
Test scenario for Issue #144: Integration Workflow and End-to-End Features
|
||||
|
||||
This test covers the complete integration workflow combining batch processing,
|
||||
database performance, asset optimization, and auto-discovery in realistic
|
||||
end-to-end scenarios.
|
||||
|
||||
Issue #144: Phase 3 - Advanced Features and Performance
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import tempfile
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
import time
|
||||
import json
|
||||
|
||||
from markitect.assets import AssetManager
|
||||
from markitect.assets.batch_processor import BatchAssetProcessor
|
||||
from markitect.assets.database import AssetDatabase
|
||||
from markitect.assets.optimizer import AssetOptimizer, OptimizationProfile
|
||||
from markitect.assets.discovery import AssetDiscoveryEngine
|
||||
from markitect.assets.cache import AssetCache
|
||||
from markitect.assets.performance import PerformanceMonitor
|
||||
from markitect.workspace import WorkspaceManager
|
||||
from markitect.cli.asset_commands import AssetCommands
|
||||
|
||||
|
||||
class TestIntegrationWorkflowEndToEnd:
|
||||
"""Test complete integration workflow for Issue #144."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up complete test environment with realistic project structure."""
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
self.project_root = Path(self.temp_dir) / "sample_project"
|
||||
self.create_realistic_project_structure()
|
||||
|
||||
# Initialize integrated asset management system
|
||||
self.asset_manager = AssetManager(
|
||||
storage_path=self.project_root / "assets",
|
||||
database_path=self.project_root / "assets.db",
|
||||
enable_caching=True,
|
||||
enable_performance_monitoring=True
|
||||
)
|
||||
|
||||
def teardown_method(self):
|
||||
"""Clean up temporary directories."""
|
||||
shutil.rmtree(self.temp_dir)
|
||||
|
||||
def create_realistic_project_structure(self):
|
||||
"""Create a realistic project structure with assets and documentation."""
|
||||
self.project_root.mkdir(parents=True)
|
||||
|
||||
# Create directory structure
|
||||
directories = [
|
||||
"docs",
|
||||
"docs/images",
|
||||
"docs/diagrams",
|
||||
"assets/imported",
|
||||
"screenshots",
|
||||
"media/photos",
|
||||
"media/videos",
|
||||
"templates"
|
||||
]
|
||||
|
||||
for directory in directories:
|
||||
(self.project_root / directory).mkdir(parents=True)
|
||||
|
||||
# Create sample assets
|
||||
self.create_sample_assets()
|
||||
self.create_sample_documentation()
|
||||
|
||||
def create_sample_assets(self):
|
||||
"""Create various types of sample assets."""
|
||||
# Images with different characteristics
|
||||
assets = [
|
||||
("docs/images/logo.png", b"PNG logo content", 2048),
|
||||
("docs/images/banner.jpg", b"JPEG banner content", 4096),
|
||||
("docs/diagrams/architecture.svg", b"<svg>diagram</svg>", 512),
|
||||
("screenshots/app_home.png", b"PNG screenshot", 8192),
|
||||
("screenshots/app_settings.png", b"PNG screenshot", 6144),
|
||||
("media/photos/team_photo.jpg", b"JPEG photo content", 12288),
|
||||
("media/videos/demo.mp4", b"MP4 video content", 51200),
|
||||
("assets/imported/icon_set.zip", b"ZIP icon content", 1024),
|
||||
]
|
||||
|
||||
for file_path, content, size in assets:
|
||||
full_path = self.project_root / file_path
|
||||
# Create content of specified size
|
||||
full_content = content + b"x" * (size - len(content))
|
||||
full_path.write_bytes(full_content)
|
||||
|
||||
# Create some duplicate assets
|
||||
duplicate_content = b"This is duplicate content" + b"x" * 1000
|
||||
(self.project_root / "assets/imported/duplicate1.txt").write_bytes(duplicate_content)
|
||||
(self.project_root / "media/duplicate2.txt").write_bytes(duplicate_content)
|
||||
|
||||
def create_sample_documentation(self):
|
||||
"""Create markdown documentation with asset references."""
|
||||
main_doc = """
|
||||
# Project Documentation
|
||||
|
||||

|
||||

|
||||
|
||||
## Architecture
|
||||
|
||||
See our system architecture:
|
||||

|
||||
|
||||
## Screenshots
|
||||
|
||||
Application interface:
|
||||

|
||||

|
||||
|
||||
## Team
|
||||
|
||||
Meet our team:
|
||||

|
||||
|
||||
## Resources
|
||||
|
||||
- [Demo Video](../media/videos/demo.mp4)
|
||||
- [Icon Set](../assets/imported/icon_set.zip)
|
||||
|
||||
## Broken Links
|
||||

|
||||
"""
|
||||
|
||||
(self.project_root / "docs/main.md").write_text(main_doc)
|
||||
|
||||
# Create additional documentation
|
||||
tutorial_doc = """
|
||||
# Tutorial
|
||||
|
||||

|
||||

|
||||
|
||||
Download the [complete guide](./assets/guide.pdf).
|
||||
"""
|
||||
|
||||
(self.project_root / "docs/tutorial.md").write_text(tutorial_doc)
|
||||
|
||||
def test_complete_asset_discovery_and_import_workflow(self):
|
||||
"""Test complete workflow: discovery → import → optimization → database."""
|
||||
# Step 1: Discover assets in project
|
||||
discovery_engine = AssetDiscoveryEngine(self.asset_manager)
|
||||
|
||||
discovery_result = discovery_engine.scan_directory(
|
||||
self.project_root,
|
||||
recursive=True,
|
||||
file_patterns=["*.md", "*.mdx"]
|
||||
)
|
||||
|
||||
# Verify discovery found references
|
||||
assert len(discovery_result.asset_references) >= 8
|
||||
assert len(discovery_result.broken_links) >= 1
|
||||
|
||||
# Step 2: Batch import discovered assets
|
||||
batch_processor = BatchAssetProcessor(self.asset_manager)
|
||||
|
||||
import_result = batch_processor.import_directory(
|
||||
self.project_root,
|
||||
recursive=True,
|
||||
patterns=["*.png", "*.jpg", "*.svg", "*.mp4", "*.zip"],
|
||||
auto_optimize=True
|
||||
)
|
||||
|
||||
# Verify import success
|
||||
assert import_result.successful_imports >= 6
|
||||
assert import_result.total_size_bytes > 10000
|
||||
|
||||
# Step 3: Verify database integration
|
||||
database = self.asset_manager.database
|
||||
all_assets = database.get_all_assets()
|
||||
|
||||
assert len(all_assets) >= 6
|
||||
|
||||
# Check usage tracking was recorded
|
||||
for asset_ref in discovery_result.asset_references:
|
||||
if not asset_ref.is_broken:
|
||||
# Should have usage stats
|
||||
usage_stats = database.get_asset_usage_stats(asset_ref.resolved_hash)
|
||||
assert usage_stats is not None
|
||||
|
||||
def test_performance_monitoring_during_batch_operations(self):
|
||||
"""Test performance monitoring throughout batch operations."""
|
||||
monitor = PerformanceMonitor()
|
||||
|
||||
# Monitor batch import performance
|
||||
batch_processor = BatchAssetProcessor(
|
||||
self.asset_manager,
|
||||
performance_monitor=monitor
|
||||
)
|
||||
|
||||
with monitor.track_operation("batch_import_workflow"):
|
||||
import_result = batch_processor.import_directory(
|
||||
self.project_root / "media",
|
||||
recursive=True
|
||||
)
|
||||
|
||||
# Verify performance metrics were collected
|
||||
metrics = monitor.get_metrics()
|
||||
|
||||
assert "batch_import_workflow" in metrics
|
||||
assert metrics["batch_import_workflow"]["total_time"] > 0
|
||||
assert metrics["batch_import_workflow"]["call_count"] == 1
|
||||
|
||||
# Check for performance bottlenecks
|
||||
slowest_operations = monitor.get_slowest_operations(limit=5)
|
||||
assert len(slowest_operations) > 0
|
||||
|
||||
def test_caching_effectiveness_in_realistic_scenario(self):
|
||||
"""Test caching effectiveness with realistic access patterns."""
|
||||
cache = AssetCache(max_size_mb=50, enable_metrics=True)
|
||||
|
||||
# First, populate the system with assets
|
||||
batch_processor = BatchAssetProcessor(self.asset_manager)
|
||||
batch_processor.import_directory(self.project_root, recursive=True)
|
||||
|
||||
# Simulate realistic access patterns
|
||||
assets = self.asset_manager.registry.list_assets()
|
||||
|
||||
# First pass - populate cache (cold)
|
||||
for asset in assets[:10]: # Access first 10 assets
|
||||
metadata = cache.get_metadata(asset.content_hash)
|
||||
if metadata is None:
|
||||
# Simulate loading from database/disk
|
||||
metadata = {
|
||||
"filename": asset.filename,
|
||||
"size": asset.size_bytes,
|
||||
"mime_type": asset.mime_type
|
||||
}
|
||||
cache.store_metadata(asset.content_hash, metadata)
|
||||
|
||||
# Second pass - should hit cache (warm)
|
||||
for asset in assets[:5]: # Access first 5 assets again
|
||||
cached_metadata = cache.get_metadata(asset.content_hash)
|
||||
assert cached_metadata is not None
|
||||
|
||||
# Verify cache effectiveness
|
||||
hit_rate = cache.get_hit_rate()
|
||||
assert hit_rate > 0.3 # At least 30% hit rate
|
||||
|
||||
performance_metrics = cache.get_performance_metrics()
|
||||
assert performance_metrics["total_requests"] >= 15
|
||||
assert performance_metrics["cache_hits"] >= 5
|
||||
|
||||
def test_optimization_pipeline_integration(self):
|
||||
"""Test integrated optimization pipeline with batch processing."""
|
||||
optimizer = AssetOptimizer(profile=OptimizationProfile.BALANCED)
|
||||
|
||||
# Import assets first
|
||||
batch_processor = BatchAssetProcessor(self.asset_manager)
|
||||
import_result = batch_processor.import_directory(
|
||||
self.project_root / "docs/images",
|
||||
recursive=True,
|
||||
auto_optimize=False # We'll optimize separately
|
||||
)
|
||||
|
||||
# Run optimization pipeline
|
||||
assets_to_optimize = [
|
||||
self.project_root / "docs/images/logo.png",
|
||||
self.project_root / "docs/images/banner.jpg",
|
||||
self.project_root / "docs/diagrams/architecture.svg"
|
||||
]
|
||||
|
||||
optimization_results = optimizer.optimize_batch(
|
||||
assets_to_optimize,
|
||||
max_concurrent=2,
|
||||
progress_callback=Mock()
|
||||
)
|
||||
|
||||
# Verify optimization results
|
||||
successful_optimizations = [r for r in optimization_results if r.success]
|
||||
assert len(successful_optimizations) >= 2
|
||||
|
||||
total_savings = sum(r.original_size - r.optimized_size
|
||||
for r in successful_optimizations)
|
||||
assert total_savings > 0
|
||||
|
||||
def test_cli_integration_end_to_end(self):
|
||||
"""Test CLI commands integration with advanced features."""
|
||||
cli_commands = AssetCommands(self.asset_manager)
|
||||
|
||||
# Test batch import via CLI
|
||||
import_result = cli_commands.batch_import(
|
||||
source_directory=str(self.project_root),
|
||||
recursive=True,
|
||||
patterns=["*.png", "*.jpg"],
|
||||
auto_optimize=True,
|
||||
progress=True
|
||||
)
|
||||
|
||||
assert import_result.success is True
|
||||
assert import_result.imported_count > 0
|
||||
|
||||
# Test asset stats command
|
||||
stats_result = cli_commands.get_statistics(
|
||||
include_usage=True,
|
||||
include_optimization_potential=True
|
||||
)
|
||||
|
||||
assert stats_result.total_assets > 0
|
||||
assert stats_result.total_size > 0
|
||||
assert hasattr(stats_result, 'optimization_potential')
|
||||
|
||||
# Test discovery command
|
||||
discovery_result = cli_commands.discover_assets(
|
||||
scan_directory=str(self.project_root),
|
||||
auto_register=True,
|
||||
report_broken_links=True
|
||||
)
|
||||
|
||||
assert discovery_result.total_references > 0
|
||||
assert discovery_result.broken_links >= 1
|
||||
|
||||
def test_workspace_template_with_advanced_features(self):
|
||||
"""Test workspace template creation including advanced configurations."""
|
||||
workspace_manager = WorkspaceManager()
|
||||
|
||||
# Create template with advanced asset management configuration
|
||||
template_config = {
|
||||
"asset_management": {
|
||||
"batch_processing": {
|
||||
"enabled": True,
|
||||
"max_concurrent": 4,
|
||||
"auto_optimize": True
|
||||
},
|
||||
"auto_discovery": {
|
||||
"enabled": True,
|
||||
"scan_patterns": ["*.md", "*.mdx"],
|
||||
"update_frequency": "daily"
|
||||
},
|
||||
"performance": {
|
||||
"cache_enabled": True,
|
||||
"cache_size_mb": 100,
|
||||
"enable_thumbnails": True
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template_result = workspace_manager.create_template(
|
||||
name="advanced_asset_project",
|
||||
source_path=self.project_root,
|
||||
description="Project with advanced asset management",
|
||||
include_assets=True,
|
||||
configuration=template_config
|
||||
)
|
||||
|
||||
assert template_result.success is True
|
||||
|
||||
# Create new workspace from template
|
||||
new_workspace = Path(self.temp_dir) / "new_advanced_project"
|
||||
creation_result = workspace_manager.create_workspace_from_template(
|
||||
template_name="advanced_asset_project",
|
||||
target_path=new_workspace,
|
||||
project_name="New Advanced Project"
|
||||
)
|
||||
|
||||
assert creation_result.success is True
|
||||
|
||||
# Verify configuration was applied
|
||||
config_file = new_workspace / "markitect.yaml"
|
||||
assert config_file.exists()
|
||||
|
||||
# Test that asset management features work in new workspace
|
||||
new_asset_manager = AssetManager(storage_path=new_workspace / "assets")
|
||||
new_discovery = AssetDiscoveryEngine(new_asset_manager)
|
||||
|
||||
scan_result = new_discovery.scan_directory(new_workspace, recursive=True)
|
||||
assert len(scan_result.asset_references) > 0
|
||||
|
||||
def test_error_recovery_and_data_consistency(self):
|
||||
"""Test error recovery and data consistency during complex operations."""
|
||||
# Simulate interrupted batch operation
|
||||
batch_processor = BatchAssetProcessor(self.asset_manager)
|
||||
|
||||
# Mock failure during batch import
|
||||
original_add_asset = self.asset_manager.add_asset
|
||||
|
||||
def failing_add_asset(asset_path, *args, **kwargs):
|
||||
if "banner.jpg" in str(asset_path):
|
||||
raise Exception("Simulated failure")
|
||||
return original_add_asset(asset_path, *args, **kwargs)
|
||||
|
||||
with patch.object(self.asset_manager, 'add_asset', side_effect=failing_add_asset):
|
||||
import_result = batch_processor.import_directory(
|
||||
self.project_root / "docs/images",
|
||||
recursive=True
|
||||
)
|
||||
|
||||
# Verify partial success and error handling
|
||||
assert import_result.failed_imports > 0
|
||||
assert import_result.successful_imports > 0
|
||||
assert len(import_result.errors) > 0
|
||||
|
||||
# Verify database consistency
|
||||
database = self.asset_manager.database
|
||||
all_assets = database.get_all_assets()
|
||||
|
||||
# Should have some assets but not the failed one
|
||||
asset_filenames = [asset.filename for asset in all_assets]
|
||||
assert "logo.png" in asset_filenames # Should succeed
|
||||
assert "banner.jpg" not in asset_filenames # Should fail
|
||||
|
||||
# Test recovery - retry failed imports
|
||||
retry_result = batch_processor.retry_failed_imports(import_result)
|
||||
assert retry_result.retry_attempted is True
|
||||
|
||||
def test_large_dataset_scalability(self):
|
||||
"""Test scalability with larger datasets (scaled appropriately for testing)."""
|
||||
# Create larger test dataset
|
||||
large_asset_dir = self.project_root / "large_dataset"
|
||||
large_asset_dir.mkdir()
|
||||
|
||||
# Create 50 test assets (scaled down from 1000+ for test performance)
|
||||
for i in range(50):
|
||||
asset_content = f"Asset {i} content".encode() + b"x" * (1024 * (i % 10 + 1))
|
||||
(large_asset_dir / f"asset_{i:03d}.png").write_bytes(asset_content)
|
||||
|
||||
# Test batch processing performance
|
||||
start_time = time.time()
|
||||
|
||||
batch_processor = BatchAssetProcessor(
|
||||
self.asset_manager,
|
||||
max_concurrent=4,
|
||||
chunk_size=10
|
||||
)
|
||||
|
||||
import_result = batch_processor.import_directory(
|
||||
large_asset_dir,
|
||||
recursive=False
|
||||
)
|
||||
|
||||
processing_time = time.time() - start_time
|
||||
|
||||
# Verify performance is acceptable
|
||||
assert processing_time < 30.0 # Should complete in under 30 seconds
|
||||
assert import_result.successful_imports == 50
|
||||
|
||||
# Test database query performance with larger dataset
|
||||
database = self.asset_manager.database
|
||||
|
||||
query_start = time.time()
|
||||
recent_assets = database.get_recently_used_assets(limit=20)
|
||||
query_time = time.time() - query_start
|
||||
|
||||
assert query_time < 0.5 # Query should be fast even with more data
|
||||
assert len(recent_assets) <= 20
|
||||
|
||||
def test_cross_platform_compatibility_validation(self):
|
||||
"""Test cross-platform compatibility for file operations."""
|
||||
# Test path handling with various path formats
|
||||
test_paths = [
|
||||
"assets/image.png",
|
||||
"assets\\image.png", # Windows style
|
||||
"assets/sub dir/image with spaces.png",
|
||||
"assets/unicode_ñame.png"
|
||||
]
|
||||
|
||||
batch_processor = BatchAssetProcessor(self.asset_manager)
|
||||
|
||||
for path_str in test_paths:
|
||||
# Create test file
|
||||
test_file = self.project_root / path_str.replace("\\", "/")
|
||||
test_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
test_file.write_bytes(b"test content")
|
||||
|
||||
# Test that path is handled correctly
|
||||
normalized_path = batch_processor.normalize_path(path_str)
|
||||
assert isinstance(normalized_path, Path)
|
||||
|
||||
# Test that batch import handles all path formats
|
||||
import_result = batch_processor.import_directory(
|
||||
self.project_root / "assets",
|
||||
recursive=True
|
||||
)
|
||||
|
||||
# Should successfully import files regardless of path format
|
||||
assert import_result.successful_imports >= len(test_paths)
|
||||
|
||||
def test_memory_usage_during_bulk_operations(self):
|
||||
"""Test memory usage remains reasonable during bulk operations."""
|
||||
# This test would use psutil in a real implementation
|
||||
# For now, we'll simulate and verify no obvious memory leaks
|
||||
|
||||
initial_asset_count = len(self.asset_manager.registry.list_assets())
|
||||
|
||||
# Perform multiple batch operations
|
||||
for batch_num in range(5):
|
||||
batch_dir = self.project_root / f"batch_{batch_num}"
|
||||
batch_dir.mkdir()
|
||||
|
||||
# Create batch of assets
|
||||
for i in range(10):
|
||||
asset_content = f"Batch {batch_num} Asset {i}".encode() + b"x" * 1024
|
||||
(batch_dir / f"batch_asset_{i}.dat").write_bytes(asset_content)
|
||||
|
||||
# Import batch
|
||||
batch_processor = BatchAssetProcessor(self.asset_manager)
|
||||
import_result = batch_processor.import_directory(batch_dir)
|
||||
|
||||
assert import_result.successful_imports == 10
|
||||
|
||||
# Verify all assets were processed
|
||||
final_asset_count = len(self.asset_manager.registry.list_assets())
|
||||
expected_increase = 5 * 10 # 5 batches × 10 assets each
|
||||
|
||||
assert final_asset_count >= initial_asset_count + expected_increase
|
||||
|
||||
# In a real implementation, we would also check:
|
||||
# - Memory usage didn't grow excessively
|
||||
# - No file handles were leaked
|
||||
# - Temporary files were cleaned up
|
||||
Reference in New Issue
Block a user