Files
markitect-main/tests/test_issue_146_final_integration.py
tegwick 92c63f0716 fix: update Issue #146 CLI import path
- Fixed import path from markitect.cli.asset_commands to markitect.assets.cli_commands
- Resolves import error that prevented test collection

Note: Some integration tests may need interface adjustments as the TDD8
implementations created comprehensive mock interfaces that need alignment
with the actual asset management backend APIs.
2025-10-14 19:15:20 +02:00

578 lines
23 KiB
Python

"""
Test scenario for Issue #146: Asset Management Implementation Milestone - Final Integration
===========================================================================================
This test suite provides comprehensive validation of the complete asset management
ecosystem, covering all phases and ensuring production readiness.
Issue #146: Asset Management Implementation Milestone - Variant B Tracker
Test Coverage:
1. End-to-end workflow validation across all asset management components
2. Performance benchmarks and scalability validation
3. Production readiness and error handling
4. Cross-platform compatibility and deployment readiness
5. Complete integration with markitect CLI and workspace management
6. Final milestone completion verification
"""
import pytest
import tempfile
import shutil
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
import time
import json
import hashlib
import zipfile
from typing import List, Dict, Any
from markitect.assets import AssetManager
from markitect.assets.registry import AssetRegistry
from markitect.assets.deduplicator import AssetDeduplicator
from markitect.assets.packager import MarkdownPackager
from markitect.assets.batch_processor import BatchAssetProcessor
from markitect.assets.cache import AssetCache
from markitect.assets.database import AssetDatabase
from markitect.assets.performance import PerformanceMonitor
from markitect.workspace import WorkspaceManager
from markitect.assets.cli_commands import AssetCommands
class TestFinalAssetManagementIntegration:
"""Final integration test suite for complete asset management implementation."""
@pytest.fixture
def integration_workspace(self):
"""Create a comprehensive test workspace with realistic data."""
temp_dir = Path(tempfile.mkdtemp(prefix="asset_integration_"))
# Create realistic project structure
project_dir = temp_dir / "test_project"
project_dir.mkdir()
# Create multiple documents with shared and unique assets
docs = [
("user_guide", ["logo.png", "screenshot1.png", "diagram.svg"]),
("technical_specs", ["logo.png", "architecture.png", "flowchart.svg"]),
("marketing_material", ["logo.png", "product_image.jpg", "banner.png"]),
]
for doc_name, assets in docs:
doc_dir = project_dir / doc_name
doc_dir.mkdir()
# Create markdown document
(doc_dir / f"{doc_name}.md").write_text(f"""
# {doc_name.title().replace('_', ' ')}
This is a test document for integration testing.
![Logo](assets/logo.png)
![Asset 1](assets/{assets[1]})
![Asset 2](assets/assets/{assets[2]})
Content for comprehensive testing of the asset management system.
""")
# Create assets directory with test files
assets_dir = doc_dir / "assets"
assets_dir.mkdir()
for asset in assets:
asset_content = f"Test asset content for {asset} in {doc_name}".encode()
if asset == "logo.png": # Shared asset
asset_content = b"Shared logo content for consistency"
(assets_dir / asset).write_bytes(asset_content)
yield temp_dir
shutil.rmtree(temp_dir, ignore_errors=True)
@pytest.fixture
def asset_manager(self, integration_workspace):
"""Initialize AssetManager for integration testing."""
storage_path = integration_workspace / "asset_storage"
manager = AssetManager(storage_path=storage_path)
return manager
def test_complete_ecosystem_initialization(self, integration_workspace):
"""Test complete initialization of all asset management components."""
storage_path = integration_workspace / "storage"
# Initialize all core components
manager = AssetManager(storage_path=storage_path)
registry = AssetRegistry(storage_path / "registry.json")
deduplicator = AssetDeduplicator(storage_path / "assets", registry)
packager = MarkdownPackager(registry, deduplicator)
# Verify all components are properly initialized
assert manager.storage_path.exists()
assert registry.registry_path.parent.exists()
assert deduplicator.storage_path.exists()
assert packager.registry == registry
assert packager.deduplicator == deduplicator
# Test component integration
test_file = integration_workspace / "test.txt"
test_file.write_text("Integration test content")
result = manager.add_asset(test_file)
asset_hash = result['content_hash']
assert manager.registry.asset_exists(asset_hash)
assert manager.deduplicator.get_asset_path(asset_hash).exists()
def test_end_to_end_document_workflow(self, asset_manager, integration_workspace):
"""Test complete document workflow from creation to package extraction."""
project_dir = integration_workspace / "test_project"
# Phase 1: Process all documents and their assets
processed_assets = {}
for doc_dir in project_dir.iterdir():
if doc_dir.is_dir():
doc_assets = []
assets_dir = doc_dir / "assets"
if assets_dir.exists():
for asset_file in assets_dir.iterdir():
if asset_file.is_file():
asset_hash = asset_manager.add_asset(asset_file)
doc_assets.append(asset_hash)
processed_assets[doc_dir.name] = doc_assets
# Verify asset deduplication occurred
logo_hashes = []
for doc_name, assets in processed_assets.items():
if assets: # If document has assets
# Check that logo.png appears in multiple documents but has same hash
doc_path = project_dir / doc_name / "assets" / "logo.png"
if doc_path.exists():
logo_hash = asset_manager.registry.get_content_hash(doc_path)
logo_hashes.append(logo_hash)
if len(logo_hashes) > 1:
assert all(h == logo_hashes[0] for h in logo_hashes), "Logo deduplication failed"
# Phase 2: Create packages for each document
packages = {}
for doc_dir in project_dir.iterdir():
if doc_dir.is_dir():
package_path = integration_workspace / f"{doc_dir.name}.mdpkg"
asset_manager.create_package(doc_dir, package_path)
packages[doc_dir.name] = package_path
assert package_path.exists()
# Phase 3: Extract packages to new workspace
extracted_workspace = integration_workspace / "extracted"
extracted_workspace.mkdir()
for doc_name, package_path in packages.items():
extract_dir = extracted_workspace / doc_name
asset_manager.extract_package(package_path, extract_dir)
# Verify extracted content
assert extract_dir.exists()
assert (extract_dir / f"{doc_name}.md").exists()
assert (extract_dir / "assets").exists()
# Phase 4: Verify workspace integrity
for doc_name in packages.keys():
original_dir = project_dir / doc_name
extracted_dir = extracted_workspace / doc_name
# Compare markdown content
original_md = (original_dir / f"{doc_name}.md").read_text()
extracted_md = (extracted_dir / f"{doc_name}.md").read_text()
assert original_md == extracted_md
# Verify asset integrity
original_assets = original_dir / "assets"
extracted_assets = extracted_dir / "assets"
if original_assets.exists():
for asset_file in original_assets.iterdir():
if asset_file.is_file():
extracted_asset = extracted_assets / asset_file.name
assert extracted_asset.exists()
# Compare file content or verify symlink
if extracted_asset.is_symlink():
# Verify symlink points to valid asset
assert extracted_asset.resolve().exists()
else:
# Compare content directly
assert asset_file.read_bytes() == extracted_asset.read_bytes()
def test_performance_benchmarks(self, asset_manager, integration_workspace):
"""Test performance benchmarks for production readiness validation."""
# Performance Monitor
monitor = PerformanceMonitor()
# Create performance test data
test_files = []
for i in range(50): # 50 test files for benchmark (reduced for faster testing)
test_file = integration_workspace / f"perf_test_{i}.bin"
# Create files of varying sizes (1KB to 50KB)
size = 1024 * (1 + i % 50)
test_file.write_bytes(b"X" * size)
test_files.append(test_file)
# Benchmark: Asset Addition Performance
start_time = time.time()
asset_results = []
with monitor.track_operation("asset_addition_benchmark"):
for test_file in test_files:
result = asset_manager.add_asset(test_file)
asset_results.append(result)
addition_time = time.time() - start_time
# Performance Requirements:
# - Should process 50 assets in under 3 seconds
# - Average time per asset should be under 60ms
assert addition_time < 3.0, f"Asset addition too slow: {addition_time:.2f}s"
assert (addition_time / len(test_files)) < 0.06, f"Average per-asset time too slow"
# Benchmark: Deduplication Performance
duplicate_results = []
start_time = time.time()
# Add duplicate assets (should be deduplicated instantly)
with monitor.track_operation("deduplication_benchmark"):
for i in range(10):
duplicate_file = integration_workspace / f"duplicate_{i}.bin"
duplicate_file.write_bytes(test_files[0].read_bytes()) # Same content as first file
duplicate_result = asset_manager.add_asset(duplicate_file)
duplicate_results.append(duplicate_result)
dedup_time = time.time() - start_time
# Deduplication should be very fast (under 0.2s for 10 duplicates)
assert dedup_time < 0.2, f"Deduplication too slow: {dedup_time:.3f}s"
# All duplicates should have same hash as original
original_hash = asset_results[0]['content_hash']
assert all(r['content_hash'] == original_hash for r in duplicate_results)
# Benchmark: Package Creation Performance
package_dir = integration_workspace / "package_test"
package_dir.mkdir()
(package_dir / "test.md").write_text("# Test Document")
assets_dir = package_dir / "assets"
assets_dir.mkdir()
# Link first 10 test files to package
for i, test_file in enumerate(test_files[:10]):
(assets_dir / f"asset_{i}.bin").write_bytes(test_file.read_bytes())
start_time = time.time()
package_path = integration_workspace / "benchmark.mdpkg"
asset_manager.create_package(package_dir, package_path)
package_time = time.time() - start_time
# Package creation should be fast (under 1s for 10 assets)
assert package_time < 1.0, f"Package creation too slow: {package_time:.2f}s"
assert package_path.exists()
# Get monitoring metrics
metrics = monitor.get_metrics()
# Verify performance metrics are collected
assert metrics is not None
assert "asset_addition_benchmark" in metrics
assert "deduplication_benchmark" in metrics
# Verify the operations were tracked
addition_metrics = metrics["asset_addition_benchmark"]
assert addition_metrics.call_count == 1 # Single benchmark run
assert addition_metrics.total_time > 0
def test_error_handling_and_recovery(self, asset_manager, integration_workspace):
"""Test comprehensive error handling and recovery mechanisms."""
# Test 1: Invalid Asset Handling
nonexistent_file = integration_workspace / "does_not_exist.txt"
with pytest.raises(Exception): # Should raise appropriate exception
asset_manager.add_asset(nonexistent_file)
# Test 2: Corrupted Registry Recovery
# Corrupt the registry file
if asset_manager.registry.registry_path.exists():
asset_manager.registry.registry_path.write_text("invalid json content")
# Registry should recover gracefully
new_registry = AssetRegistry(asset_manager.registry.registry_path)
assert isinstance(new_registry.assets, dict)
# Test 3: Package Corruption Handling
test_file = integration_workspace / "test.txt"
test_file.write_text("Test content")
asset_manager.add_asset(test_file)
# Create corrupted package
corrupted_package = integration_workspace / "corrupted.mdpkg"
corrupted_package.write_bytes(b"This is not a valid ZIP file")
# Extraction should fail gracefully
extract_dir = integration_workspace / "extract_test"
with pytest.raises(Exception):
asset_manager.extract_package(corrupted_package, extract_dir)
# Test 4: Storage Permission Handling
# This is platform-dependent, so we'll mock it
with patch('pathlib.Path.mkdir') as mock_mkdir:
mock_mkdir.side_effect = PermissionError("Permission denied")
with pytest.raises(PermissionError):
restricted_manager = AssetManager(integration_workspace / "restricted")
def test_cli_integration(self, asset_manager, integration_workspace):
"""Test CLI integration and command functionality."""
# Create test data
test_file = integration_workspace / "cli_test.txt"
test_file.write_text("CLI integration test")
# Initialize CLI commands
cli_commands = AssetCommands(asset_manager)
# Test asset addition via CLI
result = cli_commands.add_asset(str(test_file))
assert result.success
assert result.asset_hash is not None
# Test asset listing via CLI
list_result = cli_commands.list_assets()
assert list_result.success
assert len(list_result.assets) > 0
# Test asset info retrieval
info_result = cli_commands.get_asset_info(result.asset_hash)
assert info_result.success
assert info_result.asset_info is not None
def test_cross_platform_compatibility(self, asset_manager, integration_workspace):
"""Test cross-platform compatibility features."""
# Test symlink creation with fallback
test_file = integration_workspace / "cross_platform_test.txt"
test_file.write_text("Cross-platform test content")
asset_hash = asset_manager.add_asset(test_file)
assert asset_hash is not None
# Create workspace with symlinks/copies
workspace_dir = integration_workspace / "workspace"
workspace_dir.mkdir()
target_file = workspace_dir / "test_asset.txt"
# Test link creation (should work on all platforms)
deduplicator = asset_manager.deduplicator
deduplicator.create_link(
deduplicator.get_asset_path(asset_hash),
target_file
)
# Verify link was created (symlink on Unix, copy on Windows)
assert target_file.exists()
assert target_file.read_text() == test_file.read_text()
def test_production_deployment_readiness(self, asset_manager, integration_workspace):
"""Test production deployment readiness features."""
# Test 1: Configuration Management
config = asset_manager.config
assert config is not None
# Test 2: Logging and Monitoring
# Verify logging is properly configured
import logging
logger = logging.getLogger("markitect.assets")
assert logger.level <= logging.INFO
# Test 3: Resource Management
# Create large number of assets to test memory management
large_assets = []
for i in range(50):
large_file = integration_workspace / f"large_asset_{i}.bin"
# Create 1MB files
large_file.write_bytes(b"X" * (1024 * 1024))
hash_val = asset_manager.add_asset(large_file)
large_assets.append(hash_val)
# Verify all assets were processed without memory issues
assert len(large_assets) == 50
# Test 4: Cleanup and Maintenance
# Test asset removal
removed_hash = large_assets[0]
asset_manager.remove_asset(removed_hash)
# Verify asset was removed from registry
assert not asset_manager.registry.asset_exists(removed_hash)
def test_final_milestone_validation(self, asset_manager, integration_workspace):
"""Final validation test for Issue #146 milestone completion."""
# Validation 1: All Core Features Implemented
core_features = {
"asset_storage": hasattr(asset_manager, "add_asset"),
"deduplication": hasattr(asset_manager, "deduplicator"),
"packaging": hasattr(asset_manager, "create_package"),
"registry": hasattr(asset_manager, "registry"),
"extraction": hasattr(asset_manager, "extract_package"),
"removal": hasattr(asset_manager, "remove_asset"),
}
for feature, implemented in core_features.items():
assert implemented, f"Core feature not implemented: {feature}"
# Validation 2: Integration with markitect Ecosystem
# Test workspace integration
workspace_manager = WorkspaceManager()
assert workspace_manager is not None
# Validation 3: Performance Requirements Met
# Quick performance test
perf_test_file = integration_workspace / "perf_validation.txt"
perf_test_file.write_text("Performance validation test")
start_time = time.time()
perf_hash = asset_manager.add_asset(perf_test_file)
add_time = time.time() - start_time
# Should add asset in under 100ms
assert add_time < 0.1, f"Performance requirement not met: {add_time:.3f}s"
# Validation 4: Error Handling Robustness
error_scenarios = [
(lambda: asset_manager.add_asset(integration_workspace / "nonexistent.txt"), Exception),
(lambda: asset_manager.get_asset_info("invalid_hash"), Exception),
]
for scenario, expected_exception in error_scenarios:
with pytest.raises(expected_exception):
scenario()
# Validation 5: Production Readiness Checklist
production_checklist = {
"storage_configured": asset_manager.storage_path.exists(),
"registry_functional": len(asset_manager.list_assets()) >= 0,
"deduplication_working": asset_manager.deduplicator is not None,
"logging_enabled": True, # Verified in previous tests
"error_handling": True, # Verified above
}
for check, passed in production_checklist.items():
assert passed, f"Production readiness check failed: {check}"
# Final Success Marker
success_marker = integration_workspace / "MILESTONE_146_COMPLETE.txt"
success_marker.write_text(f"""
Issue #146: Asset Management Implementation Milestone - Variant B Tracker
=====================================================================
MILESTONE COMPLETION VERIFIED: {time.strftime('%Y-%m-%d %H:%M:%S')}
All validation tests passed:
✅ Complete ecosystem initialization
✅ End-to-end document workflow
✅ Performance benchmarks met
✅ Error handling and recovery
✅ CLI integration functional
✅ Cross-platform compatibility
✅ Production deployment readiness
✅ Final milestone validation
Asset Management System Status: PRODUCTION READY
""")
assert success_marker.exists()
print(f"\\n🎉 Issue #146 Milestone Validation Complete: {success_marker}")
# Performance Benchmark Test Class
class TestAssetManagementPerformanceBenchmarks:
"""Dedicated performance benchmark suite for production validation."""
@pytest.fixture
def benchmark_workspace(self):
"""Create large-scale test workspace for benchmarking."""
temp_dir = Path(tempfile.mkdtemp(prefix="asset_benchmark_"))
# Create variety of file types and sizes
file_types = [
(".txt", "text/plain", 1024), # 1KB text files
(".jpg", "image/jpeg", 50*1024), # 50KB images
(".png", "image/png", 100*1024), # 100KB images
(".pdf", "application/pdf", 500*1024), # 500KB documents
]
for i in range(25): # 25 files of each type = 100 total
for ext, mime, size in file_types:
test_file = temp_dir / f"benchmark_{i}{ext}"
content = f"Benchmark content {i}".encode()
content += b"X" * (size - len(content))
test_file.write_bytes(content)
yield temp_dir
shutil.rmtree(temp_dir, ignore_errors=True)
def test_large_scale_asset_processing(self, benchmark_workspace):
"""Benchmark large-scale asset processing performance."""
storage_path = benchmark_workspace / "storage"
manager = AssetManager(storage_path=storage_path)
# Benchmark metrics
start_time = time.time()
memory_start = monitor_memory_usage()
# Process all benchmark files
processed_hashes = []
file_count = 0
for test_file in benchmark_workspace.glob("benchmark_*"):
if test_file.is_file():
asset_hash = manager.add_asset(test_file)
processed_hashes.append(asset_hash)
file_count += 1
end_time = time.time()
memory_end = monitor_memory_usage()
# Performance assertions
total_time = end_time - start_time
avg_time_per_file = total_time / file_count
memory_increase = memory_end - memory_start
print(f"\\nPerformance Benchmark Results:")
print(f" Files processed: {file_count}")
print(f" Total time: {total_time:.2f}s")
print(f" Average per file: {avg_time_per_file*1000:.1f}ms")
print(f" Memory increase: {memory_increase:.1f}MB")
# Performance requirements for production
assert file_count == 100, f"Expected 100 files, processed {file_count}"
assert total_time < 10.0, f"Processing too slow: {total_time:.2f}s"
assert avg_time_per_file < 0.1, f"Average per-file too slow: {avg_time_per_file:.3f}s"
assert memory_increase < 100, f"Memory usage too high: {memory_increase:.1f}MB"
# Verify deduplication efficiency
unique_hashes = set(processed_hashes)
dedup_ratio = len(unique_hashes) / len(processed_hashes)
print(f" Deduplication ratio: {dedup_ratio:.2f}")
# Should have good deduplication due to repeated content
assert dedup_ratio > 0.8, f"Poor deduplication: {dedup_ratio:.2f}"
def monitor_memory_usage():
"""Helper function to monitor memory usage."""
try:
import psutil
process = psutil.Process()
return process.memory_info().rss / 1024 / 1024 # MB
except ImportError:
return 0 # Skip memory monitoring if psutil not available