Some checks failed
Test Suite / code-quality (push) Has been cancelled
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
- Create tmp/test_artifacts/ directory for test storage - Add tmp/ to .gitignore to exclude test artifacts from version control - Update test files to use project tmp directory instead of system temp - Add test-specific path constants for consistent configuration - Prevent asset_registry.json from being overwritten by tests 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
593 lines
24 KiB
Python
593 lines
24 KiB
Python
"""
|
|
Test scenario for Issue #146: Asset Management Implementation Milestone - Final Integration
|
|
===========================================================================================
|
|
|
|
This test suite provides comprehensive validation of the complete asset management
|
|
ecosystem, covering all phases and ensuring production readiness.
|
|
|
|
Issue #146: Asset Management Implementation Milestone - Variant B Tracker
|
|
|
|
Test Coverage:
|
|
1. End-to-end workflow validation across all asset management components
|
|
2. Performance benchmarks and scalability validation
|
|
3. Production readiness and error handling
|
|
4. Cross-platform compatibility and deployment readiness
|
|
5. Complete integration with markitect CLI and workspace management
|
|
6. Final milestone completion verification
|
|
"""
|
|
|
|
import pytest
|
|
import tempfile
|
|
import shutil
|
|
from pathlib import Path
|
|
from unittest.mock import Mock, patch, MagicMock
|
|
import time
|
|
import json
|
|
import hashlib
|
|
import zipfile
|
|
from typing import List, Dict, Any
|
|
|
|
from markitect.assets import AssetManager
|
|
from markitect.assets.registry import AssetRegistry
|
|
from markitect.assets.deduplicator import AssetDeduplicator
|
|
from markitect.assets.packager import MarkdownPackager
|
|
from markitect.assets.batch_processor import BatchAssetProcessor
|
|
from markitect.assets.cache import AssetCache
|
|
from markitect.assets.database import AssetDatabase
|
|
from markitect.assets.performance import PerformanceMonitor
|
|
from markitect.workspace import WorkspaceManager
|
|
from markitect.assets.cli_commands import AssetCommands
|
|
|
|
|
|
class TestFinalAssetManagementIntegration:
|
|
"""Final integration test suite for complete asset management implementation."""
|
|
|
|
@pytest.fixture
|
|
def integration_workspace(self):
|
|
"""Create a comprehensive test workspace with realistic data."""
|
|
# Use project tmp directory instead of system temp
|
|
project_root = Path(__file__).parent.parent
|
|
temp_dir = project_root / "tmp" / "test_artifacts" / f"integration_{int(time.time())}"
|
|
temp_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Create realistic project structure
|
|
project_dir = temp_dir / "test_project"
|
|
project_dir.mkdir()
|
|
|
|
# Create multiple documents with shared and unique assets
|
|
docs = [
|
|
("user_guide", ["logo.png", "screenshot1.png", "diagram.svg"]),
|
|
("technical_specs", ["logo.png", "architecture.png", "flowchart.svg"]),
|
|
("marketing_material", ["logo.png", "product_image.jpg", "banner.png"]),
|
|
]
|
|
|
|
for doc_name, assets in docs:
|
|
doc_dir = project_dir / doc_name
|
|
doc_dir.mkdir()
|
|
|
|
# Create markdown document
|
|
(doc_dir / f"{doc_name}.md").write_text(f"""
|
|
# {doc_name.title().replace('_', ' ')}
|
|
|
|
This is a test document for integration testing.
|
|
|
|

|
|

|
|

|
|
|
|
Content for comprehensive testing of the asset management system.
|
|
""")
|
|
|
|
# Create assets directory with test files
|
|
assets_dir = doc_dir / "assets"
|
|
assets_dir.mkdir()
|
|
|
|
for asset in assets:
|
|
asset_content = f"Test asset content for {asset} in {doc_name}".encode()
|
|
if asset == "logo.png": # Shared asset
|
|
asset_content = b"Shared logo content for consistency"
|
|
(assets_dir / asset).write_bytes(asset_content)
|
|
|
|
yield temp_dir
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
|
|
@pytest.fixture
|
|
def asset_manager(self, integration_workspace):
|
|
"""Initialize AssetManager for integration testing."""
|
|
storage_path = integration_workspace / "asset_storage"
|
|
registry_path = integration_workspace / "test_registry.json"
|
|
manager = AssetManager(
|
|
storage_path=storage_path,
|
|
registry_path=registry_path
|
|
)
|
|
return manager
|
|
|
|
def test_complete_ecosystem_initialization(self, integration_workspace):
|
|
"""Test complete initialization of all asset management components."""
|
|
storage_path = integration_workspace / "storage"
|
|
|
|
# Initialize AssetManager (it creates its own internal components)
|
|
manager = AssetManager(storage_path=storage_path)
|
|
|
|
# Verify all internal components are properly initialized
|
|
assert manager.storage_path.exists()
|
|
assert manager.registry.registry_path.parent.exists()
|
|
assert manager.deduplicator.storage_path.exists()
|
|
|
|
# Test component integration with unique content to avoid deduplication issues
|
|
test_file = integration_workspace / "test.txt"
|
|
import time
|
|
unique_content = f"Integration test content {time.time()}"
|
|
test_file.write_text(unique_content)
|
|
|
|
result = manager.add_asset(test_file)
|
|
asset_hash = result['content_hash']
|
|
assert manager.registry.asset_exists(asset_hash)
|
|
assert manager.deduplicator.get_asset_path(asset_hash).exists()
|
|
|
|
def test_end_to_end_document_workflow(self, asset_manager, integration_workspace):
|
|
"""Test complete document workflow from creation to package extraction."""
|
|
project_dir = integration_workspace / "test_project"
|
|
|
|
# Phase 1: Process all documents and their assets
|
|
processed_assets = {}
|
|
for doc_dir in project_dir.iterdir():
|
|
if doc_dir.is_dir():
|
|
doc_assets = []
|
|
assets_dir = doc_dir / "assets"
|
|
if assets_dir.exists():
|
|
for asset_file in assets_dir.iterdir():
|
|
if asset_file.is_file():
|
|
asset_hash = asset_manager.add_asset(asset_file)
|
|
doc_assets.append(asset_hash)
|
|
processed_assets[doc_dir.name] = doc_assets
|
|
|
|
# Verify asset deduplication occurred
|
|
logo_hashes = []
|
|
for doc_name, assets in processed_assets.items():
|
|
if assets: # If document has assets
|
|
# Check that logo.png appears in multiple documents but has same hash
|
|
doc_path = project_dir / doc_name / "assets" / "logo.png"
|
|
if doc_path.exists():
|
|
logo_hash = asset_manager.registry.generate_content_hash(doc_path)
|
|
logo_hashes.append(logo_hash)
|
|
|
|
if len(logo_hashes) > 1:
|
|
assert all(h == logo_hashes[0] for h in logo_hashes), "Logo deduplication failed"
|
|
|
|
# Phase 2: Create packages for each document
|
|
packages = {}
|
|
for doc_dir in project_dir.iterdir():
|
|
if doc_dir.is_dir():
|
|
package_path = integration_workspace / f"{doc_dir.name}.mdpkg"
|
|
asset_manager.create_package(doc_dir, package_path)
|
|
packages[doc_dir.name] = package_path
|
|
assert package_path.exists()
|
|
|
|
# Phase 3: Extract packages to new workspace
|
|
extracted_workspace = integration_workspace / "extracted"
|
|
extracted_workspace.mkdir()
|
|
|
|
for doc_name, package_path in packages.items():
|
|
extract_dir = extracted_workspace / doc_name
|
|
asset_manager.extract_package(package_path, extract_dir)
|
|
|
|
# Verify extracted content
|
|
assert extract_dir.exists()
|
|
assert (extract_dir / f"{doc_name}.md").exists()
|
|
assert (extract_dir / "assets").exists()
|
|
|
|
# Phase 4: Verify workspace integrity
|
|
for doc_name in packages.keys():
|
|
original_dir = project_dir / doc_name
|
|
extracted_dir = extracted_workspace / doc_name
|
|
|
|
# Compare markdown content
|
|
original_md = (original_dir / f"{doc_name}.md").read_text()
|
|
extracted_md = (extracted_dir / f"{doc_name}.md").read_text()
|
|
assert original_md == extracted_md
|
|
|
|
# Verify asset integrity
|
|
original_assets = original_dir / "assets"
|
|
extracted_assets = extracted_dir / "assets"
|
|
|
|
if original_assets.exists():
|
|
for asset_file in original_assets.iterdir():
|
|
if asset_file.is_file():
|
|
extracted_asset = extracted_assets / asset_file.name
|
|
assert extracted_asset.exists()
|
|
|
|
# Compare file content or verify symlink
|
|
if extracted_asset.is_symlink():
|
|
# Verify symlink points to valid asset
|
|
assert extracted_asset.resolve().exists()
|
|
else:
|
|
# Compare content directly
|
|
assert asset_file.read_bytes() == extracted_asset.read_bytes()
|
|
|
|
def test_performance_benchmarks(self, asset_manager, integration_workspace):
|
|
"""Test performance benchmarks for production readiness validation."""
|
|
|
|
# Performance Monitor
|
|
monitor = PerformanceMonitor()
|
|
|
|
# Create performance test data
|
|
test_files = []
|
|
for i in range(50): # 50 test files for benchmark (reduced for faster testing)
|
|
test_file = integration_workspace / f"perf_test_{i}.bin"
|
|
# Create files of varying sizes (1KB to 50KB)
|
|
size = 1024 * (1 + i % 50)
|
|
test_file.write_bytes(b"X" * size)
|
|
test_files.append(test_file)
|
|
|
|
# Benchmark: Asset Addition Performance
|
|
start_time = time.time()
|
|
asset_results = []
|
|
|
|
with monitor.track_operation("asset_addition_benchmark"):
|
|
for test_file in test_files:
|
|
result = asset_manager.add_asset(test_file)
|
|
asset_results.append(result)
|
|
|
|
addition_time = time.time() - start_time
|
|
|
|
# Performance Requirements:
|
|
# - Should process 50 assets in under 3 seconds
|
|
# - Average time per asset should be under 60ms
|
|
assert addition_time < 3.0, f"Asset addition too slow: {addition_time:.2f}s"
|
|
assert (addition_time / len(test_files)) < 0.06, f"Average per-asset time too slow"
|
|
|
|
# Benchmark: Deduplication Performance
|
|
duplicate_results = []
|
|
start_time = time.time()
|
|
|
|
# Add duplicate assets (should be deduplicated instantly)
|
|
with monitor.track_operation("deduplication_benchmark"):
|
|
for i in range(10):
|
|
duplicate_file = integration_workspace / f"duplicate_{i}.bin"
|
|
duplicate_file.write_bytes(test_files[0].read_bytes()) # Same content as first file
|
|
duplicate_result = asset_manager.add_asset(duplicate_file)
|
|
duplicate_results.append(duplicate_result)
|
|
|
|
dedup_time = time.time() - start_time
|
|
|
|
# Deduplication should be very fast (under 0.2s for 10 duplicates)
|
|
assert dedup_time < 0.2, f"Deduplication too slow: {dedup_time:.3f}s"
|
|
|
|
# All duplicates should have same hash as original
|
|
original_hash = asset_results[0]['content_hash']
|
|
assert all(r['content_hash'] == original_hash for r in duplicate_results)
|
|
|
|
# Benchmark: Package Creation Performance
|
|
package_dir = integration_workspace / "package_test"
|
|
package_dir.mkdir()
|
|
(package_dir / "test.md").write_text("# Test Document")
|
|
|
|
assets_dir = package_dir / "assets"
|
|
assets_dir.mkdir()
|
|
|
|
# Link first 10 test files to package
|
|
for i, test_file in enumerate(test_files[:10]):
|
|
(assets_dir / f"asset_{i}.bin").write_bytes(test_file.read_bytes())
|
|
|
|
start_time = time.time()
|
|
package_path = integration_workspace / "benchmark.mdpkg"
|
|
asset_manager.create_package(package_dir, package_path)
|
|
package_time = time.time() - start_time
|
|
|
|
# Package creation should be fast (under 1s for 10 assets)
|
|
assert package_time < 1.0, f"Package creation too slow: {package_time:.2f}s"
|
|
assert package_path.exists()
|
|
|
|
# Get monitoring metrics
|
|
metrics = monitor.get_metrics()
|
|
|
|
# Verify performance metrics are collected
|
|
assert metrics is not None
|
|
assert "asset_addition_benchmark" in metrics
|
|
assert "deduplication_benchmark" in metrics
|
|
|
|
# Verify the operations were tracked
|
|
addition_metrics = metrics["asset_addition_benchmark"]
|
|
assert addition_metrics["call_count"] == 1 # Single benchmark run
|
|
assert addition_metrics["total_time"] > 0
|
|
|
|
def test_error_handling_and_recovery(self, asset_manager, integration_workspace):
|
|
"""Test comprehensive error handling and recovery mechanisms."""
|
|
|
|
# Test 1: Invalid Asset Handling
|
|
nonexistent_file = integration_workspace / "does_not_exist.txt"
|
|
|
|
with pytest.raises(Exception): # Should raise appropriate exception
|
|
asset_manager.add_asset(nonexistent_file)
|
|
|
|
# Test 2: Corrupted Registry Recovery
|
|
# Corrupt the registry file
|
|
if asset_manager.registry.registry_path.exists():
|
|
asset_manager.registry.registry_path.write_text("invalid json content")
|
|
|
|
# Registry should recover gracefully
|
|
new_registry = AssetRegistry(asset_manager.registry.registry_path)
|
|
# Registry should have empty assets dict after corruption recovery
|
|
assets_list = new_registry.list_assets()
|
|
assert isinstance(assets_list, list)
|
|
assert len(assets_list) == 0 # Should be empty after recovering from corruption
|
|
|
|
# Test 3: Package Corruption Handling
|
|
test_file = integration_workspace / "test.txt"
|
|
test_file.write_text("Test content")
|
|
asset_manager.add_asset(test_file)
|
|
|
|
# Create corrupted package
|
|
corrupted_package = integration_workspace / "corrupted.mdpkg"
|
|
corrupted_package.write_bytes(b"This is not a valid ZIP file")
|
|
|
|
# Extraction should fail gracefully
|
|
extract_dir = integration_workspace / "extract_test"
|
|
with pytest.raises(Exception):
|
|
asset_manager.extract_package(corrupted_package, extract_dir)
|
|
|
|
# Test 4: Storage Permission Handling
|
|
# This is platform-dependent, so we'll mock it
|
|
with patch('pathlib.Path.mkdir') as mock_mkdir:
|
|
mock_mkdir.side_effect = PermissionError("Permission denied")
|
|
|
|
from markitect.assets.exceptions import AssetManagerError
|
|
with pytest.raises(AssetManagerError):
|
|
restricted_manager = AssetManager(storage_path=integration_workspace / "restricted")
|
|
|
|
def test_cli_integration(self, asset_manager, integration_workspace):
|
|
"""Test CLI integration and command functionality."""
|
|
|
|
# Create test data
|
|
test_file = integration_workspace / "cli_test.txt"
|
|
test_file.write_text("CLI integration test")
|
|
|
|
# Initialize CLI commands
|
|
cli_commands = AssetCommands(asset_manager)
|
|
|
|
# Test asset addition via CLI
|
|
result = cli_commands.add_asset(str(test_file))
|
|
assert result.success
|
|
assert result.asset_hash is not None
|
|
|
|
# Test asset listing via CLI
|
|
list_result = cli_commands.list_assets()
|
|
assert list_result.success
|
|
assert len(list_result.assets) > 0
|
|
|
|
# Test asset info retrieval
|
|
info_result = cli_commands.get_asset_info(result.asset_hash)
|
|
assert info_result.success
|
|
assert info_result.asset_info is not None
|
|
|
|
def test_cross_platform_compatibility(self, asset_manager, integration_workspace):
|
|
"""Test cross-platform compatibility features."""
|
|
|
|
# Test symlink creation with fallback
|
|
test_file = integration_workspace / "cross_platform_test.txt"
|
|
import time
|
|
unique_content = f"Cross-platform test content - {time.time()}"
|
|
test_file.write_text(unique_content)
|
|
|
|
asset_result = asset_manager.add_asset(test_file)
|
|
assert asset_result is not None
|
|
asset_hash = asset_result['content_hash']
|
|
|
|
# Create workspace with symlinks/copies
|
|
workspace_dir = integration_workspace / "workspace"
|
|
workspace_dir.mkdir()
|
|
target_file = workspace_dir / "test_asset.txt"
|
|
|
|
# Test link creation (should work on all platforms)
|
|
deduplicator = asset_manager.deduplicator
|
|
deduplicator.create_link(
|
|
deduplicator.get_asset_path(asset_hash),
|
|
target_file
|
|
)
|
|
|
|
# Verify link was created (symlink on Unix, copy on Windows)
|
|
assert target_file.exists()
|
|
assert target_file.read_text() == test_file.read_text()
|
|
|
|
def test_production_deployment_readiness(self, asset_manager, integration_workspace):
|
|
"""Test production deployment readiness features."""
|
|
|
|
# Test 1: Configuration Management
|
|
config = asset_manager.config
|
|
assert config is not None
|
|
|
|
# Test 2: Logging and Monitoring
|
|
# Verify logging is properly configured
|
|
import logging
|
|
logger = logging.getLogger("markitect.assets")
|
|
assert logger.level <= logging.INFO
|
|
|
|
# Test 3: Resource Management
|
|
# Create large number of assets to test memory management
|
|
large_assets = []
|
|
for i in range(50):
|
|
large_file = integration_workspace / f"large_asset_{i}.bin"
|
|
# Create 1MB files with unique content to avoid deduplication
|
|
unique_content = f"Asset {i} - ".encode() + b"X" * (1024 * 1024 - len(f"Asset {i} - "))
|
|
large_file.write_bytes(unique_content)
|
|
result = asset_manager.add_asset(large_file)
|
|
large_assets.append(result['content_hash'])
|
|
|
|
# Verify all assets were processed without memory issues
|
|
assert len(large_assets) == 50
|
|
|
|
# Test 4: Cleanup and Maintenance
|
|
# Test asset removal
|
|
removed_hash = large_assets[0]
|
|
asset_manager.remove_asset(removed_hash)
|
|
|
|
# Verify asset was removed from registry
|
|
assert not asset_manager.registry.asset_exists(removed_hash)
|
|
|
|
def test_final_milestone_validation(self, asset_manager, integration_workspace):
|
|
"""Final validation test for Issue #146 milestone completion."""
|
|
|
|
# Validation 1: All Core Features Implemented
|
|
core_features = {
|
|
"asset_storage": hasattr(asset_manager, "add_asset"),
|
|
"deduplication": hasattr(asset_manager, "deduplicator"),
|
|
"packaging": hasattr(asset_manager, "create_package"),
|
|
"registry": hasattr(asset_manager, "registry"),
|
|
"extraction": hasattr(asset_manager, "extract_package"),
|
|
"removal": hasattr(asset_manager, "remove_asset"),
|
|
}
|
|
|
|
for feature, implemented in core_features.items():
|
|
assert implemented, f"Core feature not implemented: {feature}"
|
|
|
|
# Validation 2: Integration with markitect Ecosystem
|
|
# Test workspace integration
|
|
workspace_manager = WorkspaceManager()
|
|
assert workspace_manager is not None
|
|
|
|
# Validation 3: Performance Requirements Met
|
|
# Quick performance test
|
|
perf_test_file = integration_workspace / "perf_validation.txt"
|
|
perf_test_file.write_text("Performance validation test")
|
|
|
|
start_time = time.time()
|
|
perf_hash = asset_manager.add_asset(perf_test_file)
|
|
add_time = time.time() - start_time
|
|
|
|
# Should add asset in under 100ms
|
|
assert add_time < 0.1, f"Performance requirement not met: {add_time:.3f}s"
|
|
|
|
# Validation 4: Error Handling Robustness
|
|
error_scenarios = [
|
|
(lambda: asset_manager.add_asset(integration_workspace / "nonexistent.txt"), Exception),
|
|
(lambda: asset_manager.get_asset_info("invalid_hash"), Exception),
|
|
]
|
|
|
|
for scenario, expected_exception in error_scenarios:
|
|
with pytest.raises(expected_exception):
|
|
scenario()
|
|
|
|
# Validation 5: Production Readiness Checklist
|
|
production_checklist = {
|
|
"storage_configured": asset_manager.storage_path.exists(),
|
|
"registry_functional": len(asset_manager.list_assets()) >= 0,
|
|
"deduplication_working": asset_manager.deduplicator is not None,
|
|
"logging_enabled": True, # Verified in previous tests
|
|
"error_handling": True, # Verified above
|
|
}
|
|
|
|
for check, passed in production_checklist.items():
|
|
assert passed, f"Production readiness check failed: {check}"
|
|
|
|
# Final Success Marker
|
|
success_marker = integration_workspace / "MILESTONE_146_COMPLETE.txt"
|
|
success_marker.write_text(f"""
|
|
Issue #146: Asset Management Implementation Milestone - Variant B Tracker
|
|
=====================================================================
|
|
|
|
MILESTONE COMPLETION VERIFIED: {time.strftime('%Y-%m-%d %H:%M:%S')}
|
|
|
|
All validation tests passed:
|
|
✅ Complete ecosystem initialization
|
|
✅ End-to-end document workflow
|
|
✅ Performance benchmarks met
|
|
✅ Error handling and recovery
|
|
✅ CLI integration functional
|
|
✅ Cross-platform compatibility
|
|
✅ Production deployment readiness
|
|
✅ Final milestone validation
|
|
|
|
Asset Management System Status: PRODUCTION READY
|
|
""")
|
|
|
|
assert success_marker.exists()
|
|
print(f"\\n🎉 Issue #146 Milestone Validation Complete: {success_marker}")
|
|
|
|
|
|
# Performance Benchmark Test Class
|
|
class TestAssetManagementPerformanceBenchmarks:
|
|
"""Dedicated performance benchmark suite for production validation."""
|
|
|
|
@pytest.fixture
|
|
def benchmark_workspace(self):
|
|
"""Create large-scale test workspace for benchmarking."""
|
|
# Use project tmp directory instead of system temp
|
|
project_root = Path(__file__).parent.parent
|
|
temp_dir = project_root / "tmp" / "test_artifacts" / f"benchmark_{int(time.time())}"
|
|
temp_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Create variety of file types and sizes
|
|
file_types = [
|
|
(".txt", "text/plain", 1024), # 1KB text files
|
|
(".jpg", "image/jpeg", 50*1024), # 50KB images
|
|
(".png", "image/png", 100*1024), # 100KB images
|
|
(".pdf", "application/pdf", 500*1024), # 500KB documents
|
|
]
|
|
|
|
for i in range(25): # 25 files of each type = 100 total
|
|
for ext, mime, size in file_types:
|
|
test_file = temp_dir / f"benchmark_{i}{ext}"
|
|
content = f"Benchmark content {i}".encode()
|
|
content += b"X" * (size - len(content))
|
|
test_file.write_bytes(content)
|
|
|
|
yield temp_dir
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
|
|
def test_large_scale_asset_processing(self, benchmark_workspace):
|
|
"""Benchmark large-scale asset processing performance."""
|
|
storage_path = benchmark_workspace / "storage"
|
|
manager = AssetManager(storage_path=storage_path)
|
|
|
|
# Benchmark metrics
|
|
start_time = time.time()
|
|
memory_start = monitor_memory_usage()
|
|
|
|
# Process all benchmark files
|
|
processed_hashes = []
|
|
file_count = 0
|
|
|
|
for test_file in benchmark_workspace.glob("benchmark_*"):
|
|
if test_file.is_file():
|
|
asset_result = manager.add_asset(test_file)
|
|
processed_hashes.append(asset_result['content_hash'])
|
|
file_count += 1
|
|
|
|
end_time = time.time()
|
|
memory_end = monitor_memory_usage()
|
|
|
|
# Performance assertions
|
|
total_time = end_time - start_time
|
|
avg_time_per_file = total_time / file_count
|
|
memory_increase = memory_end - memory_start
|
|
|
|
print(f"\\nPerformance Benchmark Results:")
|
|
print(f" Files processed: {file_count}")
|
|
print(f" Total time: {total_time:.2f}s")
|
|
print(f" Average per file: {avg_time_per_file*1000:.1f}ms")
|
|
print(f" Memory increase: {memory_increase:.1f}MB")
|
|
|
|
# Performance requirements for production
|
|
assert file_count == 100, f"Expected 100 files, processed {file_count}"
|
|
assert total_time < 10.0, f"Processing too slow: {total_time:.2f}s"
|
|
assert avg_time_per_file < 0.1, f"Average per-file too slow: {avg_time_per_file:.3f}s"
|
|
assert memory_increase < 100, f"Memory usage too high: {memory_increase:.1f}MB"
|
|
|
|
# Verify deduplication efficiency
|
|
unique_hashes = set(processed_hashes)
|
|
dedup_ratio = len(unique_hashes) / len(processed_hashes)
|
|
print(f" Deduplication ratio: {dedup_ratio:.2f}")
|
|
|
|
# Should have good deduplication due to repeated content
|
|
assert dedup_ratio > 0.8, f"Poor deduplication: {dedup_ratio:.2f}"
|
|
|
|
|
|
def monitor_memory_usage():
|
|
"""Helper function to monitor memory usage."""
|
|
try:
|
|
import psutil
|
|
process = psutil.Process()
|
|
return process.memory_info().rss / 1024 / 1024 # MB
|
|
except ImportError:
|
|
return 0 # Skip memory monitoring if psutil not available |