""" Test scenarios for AssetDeduplicator symlink and deduplication functionality. This module tests the AssetDeduplicator class for Issue #142: Phase 1 - Core Asset Management Module. Tests cover content-based asset deduplication, symlink creation with relative paths, Windows fallback to file copying, and conflict resolution. Requirements: - Content-based asset deduplication - Symlink creation with relative paths - Windows fallback to file copying - Conflict resolution for existing assets """ import os import platform import shutil import tempfile from pathlib import Path from unittest.mock import Mock, patch, MagicMock import pytest from markitect.assets.deduplicator import AssetDeduplicator from markitect.assets.registry import AssetRegistry from markitect.assets.exceptions import AssetError, DeduplicationError class TestAssetDeduplicatorInitialization: """Test AssetDeduplicator initialization and setup.""" def test_deduplicator_initialization(self): """Test AssetDeduplicator can be initialized with storage path and registry.""" with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) assert deduplicator.storage_path == storage_path assert deduplicator.registry == registry assert storage_path.exists() # Should create storage directory def test_deduplicator_creates_storage_directory(self): """Test that AssetDeduplicator creates storage directory if it doesn't exist.""" with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "nonexistent" / "assets" registry_path = Path(temp_dir) / "registry.json" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) assert storage_path.exists() assert storage_path.is_dir() class TestAssetDeduplication: """Test content-based asset deduplication functionality.""" def test_deduplicate_identical_files(self): """Test that identical files are deduplicated properly.""" with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" # Create two identical files file1 = Path(temp_dir) / "file1.txt" file2 = Path(temp_dir) / "file2.txt" content = "Identical content for deduplication test" file1.write_text(content) file2.write_text(content) registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) # Store first file result1 = deduplicator.store_asset(file1) # Store second identical file - should be deduplicated result2 = deduplicator.store_asset(file2) # Both should reference the same stored file assert result1["content_hash"] == result2["content_hash"] assert result1["stored_path"] == result2["stored_path"] def test_different_files_stored_separately(self): """Test that different files are stored separately.""" with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" # Create two different files file1 = Path(temp_dir) / "file1.txt" file2 = Path(temp_dir) / "file2.txt" file1.write_text("Content of first file") file2.write_text("Content of second file") registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) result1 = deduplicator.store_asset(file1) result2 = deduplicator.store_asset(file2) # Should have different hashes and storage paths assert result1["content_hash"] != result2["content_hash"] assert result1["stored_path"] != result2["stored_path"] class TestSymlinkCreation: """Test symlink creation functionality with relative paths.""" def test_create_symlink_unix(self): """Test symlink creation on Unix-like systems.""" if platform.system() == "Windows": pytest.skip("Skipping Unix symlink test on Windows") with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" source_file = Path(temp_dir) / "source.txt" source_file.write_text("Source file content") target_dir = Path(temp_dir) / "target_dir" target_dir.mkdir() registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) # Store asset first store_result = deduplicator.store_asset(source_file) stored_path = Path(store_result["stored_path"]) # Create symlink to stored asset link_path = target_dir / "linked_asset.txt" deduplicator.create_asset_link(stored_path, link_path) assert link_path.is_symlink() assert link_path.resolve() == stored_path.resolve() # Test that symlink uses relative path assert not link_path.readlink().is_absolute() def test_symlink_uses_relative_path(self): """Test that created symlinks use relative paths.""" if platform.system() == "Windows": pytest.skip("Skipping relative symlink test on Windows") with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" source_file = Path(temp_dir) / "source.txt" source_file.write_text("Source file for relative symlink test") registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) store_result = deduplicator.store_asset(source_file) stored_path = Path(store_result["stored_path"]) # Create symlink in subdirectory link_dir = Path(temp_dir) / "workspace" / "subdir" link_dir.mkdir(parents=True) link_path = link_dir / "asset_link.txt" deduplicator.create_asset_link(stored_path, link_path) # Verify symlink target is relative link_target = link_path.readlink() assert not link_target.is_absolute() assert str(link_target).startswith("..") class TestWindowsFallbackCopying: """Test Windows fallback to file copying.""" def test_file_copy_fallback_on_symlink_failure(self): """Test that file copying is used when symlink creation fails.""" with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" source_file = Path(temp_dir) / "source.txt" content = "Content for copy fallback test" source_file.write_text(content) registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) store_result = deduplicator.store_asset(source_file) stored_path = Path(store_result["stored_path"]) target_path = Path(temp_dir) / "copied_asset.txt" # Mock symlink creation to fail with patch('os.symlink', side_effect=OSError("Symlink not supported")): deduplicator.create_asset_link(stored_path, target_path) # Should fallback to copying assert target_path.exists() assert not target_path.is_symlink() assert target_path.read_text() == content @pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test") def test_windows_uses_file_copying_by_default(self): """Test that Windows uses file copying by default.""" with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" source_file = Path(temp_dir) / "source.txt" content = "Content for Windows copy test" source_file.write_text(content) registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) store_result = deduplicator.store_asset(source_file) stored_path = Path(store_result["stored_path"]) target_path = Path(temp_dir) / "windows_asset.txt" deduplicator.create_asset_link(stored_path, target_path) # On Windows, should use copying instead of symlinks assert target_path.exists() assert not target_path.is_symlink() assert target_path.read_text() == content class TestConflictResolution: """Test conflict resolution for existing assets.""" def test_existing_file_conflict_resolution(self): """Test handling of conflicts when target file already exists.""" with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" source_file = Path(temp_dir) / "source.txt" source_file.write_text("Source content") # Create existing target file target_path = Path(temp_dir) / "existing_target.txt" target_path.write_text("Existing content") registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) store_result = deduplicator.store_asset(source_file) stored_path = Path(store_result["stored_path"]) # Should handle conflict gracefully deduplicator.create_asset_link(stored_path, target_path, conflict_resolution="overwrite") # Target should now link to stored asset if platform.system() != "Windows": assert target_path.is_symlink() def test_backup_conflict_resolution(self): """Test backup creation during conflict resolution.""" with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" source_file = Path(temp_dir) / "source.txt" source_file.write_text("New content") # Create existing target file target_path = Path(temp_dir) / "target.txt" original_content = "Original content to backup" target_path.write_text(original_content) registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) store_result = deduplicator.store_asset(source_file) stored_path = Path(store_result["stored_path"]) # Create link with backup resolution deduplicator.create_asset_link(stored_path, target_path, conflict_resolution="backup") # Should create backup file backup_path = target_path.with_suffix(target_path.suffix + ".bak") assert backup_path.exists() assert backup_path.read_text() == original_content def test_skip_conflict_resolution(self): """Test skipping operation when file exists and resolution is 'skip'.""" with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" source_file = Path(temp_dir) / "source.txt" source_file.write_text("Source content") # Create existing target file target_path = Path(temp_dir) / "existing.txt" original_content = "Original content" target_path.write_text(original_content) registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) store_result = deduplicator.store_asset(source_file) stored_path = Path(store_result["stored_path"]) # Skip operation for existing file result = deduplicator.create_asset_link(stored_path, target_path, conflict_resolution="skip") # Original file should remain unchanged assert target_path.read_text() == original_content assert result["skipped"] is True class TestAssetDeduplicatorErrorHandling: """Test error handling scenarios.""" def test_store_nonexistent_file_raises_error(self): """Test that storing non-existent file raises appropriate error.""" with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) nonexistent_file = Path(temp_dir) / "does_not_exist.txt" with pytest.raises(AssetError): deduplicator.store_asset(nonexistent_file) def test_invalid_storage_path_raises_error(self): """Test that invalid storage path raises appropriate error.""" with tempfile.TemporaryDirectory() as temp_dir: # Try to use a file as storage path (should be directory) file_path = Path(temp_dir) / "not_a_directory.txt" file_path.write_text("This is a file, not a directory") registry_path = Path(temp_dir) / "registry.json" registry = AssetRegistry(registry_path) with pytest.raises(DeduplicationError): AssetDeduplicator(file_path, registry) def test_permission_error_handling(self): """Test handling of permission errors during asset storage.""" with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) source_file = Path(temp_dir) / "source.txt" source_file.write_text("Test content") # Mock shutil.copy2 to raise PermissionError with patch('shutil.copy2', side_effect=PermissionError("Permission denied")): with pytest.raises(DeduplicationError): deduplicator.store_asset(source_file) class TestAssetRetrieval: """Test asset retrieval and verification functionality.""" def test_retrieve_stored_asset(self): """Test retrieving stored asset by content hash.""" with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" source_file = Path(temp_dir) / "source.txt" content = "Content for retrieval test" source_file.write_text(content) registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) store_result = deduplicator.store_asset(source_file) content_hash = store_result["content_hash"] # Retrieve asset retrieved_path = deduplicator.get_asset_path(content_hash) assert retrieved_path.exists() assert retrieved_path.read_text() == content def test_verify_asset_integrity(self): """Test verifying stored asset integrity.""" with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" source_file = Path(temp_dir) / "source.txt" source_file.write_text("Content for integrity test") registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) store_result = deduplicator.store_asset(source_file) content_hash = store_result["content_hash"] # Verify integrity is_valid = deduplicator.verify_asset_integrity(content_hash) assert is_valid is True def test_detect_corrupted_asset(self): """Test detection of corrupted stored assets.""" with tempfile.TemporaryDirectory() as temp_dir: storage_path = Path(temp_dir) / "assets" registry_path = Path(temp_dir) / "registry.json" source_file = Path(temp_dir) / "source.txt" source_file.write_text("Original content") registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) store_result = deduplicator.store_asset(source_file) content_hash = store_result["content_hash"] stored_path = Path(store_result["stored_path"]) # Corrupt the stored file stored_path.write_text("Corrupted content") # Verify should detect corruption is_valid = deduplicator.verify_asset_integrity(content_hash) assert is_valid is False