feat: comprehensive asset management system and testing improvements
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Asset Management System (Issue #142): - Add complete asset management framework with deduplication - Implement AssetManager, AssetRegistry, and AssetDeduplicator classes - Add AssetPackager for markdown document packaging - Create comprehensive test suite for all asset management components - Add asset constants and custom exceptions for robust error handling Markdown Processing Enhancements: - Update markdown_commands.py with improved functionality - Enhanced parsing and content aggregation capabilities - Improved filename encoding/decoding for special characters Test Suite Improvements: - Add comprehensive tests for Issue #138 markdown parsing - Enhance Issue #139 content aggregation and end-to-end testing - Complete test coverage for new asset management features Examples and Documentation: - Update BildungsKanonJon.md example with enhanced content - Generate corresponding HTML output for documentation - Add asset registry configuration Development Tools: - Add install script for simplified setup This commit represents a major enhancement to MarkiTect's asset handling capabilities with full test coverage and improved markdown processing. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
430
tests/test_issue_142_asset_deduplicator.py
Normal file
430
tests/test_issue_142_asset_deduplicator.py
Normal file
@@ -0,0 +1,430 @@
|
||||
"""
|
||||
Test scenarios for AssetDeduplicator symlink and deduplication functionality.
|
||||
|
||||
This module tests the AssetDeduplicator class for Issue #142: Phase 1 - Core Asset Management Module.
|
||||
Tests cover content-based asset deduplication, symlink creation with relative paths,
|
||||
Windows fallback to file copying, and conflict resolution.
|
||||
|
||||
Requirements:
|
||||
- Content-based asset deduplication
|
||||
- Symlink creation with relative paths
|
||||
- Windows fallback to file copying
|
||||
- Conflict resolution for existing assets
|
||||
"""
|
||||
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
import pytest
|
||||
|
||||
from markitect.assets.deduplicator import AssetDeduplicator
|
||||
from markitect.assets.registry import AssetRegistry
|
||||
from markitect.assets.exceptions import AssetError, DeduplicationError
|
||||
|
||||
|
||||
class TestAssetDeduplicatorInitialization:
|
||||
"""Test AssetDeduplicator initialization and setup."""
|
||||
|
||||
def test_deduplicator_initialization(self):
|
||||
"""Test AssetDeduplicator can be initialized with storage path and registry."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
assert deduplicator.storage_path == storage_path
|
||||
assert deduplicator.registry == registry
|
||||
assert storage_path.exists() # Should create storage directory
|
||||
|
||||
def test_deduplicator_creates_storage_directory(self):
|
||||
"""Test that AssetDeduplicator creates storage directory if it doesn't exist."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "nonexistent" / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
assert storage_path.exists()
|
||||
assert storage_path.is_dir()
|
||||
|
||||
|
||||
class TestAssetDeduplication:
|
||||
"""Test content-based asset deduplication functionality."""
|
||||
|
||||
def test_deduplicate_identical_files(self):
|
||||
"""Test that identical files are deduplicated properly."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
# Create two identical files
|
||||
file1 = Path(temp_dir) / "file1.txt"
|
||||
file2 = Path(temp_dir) / "file2.txt"
|
||||
content = "Identical content for deduplication test"
|
||||
file1.write_text(content)
|
||||
file2.write_text(content)
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
# Store first file
|
||||
result1 = deduplicator.store_asset(file1)
|
||||
|
||||
# Store second identical file - should be deduplicated
|
||||
result2 = deduplicator.store_asset(file2)
|
||||
|
||||
# Both should reference the same stored file
|
||||
assert result1["content_hash"] == result2["content_hash"]
|
||||
assert result1["stored_path"] == result2["stored_path"]
|
||||
|
||||
def test_different_files_stored_separately(self):
|
||||
"""Test that different files are stored separately."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
# Create two different files
|
||||
file1 = Path(temp_dir) / "file1.txt"
|
||||
file2 = Path(temp_dir) / "file2.txt"
|
||||
file1.write_text("Content of first file")
|
||||
file2.write_text("Content of second file")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
result1 = deduplicator.store_asset(file1)
|
||||
result2 = deduplicator.store_asset(file2)
|
||||
|
||||
# Should have different hashes and storage paths
|
||||
assert result1["content_hash"] != result2["content_hash"]
|
||||
assert result1["stored_path"] != result2["stored_path"]
|
||||
|
||||
|
||||
class TestSymlinkCreation:
|
||||
"""Test symlink creation functionality with relative paths."""
|
||||
|
||||
def test_create_symlink_unix(self):
|
||||
"""Test symlink creation on Unix-like systems."""
|
||||
if platform.system() == "Windows":
|
||||
pytest.skip("Skipping Unix symlink test on Windows")
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("Source file content")
|
||||
|
||||
target_dir = Path(temp_dir) / "target_dir"
|
||||
target_dir.mkdir()
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
# Store asset first
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
# Create symlink to stored asset
|
||||
link_path = target_dir / "linked_asset.txt"
|
||||
deduplicator.create_asset_link(stored_path, link_path)
|
||||
|
||||
assert link_path.is_symlink()
|
||||
assert link_path.resolve() == stored_path.resolve()
|
||||
# Test that symlink uses relative path
|
||||
assert not link_path.readlink().is_absolute()
|
||||
|
||||
def test_symlink_uses_relative_path(self):
|
||||
"""Test that created symlinks use relative paths."""
|
||||
if platform.system() == "Windows":
|
||||
pytest.skip("Skipping relative symlink test on Windows")
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("Source file for relative symlink test")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
# Create symlink in subdirectory
|
||||
link_dir = Path(temp_dir) / "workspace" / "subdir"
|
||||
link_dir.mkdir(parents=True)
|
||||
link_path = link_dir / "asset_link.txt"
|
||||
|
||||
deduplicator.create_asset_link(stored_path, link_path)
|
||||
|
||||
# Verify symlink target is relative
|
||||
link_target = link_path.readlink()
|
||||
assert not link_target.is_absolute()
|
||||
assert str(link_target).startswith("..")
|
||||
|
||||
|
||||
class TestWindowsFallbackCopying:
|
||||
"""Test Windows fallback to file copying."""
|
||||
|
||||
def test_file_copy_fallback_on_symlink_failure(self):
|
||||
"""Test that file copying is used when symlink creation fails."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
content = "Content for copy fallback test"
|
||||
source_file.write_text(content)
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
target_path = Path(temp_dir) / "copied_asset.txt"
|
||||
|
||||
# Mock symlink creation to fail
|
||||
with patch('os.symlink', side_effect=OSError("Symlink not supported")):
|
||||
deduplicator.create_asset_link(stored_path, target_path)
|
||||
|
||||
# Should fallback to copying
|
||||
assert target_path.exists()
|
||||
assert not target_path.is_symlink()
|
||||
assert target_path.read_text() == content
|
||||
|
||||
@pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test")
|
||||
def test_windows_uses_file_copying_by_default(self):
|
||||
"""Test that Windows uses file copying by default."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
content = "Content for Windows copy test"
|
||||
source_file.write_text(content)
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
target_path = Path(temp_dir) / "windows_asset.txt"
|
||||
deduplicator.create_asset_link(stored_path, target_path)
|
||||
|
||||
# On Windows, should use copying instead of symlinks
|
||||
assert target_path.exists()
|
||||
assert not target_path.is_symlink()
|
||||
assert target_path.read_text() == content
|
||||
|
||||
|
||||
class TestConflictResolution:
|
||||
"""Test conflict resolution for existing assets."""
|
||||
|
||||
def test_existing_file_conflict_resolution(self):
|
||||
"""Test handling of conflicts when target file already exists."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("Source content")
|
||||
|
||||
# Create existing target file
|
||||
target_path = Path(temp_dir) / "existing_target.txt"
|
||||
target_path.write_text("Existing content")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
# Should handle conflict gracefully
|
||||
deduplicator.create_asset_link(stored_path, target_path,
|
||||
conflict_resolution="overwrite")
|
||||
|
||||
# Target should now link to stored asset
|
||||
if platform.system() != "Windows":
|
||||
assert target_path.is_symlink()
|
||||
|
||||
def test_backup_conflict_resolution(self):
|
||||
"""Test backup creation during conflict resolution."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("New content")
|
||||
|
||||
# Create existing target file
|
||||
target_path = Path(temp_dir) / "target.txt"
|
||||
original_content = "Original content to backup"
|
||||
target_path.write_text(original_content)
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
# Create link with backup resolution
|
||||
deduplicator.create_asset_link(stored_path, target_path,
|
||||
conflict_resolution="backup")
|
||||
|
||||
# Should create backup file
|
||||
backup_path = target_path.with_suffix(target_path.suffix + ".bak")
|
||||
assert backup_path.exists()
|
||||
assert backup_path.read_text() == original_content
|
||||
|
||||
def test_skip_conflict_resolution(self):
|
||||
"""Test skipping operation when file exists and resolution is 'skip'."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("Source content")
|
||||
|
||||
# Create existing target file
|
||||
target_path = Path(temp_dir) / "existing.txt"
|
||||
original_content = "Original content"
|
||||
target_path.write_text(original_content)
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
# Skip operation for existing file
|
||||
result = deduplicator.create_asset_link(stored_path, target_path,
|
||||
conflict_resolution="skip")
|
||||
|
||||
# Original file should remain unchanged
|
||||
assert target_path.read_text() == original_content
|
||||
assert result["skipped"] is True
|
||||
|
||||
|
||||
class TestAssetDeduplicatorErrorHandling:
|
||||
"""Test error handling scenarios."""
|
||||
|
||||
def test_store_nonexistent_file_raises_error(self):
|
||||
"""Test that storing non-existent file raises appropriate error."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
nonexistent_file = Path(temp_dir) / "does_not_exist.txt"
|
||||
|
||||
with pytest.raises(AssetError):
|
||||
deduplicator.store_asset(nonexistent_file)
|
||||
|
||||
def test_invalid_storage_path_raises_error(self):
|
||||
"""Test that invalid storage path raises appropriate error."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Try to use a file as storage path (should be directory)
|
||||
file_path = Path(temp_dir) / "not_a_directory.txt"
|
||||
file_path.write_text("This is a file, not a directory")
|
||||
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
registry = AssetRegistry(registry_path)
|
||||
|
||||
with pytest.raises(DeduplicationError):
|
||||
AssetDeduplicator(file_path, registry)
|
||||
|
||||
def test_permission_error_handling(self):
|
||||
"""Test handling of permission errors during asset storage."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("Test content")
|
||||
|
||||
# Mock shutil.copy2 to raise PermissionError
|
||||
with patch('shutil.copy2', side_effect=PermissionError("Permission denied")):
|
||||
with pytest.raises(DeduplicationError):
|
||||
deduplicator.store_asset(source_file)
|
||||
|
||||
|
||||
class TestAssetRetrieval:
|
||||
"""Test asset retrieval and verification functionality."""
|
||||
|
||||
def test_retrieve_stored_asset(self):
|
||||
"""Test retrieving stored asset by content hash."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
content = "Content for retrieval test"
|
||||
source_file.write_text(content)
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
content_hash = store_result["content_hash"]
|
||||
|
||||
# Retrieve asset
|
||||
retrieved_path = deduplicator.get_asset_path(content_hash)
|
||||
assert retrieved_path.exists()
|
||||
assert retrieved_path.read_text() == content
|
||||
|
||||
def test_verify_asset_integrity(self):
|
||||
"""Test verifying stored asset integrity."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("Content for integrity test")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
content_hash = store_result["content_hash"]
|
||||
|
||||
# Verify integrity
|
||||
is_valid = deduplicator.verify_asset_integrity(content_hash)
|
||||
assert is_valid is True
|
||||
|
||||
def test_detect_corrupted_asset(self):
|
||||
"""Test detection of corrupted stored assets."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("Original content")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
content_hash = store_result["content_hash"]
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
# Corrupt the stored file
|
||||
stored_path.write_text("Corrupted content")
|
||||
|
||||
# Verify should detect corruption
|
||||
is_valid = deduplicator.verify_asset_integrity(content_hash)
|
||||
assert is_valid is False
|
||||
Reference in New Issue
Block a user