Files
markitect-main/tests/test_issue_142_asset_deduplicator.py
tegwick 81d3da5fe7
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat: comprehensive asset management system and testing improvements
Asset Management System (Issue #142):
- Add complete asset management framework with deduplication
- Implement AssetManager, AssetRegistry, and AssetDeduplicator classes
- Add AssetPackager for markdown document packaging
- Create comprehensive test suite for all asset management components
- Add asset constants and custom exceptions for robust error handling

Markdown Processing Enhancements:
- Update markdown_commands.py with improved functionality
- Enhanced parsing and content aggregation capabilities
- Improved filename encoding/decoding for special characters

Test Suite Improvements:
- Add comprehensive tests for Issue #138 markdown parsing
- Enhance Issue #139 content aggregation and end-to-end testing
- Complete test coverage for new asset management features

Examples and Documentation:
- Update BildungsKanonJon.md example with enhanced content
- Generate corresponding HTML output for documentation
- Add asset registry configuration

Development Tools:
- Add install script for simplified setup

This commit represents a major enhancement to MarkiTect's asset handling
capabilities with full test coverage and improved markdown processing.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-12 19:57:31 +02:00

430 lines
17 KiB
Python

"""
Test scenarios for AssetDeduplicator symlink and deduplication functionality.
This module tests the AssetDeduplicator class for Issue #142: Phase 1 - Core Asset Management Module.
Tests cover content-based asset deduplication, symlink creation with relative paths,
Windows fallback to file copying, and conflict resolution.
Requirements:
- Content-based asset deduplication
- Symlink creation with relative paths
- Windows fallback to file copying
- Conflict resolution for existing assets
"""
import os
import platform
import shutil
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
import pytest
from markitect.assets.deduplicator import AssetDeduplicator
from markitect.assets.registry import AssetRegistry
from markitect.assets.exceptions import AssetError, DeduplicationError
class TestAssetDeduplicatorInitialization:
"""Test AssetDeduplicator initialization and setup."""
def test_deduplicator_initialization(self):
"""Test AssetDeduplicator can be initialized with storage path and registry."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
assert deduplicator.storage_path == storage_path
assert deduplicator.registry == registry
assert storage_path.exists() # Should create storage directory
def test_deduplicator_creates_storage_directory(self):
"""Test that AssetDeduplicator creates storage directory if it doesn't exist."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "nonexistent" / "assets"
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
assert storage_path.exists()
assert storage_path.is_dir()
class TestAssetDeduplication:
"""Test content-based asset deduplication functionality."""
def test_deduplicate_identical_files(self):
"""Test that identical files are deduplicated properly."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
# Create two identical files
file1 = Path(temp_dir) / "file1.txt"
file2 = Path(temp_dir) / "file2.txt"
content = "Identical content for deduplication test"
file1.write_text(content)
file2.write_text(content)
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
# Store first file
result1 = deduplicator.store_asset(file1)
# Store second identical file - should be deduplicated
result2 = deduplicator.store_asset(file2)
# Both should reference the same stored file
assert result1["content_hash"] == result2["content_hash"]
assert result1["stored_path"] == result2["stored_path"]
def test_different_files_stored_separately(self):
"""Test that different files are stored separately."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
# Create two different files
file1 = Path(temp_dir) / "file1.txt"
file2 = Path(temp_dir) / "file2.txt"
file1.write_text("Content of first file")
file2.write_text("Content of second file")
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
result1 = deduplicator.store_asset(file1)
result2 = deduplicator.store_asset(file2)
# Should have different hashes and storage paths
assert result1["content_hash"] != result2["content_hash"]
assert result1["stored_path"] != result2["stored_path"]
class TestSymlinkCreation:
"""Test symlink creation functionality with relative paths."""
def test_create_symlink_unix(self):
"""Test symlink creation on Unix-like systems."""
if platform.system() == "Windows":
pytest.skip("Skipping Unix symlink test on Windows")
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("Source file content")
target_dir = Path(temp_dir) / "target_dir"
target_dir.mkdir()
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
# Store asset first
store_result = deduplicator.store_asset(source_file)
stored_path = Path(store_result["stored_path"])
# Create symlink to stored asset
link_path = target_dir / "linked_asset.txt"
deduplicator.create_asset_link(stored_path, link_path)
assert link_path.is_symlink()
assert link_path.resolve() == stored_path.resolve()
# Test that symlink uses relative path
assert not link_path.readlink().is_absolute()
def test_symlink_uses_relative_path(self):
"""Test that created symlinks use relative paths."""
if platform.system() == "Windows":
pytest.skip("Skipping relative symlink test on Windows")
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("Source file for relative symlink test")
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
stored_path = Path(store_result["stored_path"])
# Create symlink in subdirectory
link_dir = Path(temp_dir) / "workspace" / "subdir"
link_dir.mkdir(parents=True)
link_path = link_dir / "asset_link.txt"
deduplicator.create_asset_link(stored_path, link_path)
# Verify symlink target is relative
link_target = link_path.readlink()
assert not link_target.is_absolute()
assert str(link_target).startswith("..")
class TestWindowsFallbackCopying:
"""Test Windows fallback to file copying."""
def test_file_copy_fallback_on_symlink_failure(self):
"""Test that file copying is used when symlink creation fails."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
content = "Content for copy fallback test"
source_file.write_text(content)
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
stored_path = Path(store_result["stored_path"])
target_path = Path(temp_dir) / "copied_asset.txt"
# Mock symlink creation to fail
with patch('os.symlink', side_effect=OSError("Symlink not supported")):
deduplicator.create_asset_link(stored_path, target_path)
# Should fallback to copying
assert target_path.exists()
assert not target_path.is_symlink()
assert target_path.read_text() == content
@pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test")
def test_windows_uses_file_copying_by_default(self):
"""Test that Windows uses file copying by default."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
content = "Content for Windows copy test"
source_file.write_text(content)
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
stored_path = Path(store_result["stored_path"])
target_path = Path(temp_dir) / "windows_asset.txt"
deduplicator.create_asset_link(stored_path, target_path)
# On Windows, should use copying instead of symlinks
assert target_path.exists()
assert not target_path.is_symlink()
assert target_path.read_text() == content
class TestConflictResolution:
"""Test conflict resolution for existing assets."""
def test_existing_file_conflict_resolution(self):
"""Test handling of conflicts when target file already exists."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("Source content")
# Create existing target file
target_path = Path(temp_dir) / "existing_target.txt"
target_path.write_text("Existing content")
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
stored_path = Path(store_result["stored_path"])
# Should handle conflict gracefully
deduplicator.create_asset_link(stored_path, target_path,
conflict_resolution="overwrite")
# Target should now link to stored asset
if platform.system() != "Windows":
assert target_path.is_symlink()
def test_backup_conflict_resolution(self):
"""Test backup creation during conflict resolution."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("New content")
# Create existing target file
target_path = Path(temp_dir) / "target.txt"
original_content = "Original content to backup"
target_path.write_text(original_content)
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
stored_path = Path(store_result["stored_path"])
# Create link with backup resolution
deduplicator.create_asset_link(stored_path, target_path,
conflict_resolution="backup")
# Should create backup file
backup_path = target_path.with_suffix(target_path.suffix + ".bak")
assert backup_path.exists()
assert backup_path.read_text() == original_content
def test_skip_conflict_resolution(self):
"""Test skipping operation when file exists and resolution is 'skip'."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("Source content")
# Create existing target file
target_path = Path(temp_dir) / "existing.txt"
original_content = "Original content"
target_path.write_text(original_content)
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
stored_path = Path(store_result["stored_path"])
# Skip operation for existing file
result = deduplicator.create_asset_link(stored_path, target_path,
conflict_resolution="skip")
# Original file should remain unchanged
assert target_path.read_text() == original_content
assert result["skipped"] is True
class TestAssetDeduplicatorErrorHandling:
"""Test error handling scenarios."""
def test_store_nonexistent_file_raises_error(self):
"""Test that storing non-existent file raises appropriate error."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
nonexistent_file = Path(temp_dir) / "does_not_exist.txt"
with pytest.raises(AssetError):
deduplicator.store_asset(nonexistent_file)
def test_invalid_storage_path_raises_error(self):
"""Test that invalid storage path raises appropriate error."""
with tempfile.TemporaryDirectory() as temp_dir:
# Try to use a file as storage path (should be directory)
file_path = Path(temp_dir) / "not_a_directory.txt"
file_path.write_text("This is a file, not a directory")
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
with pytest.raises(DeduplicationError):
AssetDeduplicator(file_path, registry)
def test_permission_error_handling(self):
"""Test handling of permission errors during asset storage."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("Test content")
# Mock shutil.copy2 to raise PermissionError
with patch('shutil.copy2', side_effect=PermissionError("Permission denied")):
with pytest.raises(DeduplicationError):
deduplicator.store_asset(source_file)
class TestAssetRetrieval:
"""Test asset retrieval and verification functionality."""
def test_retrieve_stored_asset(self):
"""Test retrieving stored asset by content hash."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
content = "Content for retrieval test"
source_file.write_text(content)
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
content_hash = store_result["content_hash"]
# Retrieve asset
retrieved_path = deduplicator.get_asset_path(content_hash)
assert retrieved_path.exists()
assert retrieved_path.read_text() == content
def test_verify_asset_integrity(self):
"""Test verifying stored asset integrity."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("Content for integrity test")
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
content_hash = store_result["content_hash"]
# Verify integrity
is_valid = deduplicator.verify_asset_integrity(content_hash)
assert is_valid is True
def test_detect_corrupted_asset(self):
"""Test detection of corrupted stored assets."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("Original content")
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
content_hash = store_result["content_hash"]
stored_path = Path(store_result["stored_path"])
# Corrupt the stored file
stored_path.write_text("Corrupted content")
# Verify should detect corruption
is_valid = deduplicator.verify_asset_integrity(content_hash)
assert is_valid is False