Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Asset Management System (Issue #142): - Add complete asset management framework with deduplication - Implement AssetManager, AssetRegistry, and AssetDeduplicator classes - Add AssetPackager for markdown document packaging - Create comprehensive test suite for all asset management components - Add asset constants and custom exceptions for robust error handling Markdown Processing Enhancements: - Update markdown_commands.py with improved functionality - Enhanced parsing and content aggregation capabilities - Improved filename encoding/decoding for special characters Test Suite Improvements: - Add comprehensive tests for Issue #138 markdown parsing - Enhance Issue #139 content aggregation and end-to-end testing - Complete test coverage for new asset management features Examples and Documentation: - Update BildungsKanonJon.md example with enhanced content - Generate corresponding HTML output for documentation - Add asset registry configuration Development Tools: - Add install script for simplified setup This commit represents a major enhancement to MarkiTect's asset handling capabilities with full test coverage and improved markdown processing. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
430 lines
17 KiB
Python
430 lines
17 KiB
Python
"""
|
|
Test scenarios for AssetDeduplicator symlink and deduplication functionality.
|
|
|
|
This module tests the AssetDeduplicator class for Issue #142: Phase 1 - Core Asset Management Module.
|
|
Tests cover content-based asset deduplication, symlink creation with relative paths,
|
|
Windows fallback to file copying, and conflict resolution.
|
|
|
|
Requirements:
|
|
- Content-based asset deduplication
|
|
- Symlink creation with relative paths
|
|
- Windows fallback to file copying
|
|
- Conflict resolution for existing assets
|
|
"""
|
|
|
|
import os
|
|
import platform
|
|
import shutil
|
|
import tempfile
|
|
from pathlib import Path
|
|
from unittest.mock import Mock, patch, MagicMock
|
|
import pytest
|
|
|
|
from markitect.assets.deduplicator import AssetDeduplicator
|
|
from markitect.assets.registry import AssetRegistry
|
|
from markitect.assets.exceptions import AssetError, DeduplicationError
|
|
|
|
|
|
class TestAssetDeduplicatorInitialization:
|
|
"""Test AssetDeduplicator initialization and setup."""
|
|
|
|
def test_deduplicator_initialization(self):
|
|
"""Test AssetDeduplicator can be initialized with storage path and registry."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
assert deduplicator.storage_path == storage_path
|
|
assert deduplicator.registry == registry
|
|
assert storage_path.exists() # Should create storage directory
|
|
|
|
def test_deduplicator_creates_storage_directory(self):
|
|
"""Test that AssetDeduplicator creates storage directory if it doesn't exist."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "nonexistent" / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
assert storage_path.exists()
|
|
assert storage_path.is_dir()
|
|
|
|
|
|
class TestAssetDeduplication:
|
|
"""Test content-based asset deduplication functionality."""
|
|
|
|
def test_deduplicate_identical_files(self):
|
|
"""Test that identical files are deduplicated properly."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
# Create two identical files
|
|
file1 = Path(temp_dir) / "file1.txt"
|
|
file2 = Path(temp_dir) / "file2.txt"
|
|
content = "Identical content for deduplication test"
|
|
file1.write_text(content)
|
|
file2.write_text(content)
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
# Store first file
|
|
result1 = deduplicator.store_asset(file1)
|
|
|
|
# Store second identical file - should be deduplicated
|
|
result2 = deduplicator.store_asset(file2)
|
|
|
|
# Both should reference the same stored file
|
|
assert result1["content_hash"] == result2["content_hash"]
|
|
assert result1["stored_path"] == result2["stored_path"]
|
|
|
|
def test_different_files_stored_separately(self):
|
|
"""Test that different files are stored separately."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
# Create two different files
|
|
file1 = Path(temp_dir) / "file1.txt"
|
|
file2 = Path(temp_dir) / "file2.txt"
|
|
file1.write_text("Content of first file")
|
|
file2.write_text("Content of second file")
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
result1 = deduplicator.store_asset(file1)
|
|
result2 = deduplicator.store_asset(file2)
|
|
|
|
# Should have different hashes and storage paths
|
|
assert result1["content_hash"] != result2["content_hash"]
|
|
assert result1["stored_path"] != result2["stored_path"]
|
|
|
|
|
|
class TestSymlinkCreation:
|
|
"""Test symlink creation functionality with relative paths."""
|
|
|
|
def test_create_symlink_unix(self):
|
|
"""Test symlink creation on Unix-like systems."""
|
|
if platform.system() == "Windows":
|
|
pytest.skip("Skipping Unix symlink test on Windows")
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
source_file = Path(temp_dir) / "source.txt"
|
|
source_file.write_text("Source file content")
|
|
|
|
target_dir = Path(temp_dir) / "target_dir"
|
|
target_dir.mkdir()
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
# Store asset first
|
|
store_result = deduplicator.store_asset(source_file)
|
|
stored_path = Path(store_result["stored_path"])
|
|
|
|
# Create symlink to stored asset
|
|
link_path = target_dir / "linked_asset.txt"
|
|
deduplicator.create_asset_link(stored_path, link_path)
|
|
|
|
assert link_path.is_symlink()
|
|
assert link_path.resolve() == stored_path.resolve()
|
|
# Test that symlink uses relative path
|
|
assert not link_path.readlink().is_absolute()
|
|
|
|
def test_symlink_uses_relative_path(self):
|
|
"""Test that created symlinks use relative paths."""
|
|
if platform.system() == "Windows":
|
|
pytest.skip("Skipping relative symlink test on Windows")
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
source_file = Path(temp_dir) / "source.txt"
|
|
source_file.write_text("Source file for relative symlink test")
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
store_result = deduplicator.store_asset(source_file)
|
|
stored_path = Path(store_result["stored_path"])
|
|
|
|
# Create symlink in subdirectory
|
|
link_dir = Path(temp_dir) / "workspace" / "subdir"
|
|
link_dir.mkdir(parents=True)
|
|
link_path = link_dir / "asset_link.txt"
|
|
|
|
deduplicator.create_asset_link(stored_path, link_path)
|
|
|
|
# Verify symlink target is relative
|
|
link_target = link_path.readlink()
|
|
assert not link_target.is_absolute()
|
|
assert str(link_target).startswith("..")
|
|
|
|
|
|
class TestWindowsFallbackCopying:
|
|
"""Test Windows fallback to file copying."""
|
|
|
|
def test_file_copy_fallback_on_symlink_failure(self):
|
|
"""Test that file copying is used when symlink creation fails."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
source_file = Path(temp_dir) / "source.txt"
|
|
content = "Content for copy fallback test"
|
|
source_file.write_text(content)
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
store_result = deduplicator.store_asset(source_file)
|
|
stored_path = Path(store_result["stored_path"])
|
|
|
|
target_path = Path(temp_dir) / "copied_asset.txt"
|
|
|
|
# Mock symlink creation to fail
|
|
with patch('os.symlink', side_effect=OSError("Symlink not supported")):
|
|
deduplicator.create_asset_link(stored_path, target_path)
|
|
|
|
# Should fallback to copying
|
|
assert target_path.exists()
|
|
assert not target_path.is_symlink()
|
|
assert target_path.read_text() == content
|
|
|
|
@pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test")
|
|
def test_windows_uses_file_copying_by_default(self):
|
|
"""Test that Windows uses file copying by default."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
source_file = Path(temp_dir) / "source.txt"
|
|
content = "Content for Windows copy test"
|
|
source_file.write_text(content)
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
store_result = deduplicator.store_asset(source_file)
|
|
stored_path = Path(store_result["stored_path"])
|
|
|
|
target_path = Path(temp_dir) / "windows_asset.txt"
|
|
deduplicator.create_asset_link(stored_path, target_path)
|
|
|
|
# On Windows, should use copying instead of symlinks
|
|
assert target_path.exists()
|
|
assert not target_path.is_symlink()
|
|
assert target_path.read_text() == content
|
|
|
|
|
|
class TestConflictResolution:
|
|
"""Test conflict resolution for existing assets."""
|
|
|
|
def test_existing_file_conflict_resolution(self):
|
|
"""Test handling of conflicts when target file already exists."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
source_file = Path(temp_dir) / "source.txt"
|
|
source_file.write_text("Source content")
|
|
|
|
# Create existing target file
|
|
target_path = Path(temp_dir) / "existing_target.txt"
|
|
target_path.write_text("Existing content")
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
store_result = deduplicator.store_asset(source_file)
|
|
stored_path = Path(store_result["stored_path"])
|
|
|
|
# Should handle conflict gracefully
|
|
deduplicator.create_asset_link(stored_path, target_path,
|
|
conflict_resolution="overwrite")
|
|
|
|
# Target should now link to stored asset
|
|
if platform.system() != "Windows":
|
|
assert target_path.is_symlink()
|
|
|
|
def test_backup_conflict_resolution(self):
|
|
"""Test backup creation during conflict resolution."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
source_file = Path(temp_dir) / "source.txt"
|
|
source_file.write_text("New content")
|
|
|
|
# Create existing target file
|
|
target_path = Path(temp_dir) / "target.txt"
|
|
original_content = "Original content to backup"
|
|
target_path.write_text(original_content)
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
store_result = deduplicator.store_asset(source_file)
|
|
stored_path = Path(store_result["stored_path"])
|
|
|
|
# Create link with backup resolution
|
|
deduplicator.create_asset_link(stored_path, target_path,
|
|
conflict_resolution="backup")
|
|
|
|
# Should create backup file
|
|
backup_path = target_path.with_suffix(target_path.suffix + ".bak")
|
|
assert backup_path.exists()
|
|
assert backup_path.read_text() == original_content
|
|
|
|
def test_skip_conflict_resolution(self):
|
|
"""Test skipping operation when file exists and resolution is 'skip'."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
source_file = Path(temp_dir) / "source.txt"
|
|
source_file.write_text("Source content")
|
|
|
|
# Create existing target file
|
|
target_path = Path(temp_dir) / "existing.txt"
|
|
original_content = "Original content"
|
|
target_path.write_text(original_content)
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
store_result = deduplicator.store_asset(source_file)
|
|
stored_path = Path(store_result["stored_path"])
|
|
|
|
# Skip operation for existing file
|
|
result = deduplicator.create_asset_link(stored_path, target_path,
|
|
conflict_resolution="skip")
|
|
|
|
# Original file should remain unchanged
|
|
assert target_path.read_text() == original_content
|
|
assert result["skipped"] is True
|
|
|
|
|
|
class TestAssetDeduplicatorErrorHandling:
|
|
"""Test error handling scenarios."""
|
|
|
|
def test_store_nonexistent_file_raises_error(self):
|
|
"""Test that storing non-existent file raises appropriate error."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
nonexistent_file = Path(temp_dir) / "does_not_exist.txt"
|
|
|
|
with pytest.raises(AssetError):
|
|
deduplicator.store_asset(nonexistent_file)
|
|
|
|
def test_invalid_storage_path_raises_error(self):
|
|
"""Test that invalid storage path raises appropriate error."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
# Try to use a file as storage path (should be directory)
|
|
file_path = Path(temp_dir) / "not_a_directory.txt"
|
|
file_path.write_text("This is a file, not a directory")
|
|
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
registry = AssetRegistry(registry_path)
|
|
|
|
with pytest.raises(DeduplicationError):
|
|
AssetDeduplicator(file_path, registry)
|
|
|
|
def test_permission_error_handling(self):
|
|
"""Test handling of permission errors during asset storage."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
source_file = Path(temp_dir) / "source.txt"
|
|
source_file.write_text("Test content")
|
|
|
|
# Mock shutil.copy2 to raise PermissionError
|
|
with patch('shutil.copy2', side_effect=PermissionError("Permission denied")):
|
|
with pytest.raises(DeduplicationError):
|
|
deduplicator.store_asset(source_file)
|
|
|
|
|
|
class TestAssetRetrieval:
|
|
"""Test asset retrieval and verification functionality."""
|
|
|
|
def test_retrieve_stored_asset(self):
|
|
"""Test retrieving stored asset by content hash."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
source_file = Path(temp_dir) / "source.txt"
|
|
content = "Content for retrieval test"
|
|
source_file.write_text(content)
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
store_result = deduplicator.store_asset(source_file)
|
|
content_hash = store_result["content_hash"]
|
|
|
|
# Retrieve asset
|
|
retrieved_path = deduplicator.get_asset_path(content_hash)
|
|
assert retrieved_path.exists()
|
|
assert retrieved_path.read_text() == content
|
|
|
|
def test_verify_asset_integrity(self):
|
|
"""Test verifying stored asset integrity."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
source_file = Path(temp_dir) / "source.txt"
|
|
source_file.write_text("Content for integrity test")
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
store_result = deduplicator.store_asset(source_file)
|
|
content_hash = store_result["content_hash"]
|
|
|
|
# Verify integrity
|
|
is_valid = deduplicator.verify_asset_integrity(content_hash)
|
|
assert is_valid is True
|
|
|
|
def test_detect_corrupted_asset(self):
|
|
"""Test detection of corrupted stored assets."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
storage_path = Path(temp_dir) / "assets"
|
|
registry_path = Path(temp_dir) / "registry.json"
|
|
|
|
source_file = Path(temp_dir) / "source.txt"
|
|
source_file.write_text("Original content")
|
|
|
|
registry = AssetRegistry(registry_path)
|
|
deduplicator = AssetDeduplicator(storage_path, registry)
|
|
|
|
store_result = deduplicator.store_asset(source_file)
|
|
content_hash = store_result["content_hash"]
|
|
stored_path = Path(store_result["stored_path"])
|
|
|
|
# Corrupt the stored file
|
|
stored_path.write_text("Corrupted content")
|
|
|
|
# Verify should detect corruption
|
|
is_valid = deduplicator.verify_asset_integrity(content_hash)
|
|
assert is_valid is False |