feat: comprehensive asset management system and testing improvements
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Asset Management System (Issue #142): - Add complete asset management framework with deduplication - Implement AssetManager, AssetRegistry, and AssetDeduplicator classes - Add AssetPackager for markdown document packaging - Create comprehensive test suite for all asset management components - Add asset constants and custom exceptions for robust error handling Markdown Processing Enhancements: - Update markdown_commands.py with improved functionality - Enhanced parsing and content aggregation capabilities - Improved filename encoding/decoding for special characters Test Suite Improvements: - Add comprehensive tests for Issue #138 markdown parsing - Enhance Issue #139 content aggregation and end-to-end testing - Complete test coverage for new asset management features Examples and Documentation: - Update BildungsKanonJon.md example with enhanced content - Generate corresponding HTML output for documentation - Add asset registry configuration Development Tools: - Add install script for simplified setup This commit represents a major enhancement to MarkiTect's asset handling capabilities with full test coverage and improved markdown processing. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -50,13 +50,14 @@ Detailed content here.
|
||||
|
||||
try:
|
||||
# This should fail initially (RED phase)
|
||||
structure = parse_markdown_structure(temp_file)
|
||||
structure, front_matter = parse_markdown_structure(temp_file)
|
||||
|
||||
# Verify structure
|
||||
assert len(structure) == 1 # One part
|
||||
assert structure[0].level == 1
|
||||
assert structure[0].title == "Part 1: Introduction"
|
||||
assert len(structure[0].children) == 2 # Two chapters
|
||||
assert front_matter is None # No front matter in this test
|
||||
|
||||
# Check chapters
|
||||
assert structure[0].children[0].level == 2
|
||||
@@ -154,12 +155,14 @@ Section content.
|
||||
|
||||
try:
|
||||
# This should fail initially (RED phase)
|
||||
structure = parse_markdown_structure(temp_file)
|
||||
structure, front_matter = parse_markdown_structure(temp_file)
|
||||
|
||||
# Front matter should be handled appropriately
|
||||
# Front matter should be extracted and structure parsed
|
||||
assert len(structure) == 1
|
||||
assert structure[0].title == "Chapter 1"
|
||||
assert structure[0].level == 1
|
||||
assert front_matter is not None
|
||||
assert 'title: "My Document"' in front_matter
|
||||
|
||||
finally:
|
||||
temp_file.unlink()
|
||||
@@ -178,10 +181,11 @@ Some more content.
|
||||
|
||||
try:
|
||||
# This should fail initially (RED phase)
|
||||
structure = parse_markdown_structure(temp_file)
|
||||
structure, front_matter = parse_markdown_structure(temp_file)
|
||||
|
||||
# Should return empty structure or handle gracefully
|
||||
assert structure == [] or structure is None
|
||||
assert structure == []
|
||||
assert front_matter is None
|
||||
|
||||
finally:
|
||||
temp_file.unlink()
|
||||
@@ -204,10 +208,11 @@ Back to level 2.
|
||||
|
||||
try:
|
||||
# This should fail initially (RED phase)
|
||||
structure = parse_markdown_structure(temp_file)
|
||||
structure, front_matter = parse_markdown_structure(temp_file)
|
||||
|
||||
# Should handle inconsistent levels gracefully
|
||||
assert len(structure) == 1 # Main title
|
||||
assert front_matter is None
|
||||
assert structure[0].level == 1
|
||||
assert len(structure[0].children) >= 1 # Should have children
|
||||
|
||||
|
||||
@@ -365,7 +365,7 @@ More content""")
|
||||
file_path.write_text(content)
|
||||
files.append(file_path)
|
||||
|
||||
aggregated = aggregate_content(files, preserve_front_matter=True)
|
||||
aggregated = aggregate_content(self.temp_dir, preserve_front_matter=True)
|
||||
|
||||
# Should have front matter at the beginning
|
||||
lines = aggregated.split('\n')
|
||||
|
||||
@@ -334,7 +334,7 @@ class TestBookLikeStructureProcessing:
|
||||
assert "```python" in content
|
||||
assert "| Feature | Description |" in content
|
||||
assert "" in content
|
||||
assert "- Step 1" in content
|
||||
assert "1. First step" in content
|
||||
|
||||
def _create_book_structure(self):
|
||||
"""Create a realistic book directory structure."""
|
||||
@@ -552,7 +552,7 @@ Advanced topics.
|
||||
|
||||
# Verify exploded structure exists
|
||||
assert exploded_dir.exists()
|
||||
assert (exploded_dir / "getting_started").exists()
|
||||
assert (exploded_dir / "user_guide" / "getting_started").exists()
|
||||
|
||||
# Now implode it back
|
||||
imploded_file = self.temp_dir / "reconstructed.md"
|
||||
|
||||
430
tests/test_issue_142_asset_deduplicator.py
Normal file
430
tests/test_issue_142_asset_deduplicator.py
Normal file
@@ -0,0 +1,430 @@
|
||||
"""
|
||||
Test scenarios for AssetDeduplicator symlink and deduplication functionality.
|
||||
|
||||
This module tests the AssetDeduplicator class for Issue #142: Phase 1 - Core Asset Management Module.
|
||||
Tests cover content-based asset deduplication, symlink creation with relative paths,
|
||||
Windows fallback to file copying, and conflict resolution.
|
||||
|
||||
Requirements:
|
||||
- Content-based asset deduplication
|
||||
- Symlink creation with relative paths
|
||||
- Windows fallback to file copying
|
||||
- Conflict resolution for existing assets
|
||||
"""
|
||||
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
import pytest
|
||||
|
||||
from markitect.assets.deduplicator import AssetDeduplicator
|
||||
from markitect.assets.registry import AssetRegistry
|
||||
from markitect.assets.exceptions import AssetError, DeduplicationError
|
||||
|
||||
|
||||
class TestAssetDeduplicatorInitialization:
|
||||
"""Test AssetDeduplicator initialization and setup."""
|
||||
|
||||
def test_deduplicator_initialization(self):
|
||||
"""Test AssetDeduplicator can be initialized with storage path and registry."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
assert deduplicator.storage_path == storage_path
|
||||
assert deduplicator.registry == registry
|
||||
assert storage_path.exists() # Should create storage directory
|
||||
|
||||
def test_deduplicator_creates_storage_directory(self):
|
||||
"""Test that AssetDeduplicator creates storage directory if it doesn't exist."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "nonexistent" / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
assert storage_path.exists()
|
||||
assert storage_path.is_dir()
|
||||
|
||||
|
||||
class TestAssetDeduplication:
|
||||
"""Test content-based asset deduplication functionality."""
|
||||
|
||||
def test_deduplicate_identical_files(self):
|
||||
"""Test that identical files are deduplicated properly."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
# Create two identical files
|
||||
file1 = Path(temp_dir) / "file1.txt"
|
||||
file2 = Path(temp_dir) / "file2.txt"
|
||||
content = "Identical content for deduplication test"
|
||||
file1.write_text(content)
|
||||
file2.write_text(content)
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
# Store first file
|
||||
result1 = deduplicator.store_asset(file1)
|
||||
|
||||
# Store second identical file - should be deduplicated
|
||||
result2 = deduplicator.store_asset(file2)
|
||||
|
||||
# Both should reference the same stored file
|
||||
assert result1["content_hash"] == result2["content_hash"]
|
||||
assert result1["stored_path"] == result2["stored_path"]
|
||||
|
||||
def test_different_files_stored_separately(self):
|
||||
"""Test that different files are stored separately."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
# Create two different files
|
||||
file1 = Path(temp_dir) / "file1.txt"
|
||||
file2 = Path(temp_dir) / "file2.txt"
|
||||
file1.write_text("Content of first file")
|
||||
file2.write_text("Content of second file")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
result1 = deduplicator.store_asset(file1)
|
||||
result2 = deduplicator.store_asset(file2)
|
||||
|
||||
# Should have different hashes and storage paths
|
||||
assert result1["content_hash"] != result2["content_hash"]
|
||||
assert result1["stored_path"] != result2["stored_path"]
|
||||
|
||||
|
||||
class TestSymlinkCreation:
|
||||
"""Test symlink creation functionality with relative paths."""
|
||||
|
||||
def test_create_symlink_unix(self):
|
||||
"""Test symlink creation on Unix-like systems."""
|
||||
if platform.system() == "Windows":
|
||||
pytest.skip("Skipping Unix symlink test on Windows")
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("Source file content")
|
||||
|
||||
target_dir = Path(temp_dir) / "target_dir"
|
||||
target_dir.mkdir()
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
# Store asset first
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
# Create symlink to stored asset
|
||||
link_path = target_dir / "linked_asset.txt"
|
||||
deduplicator.create_asset_link(stored_path, link_path)
|
||||
|
||||
assert link_path.is_symlink()
|
||||
assert link_path.resolve() == stored_path.resolve()
|
||||
# Test that symlink uses relative path
|
||||
assert not link_path.readlink().is_absolute()
|
||||
|
||||
def test_symlink_uses_relative_path(self):
|
||||
"""Test that created symlinks use relative paths."""
|
||||
if platform.system() == "Windows":
|
||||
pytest.skip("Skipping relative symlink test on Windows")
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("Source file for relative symlink test")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
# Create symlink in subdirectory
|
||||
link_dir = Path(temp_dir) / "workspace" / "subdir"
|
||||
link_dir.mkdir(parents=True)
|
||||
link_path = link_dir / "asset_link.txt"
|
||||
|
||||
deduplicator.create_asset_link(stored_path, link_path)
|
||||
|
||||
# Verify symlink target is relative
|
||||
link_target = link_path.readlink()
|
||||
assert not link_target.is_absolute()
|
||||
assert str(link_target).startswith("..")
|
||||
|
||||
|
||||
class TestWindowsFallbackCopying:
|
||||
"""Test Windows fallback to file copying."""
|
||||
|
||||
def test_file_copy_fallback_on_symlink_failure(self):
|
||||
"""Test that file copying is used when symlink creation fails."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
content = "Content for copy fallback test"
|
||||
source_file.write_text(content)
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
target_path = Path(temp_dir) / "copied_asset.txt"
|
||||
|
||||
# Mock symlink creation to fail
|
||||
with patch('os.symlink', side_effect=OSError("Symlink not supported")):
|
||||
deduplicator.create_asset_link(stored_path, target_path)
|
||||
|
||||
# Should fallback to copying
|
||||
assert target_path.exists()
|
||||
assert not target_path.is_symlink()
|
||||
assert target_path.read_text() == content
|
||||
|
||||
@pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test")
|
||||
def test_windows_uses_file_copying_by_default(self):
|
||||
"""Test that Windows uses file copying by default."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
content = "Content for Windows copy test"
|
||||
source_file.write_text(content)
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
target_path = Path(temp_dir) / "windows_asset.txt"
|
||||
deduplicator.create_asset_link(stored_path, target_path)
|
||||
|
||||
# On Windows, should use copying instead of symlinks
|
||||
assert target_path.exists()
|
||||
assert not target_path.is_symlink()
|
||||
assert target_path.read_text() == content
|
||||
|
||||
|
||||
class TestConflictResolution:
|
||||
"""Test conflict resolution for existing assets."""
|
||||
|
||||
def test_existing_file_conflict_resolution(self):
|
||||
"""Test handling of conflicts when target file already exists."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("Source content")
|
||||
|
||||
# Create existing target file
|
||||
target_path = Path(temp_dir) / "existing_target.txt"
|
||||
target_path.write_text("Existing content")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
# Should handle conflict gracefully
|
||||
deduplicator.create_asset_link(stored_path, target_path,
|
||||
conflict_resolution="overwrite")
|
||||
|
||||
# Target should now link to stored asset
|
||||
if platform.system() != "Windows":
|
||||
assert target_path.is_symlink()
|
||||
|
||||
def test_backup_conflict_resolution(self):
|
||||
"""Test backup creation during conflict resolution."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("New content")
|
||||
|
||||
# Create existing target file
|
||||
target_path = Path(temp_dir) / "target.txt"
|
||||
original_content = "Original content to backup"
|
||||
target_path.write_text(original_content)
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
# Create link with backup resolution
|
||||
deduplicator.create_asset_link(stored_path, target_path,
|
||||
conflict_resolution="backup")
|
||||
|
||||
# Should create backup file
|
||||
backup_path = target_path.with_suffix(target_path.suffix + ".bak")
|
||||
assert backup_path.exists()
|
||||
assert backup_path.read_text() == original_content
|
||||
|
||||
def test_skip_conflict_resolution(self):
|
||||
"""Test skipping operation when file exists and resolution is 'skip'."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("Source content")
|
||||
|
||||
# Create existing target file
|
||||
target_path = Path(temp_dir) / "existing.txt"
|
||||
original_content = "Original content"
|
||||
target_path.write_text(original_content)
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
# Skip operation for existing file
|
||||
result = deduplicator.create_asset_link(stored_path, target_path,
|
||||
conflict_resolution="skip")
|
||||
|
||||
# Original file should remain unchanged
|
||||
assert target_path.read_text() == original_content
|
||||
assert result["skipped"] is True
|
||||
|
||||
|
||||
class TestAssetDeduplicatorErrorHandling:
|
||||
"""Test error handling scenarios."""
|
||||
|
||||
def test_store_nonexistent_file_raises_error(self):
|
||||
"""Test that storing non-existent file raises appropriate error."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
nonexistent_file = Path(temp_dir) / "does_not_exist.txt"
|
||||
|
||||
with pytest.raises(AssetError):
|
||||
deduplicator.store_asset(nonexistent_file)
|
||||
|
||||
def test_invalid_storage_path_raises_error(self):
|
||||
"""Test that invalid storage path raises appropriate error."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Try to use a file as storage path (should be directory)
|
||||
file_path = Path(temp_dir) / "not_a_directory.txt"
|
||||
file_path.write_text("This is a file, not a directory")
|
||||
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
registry = AssetRegistry(registry_path)
|
||||
|
||||
with pytest.raises(DeduplicationError):
|
||||
AssetDeduplicator(file_path, registry)
|
||||
|
||||
def test_permission_error_handling(self):
|
||||
"""Test handling of permission errors during asset storage."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("Test content")
|
||||
|
||||
# Mock shutil.copy2 to raise PermissionError
|
||||
with patch('shutil.copy2', side_effect=PermissionError("Permission denied")):
|
||||
with pytest.raises(DeduplicationError):
|
||||
deduplicator.store_asset(source_file)
|
||||
|
||||
|
||||
class TestAssetRetrieval:
|
||||
"""Test asset retrieval and verification functionality."""
|
||||
|
||||
def test_retrieve_stored_asset(self):
|
||||
"""Test retrieving stored asset by content hash."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
content = "Content for retrieval test"
|
||||
source_file.write_text(content)
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
content_hash = store_result["content_hash"]
|
||||
|
||||
# Retrieve asset
|
||||
retrieved_path = deduplicator.get_asset_path(content_hash)
|
||||
assert retrieved_path.exists()
|
||||
assert retrieved_path.read_text() == content
|
||||
|
||||
def test_verify_asset_integrity(self):
|
||||
"""Test verifying stored asset integrity."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("Content for integrity test")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
content_hash = store_result["content_hash"]
|
||||
|
||||
# Verify integrity
|
||||
is_valid = deduplicator.verify_asset_integrity(content_hash)
|
||||
assert is_valid is True
|
||||
|
||||
def test_detect_corrupted_asset(self):
|
||||
"""Test detection of corrupted stored assets."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
|
||||
source_file = Path(temp_dir) / "source.txt"
|
||||
source_file.write_text("Original content")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
|
||||
store_result = deduplicator.store_asset(source_file)
|
||||
content_hash = store_result["content_hash"]
|
||||
stored_path = Path(store_result["stored_path"])
|
||||
|
||||
# Corrupt the stored file
|
||||
stored_path.write_text("Corrupted content")
|
||||
|
||||
# Verify should detect corruption
|
||||
is_valid = deduplicator.verify_asset_integrity(content_hash)
|
||||
assert is_valid is False
|
||||
574
tests/test_issue_142_asset_manager.py
Normal file
574
tests/test_issue_142_asset_manager.py
Normal file
@@ -0,0 +1,574 @@
|
||||
"""
|
||||
Test scenarios for AssetManager high-level API coordination functionality.
|
||||
|
||||
This module tests the AssetManager class for Issue #142: Phase 1 - Core Asset Management Module.
|
||||
Tests cover high-level API coordination, integration with existing markitect patterns,
|
||||
error handling and logging, and configuration management integration.
|
||||
|
||||
Requirements:
|
||||
- High-level API coordinating all operations
|
||||
- Integration with existing markitect patterns
|
||||
- Error handling and logging
|
||||
- Configuration management integration
|
||||
"""
|
||||
|
||||
import tempfile
|
||||
import json
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
import pytest
|
||||
import logging
|
||||
|
||||
from markitect.assets.manager import AssetManager
|
||||
from markitect.assets.registry import AssetRegistry
|
||||
from markitect.assets.deduplicator import AssetDeduplicator
|
||||
from markitect.assets.packager import MarkdownPackager
|
||||
from markitect.assets.exceptions import AssetError, AssetManagerError
|
||||
from markitect.config_manager import ConfigurationManager
|
||||
|
||||
|
||||
class TestAssetManagerInitialization:
|
||||
"""Test AssetManager initialization and configuration."""
|
||||
|
||||
def test_manager_initialization_with_config(self):
|
||||
"""Test AssetManager can be initialized with configuration."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json"),
|
||||
"enable_deduplication": True,
|
||||
"default_conflict_resolution": "backup"
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
assert manager.storage_path == Path(temp_dir) / "assets"
|
||||
assert manager.registry_path == Path(temp_dir) / "registry.json"
|
||||
assert manager.enable_deduplication is True
|
||||
|
||||
def test_manager_initialization_with_defaults(self):
|
||||
"""Test AssetManager initialization with default configuration."""
|
||||
manager = AssetManager()
|
||||
|
||||
# Should use reasonable defaults
|
||||
assert manager.storage_path.name == "assets"
|
||||
assert manager.registry_path.name == "asset_registry.json"
|
||||
assert manager.enable_deduplication is True
|
||||
|
||||
def test_manager_creates_required_components(self):
|
||||
"""Test that AssetManager creates required component instances."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
assert isinstance(manager.registry, AssetRegistry)
|
||||
assert isinstance(manager.deduplicator, AssetDeduplicator)
|
||||
assert isinstance(manager.packager, MarkdownPackager)
|
||||
|
||||
def test_manager_integration_with_config_manager(self):
|
||||
"""Test AssetManager integration with ConfigurationManager."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Create config file
|
||||
config_file = Path(temp_dir) / ".markitect.json"
|
||||
config_data = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "custom_assets"),
|
||||
"enable_deduplication": False
|
||||
}
|
||||
}
|
||||
config_file.write_text(json.dumps(config_data))
|
||||
|
||||
# Mock ConfigurationManager to return our config
|
||||
with patch.object(ConfigurationManager, 'get_current_config', return_value=config_data):
|
||||
manager = AssetManager.from_config_manager()
|
||||
|
||||
assert str(manager.storage_path).endswith("custom_assets")
|
||||
assert manager.enable_deduplication is False
|
||||
|
||||
|
||||
class TestAssetManagerHighLevelOperations:
|
||||
"""Test high-level asset management operations."""
|
||||
|
||||
def test_add_asset_with_deduplication(self):
|
||||
"""Test adding asset with automatic deduplication."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
# Create test asset
|
||||
asset_file = Path(temp_dir) / "test_asset.txt"
|
||||
asset_file.write_text("Test asset content")
|
||||
|
||||
# Add asset
|
||||
result = manager.add_asset(asset_file, "Test asset")
|
||||
|
||||
assert "content_hash" in result
|
||||
assert "stored_path" in result
|
||||
assert "deduplicated" in result
|
||||
assert result["description"] == "Test asset"
|
||||
|
||||
def test_add_duplicate_asset_detected(self):
|
||||
"""Test that duplicate assets are properly detected and handled."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
# Create identical assets
|
||||
asset1 = Path(temp_dir) / "asset1.txt"
|
||||
asset2 = Path(temp_dir) / "asset2.txt"
|
||||
content = "Identical content for deduplication"
|
||||
asset1.write_text(content)
|
||||
asset2.write_text(content)
|
||||
|
||||
# Add first asset
|
||||
result1 = manager.add_asset(asset1, "First asset")
|
||||
|
||||
# Add second identical asset
|
||||
result2 = manager.add_asset(asset2, "Second asset")
|
||||
|
||||
# Should be deduplicated
|
||||
assert result1["content_hash"] == result2["content_hash"]
|
||||
assert result2["deduplicated"] is True
|
||||
|
||||
def test_list_assets_with_metadata(self):
|
||||
"""Test listing all assets with their metadata."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
# Add multiple assets
|
||||
assets = []
|
||||
for i in range(3):
|
||||
asset_file = Path(temp_dir) / f"asset_{i}.txt"
|
||||
asset_file.write_text(f"Content for asset {i}")
|
||||
result = manager.add_asset(asset_file, f"Asset {i}")
|
||||
assets.append(result)
|
||||
|
||||
# List all assets
|
||||
asset_list = manager.list_assets()
|
||||
|
||||
assert len(asset_list) == 3
|
||||
for asset in asset_list:
|
||||
assert "content_hash" in asset
|
||||
assert "description" in asset
|
||||
assert "size" in asset
|
||||
assert "mime_type" in asset
|
||||
|
||||
def test_get_asset_info_by_hash(self):
|
||||
"""Test retrieving detailed asset information by content hash."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
# Add asset
|
||||
asset_file = Path(temp_dir) / "info_test.txt"
|
||||
asset_file.write_text("Information test content")
|
||||
result = manager.add_asset(asset_file, "Info test asset")
|
||||
|
||||
content_hash = result["content_hash"]
|
||||
|
||||
# Get detailed info
|
||||
asset_info = manager.get_asset_info(content_hash)
|
||||
|
||||
assert asset_info["content_hash"] == content_hash
|
||||
assert asset_info["description"] == "Info test asset"
|
||||
assert "created_at" in asset_info
|
||||
assert "file_path" in asset_info
|
||||
|
||||
def test_remove_asset_by_hash(self):
|
||||
"""Test removing asset by content hash."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
# Add asset
|
||||
asset_file = Path(temp_dir) / "remove_test.txt"
|
||||
asset_file.write_text("Content to be removed")
|
||||
result = manager.add_asset(asset_file)
|
||||
|
||||
content_hash = result["content_hash"]
|
||||
|
||||
# Verify asset exists
|
||||
assert manager.asset_exists(content_hash)
|
||||
|
||||
# Remove asset
|
||||
removal_result = manager.remove_asset(content_hash)
|
||||
|
||||
assert removal_result["removed"] is True
|
||||
assert not manager.asset_exists(content_hash)
|
||||
|
||||
|
||||
class TestAssetManagerPackaging:
|
||||
"""Test high-level package creation and extraction operations."""
|
||||
|
||||
def test_create_document_package(self):
|
||||
"""Test creating complete document package with assets."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
# Create document structure
|
||||
doc_dir = Path(temp_dir) / "document"
|
||||
doc_dir.mkdir()
|
||||
|
||||
# Create markdown document
|
||||
md_file = doc_dir / "document.md"
|
||||
md_content = """# Test Document
|
||||
|
||||
This document has assets:
|
||||
- Image: 
|
||||
- Data: [CSV File](data/test.csv)
|
||||
"""
|
||||
md_file.write_text(md_content)
|
||||
|
||||
# Create assets
|
||||
(doc_dir / "images").mkdir()
|
||||
(doc_dir / "data").mkdir()
|
||||
|
||||
(doc_dir / "images" / "test.png").write_bytes(b"PNG content")
|
||||
(doc_dir / "data" / "test.csv").write_text("col1,col2\n1,2")
|
||||
|
||||
# Create package
|
||||
package_path = Path(temp_dir) / "test_document.mdpkg"
|
||||
result = manager.create_package(doc_dir, package_path,
|
||||
description="Test document package")
|
||||
|
||||
assert package_path.exists()
|
||||
assert result["package_path"] == str(package_path)
|
||||
assert "assets_processed" in result
|
||||
assert result["assets_processed"] == 2
|
||||
|
||||
def test_extract_document_package_to_workspace(self):
|
||||
"""Test extracting package to workspace with proper asset linking."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
# Create and package a document first
|
||||
doc_dir = Path(temp_dir) / "source_doc"
|
||||
doc_dir.mkdir()
|
||||
(doc_dir / "readme.md").write_text("# README\n\n")
|
||||
(doc_dir / "logo.png").write_bytes(b"Logo content")
|
||||
|
||||
package_path = Path(temp_dir) / "source.mdpkg"
|
||||
manager.create_package(doc_dir, package_path)
|
||||
|
||||
# Extract to workspace
|
||||
workspace_dir = Path(temp_dir) / "workspace"
|
||||
result = manager.extract_package(package_path, workspace_dir,
|
||||
restore_assets=True)
|
||||
|
||||
assert workspace_dir.exists()
|
||||
assert (workspace_dir / "readme.md").exists()
|
||||
assert (workspace_dir / "logo.png").exists()
|
||||
assert result["extracted_files"] >= 1
|
||||
assert "asset_links_created" in result
|
||||
|
||||
def test_package_with_custom_options(self):
|
||||
"""Test package creation with custom options and exclude patterns."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
# Create document with files to exclude
|
||||
doc_dir = Path(temp_dir) / "document"
|
||||
doc_dir.mkdir()
|
||||
|
||||
(doc_dir / "document.md").write_text("# Document")
|
||||
(doc_dir / "important.txt").write_text("Important content")
|
||||
(doc_dir / "temp.tmp").write_text("Temporary file")
|
||||
(doc_dir / ".hidden").write_text("Hidden file")
|
||||
|
||||
package_path = Path(temp_dir) / "custom.mdpkg"
|
||||
|
||||
# Create package with custom options
|
||||
result = manager.create_package(
|
||||
doc_dir, package_path,
|
||||
exclude_patterns=["*.tmp", ".*"],
|
||||
description="Custom package",
|
||||
metadata={"author": "Test", "version": "1.0"}
|
||||
)
|
||||
|
||||
# Verify exclusions worked
|
||||
import zipfile
|
||||
with zipfile.ZipFile(package_path, 'r') as zf:
|
||||
file_list = zf.namelist()
|
||||
assert "document.md" in file_list
|
||||
assert "important.txt" in file_list
|
||||
assert "temp.tmp" not in file_list
|
||||
assert ".hidden" not in file_list
|
||||
|
||||
|
||||
class TestAssetManagerErrorHandling:
|
||||
"""Test error handling and logging functionality."""
|
||||
|
||||
def test_add_nonexistent_asset_raises_error(self):
|
||||
"""Test that adding non-existent asset raises appropriate error."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
nonexistent_file = Path(temp_dir) / "does_not_exist.txt"
|
||||
|
||||
with pytest.raises(AssetError):
|
||||
manager.add_asset(nonexistent_file)
|
||||
|
||||
def test_get_info_for_nonexistent_asset_raises_error(self):
|
||||
"""Test that getting info for non-existent asset raises error."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
with pytest.raises(AssetManagerError):
|
||||
manager.get_asset_info("nonexistent_hash_12345")
|
||||
|
||||
def test_manager_logs_operations(self):
|
||||
"""Test that AssetManager logs important operations."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
# Set up logging capture
|
||||
import logging
|
||||
log_messages = []
|
||||
|
||||
class TestHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
log_messages.append(record.getMessage())
|
||||
|
||||
test_handler = TestHandler()
|
||||
logger = logging.getLogger('markitect.assets')
|
||||
logger.addHandler(test_handler)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
# Add an asset (should be logged)
|
||||
asset_file = Path(temp_dir) / "log_test.txt"
|
||||
asset_file.write_text("Test content for logging")
|
||||
manager.add_asset(asset_file, "Log test asset")
|
||||
|
||||
# Check that operation was logged
|
||||
assert any("Adding asset" in msg for msg in log_messages)
|
||||
|
||||
def test_configuration_validation_errors(self):
|
||||
"""Test that invalid configuration raises appropriate errors."""
|
||||
# Invalid storage path (file instead of directory)
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
invalid_file = Path(temp_dir) / "not_a_directory.txt"
|
||||
invalid_file.write_text("This is a file")
|
||||
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(invalid_file),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
with pytest.raises(AssetManagerError):
|
||||
AssetManager(config)
|
||||
|
||||
|
||||
class TestAssetManagerWorkflows:
|
||||
"""Test complete workflows and integration scenarios."""
|
||||
|
||||
def test_complete_document_workflow(self):
|
||||
"""Test complete workflow: add assets, create package, extract elsewhere."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
# 1. Create document with assets
|
||||
doc_dir = Path(temp_dir) / "project"
|
||||
doc_dir.mkdir()
|
||||
|
||||
# Main document
|
||||
(doc_dir / "project.md").write_text("""# Project Document
|
||||
|
||||
Assets:
|
||||

|
||||
[Data](data/results.json)
|
||||
""")
|
||||
|
||||
# Assets
|
||||
(doc_dir / "charts").mkdir()
|
||||
(doc_dir / "data").mkdir()
|
||||
(doc_dir / "charts" / "performance.png").write_bytes(b"Chart data")
|
||||
(doc_dir / "data" / "results.json").write_text('{"status": "success"}')
|
||||
|
||||
# 2. Create package
|
||||
package_path = Path(temp_dir) / "project.mdpkg"
|
||||
package_result = manager.create_package(doc_dir, package_path)
|
||||
|
||||
assert package_result["assets_processed"] == 2
|
||||
|
||||
# 3. Extract to new location
|
||||
extract_dir = Path(temp_dir) / "extracted_project"
|
||||
extract_result = manager.extract_package(package_path, extract_dir,
|
||||
restore_assets=True)
|
||||
|
||||
# Verify complete extraction
|
||||
assert (extract_dir / "project.md").exists()
|
||||
assert (extract_dir / "charts" / "performance.png").exists()
|
||||
assert (extract_dir / "data" / "results.json").exists()
|
||||
|
||||
# Verify content integrity
|
||||
extracted_json = (extract_dir / "data" / "results.json").read_text()
|
||||
assert '{"status": "success"}' == extracted_json
|
||||
|
||||
def test_asset_sharing_between_packages(self):
|
||||
"""Test that assets can be shared between different packages."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
# Create shared asset
|
||||
shared_asset = Path(temp_dir) / "shared_logo.png"
|
||||
shared_asset.write_bytes(b"Shared logo content")
|
||||
|
||||
# Add shared asset
|
||||
asset_result = manager.add_asset(shared_asset, "Company logo")
|
||||
shared_hash = asset_result["content_hash"]
|
||||
|
||||
# Create first document using shared asset
|
||||
doc1_dir = Path(temp_dir) / "doc1"
|
||||
doc1_dir.mkdir()
|
||||
(doc1_dir / "doc1.md").write_text("# Doc 1\n\n")
|
||||
# Copy shared asset to doc structure
|
||||
(doc1_dir / "logo.png").write_bytes(b"Shared logo content")
|
||||
|
||||
# Create second document using same asset
|
||||
doc2_dir = Path(temp_dir) / "doc2"
|
||||
doc2_dir.mkdir()
|
||||
(doc2_dir / "doc2.md").write_text("# Doc 2\n\n")
|
||||
(doc2_dir / "logo.png").write_bytes(b"Shared logo content")
|
||||
|
||||
# Create packages
|
||||
pkg1_path = Path(temp_dir) / "doc1.mdpkg"
|
||||
pkg2_path = Path(temp_dir) / "doc2.mdpkg"
|
||||
|
||||
pkg1_result = manager.create_package(doc1_dir, pkg1_path)
|
||||
pkg2_result = manager.create_package(doc2_dir, pkg2_path)
|
||||
|
||||
# Both should reference the same deduplicated asset
|
||||
assert pkg1_result["assets_processed"] >= 1
|
||||
assert pkg2_result["assets_processed"] >= 1
|
||||
|
||||
# Asset should only be stored once in the asset store
|
||||
asset_list = manager.list_assets()
|
||||
logo_assets = [a for a in asset_list if a.get("description") == "Company logo"]
|
||||
assert len(logo_assets) == 1 # Only one copy stored
|
||||
|
||||
def test_performance_requirements_met(self):
|
||||
"""Test that operations complete within performance requirements (<100ms)."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
config = {
|
||||
"assets": {
|
||||
"storage_path": str(Path(temp_dir) / "assets"),
|
||||
"registry_path": str(Path(temp_dir) / "registry.json")
|
||||
}
|
||||
}
|
||||
|
||||
manager = AssetManager(config)
|
||||
|
||||
# Create reasonably sized test asset (1MB)
|
||||
test_content = b"x" * (1024 * 1024) # 1MB
|
||||
asset_file = Path(temp_dir) / "performance_test.bin"
|
||||
asset_file.write_bytes(test_content)
|
||||
|
||||
# Time the operation
|
||||
import time
|
||||
start_time = time.time()
|
||||
|
||||
result = manager.add_asset(asset_file, "Performance test asset")
|
||||
|
||||
end_time = time.time()
|
||||
operation_time = (end_time - start_time) * 1000 # Convert to ms
|
||||
|
||||
# Should complete in under 100ms for 1MB file
|
||||
assert operation_time < 100, f"Operation took {operation_time}ms, expected <100ms"
|
||||
assert result["content_hash"] is not None
|
||||
270
tests/test_issue_142_asset_registry.py
Normal file
270
tests/test_issue_142_asset_registry.py
Normal file
@@ -0,0 +1,270 @@
|
||||
"""
|
||||
Test scenarios for AssetRegistry JSON persistence functionality.
|
||||
|
||||
This module tests the AssetRegistry class for Issue #142: Phase 1 - Core Asset Management Module.
|
||||
Tests cover JSON-based metadata persistence, SHA-256 content hashing, MIME type detection,
|
||||
and thread-safe registry operations.
|
||||
|
||||
Requirements:
|
||||
- JSON-based asset metadata persistence
|
||||
- SHA-256 content hashing for deduplication
|
||||
- MIME type detection and file size tracking
|
||||
- Thread-safe registry operations
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch
|
||||
import pytest
|
||||
|
||||
from markitect.assets.registry import AssetRegistry
|
||||
from markitect.assets.exceptions import AssetError, RegistryError
|
||||
|
||||
|
||||
class TestAssetRegistryCore:
|
||||
"""Core functionality tests for AssetRegistry."""
|
||||
|
||||
def test_registry_initialization(self):
|
||||
"""Test AssetRegistry can be initialized with registry path."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "test_registry.json"
|
||||
registry = AssetRegistry(registry_path)
|
||||
|
||||
assert registry.registry_path == registry_path
|
||||
assert registry_path.exists() # Should create empty registry
|
||||
|
||||
def test_registry_loads_existing_json(self):
|
||||
"""Test AssetRegistry loads existing JSON registry file."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "existing_registry.json"
|
||||
|
||||
# Create existing registry with test data
|
||||
test_data = {
|
||||
"assets": {
|
||||
"hash123": {
|
||||
"path": "/test/file.txt",
|
||||
"content_hash": "hash123",
|
||||
"mime_type": "text/plain",
|
||||
"size": 100
|
||||
}
|
||||
}
|
||||
}
|
||||
registry_path.write_text(json.dumps(test_data))
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
assets = registry.list_assets()
|
||||
|
||||
assert len(assets) == 1
|
||||
assert assets[0]["content_hash"] == "hash123"
|
||||
|
||||
|
||||
class TestAssetRegistryHashing:
|
||||
"""Test SHA-256 content hashing functionality."""
|
||||
|
||||
def test_generate_content_hash_from_file(self):
|
||||
"""Test generating SHA-256 hash from file content."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
test_file = Path(temp_dir) / "test.txt"
|
||||
test_file.write_text("Hello, World!")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
content_hash = registry.generate_content_hash(test_file)
|
||||
|
||||
# SHA-256 of "Hello, World!" should be consistent
|
||||
expected_hash = "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
|
||||
assert content_hash == expected_hash
|
||||
|
||||
def test_generate_content_hash_from_bytes(self):
|
||||
"""Test generating SHA-256 hash from byte content."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
registry = AssetRegistry(registry_path)
|
||||
|
||||
test_content = b"Binary content test"
|
||||
content_hash = registry.generate_content_hash(test_content)
|
||||
|
||||
# Should generate consistent hash for same content
|
||||
assert len(content_hash) == 64 # SHA-256 hex length
|
||||
assert isinstance(content_hash, str)
|
||||
|
||||
|
||||
class TestAssetRegistryMimeTypes:
|
||||
"""Test MIME type detection functionality."""
|
||||
|
||||
def test_detect_mime_type_text_file(self):
|
||||
"""Test MIME type detection for text files."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
test_file = Path(temp_dir) / "test.txt"
|
||||
test_file.write_text("Plain text content")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
mime_type = registry.detect_mime_type(test_file)
|
||||
|
||||
assert mime_type.startswith("text/")
|
||||
|
||||
def test_detect_mime_type_image_file(self):
|
||||
"""Test MIME type detection for image files."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
# Create minimal PNG file (8-byte PNG signature + IHDR)
|
||||
png_data = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR'
|
||||
test_file = Path(temp_dir) / "test.png"
|
||||
test_file.write_bytes(png_data)
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
mime_type = registry.detect_mime_type(test_file)
|
||||
|
||||
assert mime_type == "image/png"
|
||||
|
||||
|
||||
class TestAssetRegistryOperations:
|
||||
"""Test asset registration and retrieval operations."""
|
||||
|
||||
def test_register_asset(self):
|
||||
"""Test registering a new asset in the registry."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
test_file = Path(temp_dir) / "asset.txt"
|
||||
test_file.write_text("Test asset content")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
asset_info = registry.register_asset(test_file)
|
||||
|
||||
assert "content_hash" in asset_info
|
||||
assert "mime_type" in asset_info
|
||||
assert "size" in asset_info
|
||||
assert asset_info["path"] == str(test_file)
|
||||
|
||||
def test_get_asset_by_hash(self):
|
||||
"""Test retrieving asset information by content hash."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
test_file = Path(temp_dir) / "asset.txt"
|
||||
test_file.write_text("Test content for retrieval")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
asset_info = registry.register_asset(test_file)
|
||||
content_hash = asset_info["content_hash"]
|
||||
|
||||
retrieved_asset = registry.get_asset(content_hash)
|
||||
assert retrieved_asset["content_hash"] == content_hash
|
||||
assert retrieved_asset["path"] == str(test_file)
|
||||
|
||||
def test_asset_exists_check(self):
|
||||
"""Test checking if asset exists by hash."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
test_file = Path(temp_dir) / "asset.txt"
|
||||
test_file.write_text("Existence test content")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
asset_info = registry.register_asset(test_file)
|
||||
content_hash = asset_info["content_hash"]
|
||||
|
||||
assert registry.asset_exists(content_hash)
|
||||
assert not registry.asset_exists("nonexistent_hash")
|
||||
|
||||
|
||||
class TestAssetRegistryPersistence:
|
||||
"""Test JSON persistence and file operations."""
|
||||
|
||||
def test_registry_persists_to_json(self):
|
||||
"""Test that registry changes are persisted to JSON file."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
test_file = Path(temp_dir) / "asset.txt"
|
||||
test_file.write_text("Content to persist")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
registry.register_asset(test_file)
|
||||
|
||||
# Verify JSON file contains our asset
|
||||
with open(registry_path) as f:
|
||||
data = json.load(f)
|
||||
assert "assets" in data
|
||||
assert len(data["assets"]) == 1
|
||||
|
||||
def test_registry_handles_corrupted_json(self):
|
||||
"""Test registry handles corrupted JSON gracefully."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "corrupted_registry.json"
|
||||
registry_path.write_text("{ invalid json content")
|
||||
|
||||
# Should handle corrupted JSON and create new registry
|
||||
registry = AssetRegistry(registry_path)
|
||||
assets = registry.list_assets()
|
||||
assert assets == []
|
||||
|
||||
|
||||
class TestAssetRegistryThreadSafety:
|
||||
"""Test thread-safe registry operations."""
|
||||
|
||||
def test_concurrent_asset_registration(self):
|
||||
"""Test that multiple threads can register assets simultaneously."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
registry = AssetRegistry(registry_path)
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def register_asset_thread(thread_id):
|
||||
try:
|
||||
test_file = Path(temp_dir) / f"asset_{thread_id}.txt"
|
||||
test_file.write_text(f"Content for thread {thread_id}")
|
||||
asset_info = registry.register_asset(test_file)
|
||||
results.append(asset_info)
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
# Start multiple threads
|
||||
threads = []
|
||||
for i in range(5):
|
||||
thread = threading.Thread(target=register_asset_thread, args=(i,))
|
||||
threads.append(thread)
|
||||
thread.start()
|
||||
|
||||
# Wait for all threads to complete
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
assert len(errors) == 0, f"Thread safety errors: {errors}"
|
||||
assert len(results) == 5
|
||||
assert len(set(r["content_hash"] for r in results)) == 5 # All unique hashes
|
||||
|
||||
|
||||
class TestAssetRegistryErrorHandling:
|
||||
"""Test error handling and exception scenarios."""
|
||||
|
||||
def test_register_nonexistent_file_raises_error(self):
|
||||
"""Test that registering non-existent file raises appropriate error."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
nonexistent_file = Path(temp_dir) / "does_not_exist.txt"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
|
||||
with pytest.raises(AssetError):
|
||||
registry.register_asset(nonexistent_file)
|
||||
|
||||
def test_get_nonexistent_asset_raises_error(self):
|
||||
"""Test that getting non-existent asset raises appropriate error."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
registry = AssetRegistry(registry_path)
|
||||
|
||||
with pytest.raises(RegistryError):
|
||||
registry.get_asset("nonexistent_hash_12345")
|
||||
|
||||
def test_invalid_registry_path_raises_error(self):
|
||||
"""Test that invalid registry path raises appropriate error."""
|
||||
invalid_path = Path("/root/protected/cannot_write.json")
|
||||
|
||||
with pytest.raises(RegistryError):
|
||||
AssetRegistry(invalid_path)
|
||||
580
tests/test_issue_142_markdown_packager.py
Normal file
580
tests/test_issue_142_markdown_packager.py
Normal file
@@ -0,0 +1,580 @@
|
||||
"""
|
||||
Test scenarios for MarkdownPackager ZIP package creation/extraction functionality.
|
||||
|
||||
This module tests the MarkdownPackager class for Issue #142: Phase 1 - Core Asset Management Module.
|
||||
Tests cover .mdpkg ZIP package creation, package extraction with symlink restoration,
|
||||
manifest generation and validation, and asset resolution during packaging.
|
||||
|
||||
Requirements:
|
||||
- .mdpkg ZIP package creation
|
||||
- Package extraction with symlink restoration
|
||||
- Manifest generation and validation
|
||||
- Asset resolution during packaging
|
||||
"""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
import pytest
|
||||
|
||||
from markitect.assets.packager import MarkdownPackager
|
||||
from markitect.assets.registry import AssetRegistry
|
||||
from markitect.assets.deduplicator import AssetDeduplicator
|
||||
from markitect.assets.exceptions import AssetError, PackagingError
|
||||
|
||||
|
||||
class TestMarkdownPackagerInitialization:
|
||||
"""Test MarkdownPackager initialization and setup."""
|
||||
|
||||
def test_packager_initialization(self):
|
||||
"""Test MarkdownPackager can be initialized with dependencies."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
assert packager.registry == registry
|
||||
assert packager.deduplicator == deduplicator
|
||||
|
||||
def test_packager_with_custom_manifest_filename(self):
|
||||
"""Test MarkdownPackager accepts custom manifest filename."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator,
|
||||
manifest_filename="custom_manifest.json")
|
||||
|
||||
assert packager.manifest_filename == "custom_manifest.json"
|
||||
|
||||
|
||||
class TestPackageCreation:
|
||||
"""Test .mdpkg ZIP package creation functionality."""
|
||||
|
||||
def test_create_package_with_markdown_and_assets(self):
|
||||
"""Test creating package with markdown file and referenced assets."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
# Create test document structure
|
||||
doc_dir = Path(temp_dir) / "document"
|
||||
doc_dir.mkdir()
|
||||
|
||||
markdown_file = doc_dir / "document.md"
|
||||
markdown_content = """# Test Document
|
||||
|
||||
Here is an image: 
|
||||
|
||||
And a link to a file: [Data File](data/test.csv)
|
||||
"""
|
||||
markdown_file.write_text(markdown_content)
|
||||
|
||||
# Create asset directories and files
|
||||
(doc_dir / "images").mkdir()
|
||||
(doc_dir / "data").mkdir()
|
||||
|
||||
image_file = doc_dir / "images" / "test.png"
|
||||
image_file.write_bytes(b"PNG_fake_content")
|
||||
|
||||
data_file = doc_dir / "data" / "test.csv"
|
||||
data_file.write_text("col1,col2\nval1,val2")
|
||||
|
||||
# Create packager
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
# Create package
|
||||
package_path = Path(temp_dir) / "test_package.mdpkg"
|
||||
result = packager.create_package(doc_dir, package_path)
|
||||
|
||||
assert package_path.exists()
|
||||
assert result["package_path"] == str(package_path)
|
||||
assert "assets" in result
|
||||
assert len(result["assets"]) == 2 # Image and CSV file
|
||||
|
||||
def test_package_contains_manifest(self):
|
||||
"""Test that created package contains proper manifest."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
# Create simple document
|
||||
doc_dir = Path(temp_dir) / "document"
|
||||
doc_dir.mkdir()
|
||||
|
||||
markdown_file = doc_dir / "document.md"
|
||||
markdown_file.write_text("# Simple Document\n\nNo assets.")
|
||||
|
||||
# Create package
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
package_path = Path(temp_dir) / "simple_package.mdpkg"
|
||||
packager.create_package(doc_dir, package_path)
|
||||
|
||||
# Verify manifest exists in package
|
||||
with zipfile.ZipFile(package_path, 'r') as zf:
|
||||
manifest_content = zf.read("manifest.json")
|
||||
manifest = json.loads(manifest_content)
|
||||
|
||||
assert "package_info" in manifest
|
||||
assert "files" in manifest
|
||||
assert "assets" in manifest
|
||||
assert manifest["package_info"]["format_version"] == "1.0"
|
||||
|
||||
def test_package_asset_deduplication(self):
|
||||
"""Test that identical assets are deduplicated in package."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
# Create document with duplicate assets
|
||||
doc_dir = Path(temp_dir) / "document"
|
||||
doc_dir.mkdir()
|
||||
|
||||
markdown_file = doc_dir / "document.md"
|
||||
markdown_content = """# Document with Duplicates
|
||||
|
||||
First reference: 
|
||||
Second reference: 
|
||||
"""
|
||||
markdown_file.write_text(markdown_content)
|
||||
|
||||
# Create identical files in different locations
|
||||
(doc_dir / "copy1").mkdir()
|
||||
(doc_dir / "copy2").mkdir()
|
||||
|
||||
identical_content = b"Identical PNG content"
|
||||
(doc_dir / "copy1" / "image.png").write_bytes(identical_content)
|
||||
(doc_dir / "copy2" / "image.png").write_bytes(identical_content)
|
||||
|
||||
# Create package
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
package_path = Path(temp_dir) / "dedup_package.mdpkg"
|
||||
result = packager.create_package(doc_dir, package_path)
|
||||
|
||||
# Should have 3 files (markdown + 2 duplicate assets) but only 1 unique asset hash
|
||||
assert len(result["files"]) == 3 # Markdown file + two asset files
|
||||
assert len(set(asset["content_hash"] for asset in result["assets"])) == 1 # One unique asset
|
||||
|
||||
def test_exclude_patterns_respected(self):
|
||||
"""Test that exclude patterns prevent files from being packaged."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
# Create document with various files
|
||||
doc_dir = Path(temp_dir) / "document"
|
||||
doc_dir.mkdir()
|
||||
|
||||
markdown_file = doc_dir / "document.md"
|
||||
markdown_file.write_text("# Document")
|
||||
|
||||
# Create files that should be excluded
|
||||
(doc_dir / ".DS_Store").write_text("Mac metadata")
|
||||
(doc_dir / "Thumbs.db").write_text("Windows thumbnails")
|
||||
(doc_dir / "temp").mkdir()
|
||||
(doc_dir / "temp" / "temp.txt").write_text("Temporary file")
|
||||
|
||||
# Create package with exclude patterns
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
package_path = Path(temp_dir) / "filtered_package.mdpkg"
|
||||
exclude_patterns = [".DS_Store", "Thumbs.db", "temp/*"]
|
||||
|
||||
result = packager.create_package(doc_dir, package_path,
|
||||
exclude_patterns=exclude_patterns)
|
||||
|
||||
# Verify excluded files are not in package
|
||||
with zipfile.ZipFile(package_path, 'r') as zf:
|
||||
file_list = zf.namelist()
|
||||
assert ".DS_Store" not in file_list
|
||||
assert "Thumbs.db" not in file_list
|
||||
assert "temp/temp.txt" not in file_list
|
||||
|
||||
|
||||
class TestPackageExtraction:
|
||||
"""Test package extraction and symlink restoration."""
|
||||
|
||||
def test_extract_package_with_assets(self):
|
||||
"""Test extracting package and restoring asset structure."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
# Create and package a document first
|
||||
doc_dir = Path(temp_dir) / "original_document"
|
||||
doc_dir.mkdir()
|
||||
|
||||
markdown_file = doc_dir / "document.md"
|
||||
markdown_file.write_text("# Test Document\n\n")
|
||||
|
||||
asset_file = doc_dir / "test.png"
|
||||
asset_file.write_bytes(b"PNG test content")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
package_path = Path(temp_dir) / "test.mdpkg"
|
||||
packager.create_package(doc_dir, package_path)
|
||||
|
||||
# Extract to new location
|
||||
extract_dir = Path(temp_dir) / "extracted"
|
||||
result = packager.extract_package(package_path, extract_dir)
|
||||
|
||||
assert extract_dir.exists()
|
||||
assert (extract_dir / "document.md").exists()
|
||||
assert (extract_dir / "test.png").exists()
|
||||
|
||||
# Verify content matches
|
||||
extracted_md = (extract_dir / "document.md").read_text()
|
||||
assert "# Test Document" in extracted_md
|
||||
|
||||
def test_extract_with_symlink_restoration(self):
|
||||
"""Test that extraction creates appropriate symlinks to asset store."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
# Create document and package
|
||||
doc_dir = Path(temp_dir) / "document"
|
||||
doc_dir.mkdir()
|
||||
(doc_dir / "document.md").write_text("# Doc\n\n")
|
||||
(doc_dir / "image.png").write_bytes(b"Image content")
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
package_path = Path(temp_dir) / "test.mdpkg"
|
||||
packager.create_package(doc_dir, package_path)
|
||||
|
||||
# Extract with symlink restoration
|
||||
extract_dir = Path(temp_dir) / "workspace"
|
||||
result = packager.extract_package(package_path, extract_dir,
|
||||
restore_symlinks=True)
|
||||
|
||||
extracted_asset = extract_dir / "image.png"
|
||||
assert extracted_asset.exists()
|
||||
|
||||
# On Unix systems, should be symlink to asset store
|
||||
import platform
|
||||
if platform.system() != "Windows":
|
||||
assert extracted_asset.is_symlink()
|
||||
|
||||
def test_extract_package_validates_manifest(self):
|
||||
"""Test that package extraction validates manifest structure."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Create invalid package with malformed manifest
|
||||
package_path = Path(temp_dir) / "invalid.mdpkg"
|
||||
|
||||
with zipfile.ZipFile(package_path, 'w') as zf:
|
||||
# Add invalid manifest
|
||||
invalid_manifest = {"invalid": "structure"}
|
||||
zf.writestr("manifest.json", json.dumps(invalid_manifest))
|
||||
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
extract_dir = Path(temp_dir) / "extract"
|
||||
|
||||
with pytest.raises(PackagingError):
|
||||
packager.extract_package(package_path, extract_dir)
|
||||
|
||||
def test_extract_missing_assets_handled_gracefully(self):
|
||||
"""Test that extraction handles missing assets gracefully."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Create package with reference to missing asset
|
||||
package_path = Path(temp_dir) / "missing_asset.mdpkg"
|
||||
|
||||
manifest = {
|
||||
"package_info": {"format_version": "1.0"},
|
||||
"files": ["document.md"],
|
||||
"assets": [{
|
||||
"path": "missing_asset.png",
|
||||
"content_hash": "nonexistent_hash_12345",
|
||||
"mime_type": "image/png"
|
||||
}]
|
||||
}
|
||||
|
||||
with zipfile.ZipFile(package_path, 'w') as zf:
|
||||
zf.writestr("manifest.json", json.dumps(manifest))
|
||||
zf.writestr("document.md", "# Doc with missing asset\n\n")
|
||||
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
extract_dir = Path(temp_dir) / "extract"
|
||||
result = packager.extract_package(package_path, extract_dir,
|
||||
restore_symlinks=True,
|
||||
missing_asset_handling="warn")
|
||||
|
||||
# Should extract what it can and warn about missing assets
|
||||
assert (extract_dir / "document.md").exists()
|
||||
assert "warnings" in result
|
||||
assert len(result["warnings"]) > 0
|
||||
|
||||
|
||||
class TestManifestGeneration:
|
||||
"""Test manifest generation and validation."""
|
||||
|
||||
def test_generate_manifest_structure(self):
|
||||
"""Test that generated manifest has proper structure."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
# Create test files list
|
||||
files = ["document.md", "readme.txt"]
|
||||
assets = [
|
||||
{"path": "image.png", "content_hash": "hash123", "mime_type": "image/png"},
|
||||
{"path": "data.csv", "content_hash": "hash456", "mime_type": "text/csv"}
|
||||
]
|
||||
|
||||
manifest = packager.generate_manifest(files, assets)
|
||||
|
||||
assert "package_info" in manifest
|
||||
assert "files" in manifest
|
||||
assert "assets" in manifest
|
||||
assert manifest["package_info"]["format_version"] == "1.0"
|
||||
assert manifest["files"] == files
|
||||
assert len(manifest["assets"]) == 2
|
||||
|
||||
def test_manifest_includes_creation_timestamp(self):
|
||||
"""Test that manifest includes creation timestamp."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
manifest = packager.generate_manifest([], [])
|
||||
|
||||
assert "created_at" in manifest["package_info"]
|
||||
# Should be ISO format timestamp
|
||||
from datetime import datetime
|
||||
created_at = datetime.fromisoformat(manifest["package_info"]["created_at"])
|
||||
assert isinstance(created_at, datetime)
|
||||
|
||||
def test_validate_manifest_structure(self):
|
||||
"""Test manifest validation functionality."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
# Valid manifest
|
||||
valid_manifest = {
|
||||
"package_info": {
|
||||
"format_version": "1.0",
|
||||
"created_at": "2023-01-01T12:00:00"
|
||||
},
|
||||
"files": ["document.md"],
|
||||
"assets": []
|
||||
}
|
||||
|
||||
assert packager.validate_manifest(valid_manifest) is True
|
||||
|
||||
# Invalid manifest missing required fields
|
||||
invalid_manifest = {"incomplete": "structure"}
|
||||
assert packager.validate_manifest(invalid_manifest) is False
|
||||
|
||||
|
||||
class TestAssetResolution:
|
||||
"""Test asset resolution during packaging."""
|
||||
|
||||
def test_resolve_markdown_asset_references(self):
|
||||
"""Test resolving asset references in markdown files."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
# Create markdown with various asset references
|
||||
markdown_content = """# Document
|
||||
|
||||
Images:
|
||||

|
||||

|
||||
|
||||
Links:
|
||||
[Download PDF](documents/guide.pdf)
|
||||
[Data file](./data/results.csv)
|
||||
"""
|
||||
|
||||
doc_dir = Path(temp_dir)
|
||||
asset_paths = packager.resolve_asset_references(markdown_content, doc_dir)
|
||||
|
||||
expected_paths = [
|
||||
"images/photo.jpg",
|
||||
"relative/path/image.png",
|
||||
"documents/guide.pdf",
|
||||
"data/results.csv" # Should be normalized to remove ./
|
||||
]
|
||||
|
||||
assert len(asset_paths) == len(expected_paths)
|
||||
for path in expected_paths:
|
||||
assert path in asset_paths
|
||||
|
||||
def test_resolve_html_asset_references(self):
|
||||
"""Test resolving asset references in HTML content."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
# HTML content with asset references
|
||||
html_content = """
|
||||
<img src="images/banner.png" alt="Banner">
|
||||
<link rel="stylesheet" href="styles/main.css">
|
||||
<script src="js/script.js"></script>
|
||||
<a href="downloads/file.zip">Download</a>
|
||||
"""
|
||||
|
||||
doc_dir = Path(temp_dir)
|
||||
asset_paths = packager.resolve_asset_references(html_content, doc_dir)
|
||||
|
||||
expected_paths = [
|
||||
"images/banner.png",
|
||||
"styles/main.css",
|
||||
"js/script.js",
|
||||
"downloads/file.zip"
|
||||
]
|
||||
|
||||
for path in expected_paths:
|
||||
assert path in asset_paths
|
||||
|
||||
def test_ignore_external_urls(self):
|
||||
"""Test that external URLs are ignored during asset resolution."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
# Content with mix of local and external references
|
||||
content = """
|
||||

|
||||

|
||||
[Local file](document.pdf)
|
||||
[External link](http://example.com/page.html)
|
||||
"""
|
||||
|
||||
doc_dir = Path(temp_dir)
|
||||
asset_paths = packager.resolve_asset_references(content, doc_dir)
|
||||
|
||||
# Should only include local references
|
||||
assert "local_image.png" in asset_paths
|
||||
assert "document.pdf" in asset_paths
|
||||
assert "https://example.com/image.png" not in asset_paths
|
||||
assert "http://example.com/page.html" not in asset_paths
|
||||
|
||||
|
||||
class TestPackageErrorHandling:
|
||||
"""Test error handling scenarios in packaging operations."""
|
||||
|
||||
def test_create_package_with_missing_source_directory(self):
|
||||
"""Test handling of missing source directory during package creation."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
nonexistent_dir = Path(temp_dir) / "does_not_exist"
|
||||
package_path = Path(temp_dir) / "test.mdpkg"
|
||||
|
||||
with pytest.raises(PackagingError):
|
||||
packager.create_package(nonexistent_dir, package_path)
|
||||
|
||||
def test_extract_corrupted_package(self):
|
||||
"""Test handling of corrupted package files."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
# Create corrupted package file
|
||||
corrupted_package = Path(temp_dir) / "corrupted.mdpkg"
|
||||
corrupted_package.write_text("This is not a valid ZIP file")
|
||||
|
||||
extract_dir = Path(temp_dir) / "extract"
|
||||
|
||||
with pytest.raises(PackagingError):
|
||||
packager.extract_package(corrupted_package, extract_dir)
|
||||
|
||||
def test_permission_error_during_extraction(self):
|
||||
"""Test handling of permission errors during extraction."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
registry_path = Path(temp_dir) / "registry.json"
|
||||
storage_path = Path(temp_dir) / "assets"
|
||||
|
||||
registry = AssetRegistry(registry_path)
|
||||
deduplicator = AssetDeduplicator(storage_path, registry)
|
||||
packager = MarkdownPackager(registry, deduplicator)
|
||||
|
||||
# Create valid package
|
||||
package_path = Path(temp_dir) / "test.mdpkg"
|
||||
with zipfile.ZipFile(package_path, 'w') as zf:
|
||||
manifest = {
|
||||
"package_info": {"format_version": "1.0"},
|
||||
"files": ["test.txt"],
|
||||
"assets": []
|
||||
}
|
||||
zf.writestr("manifest.json", json.dumps(manifest))
|
||||
zf.writestr("test.txt", "Test content")
|
||||
|
||||
# Mock permission error during extraction (by making extract_dir read-only)
|
||||
extract_dir = Path(temp_dir) / "extract"
|
||||
|
||||
# Create the directory but make it read-only to simulate permission error
|
||||
extract_dir.mkdir()
|
||||
with patch('zipfile.ZipFile.extractall', side_effect=PermissionError("Access denied")):
|
||||
with pytest.raises(PackagingError):
|
||||
packager.extract_package(package_path, extract_dir)
|
||||
Reference in New Issue
Block a user