feat: comprehensive asset management system and testing improvements
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled

Asset Management System (Issue #142):
- Add complete asset management framework with deduplication
- Implement AssetManager, AssetRegistry, and AssetDeduplicator classes
- Add AssetPackager for markdown document packaging
- Create comprehensive test suite for all asset management components
- Add asset constants and custom exceptions for robust error handling

Markdown Processing Enhancements:
- Update markdown_commands.py with improved functionality
- Enhanced parsing and content aggregation capabilities
- Improved filename encoding/decoding for special characters

Test Suite Improvements:
- Add comprehensive tests for Issue #138 markdown parsing
- Enhance Issue #139 content aggregation and end-to-end testing
- Complete test coverage for new asset management features

Examples and Documentation:
- Update BildungsKanonJon.md example with enhanced content
- Generate corresponding HTML output for documentation
- Add asset registry configuration

Development Tools:
- Add install script for simplified setup

This commit represents a major enhancement to MarkiTect's asset handling
capabilities with full test coverage and improved markdown processing.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-12 19:57:31 +02:00
parent 88787d903d
commit 81d3da5fe7
19 changed files with 4040 additions and 84 deletions

View File

@@ -50,13 +50,14 @@ Detailed content here.
try:
# This should fail initially (RED phase)
structure = parse_markdown_structure(temp_file)
structure, front_matter = parse_markdown_structure(temp_file)
# Verify structure
assert len(structure) == 1 # One part
assert structure[0].level == 1
assert structure[0].title == "Part 1: Introduction"
assert len(structure[0].children) == 2 # Two chapters
assert front_matter is None # No front matter in this test
# Check chapters
assert structure[0].children[0].level == 2
@@ -154,12 +155,14 @@ Section content.
try:
# This should fail initially (RED phase)
structure = parse_markdown_structure(temp_file)
structure, front_matter = parse_markdown_structure(temp_file)
# Front matter should be handled appropriately
# Front matter should be extracted and structure parsed
assert len(structure) == 1
assert structure[0].title == "Chapter 1"
assert structure[0].level == 1
assert front_matter is not None
assert 'title: "My Document"' in front_matter
finally:
temp_file.unlink()
@@ -178,10 +181,11 @@ Some more content.
try:
# This should fail initially (RED phase)
structure = parse_markdown_structure(temp_file)
structure, front_matter = parse_markdown_structure(temp_file)
# Should return empty structure or handle gracefully
assert structure == [] or structure is None
assert structure == []
assert front_matter is None
finally:
temp_file.unlink()
@@ -204,10 +208,11 @@ Back to level 2.
try:
# This should fail initially (RED phase)
structure = parse_markdown_structure(temp_file)
structure, front_matter = parse_markdown_structure(temp_file)
# Should handle inconsistent levels gracefully
assert len(structure) == 1 # Main title
assert front_matter is None
assert structure[0].level == 1
assert len(structure[0].children) >= 1 # Should have children

View File

@@ -365,7 +365,7 @@ More content""")
file_path.write_text(content)
files.append(file_path)
aggregated = aggregate_content(files, preserve_front_matter=True)
aggregated = aggregate_content(self.temp_dir, preserve_front_matter=True)
# Should have front matter at the beginning
lines = aggregated.split('\n')

View File

@@ -334,7 +334,7 @@ class TestBookLikeStructureProcessing:
assert "```python" in content
assert "| Feature | Description |" in content
assert "![Architecture](diagram.png)" in content
assert "- Step 1" in content
assert "1. First step" in content
def _create_book_structure(self):
"""Create a realistic book directory structure."""
@@ -552,7 +552,7 @@ Advanced topics.
# Verify exploded structure exists
assert exploded_dir.exists()
assert (exploded_dir / "getting_started").exists()
assert (exploded_dir / "user_guide" / "getting_started").exists()
# Now implode it back
imploded_file = self.temp_dir / "reconstructed.md"

View File

@@ -0,0 +1,430 @@
"""
Test scenarios for AssetDeduplicator symlink and deduplication functionality.
This module tests the AssetDeduplicator class for Issue #142: Phase 1 - Core Asset Management Module.
Tests cover content-based asset deduplication, symlink creation with relative paths,
Windows fallback to file copying, and conflict resolution.
Requirements:
- Content-based asset deduplication
- Symlink creation with relative paths
- Windows fallback to file copying
- Conflict resolution for existing assets
"""
import os
import platform
import shutil
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
import pytest
from markitect.assets.deduplicator import AssetDeduplicator
from markitect.assets.registry import AssetRegistry
from markitect.assets.exceptions import AssetError, DeduplicationError
class TestAssetDeduplicatorInitialization:
"""Test AssetDeduplicator initialization and setup."""
def test_deduplicator_initialization(self):
"""Test AssetDeduplicator can be initialized with storage path and registry."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
assert deduplicator.storage_path == storage_path
assert deduplicator.registry == registry
assert storage_path.exists() # Should create storage directory
def test_deduplicator_creates_storage_directory(self):
"""Test that AssetDeduplicator creates storage directory if it doesn't exist."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "nonexistent" / "assets"
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
assert storage_path.exists()
assert storage_path.is_dir()
class TestAssetDeduplication:
"""Test content-based asset deduplication functionality."""
def test_deduplicate_identical_files(self):
"""Test that identical files are deduplicated properly."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
# Create two identical files
file1 = Path(temp_dir) / "file1.txt"
file2 = Path(temp_dir) / "file2.txt"
content = "Identical content for deduplication test"
file1.write_text(content)
file2.write_text(content)
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
# Store first file
result1 = deduplicator.store_asset(file1)
# Store second identical file - should be deduplicated
result2 = deduplicator.store_asset(file2)
# Both should reference the same stored file
assert result1["content_hash"] == result2["content_hash"]
assert result1["stored_path"] == result2["stored_path"]
def test_different_files_stored_separately(self):
"""Test that different files are stored separately."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
# Create two different files
file1 = Path(temp_dir) / "file1.txt"
file2 = Path(temp_dir) / "file2.txt"
file1.write_text("Content of first file")
file2.write_text("Content of second file")
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
result1 = deduplicator.store_asset(file1)
result2 = deduplicator.store_asset(file2)
# Should have different hashes and storage paths
assert result1["content_hash"] != result2["content_hash"]
assert result1["stored_path"] != result2["stored_path"]
class TestSymlinkCreation:
"""Test symlink creation functionality with relative paths."""
def test_create_symlink_unix(self):
"""Test symlink creation on Unix-like systems."""
if platform.system() == "Windows":
pytest.skip("Skipping Unix symlink test on Windows")
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("Source file content")
target_dir = Path(temp_dir) / "target_dir"
target_dir.mkdir()
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
# Store asset first
store_result = deduplicator.store_asset(source_file)
stored_path = Path(store_result["stored_path"])
# Create symlink to stored asset
link_path = target_dir / "linked_asset.txt"
deduplicator.create_asset_link(stored_path, link_path)
assert link_path.is_symlink()
assert link_path.resolve() == stored_path.resolve()
# Test that symlink uses relative path
assert not link_path.readlink().is_absolute()
def test_symlink_uses_relative_path(self):
"""Test that created symlinks use relative paths."""
if platform.system() == "Windows":
pytest.skip("Skipping relative symlink test on Windows")
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("Source file for relative symlink test")
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
stored_path = Path(store_result["stored_path"])
# Create symlink in subdirectory
link_dir = Path(temp_dir) / "workspace" / "subdir"
link_dir.mkdir(parents=True)
link_path = link_dir / "asset_link.txt"
deduplicator.create_asset_link(stored_path, link_path)
# Verify symlink target is relative
link_target = link_path.readlink()
assert not link_target.is_absolute()
assert str(link_target).startswith("..")
class TestWindowsFallbackCopying:
"""Test Windows fallback to file copying."""
def test_file_copy_fallback_on_symlink_failure(self):
"""Test that file copying is used when symlink creation fails."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
content = "Content for copy fallback test"
source_file.write_text(content)
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
stored_path = Path(store_result["stored_path"])
target_path = Path(temp_dir) / "copied_asset.txt"
# Mock symlink creation to fail
with patch('os.symlink', side_effect=OSError("Symlink not supported")):
deduplicator.create_asset_link(stored_path, target_path)
# Should fallback to copying
assert target_path.exists()
assert not target_path.is_symlink()
assert target_path.read_text() == content
@pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test")
def test_windows_uses_file_copying_by_default(self):
"""Test that Windows uses file copying by default."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
content = "Content for Windows copy test"
source_file.write_text(content)
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
stored_path = Path(store_result["stored_path"])
target_path = Path(temp_dir) / "windows_asset.txt"
deduplicator.create_asset_link(stored_path, target_path)
# On Windows, should use copying instead of symlinks
assert target_path.exists()
assert not target_path.is_symlink()
assert target_path.read_text() == content
class TestConflictResolution:
"""Test conflict resolution for existing assets."""
def test_existing_file_conflict_resolution(self):
"""Test handling of conflicts when target file already exists."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("Source content")
# Create existing target file
target_path = Path(temp_dir) / "existing_target.txt"
target_path.write_text("Existing content")
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
stored_path = Path(store_result["stored_path"])
# Should handle conflict gracefully
deduplicator.create_asset_link(stored_path, target_path,
conflict_resolution="overwrite")
# Target should now link to stored asset
if platform.system() != "Windows":
assert target_path.is_symlink()
def test_backup_conflict_resolution(self):
"""Test backup creation during conflict resolution."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("New content")
# Create existing target file
target_path = Path(temp_dir) / "target.txt"
original_content = "Original content to backup"
target_path.write_text(original_content)
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
stored_path = Path(store_result["stored_path"])
# Create link with backup resolution
deduplicator.create_asset_link(stored_path, target_path,
conflict_resolution="backup")
# Should create backup file
backup_path = target_path.with_suffix(target_path.suffix + ".bak")
assert backup_path.exists()
assert backup_path.read_text() == original_content
def test_skip_conflict_resolution(self):
"""Test skipping operation when file exists and resolution is 'skip'."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("Source content")
# Create existing target file
target_path = Path(temp_dir) / "existing.txt"
original_content = "Original content"
target_path.write_text(original_content)
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
stored_path = Path(store_result["stored_path"])
# Skip operation for existing file
result = deduplicator.create_asset_link(stored_path, target_path,
conflict_resolution="skip")
# Original file should remain unchanged
assert target_path.read_text() == original_content
assert result["skipped"] is True
class TestAssetDeduplicatorErrorHandling:
"""Test error handling scenarios."""
def test_store_nonexistent_file_raises_error(self):
"""Test that storing non-existent file raises appropriate error."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
nonexistent_file = Path(temp_dir) / "does_not_exist.txt"
with pytest.raises(AssetError):
deduplicator.store_asset(nonexistent_file)
def test_invalid_storage_path_raises_error(self):
"""Test that invalid storage path raises appropriate error."""
with tempfile.TemporaryDirectory() as temp_dir:
# Try to use a file as storage path (should be directory)
file_path = Path(temp_dir) / "not_a_directory.txt"
file_path.write_text("This is a file, not a directory")
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
with pytest.raises(DeduplicationError):
AssetDeduplicator(file_path, registry)
def test_permission_error_handling(self):
"""Test handling of permission errors during asset storage."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("Test content")
# Mock shutil.copy2 to raise PermissionError
with patch('shutil.copy2', side_effect=PermissionError("Permission denied")):
with pytest.raises(DeduplicationError):
deduplicator.store_asset(source_file)
class TestAssetRetrieval:
"""Test asset retrieval and verification functionality."""
def test_retrieve_stored_asset(self):
"""Test retrieving stored asset by content hash."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
content = "Content for retrieval test"
source_file.write_text(content)
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
content_hash = store_result["content_hash"]
# Retrieve asset
retrieved_path = deduplicator.get_asset_path(content_hash)
assert retrieved_path.exists()
assert retrieved_path.read_text() == content
def test_verify_asset_integrity(self):
"""Test verifying stored asset integrity."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("Content for integrity test")
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
content_hash = store_result["content_hash"]
# Verify integrity
is_valid = deduplicator.verify_asset_integrity(content_hash)
assert is_valid is True
def test_detect_corrupted_asset(self):
"""Test detection of corrupted stored assets."""
with tempfile.TemporaryDirectory() as temp_dir:
storage_path = Path(temp_dir) / "assets"
registry_path = Path(temp_dir) / "registry.json"
source_file = Path(temp_dir) / "source.txt"
source_file.write_text("Original content")
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
store_result = deduplicator.store_asset(source_file)
content_hash = store_result["content_hash"]
stored_path = Path(store_result["stored_path"])
# Corrupt the stored file
stored_path.write_text("Corrupted content")
# Verify should detect corruption
is_valid = deduplicator.verify_asset_integrity(content_hash)
assert is_valid is False

View File

@@ -0,0 +1,574 @@
"""
Test scenarios for AssetManager high-level API coordination functionality.
This module tests the AssetManager class for Issue #142: Phase 1 - Core Asset Management Module.
Tests cover high-level API coordination, integration with existing markitect patterns,
error handling and logging, and configuration management integration.
Requirements:
- High-level API coordinating all operations
- Integration with existing markitect patterns
- Error handling and logging
- Configuration management integration
"""
import tempfile
import json
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
import pytest
import logging
from markitect.assets.manager import AssetManager
from markitect.assets.registry import AssetRegistry
from markitect.assets.deduplicator import AssetDeduplicator
from markitect.assets.packager import MarkdownPackager
from markitect.assets.exceptions import AssetError, AssetManagerError
from markitect.config_manager import ConfigurationManager
class TestAssetManagerInitialization:
"""Test AssetManager initialization and configuration."""
def test_manager_initialization_with_config(self):
"""Test AssetManager can be initialized with configuration."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json"),
"enable_deduplication": True,
"default_conflict_resolution": "backup"
}
}
manager = AssetManager(config)
assert manager.storage_path == Path(temp_dir) / "assets"
assert manager.registry_path == Path(temp_dir) / "registry.json"
assert manager.enable_deduplication is True
def test_manager_initialization_with_defaults(self):
"""Test AssetManager initialization with default configuration."""
manager = AssetManager()
# Should use reasonable defaults
assert manager.storage_path.name == "assets"
assert manager.registry_path.name == "asset_registry.json"
assert manager.enable_deduplication is True
def test_manager_creates_required_components(self):
"""Test that AssetManager creates required component instances."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
assert isinstance(manager.registry, AssetRegistry)
assert isinstance(manager.deduplicator, AssetDeduplicator)
assert isinstance(manager.packager, MarkdownPackager)
def test_manager_integration_with_config_manager(self):
"""Test AssetManager integration with ConfigurationManager."""
with tempfile.TemporaryDirectory() as temp_dir:
# Create config file
config_file = Path(temp_dir) / ".markitect.json"
config_data = {
"assets": {
"storage_path": str(Path(temp_dir) / "custom_assets"),
"enable_deduplication": False
}
}
config_file.write_text(json.dumps(config_data))
# Mock ConfigurationManager to return our config
with patch.object(ConfigurationManager, 'get_current_config', return_value=config_data):
manager = AssetManager.from_config_manager()
assert str(manager.storage_path).endswith("custom_assets")
assert manager.enable_deduplication is False
class TestAssetManagerHighLevelOperations:
"""Test high-level asset management operations."""
def test_add_asset_with_deduplication(self):
"""Test adding asset with automatic deduplication."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Create test asset
asset_file = Path(temp_dir) / "test_asset.txt"
asset_file.write_text("Test asset content")
# Add asset
result = manager.add_asset(asset_file, "Test asset")
assert "content_hash" in result
assert "stored_path" in result
assert "deduplicated" in result
assert result["description"] == "Test asset"
def test_add_duplicate_asset_detected(self):
"""Test that duplicate assets are properly detected and handled."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Create identical assets
asset1 = Path(temp_dir) / "asset1.txt"
asset2 = Path(temp_dir) / "asset2.txt"
content = "Identical content for deduplication"
asset1.write_text(content)
asset2.write_text(content)
# Add first asset
result1 = manager.add_asset(asset1, "First asset")
# Add second identical asset
result2 = manager.add_asset(asset2, "Second asset")
# Should be deduplicated
assert result1["content_hash"] == result2["content_hash"]
assert result2["deduplicated"] is True
def test_list_assets_with_metadata(self):
"""Test listing all assets with their metadata."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Add multiple assets
assets = []
for i in range(3):
asset_file = Path(temp_dir) / f"asset_{i}.txt"
asset_file.write_text(f"Content for asset {i}")
result = manager.add_asset(asset_file, f"Asset {i}")
assets.append(result)
# List all assets
asset_list = manager.list_assets()
assert len(asset_list) == 3
for asset in asset_list:
assert "content_hash" in asset
assert "description" in asset
assert "size" in asset
assert "mime_type" in asset
def test_get_asset_info_by_hash(self):
"""Test retrieving detailed asset information by content hash."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Add asset
asset_file = Path(temp_dir) / "info_test.txt"
asset_file.write_text("Information test content")
result = manager.add_asset(asset_file, "Info test asset")
content_hash = result["content_hash"]
# Get detailed info
asset_info = manager.get_asset_info(content_hash)
assert asset_info["content_hash"] == content_hash
assert asset_info["description"] == "Info test asset"
assert "created_at" in asset_info
assert "file_path" in asset_info
def test_remove_asset_by_hash(self):
"""Test removing asset by content hash."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Add asset
asset_file = Path(temp_dir) / "remove_test.txt"
asset_file.write_text("Content to be removed")
result = manager.add_asset(asset_file)
content_hash = result["content_hash"]
# Verify asset exists
assert manager.asset_exists(content_hash)
# Remove asset
removal_result = manager.remove_asset(content_hash)
assert removal_result["removed"] is True
assert not manager.asset_exists(content_hash)
class TestAssetManagerPackaging:
"""Test high-level package creation and extraction operations."""
def test_create_document_package(self):
"""Test creating complete document package with assets."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Create document structure
doc_dir = Path(temp_dir) / "document"
doc_dir.mkdir()
# Create markdown document
md_file = doc_dir / "document.md"
md_content = """# Test Document
This document has assets:
- Image: ![Test](images/test.png)
- Data: [CSV File](data/test.csv)
"""
md_file.write_text(md_content)
# Create assets
(doc_dir / "images").mkdir()
(doc_dir / "data").mkdir()
(doc_dir / "images" / "test.png").write_bytes(b"PNG content")
(doc_dir / "data" / "test.csv").write_text("col1,col2\n1,2")
# Create package
package_path = Path(temp_dir) / "test_document.mdpkg"
result = manager.create_package(doc_dir, package_path,
description="Test document package")
assert package_path.exists()
assert result["package_path"] == str(package_path)
assert "assets_processed" in result
assert result["assets_processed"] == 2
def test_extract_document_package_to_workspace(self):
"""Test extracting package to workspace with proper asset linking."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Create and package a document first
doc_dir = Path(temp_dir) / "source_doc"
doc_dir.mkdir()
(doc_dir / "readme.md").write_text("# README\n\n![Logo](logo.png)")
(doc_dir / "logo.png").write_bytes(b"Logo content")
package_path = Path(temp_dir) / "source.mdpkg"
manager.create_package(doc_dir, package_path)
# Extract to workspace
workspace_dir = Path(temp_dir) / "workspace"
result = manager.extract_package(package_path, workspace_dir,
restore_assets=True)
assert workspace_dir.exists()
assert (workspace_dir / "readme.md").exists()
assert (workspace_dir / "logo.png").exists()
assert result["extracted_files"] >= 1
assert "asset_links_created" in result
def test_package_with_custom_options(self):
"""Test package creation with custom options and exclude patterns."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Create document with files to exclude
doc_dir = Path(temp_dir) / "document"
doc_dir.mkdir()
(doc_dir / "document.md").write_text("# Document")
(doc_dir / "important.txt").write_text("Important content")
(doc_dir / "temp.tmp").write_text("Temporary file")
(doc_dir / ".hidden").write_text("Hidden file")
package_path = Path(temp_dir) / "custom.mdpkg"
# Create package with custom options
result = manager.create_package(
doc_dir, package_path,
exclude_patterns=["*.tmp", ".*"],
description="Custom package",
metadata={"author": "Test", "version": "1.0"}
)
# Verify exclusions worked
import zipfile
with zipfile.ZipFile(package_path, 'r') as zf:
file_list = zf.namelist()
assert "document.md" in file_list
assert "important.txt" in file_list
assert "temp.tmp" not in file_list
assert ".hidden" not in file_list
class TestAssetManagerErrorHandling:
"""Test error handling and logging functionality."""
def test_add_nonexistent_asset_raises_error(self):
"""Test that adding non-existent asset raises appropriate error."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
nonexistent_file = Path(temp_dir) / "does_not_exist.txt"
with pytest.raises(AssetError):
manager.add_asset(nonexistent_file)
def test_get_info_for_nonexistent_asset_raises_error(self):
"""Test that getting info for non-existent asset raises error."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
with pytest.raises(AssetManagerError):
manager.get_asset_info("nonexistent_hash_12345")
def test_manager_logs_operations(self):
"""Test that AssetManager logs important operations."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
# Set up logging capture
import logging
log_messages = []
class TestHandler(logging.Handler):
def emit(self, record):
log_messages.append(record.getMessage())
test_handler = TestHandler()
logger = logging.getLogger('markitect.assets')
logger.addHandler(test_handler)
logger.setLevel(logging.INFO)
manager = AssetManager(config)
# Add an asset (should be logged)
asset_file = Path(temp_dir) / "log_test.txt"
asset_file.write_text("Test content for logging")
manager.add_asset(asset_file, "Log test asset")
# Check that operation was logged
assert any("Adding asset" in msg for msg in log_messages)
def test_configuration_validation_errors(self):
"""Test that invalid configuration raises appropriate errors."""
# Invalid storage path (file instead of directory)
with tempfile.TemporaryDirectory() as temp_dir:
invalid_file = Path(temp_dir) / "not_a_directory.txt"
invalid_file.write_text("This is a file")
config = {
"assets": {
"storage_path": str(invalid_file),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
with pytest.raises(AssetManagerError):
AssetManager(config)
class TestAssetManagerWorkflows:
"""Test complete workflows and integration scenarios."""
def test_complete_document_workflow(self):
"""Test complete workflow: add assets, create package, extract elsewhere."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# 1. Create document with assets
doc_dir = Path(temp_dir) / "project"
doc_dir.mkdir()
# Main document
(doc_dir / "project.md").write_text("""# Project Document
Assets:
![Chart](charts/performance.png)
[Data](data/results.json)
""")
# Assets
(doc_dir / "charts").mkdir()
(doc_dir / "data").mkdir()
(doc_dir / "charts" / "performance.png").write_bytes(b"Chart data")
(doc_dir / "data" / "results.json").write_text('{"status": "success"}')
# 2. Create package
package_path = Path(temp_dir) / "project.mdpkg"
package_result = manager.create_package(doc_dir, package_path)
assert package_result["assets_processed"] == 2
# 3. Extract to new location
extract_dir = Path(temp_dir) / "extracted_project"
extract_result = manager.extract_package(package_path, extract_dir,
restore_assets=True)
# Verify complete extraction
assert (extract_dir / "project.md").exists()
assert (extract_dir / "charts" / "performance.png").exists()
assert (extract_dir / "data" / "results.json").exists()
# Verify content integrity
extracted_json = (extract_dir / "data" / "results.json").read_text()
assert '{"status": "success"}' == extracted_json
def test_asset_sharing_between_packages(self):
"""Test that assets can be shared between different packages."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Create shared asset
shared_asset = Path(temp_dir) / "shared_logo.png"
shared_asset.write_bytes(b"Shared logo content")
# Add shared asset
asset_result = manager.add_asset(shared_asset, "Company logo")
shared_hash = asset_result["content_hash"]
# Create first document using shared asset
doc1_dir = Path(temp_dir) / "doc1"
doc1_dir.mkdir()
(doc1_dir / "doc1.md").write_text("# Doc 1\n\n![Logo](../shared_logo.png)")
# Copy shared asset to doc structure
(doc1_dir / "logo.png").write_bytes(b"Shared logo content")
# Create second document using same asset
doc2_dir = Path(temp_dir) / "doc2"
doc2_dir.mkdir()
(doc2_dir / "doc2.md").write_text("# Doc 2\n\n![Logo](../shared_logo.png)")
(doc2_dir / "logo.png").write_bytes(b"Shared logo content")
# Create packages
pkg1_path = Path(temp_dir) / "doc1.mdpkg"
pkg2_path = Path(temp_dir) / "doc2.mdpkg"
pkg1_result = manager.create_package(doc1_dir, pkg1_path)
pkg2_result = manager.create_package(doc2_dir, pkg2_path)
# Both should reference the same deduplicated asset
assert pkg1_result["assets_processed"] >= 1
assert pkg2_result["assets_processed"] >= 1
# Asset should only be stored once in the asset store
asset_list = manager.list_assets()
logo_assets = [a for a in asset_list if a.get("description") == "Company logo"]
assert len(logo_assets) == 1 # Only one copy stored
def test_performance_requirements_met(self):
"""Test that operations complete within performance requirements (<100ms)."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Create reasonably sized test asset (1MB)
test_content = b"x" * (1024 * 1024) # 1MB
asset_file = Path(temp_dir) / "performance_test.bin"
asset_file.write_bytes(test_content)
# Time the operation
import time
start_time = time.time()
result = manager.add_asset(asset_file, "Performance test asset")
end_time = time.time()
operation_time = (end_time - start_time) * 1000 # Convert to ms
# Should complete in under 100ms for 1MB file
assert operation_time < 100, f"Operation took {operation_time}ms, expected <100ms"
assert result["content_hash"] is not None

View File

@@ -0,0 +1,270 @@
"""
Test scenarios for AssetRegistry JSON persistence functionality.
This module tests the AssetRegistry class for Issue #142: Phase 1 - Core Asset Management Module.
Tests cover JSON-based metadata persistence, SHA-256 content hashing, MIME type detection,
and thread-safe registry operations.
Requirements:
- JSON-based asset metadata persistence
- SHA-256 content hashing for deduplication
- MIME type detection and file size tracking
- Thread-safe registry operations
"""
import json
import os
import tempfile
import threading
import time
from pathlib import Path
from unittest.mock import Mock, patch
import pytest
from markitect.assets.registry import AssetRegistry
from markitect.assets.exceptions import AssetError, RegistryError
class TestAssetRegistryCore:
"""Core functionality tests for AssetRegistry."""
def test_registry_initialization(self):
"""Test AssetRegistry can be initialized with registry path."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "test_registry.json"
registry = AssetRegistry(registry_path)
assert registry.registry_path == registry_path
assert registry_path.exists() # Should create empty registry
def test_registry_loads_existing_json(self):
"""Test AssetRegistry loads existing JSON registry file."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "existing_registry.json"
# Create existing registry with test data
test_data = {
"assets": {
"hash123": {
"path": "/test/file.txt",
"content_hash": "hash123",
"mime_type": "text/plain",
"size": 100
}
}
}
registry_path.write_text(json.dumps(test_data))
registry = AssetRegistry(registry_path)
assets = registry.list_assets()
assert len(assets) == 1
assert assets[0]["content_hash"] == "hash123"
class TestAssetRegistryHashing:
"""Test SHA-256 content hashing functionality."""
def test_generate_content_hash_from_file(self):
"""Test generating SHA-256 hash from file content."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
test_file = Path(temp_dir) / "test.txt"
test_file.write_text("Hello, World!")
registry = AssetRegistry(registry_path)
content_hash = registry.generate_content_hash(test_file)
# SHA-256 of "Hello, World!" should be consistent
expected_hash = "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
assert content_hash == expected_hash
def test_generate_content_hash_from_bytes(self):
"""Test generating SHA-256 hash from byte content."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
test_content = b"Binary content test"
content_hash = registry.generate_content_hash(test_content)
# Should generate consistent hash for same content
assert len(content_hash) == 64 # SHA-256 hex length
assert isinstance(content_hash, str)
class TestAssetRegistryMimeTypes:
"""Test MIME type detection functionality."""
def test_detect_mime_type_text_file(self):
"""Test MIME type detection for text files."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
test_file = Path(temp_dir) / "test.txt"
test_file.write_text("Plain text content")
registry = AssetRegistry(registry_path)
mime_type = registry.detect_mime_type(test_file)
assert mime_type.startswith("text/")
def test_detect_mime_type_image_file(self):
"""Test MIME type detection for image files."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
# Create minimal PNG file (8-byte PNG signature + IHDR)
png_data = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR'
test_file = Path(temp_dir) / "test.png"
test_file.write_bytes(png_data)
registry = AssetRegistry(registry_path)
mime_type = registry.detect_mime_type(test_file)
assert mime_type == "image/png"
class TestAssetRegistryOperations:
"""Test asset registration and retrieval operations."""
def test_register_asset(self):
"""Test registering a new asset in the registry."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
test_file = Path(temp_dir) / "asset.txt"
test_file.write_text("Test asset content")
registry = AssetRegistry(registry_path)
asset_info = registry.register_asset(test_file)
assert "content_hash" in asset_info
assert "mime_type" in asset_info
assert "size" in asset_info
assert asset_info["path"] == str(test_file)
def test_get_asset_by_hash(self):
"""Test retrieving asset information by content hash."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
test_file = Path(temp_dir) / "asset.txt"
test_file.write_text("Test content for retrieval")
registry = AssetRegistry(registry_path)
asset_info = registry.register_asset(test_file)
content_hash = asset_info["content_hash"]
retrieved_asset = registry.get_asset(content_hash)
assert retrieved_asset["content_hash"] == content_hash
assert retrieved_asset["path"] == str(test_file)
def test_asset_exists_check(self):
"""Test checking if asset exists by hash."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
test_file = Path(temp_dir) / "asset.txt"
test_file.write_text("Existence test content")
registry = AssetRegistry(registry_path)
asset_info = registry.register_asset(test_file)
content_hash = asset_info["content_hash"]
assert registry.asset_exists(content_hash)
assert not registry.asset_exists("nonexistent_hash")
class TestAssetRegistryPersistence:
"""Test JSON persistence and file operations."""
def test_registry_persists_to_json(self):
"""Test that registry changes are persisted to JSON file."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
test_file = Path(temp_dir) / "asset.txt"
test_file.write_text("Content to persist")
registry = AssetRegistry(registry_path)
registry.register_asset(test_file)
# Verify JSON file contains our asset
with open(registry_path) as f:
data = json.load(f)
assert "assets" in data
assert len(data["assets"]) == 1
def test_registry_handles_corrupted_json(self):
"""Test registry handles corrupted JSON gracefully."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "corrupted_registry.json"
registry_path.write_text("{ invalid json content")
# Should handle corrupted JSON and create new registry
registry = AssetRegistry(registry_path)
assets = registry.list_assets()
assert assets == []
class TestAssetRegistryThreadSafety:
"""Test thread-safe registry operations."""
def test_concurrent_asset_registration(self):
"""Test that multiple threads can register assets simultaneously."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
results = []
errors = []
def register_asset_thread(thread_id):
try:
test_file = Path(temp_dir) / f"asset_{thread_id}.txt"
test_file.write_text(f"Content for thread {thread_id}")
asset_info = registry.register_asset(test_file)
results.append(asset_info)
except Exception as e:
errors.append(e)
# Start multiple threads
threads = []
for i in range(5):
thread = threading.Thread(target=register_asset_thread, args=(i,))
threads.append(thread)
thread.start()
# Wait for all threads to complete
for thread in threads:
thread.join()
assert len(errors) == 0, f"Thread safety errors: {errors}"
assert len(results) == 5
assert len(set(r["content_hash"] for r in results)) == 5 # All unique hashes
class TestAssetRegistryErrorHandling:
"""Test error handling and exception scenarios."""
def test_register_nonexistent_file_raises_error(self):
"""Test that registering non-existent file raises appropriate error."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
nonexistent_file = Path(temp_dir) / "does_not_exist.txt"
registry = AssetRegistry(registry_path)
with pytest.raises(AssetError):
registry.register_asset(nonexistent_file)
def test_get_nonexistent_asset_raises_error(self):
"""Test that getting non-existent asset raises appropriate error."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
with pytest.raises(RegistryError):
registry.get_asset("nonexistent_hash_12345")
def test_invalid_registry_path_raises_error(self):
"""Test that invalid registry path raises appropriate error."""
invalid_path = Path("/root/protected/cannot_write.json")
with pytest.raises(RegistryError):
AssetRegistry(invalid_path)

View File

@@ -0,0 +1,580 @@
"""
Test scenarios for MarkdownPackager ZIP package creation/extraction functionality.
This module tests the MarkdownPackager class for Issue #142: Phase 1 - Core Asset Management Module.
Tests cover .mdpkg ZIP package creation, package extraction with symlink restoration,
manifest generation and validation, and asset resolution during packaging.
Requirements:
- .mdpkg ZIP package creation
- Package extraction with symlink restoration
- Manifest generation and validation
- Asset resolution during packaging
"""
import json
import tempfile
import zipfile
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
import pytest
from markitect.assets.packager import MarkdownPackager
from markitect.assets.registry import AssetRegistry
from markitect.assets.deduplicator import AssetDeduplicator
from markitect.assets.exceptions import AssetError, PackagingError
class TestMarkdownPackagerInitialization:
"""Test MarkdownPackager initialization and setup."""
def test_packager_initialization(self):
"""Test MarkdownPackager can be initialized with dependencies."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
assert packager.registry == registry
assert packager.deduplicator == deduplicator
def test_packager_with_custom_manifest_filename(self):
"""Test MarkdownPackager accepts custom manifest filename."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator,
manifest_filename="custom_manifest.json")
assert packager.manifest_filename == "custom_manifest.json"
class TestPackageCreation:
"""Test .mdpkg ZIP package creation functionality."""
def test_create_package_with_markdown_and_assets(self):
"""Test creating package with markdown file and referenced assets."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
# Create test document structure
doc_dir = Path(temp_dir) / "document"
doc_dir.mkdir()
markdown_file = doc_dir / "document.md"
markdown_content = """# Test Document
Here is an image: ![Test Image](images/test.png)
And a link to a file: [Data File](data/test.csv)
"""
markdown_file.write_text(markdown_content)
# Create asset directories and files
(doc_dir / "images").mkdir()
(doc_dir / "data").mkdir()
image_file = doc_dir / "images" / "test.png"
image_file.write_bytes(b"PNG_fake_content")
data_file = doc_dir / "data" / "test.csv"
data_file.write_text("col1,col2\nval1,val2")
# Create packager
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# Create package
package_path = Path(temp_dir) / "test_package.mdpkg"
result = packager.create_package(doc_dir, package_path)
assert package_path.exists()
assert result["package_path"] == str(package_path)
assert "assets" in result
assert len(result["assets"]) == 2 # Image and CSV file
def test_package_contains_manifest(self):
"""Test that created package contains proper manifest."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
# Create simple document
doc_dir = Path(temp_dir) / "document"
doc_dir.mkdir()
markdown_file = doc_dir / "document.md"
markdown_file.write_text("# Simple Document\n\nNo assets.")
# Create package
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
package_path = Path(temp_dir) / "simple_package.mdpkg"
packager.create_package(doc_dir, package_path)
# Verify manifest exists in package
with zipfile.ZipFile(package_path, 'r') as zf:
manifest_content = zf.read("manifest.json")
manifest = json.loads(manifest_content)
assert "package_info" in manifest
assert "files" in manifest
assert "assets" in manifest
assert manifest["package_info"]["format_version"] == "1.0"
def test_package_asset_deduplication(self):
"""Test that identical assets are deduplicated in package."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
# Create document with duplicate assets
doc_dir = Path(temp_dir) / "document"
doc_dir.mkdir()
markdown_file = doc_dir / "document.md"
markdown_content = """# Document with Duplicates
First reference: ![Image 1](copy1/image.png)
Second reference: ![Image 2](copy2/image.png)
"""
markdown_file.write_text(markdown_content)
# Create identical files in different locations
(doc_dir / "copy1").mkdir()
(doc_dir / "copy2").mkdir()
identical_content = b"Identical PNG content"
(doc_dir / "copy1" / "image.png").write_bytes(identical_content)
(doc_dir / "copy2" / "image.png").write_bytes(identical_content)
# Create package
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
package_path = Path(temp_dir) / "dedup_package.mdpkg"
result = packager.create_package(doc_dir, package_path)
# Should have 3 files (markdown + 2 duplicate assets) but only 1 unique asset hash
assert len(result["files"]) == 3 # Markdown file + two asset files
assert len(set(asset["content_hash"] for asset in result["assets"])) == 1 # One unique asset
def test_exclude_patterns_respected(self):
"""Test that exclude patterns prevent files from being packaged."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
# Create document with various files
doc_dir = Path(temp_dir) / "document"
doc_dir.mkdir()
markdown_file = doc_dir / "document.md"
markdown_file.write_text("# Document")
# Create files that should be excluded
(doc_dir / ".DS_Store").write_text("Mac metadata")
(doc_dir / "Thumbs.db").write_text("Windows thumbnails")
(doc_dir / "temp").mkdir()
(doc_dir / "temp" / "temp.txt").write_text("Temporary file")
# Create package with exclude patterns
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
package_path = Path(temp_dir) / "filtered_package.mdpkg"
exclude_patterns = [".DS_Store", "Thumbs.db", "temp/*"]
result = packager.create_package(doc_dir, package_path,
exclude_patterns=exclude_patterns)
# Verify excluded files are not in package
with zipfile.ZipFile(package_path, 'r') as zf:
file_list = zf.namelist()
assert ".DS_Store" not in file_list
assert "Thumbs.db" not in file_list
assert "temp/temp.txt" not in file_list
class TestPackageExtraction:
"""Test package extraction and symlink restoration."""
def test_extract_package_with_assets(self):
"""Test extracting package and restoring asset structure."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
# Create and package a document first
doc_dir = Path(temp_dir) / "original_document"
doc_dir.mkdir()
markdown_file = doc_dir / "document.md"
markdown_file.write_text("# Test Document\n\n![Image](test.png)")
asset_file = doc_dir / "test.png"
asset_file.write_bytes(b"PNG test content")
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
package_path = Path(temp_dir) / "test.mdpkg"
packager.create_package(doc_dir, package_path)
# Extract to new location
extract_dir = Path(temp_dir) / "extracted"
result = packager.extract_package(package_path, extract_dir)
assert extract_dir.exists()
assert (extract_dir / "document.md").exists()
assert (extract_dir / "test.png").exists()
# Verify content matches
extracted_md = (extract_dir / "document.md").read_text()
assert "# Test Document" in extracted_md
def test_extract_with_symlink_restoration(self):
"""Test that extraction creates appropriate symlinks to asset store."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
# Create document and package
doc_dir = Path(temp_dir) / "document"
doc_dir.mkdir()
(doc_dir / "document.md").write_text("# Doc\n\n![](image.png)")
(doc_dir / "image.png").write_bytes(b"Image content")
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
package_path = Path(temp_dir) / "test.mdpkg"
packager.create_package(doc_dir, package_path)
# Extract with symlink restoration
extract_dir = Path(temp_dir) / "workspace"
result = packager.extract_package(package_path, extract_dir,
restore_symlinks=True)
extracted_asset = extract_dir / "image.png"
assert extracted_asset.exists()
# On Unix systems, should be symlink to asset store
import platform
if platform.system() != "Windows":
assert extracted_asset.is_symlink()
def test_extract_package_validates_manifest(self):
"""Test that package extraction validates manifest structure."""
with tempfile.TemporaryDirectory() as temp_dir:
# Create invalid package with malformed manifest
package_path = Path(temp_dir) / "invalid.mdpkg"
with zipfile.ZipFile(package_path, 'w') as zf:
# Add invalid manifest
invalid_manifest = {"invalid": "structure"}
zf.writestr("manifest.json", json.dumps(invalid_manifest))
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
extract_dir = Path(temp_dir) / "extract"
with pytest.raises(PackagingError):
packager.extract_package(package_path, extract_dir)
def test_extract_missing_assets_handled_gracefully(self):
"""Test that extraction handles missing assets gracefully."""
with tempfile.TemporaryDirectory() as temp_dir:
# Create package with reference to missing asset
package_path = Path(temp_dir) / "missing_asset.mdpkg"
manifest = {
"package_info": {"format_version": "1.0"},
"files": ["document.md"],
"assets": [{
"path": "missing_asset.png",
"content_hash": "nonexistent_hash_12345",
"mime_type": "image/png"
}]
}
with zipfile.ZipFile(package_path, 'w') as zf:
zf.writestr("manifest.json", json.dumps(manifest))
zf.writestr("document.md", "# Doc with missing asset\n\n![](missing_asset.png)")
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
extract_dir = Path(temp_dir) / "extract"
result = packager.extract_package(package_path, extract_dir,
restore_symlinks=True,
missing_asset_handling="warn")
# Should extract what it can and warn about missing assets
assert (extract_dir / "document.md").exists()
assert "warnings" in result
assert len(result["warnings"]) > 0
class TestManifestGeneration:
"""Test manifest generation and validation."""
def test_generate_manifest_structure(self):
"""Test that generated manifest has proper structure."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# Create test files list
files = ["document.md", "readme.txt"]
assets = [
{"path": "image.png", "content_hash": "hash123", "mime_type": "image/png"},
{"path": "data.csv", "content_hash": "hash456", "mime_type": "text/csv"}
]
manifest = packager.generate_manifest(files, assets)
assert "package_info" in manifest
assert "files" in manifest
assert "assets" in manifest
assert manifest["package_info"]["format_version"] == "1.0"
assert manifest["files"] == files
assert len(manifest["assets"]) == 2
def test_manifest_includes_creation_timestamp(self):
"""Test that manifest includes creation timestamp."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
manifest = packager.generate_manifest([], [])
assert "created_at" in manifest["package_info"]
# Should be ISO format timestamp
from datetime import datetime
created_at = datetime.fromisoformat(manifest["package_info"]["created_at"])
assert isinstance(created_at, datetime)
def test_validate_manifest_structure(self):
"""Test manifest validation functionality."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# Valid manifest
valid_manifest = {
"package_info": {
"format_version": "1.0",
"created_at": "2023-01-01T12:00:00"
},
"files": ["document.md"],
"assets": []
}
assert packager.validate_manifest(valid_manifest) is True
# Invalid manifest missing required fields
invalid_manifest = {"incomplete": "structure"}
assert packager.validate_manifest(invalid_manifest) is False
class TestAssetResolution:
"""Test asset resolution during packaging."""
def test_resolve_markdown_asset_references(self):
"""Test resolving asset references in markdown files."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# Create markdown with various asset references
markdown_content = """# Document
Images:
![Alt text](images/photo.jpg)
![](relative/path/image.png)
Links:
[Download PDF](documents/guide.pdf)
[Data file](./data/results.csv)
"""
doc_dir = Path(temp_dir)
asset_paths = packager.resolve_asset_references(markdown_content, doc_dir)
expected_paths = [
"images/photo.jpg",
"relative/path/image.png",
"documents/guide.pdf",
"data/results.csv" # Should be normalized to remove ./
]
assert len(asset_paths) == len(expected_paths)
for path in expected_paths:
assert path in asset_paths
def test_resolve_html_asset_references(self):
"""Test resolving asset references in HTML content."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# HTML content with asset references
html_content = """
<img src="images/banner.png" alt="Banner">
<link rel="stylesheet" href="styles/main.css">
<script src="js/script.js"></script>
<a href="downloads/file.zip">Download</a>
"""
doc_dir = Path(temp_dir)
asset_paths = packager.resolve_asset_references(html_content, doc_dir)
expected_paths = [
"images/banner.png",
"styles/main.css",
"js/script.js",
"downloads/file.zip"
]
for path in expected_paths:
assert path in asset_paths
def test_ignore_external_urls(self):
"""Test that external URLs are ignored during asset resolution."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# Content with mix of local and external references
content = """
![Local](local_image.png)
![External](https://example.com/image.png)
[Local file](document.pdf)
[External link](http://example.com/page.html)
"""
doc_dir = Path(temp_dir)
asset_paths = packager.resolve_asset_references(content, doc_dir)
# Should only include local references
assert "local_image.png" in asset_paths
assert "document.pdf" in asset_paths
assert "https://example.com/image.png" not in asset_paths
assert "http://example.com/page.html" not in asset_paths
class TestPackageErrorHandling:
"""Test error handling scenarios in packaging operations."""
def test_create_package_with_missing_source_directory(self):
"""Test handling of missing source directory during package creation."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
nonexistent_dir = Path(temp_dir) / "does_not_exist"
package_path = Path(temp_dir) / "test.mdpkg"
with pytest.raises(PackagingError):
packager.create_package(nonexistent_dir, package_path)
def test_extract_corrupted_package(self):
"""Test handling of corrupted package files."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# Create corrupted package file
corrupted_package = Path(temp_dir) / "corrupted.mdpkg"
corrupted_package.write_text("This is not a valid ZIP file")
extract_dir = Path(temp_dir) / "extract"
with pytest.raises(PackagingError):
packager.extract_package(corrupted_package, extract_dir)
def test_permission_error_during_extraction(self):
"""Test handling of permission errors during extraction."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# Create valid package
package_path = Path(temp_dir) / "test.mdpkg"
with zipfile.ZipFile(package_path, 'w') as zf:
manifest = {
"package_info": {"format_version": "1.0"},
"files": ["test.txt"],
"assets": []
}
zf.writestr("manifest.json", json.dumps(manifest))
zf.writestr("test.txt", "Test content")
# Mock permission error during extraction (by making extract_dir read-only)
extract_dir = Path(temp_dir) / "extract"
# Create the directory but make it read-only to simulate permission error
extract_dir.mkdir()
with patch('zipfile.ZipFile.extractall', side_effect=PermissionError("Access denied")):
with pytest.raises(PackagingError):
packager.extract_package(package_path, extract_dir)