"""
Test scenarios for MarkdownPackager ZIP package creation/extraction functionality.
This module tests the MarkdownPackager class for Issue #142: Phase 1 - Core Asset Management Module.
Tests cover .mdpkg ZIP package creation, package extraction with symlink restoration,
manifest generation and validation, and asset resolution during packaging.
Requirements:
- .mdpkg ZIP package creation
- Package extraction with symlink restoration
- Manifest generation and validation
- Asset resolution during packaging
"""
import json
import tempfile
import zipfile
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
import pytest
from markitect.assets.packager import MarkdownPackager
from markitect.assets.registry import AssetRegistry
from markitect.assets.deduplicator import AssetDeduplicator
from markitect.assets.exceptions import AssetError, PackagingError
class TestMarkdownPackagerInitialization:
"""Test MarkdownPackager initialization and setup."""
def test_packager_initialization(self):
"""Test MarkdownPackager can be initialized with dependencies."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
assert packager.registry == registry
assert packager.deduplicator == deduplicator
def test_packager_with_custom_manifest_filename(self):
"""Test MarkdownPackager accepts custom manifest filename."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator,
manifest_filename="custom_manifest.json")
assert packager.manifest_filename == "custom_manifest.json"
class TestPackageCreation:
"""Test .mdpkg ZIP package creation functionality."""
def test_create_package_with_markdown_and_assets(self):
"""Test creating package with markdown file and referenced assets."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
# Create test document structure
doc_dir = Path(temp_dir) / "document"
doc_dir.mkdir()
markdown_file = doc_dir / "document.md"
markdown_content = """# Test Document
Here is an image: 
And a link to a file: [Data File](data/test.csv)
"""
markdown_file.write_text(markdown_content)
# Create asset directories and files
(doc_dir / "images").mkdir()
(doc_dir / "data").mkdir()
image_file = doc_dir / "images" / "test.png"
image_file.write_bytes(b"PNG_fake_content")
data_file = doc_dir / "data" / "test.csv"
data_file.write_text("col1,col2\nval1,val2")
# Create packager
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# Create package
package_path = Path(temp_dir) / "test_package.mdpkg"
result = packager.create_package(doc_dir, package_path)
assert package_path.exists()
assert result["package_path"] == str(package_path)
assert "assets" in result
assert len(result["assets"]) == 2 # Image and CSV file
def test_package_contains_manifest(self):
"""Test that created package contains proper manifest."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
# Create simple document
doc_dir = Path(temp_dir) / "document"
doc_dir.mkdir()
markdown_file = doc_dir / "document.md"
markdown_file.write_text("# Simple Document\n\nNo assets.")
# Create package
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
package_path = Path(temp_dir) / "simple_package.mdpkg"
packager.create_package(doc_dir, package_path)
# Verify manifest exists in package
with zipfile.ZipFile(package_path, 'r') as zf:
manifest_content = zf.read("manifest.json")
manifest = json.loads(manifest_content)
assert "package_info" in manifest
assert "files" in manifest
assert "assets" in manifest
assert manifest["package_info"]["format_version"] == "1.0"
def test_package_asset_deduplication(self):
"""Test that identical assets are deduplicated in package."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
# Create document with duplicate assets
doc_dir = Path(temp_dir) / "document"
doc_dir.mkdir()
markdown_file = doc_dir / "document.md"
markdown_content = """# Document with Duplicates
First reference: 
Second reference: 
"""
markdown_file.write_text(markdown_content)
# Create identical files in different locations
(doc_dir / "copy1").mkdir()
(doc_dir / "copy2").mkdir()
identical_content = b"Identical PNG content"
(doc_dir / "copy1" / "image.png").write_bytes(identical_content)
(doc_dir / "copy2" / "image.png").write_bytes(identical_content)
# Create package
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
package_path = Path(temp_dir) / "dedup_package.mdpkg"
result = packager.create_package(doc_dir, package_path)
# Should have 3 files (markdown + 2 duplicate assets) but only 1 unique asset hash
assert len(result["files"]) == 3 # Markdown file + two asset files
assert len(set(asset["content_hash"] for asset in result["assets"])) == 1 # One unique asset
def test_exclude_patterns_respected(self):
"""Test that exclude patterns prevent files from being packaged."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
# Create document with various files
doc_dir = Path(temp_dir) / "document"
doc_dir.mkdir()
markdown_file = doc_dir / "document.md"
markdown_file.write_text("# Document")
# Create files that should be excluded
(doc_dir / ".DS_Store").write_text("Mac metadata")
(doc_dir / "Thumbs.db").write_text("Windows thumbnails")
(doc_dir / "temp").mkdir()
(doc_dir / "temp" / "temp.txt").write_text("Temporary file")
# Create package with exclude patterns
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
package_path = Path(temp_dir) / "filtered_package.mdpkg"
exclude_patterns = [".DS_Store", "Thumbs.db", "temp/*"]
result = packager.create_package(doc_dir, package_path,
exclude_patterns=exclude_patterns)
# Verify excluded files are not in package
with zipfile.ZipFile(package_path, 'r') as zf:
file_list = zf.namelist()
assert ".DS_Store" not in file_list
assert "Thumbs.db" not in file_list
assert "temp/temp.txt" not in file_list
class TestPackageExtraction:
"""Test package extraction and symlink restoration."""
def test_extract_package_with_assets(self):
"""Test extracting package and restoring asset structure."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
# Create and package a document first
doc_dir = Path(temp_dir) / "original_document"
doc_dir.mkdir()
markdown_file = doc_dir / "document.md"
markdown_file.write_text("# Test Document\n\n")
asset_file = doc_dir / "test.png"
asset_file.write_bytes(b"PNG test content")
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
package_path = Path(temp_dir) / "test.mdpkg"
packager.create_package(doc_dir, package_path)
# Extract to new location
extract_dir = Path(temp_dir) / "extracted"
result = packager.extract_package(package_path, extract_dir)
assert extract_dir.exists()
assert (extract_dir / "document.md").exists()
assert (extract_dir / "test.png").exists()
# Verify content matches
extracted_md = (extract_dir / "document.md").read_text()
assert "# Test Document" in extracted_md
def test_extract_with_symlink_restoration(self):
"""Test that extraction creates appropriate symlinks to asset store."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
# Create document and package
doc_dir = Path(temp_dir) / "document"
doc_dir.mkdir()
(doc_dir / "document.md").write_text("# Doc\n\n")
(doc_dir / "image.png").write_bytes(b"Image content")
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
package_path = Path(temp_dir) / "test.mdpkg"
packager.create_package(doc_dir, package_path)
# Extract with symlink restoration
extract_dir = Path(temp_dir) / "workspace"
result = packager.extract_package(package_path, extract_dir,
restore_symlinks=True)
extracted_asset = extract_dir / "image.png"
assert extracted_asset.exists()
# On Unix systems, should be symlink to asset store
import platform
if platform.system() != "Windows":
assert extracted_asset.is_symlink()
def test_extract_package_validates_manifest(self):
"""Test that package extraction validates manifest structure."""
with tempfile.TemporaryDirectory() as temp_dir:
# Create invalid package with malformed manifest
package_path = Path(temp_dir) / "invalid.mdpkg"
with zipfile.ZipFile(package_path, 'w') as zf:
# Add invalid manifest
invalid_manifest = {"invalid": "structure"}
zf.writestr("manifest.json", json.dumps(invalid_manifest))
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
extract_dir = Path(temp_dir) / "extract"
with pytest.raises(PackagingError):
packager.extract_package(package_path, extract_dir)
def test_extract_missing_assets_handled_gracefully(self):
"""Test that extraction handles missing assets gracefully."""
with tempfile.TemporaryDirectory() as temp_dir:
# Create package with reference to missing asset
package_path = Path(temp_dir) / "missing_asset.mdpkg"
manifest = {
"package_info": {"format_version": "1.0"},
"files": ["document.md"],
"assets": [{
"path": "missing_asset.png",
"content_hash": "nonexistent_hash_12345",
"mime_type": "image/png"
}]
}
with zipfile.ZipFile(package_path, 'w') as zf:
zf.writestr("manifest.json", json.dumps(manifest))
zf.writestr("document.md", "# Doc with missing asset\n\n")
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
extract_dir = Path(temp_dir) / "extract"
result = packager.extract_package(package_path, extract_dir,
restore_symlinks=True,
missing_asset_handling="warn")
# Should extract what it can and warn about missing assets
assert (extract_dir / "document.md").exists()
assert "warnings" in result
assert len(result["warnings"]) > 0
class TestManifestGeneration:
"""Test manifest generation and validation."""
def test_generate_manifest_structure(self):
"""Test that generated manifest has proper structure."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# Create test files list
files = ["document.md", "readme.txt"]
assets = [
{"path": "image.png", "content_hash": "hash123", "mime_type": "image/png"},
{"path": "data.csv", "content_hash": "hash456", "mime_type": "text/csv"}
]
manifest = packager.generate_manifest(files, assets)
assert "package_info" in manifest
assert "files" in manifest
assert "assets" in manifest
assert manifest["package_info"]["format_version"] == "1.0"
assert manifest["files"] == files
assert len(manifest["assets"]) == 2
def test_manifest_includes_creation_timestamp(self):
"""Test that manifest includes creation timestamp."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
manifest = packager.generate_manifest([], [])
assert "created_at" in manifest["package_info"]
# Should be ISO format timestamp
from datetime import datetime
created_at = datetime.fromisoformat(manifest["package_info"]["created_at"])
assert isinstance(created_at, datetime)
def test_validate_manifest_structure(self):
"""Test manifest validation functionality."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# Valid manifest
valid_manifest = {
"package_info": {
"format_version": "1.0",
"created_at": "2023-01-01T12:00:00"
},
"files": ["document.md"],
"assets": []
}
assert packager.validate_manifest(valid_manifest) is True
# Invalid manifest missing required fields
invalid_manifest = {"incomplete": "structure"}
assert packager.validate_manifest(invalid_manifest) is False
class TestAssetResolution:
"""Test asset resolution during packaging."""
def test_resolve_markdown_asset_references(self):
"""Test resolving asset references in markdown files."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# Create markdown with various asset references
markdown_content = """# Document
Images:


Links:
[Download PDF](documents/guide.pdf)
[Data file](./data/results.csv)
"""
doc_dir = Path(temp_dir)
asset_paths = packager.resolve_asset_references(markdown_content, doc_dir)
expected_paths = [
"images/photo.jpg",
"relative/path/image.png",
"documents/guide.pdf",
"data/results.csv" # Should be normalized to remove ./
]
assert len(asset_paths) == len(expected_paths)
for path in expected_paths:
assert path in asset_paths
def test_resolve_html_asset_references(self):
"""Test resolving asset references in HTML content."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# HTML content with asset references
html_content = """
Download
"""
doc_dir = Path(temp_dir)
asset_paths = packager.resolve_asset_references(html_content, doc_dir)
expected_paths = [
"images/banner.png",
"styles/main.css",
"js/script.js",
"downloads/file.zip"
]
for path in expected_paths:
assert path in asset_paths
def test_ignore_external_urls(self):
"""Test that external URLs are ignored during asset resolution."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# Content with mix of local and external references
content = """


[Local file](document.pdf)
[External link](http://example.com/page.html)
"""
doc_dir = Path(temp_dir)
asset_paths = packager.resolve_asset_references(content, doc_dir)
# Should only include local references
assert "local_image.png" in asset_paths
assert "document.pdf" in asset_paths
assert "https://example.com/image.png" not in asset_paths
assert "http://example.com/page.html" not in asset_paths
class TestPackageErrorHandling:
"""Test error handling scenarios in packaging operations."""
def test_create_package_with_missing_source_directory(self):
"""Test handling of missing source directory during package creation."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
nonexistent_dir = Path(temp_dir) / "does_not_exist"
package_path = Path(temp_dir) / "test.mdpkg"
with pytest.raises(PackagingError):
packager.create_package(nonexistent_dir, package_path)
def test_extract_corrupted_package(self):
"""Test handling of corrupted package files."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# Create corrupted package file
corrupted_package = Path(temp_dir) / "corrupted.mdpkg"
corrupted_package.write_text("This is not a valid ZIP file")
extract_dir = Path(temp_dir) / "extract"
with pytest.raises(PackagingError):
packager.extract_package(corrupted_package, extract_dir)
def test_permission_error_during_extraction(self):
"""Test handling of permission errors during extraction."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
storage_path = Path(temp_dir) / "assets"
registry = AssetRegistry(registry_path)
deduplicator = AssetDeduplicator(storage_path, registry)
packager = MarkdownPackager(registry, deduplicator)
# Create valid package
package_path = Path(temp_dir) / "test.mdpkg"
with zipfile.ZipFile(package_path, 'w') as zf:
manifest = {
"package_info": {"format_version": "1.0"},
"files": ["test.txt"],
"assets": []
}
zf.writestr("manifest.json", json.dumps(manifest))
zf.writestr("test.txt", "Test content")
# Mock permission error during extraction (by making extract_dir read-only)
extract_dir = Path(temp_dir) / "extract"
# Create the directory but make it read-only to simulate permission error
extract_dir.mkdir()
with patch('zipfile.ZipFile.extractall', side_effect=PermissionError("Access denied")):
with pytest.raises(PackagingError):
packager.extract_package(package_path, extract_dir)