""" Test scenarios for MarkdownPackager ZIP package creation/extraction functionality. This module tests the MarkdownPackager class for Issue #142: Phase 1 - Core Asset Management Module. Tests cover .mdpkg ZIP package creation, package extraction with symlink restoration, manifest generation and validation, and asset resolution during packaging. Requirements: - .mdpkg ZIP package creation - Package extraction with symlink restoration - Manifest generation and validation - Asset resolution during packaging """ import json import tempfile import zipfile from pathlib import Path from unittest.mock import Mock, patch, MagicMock import pytest from markitect.assets.packager import MarkdownPackager from markitect.assets.registry import AssetRegistry from markitect.assets.deduplicator import AssetDeduplicator from markitect.assets.exceptions import AssetError, PackagingError class TestMarkdownPackagerInitialization: """Test MarkdownPackager initialization and setup.""" def test_packager_initialization(self): """Test MarkdownPackager can be initialized with dependencies.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) assert packager.registry == registry assert packager.deduplicator == deduplicator def test_packager_with_custom_manifest_filename(self): """Test MarkdownPackager accepts custom manifest filename.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator, manifest_filename="custom_manifest.json") assert packager.manifest_filename == "custom_manifest.json" class TestPackageCreation: """Test .mdpkg ZIP package creation functionality.""" def test_create_package_with_markdown_and_assets(self): """Test creating package with markdown file and referenced assets.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" # Create test document structure doc_dir = Path(temp_dir) / "document" doc_dir.mkdir() markdown_file = doc_dir / "document.md" markdown_content = """# Test Document Here is an image: ![Test Image](images/test.png) And a link to a file: [Data File](data/test.csv) """ markdown_file.write_text(markdown_content) # Create asset directories and files (doc_dir / "images").mkdir() (doc_dir / "data").mkdir() image_file = doc_dir / "images" / "test.png" image_file.write_bytes(b"PNG_fake_content") data_file = doc_dir / "data" / "test.csv" data_file.write_text("col1,col2\nval1,val2") # Create packager registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) # Create package package_path = Path(temp_dir) / "test_package.mdpkg" result = packager.create_package(doc_dir, package_path) assert package_path.exists() assert result["package_path"] == str(package_path) assert "assets" in result assert len(result["assets"]) == 2 # Image and CSV file def test_package_contains_manifest(self): """Test that created package contains proper manifest.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" # Create simple document doc_dir = Path(temp_dir) / "document" doc_dir.mkdir() markdown_file = doc_dir / "document.md" markdown_file.write_text("# Simple Document\n\nNo assets.") # Create package registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) package_path = Path(temp_dir) / "simple_package.mdpkg" packager.create_package(doc_dir, package_path) # Verify manifest exists in package with zipfile.ZipFile(package_path, 'r') as zf: manifest_content = zf.read("manifest.json") manifest = json.loads(manifest_content) assert "package_info" in manifest assert "files" in manifest assert "assets" in manifest assert manifest["package_info"]["format_version"] == "1.0" def test_package_asset_deduplication(self): """Test that identical assets are deduplicated in package.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" # Create document with duplicate assets doc_dir = Path(temp_dir) / "document" doc_dir.mkdir() markdown_file = doc_dir / "document.md" markdown_content = """# Document with Duplicates First reference: ![Image 1](copy1/image.png) Second reference: ![Image 2](copy2/image.png) """ markdown_file.write_text(markdown_content) # Create identical files in different locations (doc_dir / "copy1").mkdir() (doc_dir / "copy2").mkdir() identical_content = b"Identical PNG content" (doc_dir / "copy1" / "image.png").write_bytes(identical_content) (doc_dir / "copy2" / "image.png").write_bytes(identical_content) # Create package registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) package_path = Path(temp_dir) / "dedup_package.mdpkg" result = packager.create_package(doc_dir, package_path) # Should have 3 files (markdown + 2 duplicate assets) but only 1 unique asset hash assert len(result["files"]) == 3 # Markdown file + two asset files assert len(set(asset["content_hash"] for asset in result["assets"])) == 1 # One unique asset def test_exclude_patterns_respected(self): """Test that exclude patterns prevent files from being packaged.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" # Create document with various files doc_dir = Path(temp_dir) / "document" doc_dir.mkdir() markdown_file = doc_dir / "document.md" markdown_file.write_text("# Document") # Create files that should be excluded (doc_dir / ".DS_Store").write_text("Mac metadata") (doc_dir / "Thumbs.db").write_text("Windows thumbnails") (doc_dir / "temp").mkdir() (doc_dir / "temp" / "temp.txt").write_text("Temporary file") # Create package with exclude patterns registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) package_path = Path(temp_dir) / "filtered_package.mdpkg" exclude_patterns = [".DS_Store", "Thumbs.db", "temp/*"] result = packager.create_package(doc_dir, package_path, exclude_patterns=exclude_patterns) # Verify excluded files are not in package with zipfile.ZipFile(package_path, 'r') as zf: file_list = zf.namelist() assert ".DS_Store" not in file_list assert "Thumbs.db" not in file_list assert "temp/temp.txt" not in file_list class TestPackageExtraction: """Test package extraction and symlink restoration.""" def test_extract_package_with_assets(self): """Test extracting package and restoring asset structure.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" # Create and package a document first doc_dir = Path(temp_dir) / "original_document" doc_dir.mkdir() markdown_file = doc_dir / "document.md" markdown_file.write_text("# Test Document\n\n![Image](test.png)") asset_file = doc_dir / "test.png" asset_file.write_bytes(b"PNG test content") registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) package_path = Path(temp_dir) / "test.mdpkg" packager.create_package(doc_dir, package_path) # Extract to new location extract_dir = Path(temp_dir) / "extracted" result = packager.extract_package(package_path, extract_dir) assert extract_dir.exists() assert (extract_dir / "document.md").exists() assert (extract_dir / "test.png").exists() # Verify content matches extracted_md = (extract_dir / "document.md").read_text() assert "# Test Document" in extracted_md def test_extract_with_symlink_restoration(self): """Test that extraction creates appropriate symlinks to asset store.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" # Create document and package doc_dir = Path(temp_dir) / "document" doc_dir.mkdir() (doc_dir / "document.md").write_text("# Doc\n\n![](image.png)") (doc_dir / "image.png").write_bytes(b"Image content") registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) package_path = Path(temp_dir) / "test.mdpkg" packager.create_package(doc_dir, package_path) # Extract with symlink restoration extract_dir = Path(temp_dir) / "workspace" result = packager.extract_package(package_path, extract_dir, restore_symlinks=True) extracted_asset = extract_dir / "image.png" assert extracted_asset.exists() # On Unix systems, should be symlink to asset store import platform if platform.system() != "Windows": assert extracted_asset.is_symlink() def test_extract_package_validates_manifest(self): """Test that package extraction validates manifest structure.""" with tempfile.TemporaryDirectory() as temp_dir: # Create invalid package with malformed manifest package_path = Path(temp_dir) / "invalid.mdpkg" with zipfile.ZipFile(package_path, 'w') as zf: # Add invalid manifest invalid_manifest = {"invalid": "structure"} zf.writestr("manifest.json", json.dumps(invalid_manifest)) registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) extract_dir = Path(temp_dir) / "extract" with pytest.raises(PackagingError): packager.extract_package(package_path, extract_dir) def test_extract_missing_assets_handled_gracefully(self): """Test that extraction handles missing assets gracefully.""" with tempfile.TemporaryDirectory() as temp_dir: # Create package with reference to missing asset package_path = Path(temp_dir) / "missing_asset.mdpkg" manifest = { "package_info": {"format_version": "1.0"}, "files": ["document.md"], "assets": [{ "path": "missing_asset.png", "content_hash": "nonexistent_hash_12345", "mime_type": "image/png" }] } with zipfile.ZipFile(package_path, 'w') as zf: zf.writestr("manifest.json", json.dumps(manifest)) zf.writestr("document.md", "# Doc with missing asset\n\n![](missing_asset.png)") registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) extract_dir = Path(temp_dir) / "extract" result = packager.extract_package(package_path, extract_dir, restore_symlinks=True, missing_asset_handling="warn") # Should extract what it can and warn about missing assets assert (extract_dir / "document.md").exists() assert "warnings" in result assert len(result["warnings"]) > 0 class TestManifestGeneration: """Test manifest generation and validation.""" def test_generate_manifest_structure(self): """Test that generated manifest has proper structure.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) # Create test files list files = ["document.md", "readme.txt"] assets = [ {"path": "image.png", "content_hash": "hash123", "mime_type": "image/png"}, {"path": "data.csv", "content_hash": "hash456", "mime_type": "text/csv"} ] manifest = packager.generate_manifest(files, assets) assert "package_info" in manifest assert "files" in manifest assert "assets" in manifest assert manifest["package_info"]["format_version"] == "1.0" assert manifest["files"] == files assert len(manifest["assets"]) == 2 def test_manifest_includes_creation_timestamp(self): """Test that manifest includes creation timestamp.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) manifest = packager.generate_manifest([], []) assert "created_at" in manifest["package_info"] # Should be ISO format timestamp from datetime import datetime created_at = datetime.fromisoformat(manifest["package_info"]["created_at"]) assert isinstance(created_at, datetime) def test_validate_manifest_structure(self): """Test manifest validation functionality.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) # Valid manifest valid_manifest = { "package_info": { "format_version": "1.0", "created_at": "2023-01-01T12:00:00" }, "files": ["document.md"], "assets": [] } assert packager.validate_manifest(valid_manifest) is True # Invalid manifest missing required fields invalid_manifest = {"incomplete": "structure"} assert packager.validate_manifest(invalid_manifest) is False class TestAssetResolution: """Test asset resolution during packaging.""" def test_resolve_markdown_asset_references(self): """Test resolving asset references in markdown files.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) # Create markdown with various asset references markdown_content = """# Document Images: ![Alt text](images/photo.jpg) ![](relative/path/image.png) Links: [Download PDF](documents/guide.pdf) [Data file](./data/results.csv) """ doc_dir = Path(temp_dir) asset_paths = packager.resolve_asset_references(markdown_content, doc_dir) expected_paths = [ "images/photo.jpg", "relative/path/image.png", "documents/guide.pdf", "data/results.csv" # Should be normalized to remove ./ ] assert len(asset_paths) == len(expected_paths) for path in expected_paths: assert path in asset_paths def test_resolve_html_asset_references(self): """Test resolving asset references in HTML content.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) # HTML content with asset references html_content = """ Banner Download """ doc_dir = Path(temp_dir) asset_paths = packager.resolve_asset_references(html_content, doc_dir) expected_paths = [ "images/banner.png", "styles/main.css", "js/script.js", "downloads/file.zip" ] for path in expected_paths: assert path in asset_paths def test_ignore_external_urls(self): """Test that external URLs are ignored during asset resolution.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) # Content with mix of local and external references content = """ ![Local](local_image.png) ![External](https://example.com/image.png) [Local file](document.pdf) [External link](http://example.com/page.html) """ doc_dir = Path(temp_dir) asset_paths = packager.resolve_asset_references(content, doc_dir) # Should only include local references assert "local_image.png" in asset_paths assert "document.pdf" in asset_paths assert "https://example.com/image.png" not in asset_paths assert "http://example.com/page.html" not in asset_paths class TestPackageErrorHandling: """Test error handling scenarios in packaging operations.""" def test_create_package_with_missing_source_directory(self): """Test handling of missing source directory during package creation.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) nonexistent_dir = Path(temp_dir) / "does_not_exist" package_path = Path(temp_dir) / "test.mdpkg" with pytest.raises(PackagingError): packager.create_package(nonexistent_dir, package_path) def test_extract_corrupted_package(self): """Test handling of corrupted package files.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) # Create corrupted package file corrupted_package = Path(temp_dir) / "corrupted.mdpkg" corrupted_package.write_text("This is not a valid ZIP file") extract_dir = Path(temp_dir) / "extract" with pytest.raises(PackagingError): packager.extract_package(corrupted_package, extract_dir) def test_permission_error_during_extraction(self): """Test handling of permission errors during extraction.""" with tempfile.TemporaryDirectory() as temp_dir: registry_path = Path(temp_dir) / "registry.json" storage_path = Path(temp_dir) / "assets" registry = AssetRegistry(registry_path) deduplicator = AssetDeduplicator(storage_path, registry) packager = MarkdownPackager(registry, deduplicator) # Create valid package package_path = Path(temp_dir) / "test.mdpkg" with zipfile.ZipFile(package_path, 'w') as zf: manifest = { "package_info": {"format_version": "1.0"}, "files": ["test.txt"], "assets": [] } zf.writestr("manifest.json", json.dumps(manifest)) zf.writestr("test.txt", "Test content") # Mock permission error during extraction (by making extract_dir read-only) extract_dir = Path(temp_dir) / "extract" # Create the directory but make it read-only to simulate permission error extract_dir.mkdir() with patch('zipfile.ZipFile.extractall', side_effect=PermissionError("Access denied")): with pytest.raises(PackagingError): packager.extract_package(package_path, extract_dir)