Files
markitect-main/tests/test_issue_142_asset_manager.py
tegwick 81d3da5fe7
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat: comprehensive asset management system and testing improvements
Asset Management System (Issue #142):
- Add complete asset management framework with deduplication
- Implement AssetManager, AssetRegistry, and AssetDeduplicator classes
- Add AssetPackager for markdown document packaging
- Create comprehensive test suite for all asset management components
- Add asset constants and custom exceptions for robust error handling

Markdown Processing Enhancements:
- Update markdown_commands.py with improved functionality
- Enhanced parsing and content aggregation capabilities
- Improved filename encoding/decoding for special characters

Test Suite Improvements:
- Add comprehensive tests for Issue #138 markdown parsing
- Enhance Issue #139 content aggregation and end-to-end testing
- Complete test coverage for new asset management features

Examples and Documentation:
- Update BildungsKanonJon.md example with enhanced content
- Generate corresponding HTML output for documentation
- Add asset registry configuration

Development Tools:
- Add install script for simplified setup

This commit represents a major enhancement to MarkiTect's asset handling
capabilities with full test coverage and improved markdown processing.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-12 19:57:31 +02:00

574 lines
22 KiB
Python

"""
Test scenarios for AssetManager high-level API coordination functionality.
This module tests the AssetManager class for Issue #142: Phase 1 - Core Asset Management Module.
Tests cover high-level API coordination, integration with existing markitect patterns,
error handling and logging, and configuration management integration.
Requirements:
- High-level API coordinating all operations
- Integration with existing markitect patterns
- Error handling and logging
- Configuration management integration
"""
import tempfile
import json
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
import pytest
import logging
from markitect.assets.manager import AssetManager
from markitect.assets.registry import AssetRegistry
from markitect.assets.deduplicator import AssetDeduplicator
from markitect.assets.packager import MarkdownPackager
from markitect.assets.exceptions import AssetError, AssetManagerError
from markitect.config_manager import ConfigurationManager
class TestAssetManagerInitialization:
"""Test AssetManager initialization and configuration."""
def test_manager_initialization_with_config(self):
"""Test AssetManager can be initialized with configuration."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json"),
"enable_deduplication": True,
"default_conflict_resolution": "backup"
}
}
manager = AssetManager(config)
assert manager.storage_path == Path(temp_dir) / "assets"
assert manager.registry_path == Path(temp_dir) / "registry.json"
assert manager.enable_deduplication is True
def test_manager_initialization_with_defaults(self):
"""Test AssetManager initialization with default configuration."""
manager = AssetManager()
# Should use reasonable defaults
assert manager.storage_path.name == "assets"
assert manager.registry_path.name == "asset_registry.json"
assert manager.enable_deduplication is True
def test_manager_creates_required_components(self):
"""Test that AssetManager creates required component instances."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
assert isinstance(manager.registry, AssetRegistry)
assert isinstance(manager.deduplicator, AssetDeduplicator)
assert isinstance(manager.packager, MarkdownPackager)
def test_manager_integration_with_config_manager(self):
"""Test AssetManager integration with ConfigurationManager."""
with tempfile.TemporaryDirectory() as temp_dir:
# Create config file
config_file = Path(temp_dir) / ".markitect.json"
config_data = {
"assets": {
"storage_path": str(Path(temp_dir) / "custom_assets"),
"enable_deduplication": False
}
}
config_file.write_text(json.dumps(config_data))
# Mock ConfigurationManager to return our config
with patch.object(ConfigurationManager, 'get_current_config', return_value=config_data):
manager = AssetManager.from_config_manager()
assert str(manager.storage_path).endswith("custom_assets")
assert manager.enable_deduplication is False
class TestAssetManagerHighLevelOperations:
"""Test high-level asset management operations."""
def test_add_asset_with_deduplication(self):
"""Test adding asset with automatic deduplication."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Create test asset
asset_file = Path(temp_dir) / "test_asset.txt"
asset_file.write_text("Test asset content")
# Add asset
result = manager.add_asset(asset_file, "Test asset")
assert "content_hash" in result
assert "stored_path" in result
assert "deduplicated" in result
assert result["description"] == "Test asset"
def test_add_duplicate_asset_detected(self):
"""Test that duplicate assets are properly detected and handled."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Create identical assets
asset1 = Path(temp_dir) / "asset1.txt"
asset2 = Path(temp_dir) / "asset2.txt"
content = "Identical content for deduplication"
asset1.write_text(content)
asset2.write_text(content)
# Add first asset
result1 = manager.add_asset(asset1, "First asset")
# Add second identical asset
result2 = manager.add_asset(asset2, "Second asset")
# Should be deduplicated
assert result1["content_hash"] == result2["content_hash"]
assert result2["deduplicated"] is True
def test_list_assets_with_metadata(self):
"""Test listing all assets with their metadata."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Add multiple assets
assets = []
for i in range(3):
asset_file = Path(temp_dir) / f"asset_{i}.txt"
asset_file.write_text(f"Content for asset {i}")
result = manager.add_asset(asset_file, f"Asset {i}")
assets.append(result)
# List all assets
asset_list = manager.list_assets()
assert len(asset_list) == 3
for asset in asset_list:
assert "content_hash" in asset
assert "description" in asset
assert "size" in asset
assert "mime_type" in asset
def test_get_asset_info_by_hash(self):
"""Test retrieving detailed asset information by content hash."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Add asset
asset_file = Path(temp_dir) / "info_test.txt"
asset_file.write_text("Information test content")
result = manager.add_asset(asset_file, "Info test asset")
content_hash = result["content_hash"]
# Get detailed info
asset_info = manager.get_asset_info(content_hash)
assert asset_info["content_hash"] == content_hash
assert asset_info["description"] == "Info test asset"
assert "created_at" in asset_info
assert "file_path" in asset_info
def test_remove_asset_by_hash(self):
"""Test removing asset by content hash."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Add asset
asset_file = Path(temp_dir) / "remove_test.txt"
asset_file.write_text("Content to be removed")
result = manager.add_asset(asset_file)
content_hash = result["content_hash"]
# Verify asset exists
assert manager.asset_exists(content_hash)
# Remove asset
removal_result = manager.remove_asset(content_hash)
assert removal_result["removed"] is True
assert not manager.asset_exists(content_hash)
class TestAssetManagerPackaging:
"""Test high-level package creation and extraction operations."""
def test_create_document_package(self):
"""Test creating complete document package with assets."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Create document structure
doc_dir = Path(temp_dir) / "document"
doc_dir.mkdir()
# Create markdown document
md_file = doc_dir / "document.md"
md_content = """# Test Document
This document has assets:
- Image: ![Test](images/test.png)
- Data: [CSV File](data/test.csv)
"""
md_file.write_text(md_content)
# Create assets
(doc_dir / "images").mkdir()
(doc_dir / "data").mkdir()
(doc_dir / "images" / "test.png").write_bytes(b"PNG content")
(doc_dir / "data" / "test.csv").write_text("col1,col2\n1,2")
# Create package
package_path = Path(temp_dir) / "test_document.mdpkg"
result = manager.create_package(doc_dir, package_path,
description="Test document package")
assert package_path.exists()
assert result["package_path"] == str(package_path)
assert "assets_processed" in result
assert result["assets_processed"] == 2
def test_extract_document_package_to_workspace(self):
"""Test extracting package to workspace with proper asset linking."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Create and package a document first
doc_dir = Path(temp_dir) / "source_doc"
doc_dir.mkdir()
(doc_dir / "readme.md").write_text("# README\n\n![Logo](logo.png)")
(doc_dir / "logo.png").write_bytes(b"Logo content")
package_path = Path(temp_dir) / "source.mdpkg"
manager.create_package(doc_dir, package_path)
# Extract to workspace
workspace_dir = Path(temp_dir) / "workspace"
result = manager.extract_package(package_path, workspace_dir,
restore_assets=True)
assert workspace_dir.exists()
assert (workspace_dir / "readme.md").exists()
assert (workspace_dir / "logo.png").exists()
assert result["extracted_files"] >= 1
assert "asset_links_created" in result
def test_package_with_custom_options(self):
"""Test package creation with custom options and exclude patterns."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Create document with files to exclude
doc_dir = Path(temp_dir) / "document"
doc_dir.mkdir()
(doc_dir / "document.md").write_text("# Document")
(doc_dir / "important.txt").write_text("Important content")
(doc_dir / "temp.tmp").write_text("Temporary file")
(doc_dir / ".hidden").write_text("Hidden file")
package_path = Path(temp_dir) / "custom.mdpkg"
# Create package with custom options
result = manager.create_package(
doc_dir, package_path,
exclude_patterns=["*.tmp", ".*"],
description="Custom package",
metadata={"author": "Test", "version": "1.0"}
)
# Verify exclusions worked
import zipfile
with zipfile.ZipFile(package_path, 'r') as zf:
file_list = zf.namelist()
assert "document.md" in file_list
assert "important.txt" in file_list
assert "temp.tmp" not in file_list
assert ".hidden" not in file_list
class TestAssetManagerErrorHandling:
"""Test error handling and logging functionality."""
def test_add_nonexistent_asset_raises_error(self):
"""Test that adding non-existent asset raises appropriate error."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
nonexistent_file = Path(temp_dir) / "does_not_exist.txt"
with pytest.raises(AssetError):
manager.add_asset(nonexistent_file)
def test_get_info_for_nonexistent_asset_raises_error(self):
"""Test that getting info for non-existent asset raises error."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
with pytest.raises(AssetManagerError):
manager.get_asset_info("nonexistent_hash_12345")
def test_manager_logs_operations(self):
"""Test that AssetManager logs important operations."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
# Set up logging capture
import logging
log_messages = []
class TestHandler(logging.Handler):
def emit(self, record):
log_messages.append(record.getMessage())
test_handler = TestHandler()
logger = logging.getLogger('markitect.assets')
logger.addHandler(test_handler)
logger.setLevel(logging.INFO)
manager = AssetManager(config)
# Add an asset (should be logged)
asset_file = Path(temp_dir) / "log_test.txt"
asset_file.write_text("Test content for logging")
manager.add_asset(asset_file, "Log test asset")
# Check that operation was logged
assert any("Adding asset" in msg for msg in log_messages)
def test_configuration_validation_errors(self):
"""Test that invalid configuration raises appropriate errors."""
# Invalid storage path (file instead of directory)
with tempfile.TemporaryDirectory() as temp_dir:
invalid_file = Path(temp_dir) / "not_a_directory.txt"
invalid_file.write_text("This is a file")
config = {
"assets": {
"storage_path": str(invalid_file),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
with pytest.raises(AssetManagerError):
AssetManager(config)
class TestAssetManagerWorkflows:
"""Test complete workflows and integration scenarios."""
def test_complete_document_workflow(self):
"""Test complete workflow: add assets, create package, extract elsewhere."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# 1. Create document with assets
doc_dir = Path(temp_dir) / "project"
doc_dir.mkdir()
# Main document
(doc_dir / "project.md").write_text("""# Project Document
Assets:
![Chart](charts/performance.png)
[Data](data/results.json)
""")
# Assets
(doc_dir / "charts").mkdir()
(doc_dir / "data").mkdir()
(doc_dir / "charts" / "performance.png").write_bytes(b"Chart data")
(doc_dir / "data" / "results.json").write_text('{"status": "success"}')
# 2. Create package
package_path = Path(temp_dir) / "project.mdpkg"
package_result = manager.create_package(doc_dir, package_path)
assert package_result["assets_processed"] == 2
# 3. Extract to new location
extract_dir = Path(temp_dir) / "extracted_project"
extract_result = manager.extract_package(package_path, extract_dir,
restore_assets=True)
# Verify complete extraction
assert (extract_dir / "project.md").exists()
assert (extract_dir / "charts" / "performance.png").exists()
assert (extract_dir / "data" / "results.json").exists()
# Verify content integrity
extracted_json = (extract_dir / "data" / "results.json").read_text()
assert '{"status": "success"}' == extracted_json
def test_asset_sharing_between_packages(self):
"""Test that assets can be shared between different packages."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Create shared asset
shared_asset = Path(temp_dir) / "shared_logo.png"
shared_asset.write_bytes(b"Shared logo content")
# Add shared asset
asset_result = manager.add_asset(shared_asset, "Company logo")
shared_hash = asset_result["content_hash"]
# Create first document using shared asset
doc1_dir = Path(temp_dir) / "doc1"
doc1_dir.mkdir()
(doc1_dir / "doc1.md").write_text("# Doc 1\n\n![Logo](../shared_logo.png)")
# Copy shared asset to doc structure
(doc1_dir / "logo.png").write_bytes(b"Shared logo content")
# Create second document using same asset
doc2_dir = Path(temp_dir) / "doc2"
doc2_dir.mkdir()
(doc2_dir / "doc2.md").write_text("# Doc 2\n\n![Logo](../shared_logo.png)")
(doc2_dir / "logo.png").write_bytes(b"Shared logo content")
# Create packages
pkg1_path = Path(temp_dir) / "doc1.mdpkg"
pkg2_path = Path(temp_dir) / "doc2.mdpkg"
pkg1_result = manager.create_package(doc1_dir, pkg1_path)
pkg2_result = manager.create_package(doc2_dir, pkg2_path)
# Both should reference the same deduplicated asset
assert pkg1_result["assets_processed"] >= 1
assert pkg2_result["assets_processed"] >= 1
# Asset should only be stored once in the asset store
asset_list = manager.list_assets()
logo_assets = [a for a in asset_list if a.get("description") == "Company logo"]
assert len(logo_assets) == 1 # Only one copy stored
def test_performance_requirements_met(self):
"""Test that operations complete within performance requirements (<100ms)."""
with tempfile.TemporaryDirectory() as temp_dir:
config = {
"assets": {
"storage_path": str(Path(temp_dir) / "assets"),
"registry_path": str(Path(temp_dir) / "registry.json")
}
}
manager = AssetManager(config)
# Create reasonably sized test asset (1MB)
test_content = b"x" * (1024 * 1024) # 1MB
asset_file = Path(temp_dir) / "performance_test.bin"
asset_file.write_bytes(test_content)
# Time the operation
import time
start_time = time.time()
result = manager.add_asset(asset_file, "Performance test asset")
end_time = time.time()
operation_time = (end_time - start_time) * 1000 # Convert to ms
# Should complete in under 100ms for 1MB file
assert operation_time < 100, f"Operation took {operation_time}ms, expected <100ms"
assert result["content_hash"] is not None