Files
markitect-main/tests/test_issue_142_asset_registry.py
tegwick 81d3da5fe7
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat: comprehensive asset management system and testing improvements
Asset Management System (Issue #142):
- Add complete asset management framework with deduplication
- Implement AssetManager, AssetRegistry, and AssetDeduplicator classes
- Add AssetPackager for markdown document packaging
- Create comprehensive test suite for all asset management components
- Add asset constants and custom exceptions for robust error handling

Markdown Processing Enhancements:
- Update markdown_commands.py with improved functionality
- Enhanced parsing and content aggregation capabilities
- Improved filename encoding/decoding for special characters

Test Suite Improvements:
- Add comprehensive tests for Issue #138 markdown parsing
- Enhance Issue #139 content aggregation and end-to-end testing
- Complete test coverage for new asset management features

Examples and Documentation:
- Update BildungsKanonJon.md example with enhanced content
- Generate corresponding HTML output for documentation
- Add asset registry configuration

Development Tools:
- Add install script for simplified setup

This commit represents a major enhancement to MarkiTect's asset handling
capabilities with full test coverage and improved markdown processing.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-12 19:57:31 +02:00

270 lines
10 KiB
Python

"""
Test scenarios for AssetRegistry JSON persistence functionality.
This module tests the AssetRegistry class for Issue #142: Phase 1 - Core Asset Management Module.
Tests cover JSON-based metadata persistence, SHA-256 content hashing, MIME type detection,
and thread-safe registry operations.
Requirements:
- JSON-based asset metadata persistence
- SHA-256 content hashing for deduplication
- MIME type detection and file size tracking
- Thread-safe registry operations
"""
import json
import os
import tempfile
import threading
import time
from pathlib import Path
from unittest.mock import Mock, patch
import pytest
from markitect.assets.registry import AssetRegistry
from markitect.assets.exceptions import AssetError, RegistryError
class TestAssetRegistryCore:
"""Core functionality tests for AssetRegistry."""
def test_registry_initialization(self):
"""Test AssetRegistry can be initialized with registry path."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "test_registry.json"
registry = AssetRegistry(registry_path)
assert registry.registry_path == registry_path
assert registry_path.exists() # Should create empty registry
def test_registry_loads_existing_json(self):
"""Test AssetRegistry loads existing JSON registry file."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "existing_registry.json"
# Create existing registry with test data
test_data = {
"assets": {
"hash123": {
"path": "/test/file.txt",
"content_hash": "hash123",
"mime_type": "text/plain",
"size": 100
}
}
}
registry_path.write_text(json.dumps(test_data))
registry = AssetRegistry(registry_path)
assets = registry.list_assets()
assert len(assets) == 1
assert assets[0]["content_hash"] == "hash123"
class TestAssetRegistryHashing:
"""Test SHA-256 content hashing functionality."""
def test_generate_content_hash_from_file(self):
"""Test generating SHA-256 hash from file content."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
test_file = Path(temp_dir) / "test.txt"
test_file.write_text("Hello, World!")
registry = AssetRegistry(registry_path)
content_hash = registry.generate_content_hash(test_file)
# SHA-256 of "Hello, World!" should be consistent
expected_hash = "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
assert content_hash == expected_hash
def test_generate_content_hash_from_bytes(self):
"""Test generating SHA-256 hash from byte content."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
test_content = b"Binary content test"
content_hash = registry.generate_content_hash(test_content)
# Should generate consistent hash for same content
assert len(content_hash) == 64 # SHA-256 hex length
assert isinstance(content_hash, str)
class TestAssetRegistryMimeTypes:
"""Test MIME type detection functionality."""
def test_detect_mime_type_text_file(self):
"""Test MIME type detection for text files."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
test_file = Path(temp_dir) / "test.txt"
test_file.write_text("Plain text content")
registry = AssetRegistry(registry_path)
mime_type = registry.detect_mime_type(test_file)
assert mime_type.startswith("text/")
def test_detect_mime_type_image_file(self):
"""Test MIME type detection for image files."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
# Create minimal PNG file (8-byte PNG signature + IHDR)
png_data = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR'
test_file = Path(temp_dir) / "test.png"
test_file.write_bytes(png_data)
registry = AssetRegistry(registry_path)
mime_type = registry.detect_mime_type(test_file)
assert mime_type == "image/png"
class TestAssetRegistryOperations:
"""Test asset registration and retrieval operations."""
def test_register_asset(self):
"""Test registering a new asset in the registry."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
test_file = Path(temp_dir) / "asset.txt"
test_file.write_text("Test asset content")
registry = AssetRegistry(registry_path)
asset_info = registry.register_asset(test_file)
assert "content_hash" in asset_info
assert "mime_type" in asset_info
assert "size" in asset_info
assert asset_info["path"] == str(test_file)
def test_get_asset_by_hash(self):
"""Test retrieving asset information by content hash."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
test_file = Path(temp_dir) / "asset.txt"
test_file.write_text("Test content for retrieval")
registry = AssetRegistry(registry_path)
asset_info = registry.register_asset(test_file)
content_hash = asset_info["content_hash"]
retrieved_asset = registry.get_asset(content_hash)
assert retrieved_asset["content_hash"] == content_hash
assert retrieved_asset["path"] == str(test_file)
def test_asset_exists_check(self):
"""Test checking if asset exists by hash."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
test_file = Path(temp_dir) / "asset.txt"
test_file.write_text("Existence test content")
registry = AssetRegistry(registry_path)
asset_info = registry.register_asset(test_file)
content_hash = asset_info["content_hash"]
assert registry.asset_exists(content_hash)
assert not registry.asset_exists("nonexistent_hash")
class TestAssetRegistryPersistence:
"""Test JSON persistence and file operations."""
def test_registry_persists_to_json(self):
"""Test that registry changes are persisted to JSON file."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
test_file = Path(temp_dir) / "asset.txt"
test_file.write_text("Content to persist")
registry = AssetRegistry(registry_path)
registry.register_asset(test_file)
# Verify JSON file contains our asset
with open(registry_path) as f:
data = json.load(f)
assert "assets" in data
assert len(data["assets"]) == 1
def test_registry_handles_corrupted_json(self):
"""Test registry handles corrupted JSON gracefully."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "corrupted_registry.json"
registry_path.write_text("{ invalid json content")
# Should handle corrupted JSON and create new registry
registry = AssetRegistry(registry_path)
assets = registry.list_assets()
assert assets == []
class TestAssetRegistryThreadSafety:
"""Test thread-safe registry operations."""
def test_concurrent_asset_registration(self):
"""Test that multiple threads can register assets simultaneously."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
results = []
errors = []
def register_asset_thread(thread_id):
try:
test_file = Path(temp_dir) / f"asset_{thread_id}.txt"
test_file.write_text(f"Content for thread {thread_id}")
asset_info = registry.register_asset(test_file)
results.append(asset_info)
except Exception as e:
errors.append(e)
# Start multiple threads
threads = []
for i in range(5):
thread = threading.Thread(target=register_asset_thread, args=(i,))
threads.append(thread)
thread.start()
# Wait for all threads to complete
for thread in threads:
thread.join()
assert len(errors) == 0, f"Thread safety errors: {errors}"
assert len(results) == 5
assert len(set(r["content_hash"] for r in results)) == 5 # All unique hashes
class TestAssetRegistryErrorHandling:
"""Test error handling and exception scenarios."""
def test_register_nonexistent_file_raises_error(self):
"""Test that registering non-existent file raises appropriate error."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
nonexistent_file = Path(temp_dir) / "does_not_exist.txt"
registry = AssetRegistry(registry_path)
with pytest.raises(AssetError):
registry.register_asset(nonexistent_file)
def test_get_nonexistent_asset_raises_error(self):
"""Test that getting non-existent asset raises appropriate error."""
with tempfile.TemporaryDirectory() as temp_dir:
registry_path = Path(temp_dir) / "registry.json"
registry = AssetRegistry(registry_path)
with pytest.raises(RegistryError):
registry.get_asset("nonexistent_hash_12345")
def test_invalid_registry_path_raises_error(self):
"""Test that invalid registry path raises appropriate error."""
invalid_path = Path("/root/protected/cannot_write.json")
with pytest.raises(RegistryError):
AssetRegistry(invalid_path)