Files
markitect-main/examples/asset-management/asset_management_concept_b.py
tegwick ed33766c91 refactor: reorganize examples directory with topic-based subdirectories
Reorganize examples directory into logical topic-based subdirectories with
comprehensive documentation:

- templates/: ISO/ARC42 documentation templates
- asset-management/: Asset management prototypes and demos
- essays/: Long-form content examples
- invoicing/: Invoice generation examples
- plugins/: Plugin development examples
- issue-demos/: Issue prevention demonstrations
- design-patterns/: Design pattern examples

Each subdirectory includes a README.txt file with topic description and
contributor signatures based on file creation timestamps.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-29 22:31:52 +01:00

328 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Implementation example for Issue #141 - Concept B: Package + Symlinks Asset Management
This is a working prototype demonstrating the core concepts for handling images
and file includes with automatic deduplication.
"""
import hashlib
import json
import zipfile
import shutil
import os
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Tuple
class AssetRegistry:
"""Manages the shared asset registry for deduplication."""
def __init__(self, registry_path: Path):
self.registry_path = registry_path
self.registry_path.parent.mkdir(parents=True, exist_ok=True)
self.registry = self._load_registry()
def _load_registry(self) -> Dict:
"""Load existing registry or create empty one."""
if self.registry_path.exists():
try:
return json.loads(self.registry_path.read_text())
except (json.JSONDecodeError, IOError):
return {"assets": {}, "version": "1.0"}
return {"assets": {}, "version": "1.0"}
def _save_registry(self):
"""Save registry to disk."""
self.registry_path.write_text(json.dumps(self.registry, indent=2))
def get_content_hash(self, file_path: Path) -> str:
"""Calculate SHA-256 hash of file content."""
content = file_path.read_bytes()
return hashlib.sha256(content).hexdigest()
def register_asset(self, file_path: Path, content_hash: str) -> Dict:
"""Register a new asset in the registry."""
file_size = file_path.stat().st_size
mime_type = self._guess_mime_type(file_path.suffix)
asset_info = {
"original_name": file_path.name,
"size": file_size,
"mime_type": mime_type,
"extension": file_path.suffix,
"created": datetime.now().isoformat(),
"stored_path": f"images/{content_hash}{file_path.suffix}"
}
self.registry["assets"][content_hash] = asset_info
self._save_registry()
return asset_info
def find_asset(self, content_hash: str) -> Optional[Dict]:
"""Find asset by content hash."""
return self.registry["assets"].get(content_hash)
def _guess_mime_type(self, extension: str) -> str:
"""Simple MIME type guessing based on extension."""
mime_map = {
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".gif": "image/gif",
".svg": "image/svg+xml",
".pdf": "application/pdf",
".txt": "text/plain",
".md": "text/markdown"
}
return mime_map.get(extension.lower(), "application/octet-stream")
class AssetDeduplicator:
"""Handles asset storage and deduplication using symlinks."""
def __init__(self, workspace_path: Path):
self.workspace = workspace_path
self.shared_assets = workspace_path / "shared_assets"
self.shared_images = self.shared_assets / "images"
self.registry = AssetRegistry(self.shared_assets / "registry.json")
# Create directory structure
self.shared_images.mkdir(parents=True, exist_ok=True)
def add_asset(self, source_path: Path, document_dir: Path, virtual_name: str) -> Tuple[str, Path]:
"""
Add asset with deduplication. Returns (content_hash, stored_path).
"""
if not source_path.exists():
raise FileNotFoundError(f"Source asset not found: {source_path}")
# Calculate content hash
content_hash = self.registry.get_content_hash(source_path)
# Check if we already have this content
existing_asset = self.registry.find_asset(content_hash)
if existing_asset:
print(f"✓ Deduplication: Found existing asset for {virtual_name}")
stored_path = self.shared_assets / existing_asset["stored_path"]
else:
# Store new asset
stored_path = self.shared_images / f"{content_hash}{source_path.suffix}"
shutil.copy2(source_path, stored_path)
self.registry.register_asset(source_path, content_hash)
print(f"✓ Stored new asset: {virtual_name} -> {stored_path.name}")
# Create symlink in document assets directory
self._create_asset_symlink(stored_path, document_dir, virtual_name)
return content_hash, stored_path
def _create_asset_symlink(self, stored_path: Path, document_dir: Path, virtual_name: str):
"""Create symlink from document assets directory to shared storage."""
assets_dir = document_dir / "assets"
assets_dir.mkdir(parents=True, exist_ok=True)
link_path = assets_dir / virtual_name
# Remove existing link/file if present
if link_path.exists() or link_path.is_symlink():
link_path.unlink()
# Create relative symlink
try:
relative_target = os.path.relpath(stored_path, link_path.parent)
link_path.symlink_to(relative_target)
print(f"✓ Created symlink: {virtual_name} -> {relative_target}")
except OSError as e:
# Fallback to hard copy if symlinks fail (e.g., on Windows)
shutil.copy2(stored_path, link_path)
print(f"⚠ Symlink failed, copied file instead: {virtual_name} (reason: {e})")
class MarkdownPackager:
"""Handles creation and extraction of .mdpkg files."""
def __init__(self, workspace_path: Path):
self.workspace = workspace_path
self.packages_dir = workspace_path / "packages"
self.packages_dir.mkdir(parents=True, exist_ok=True)
def create_package(self, document_dir: Path, package_name: str) -> Path:
"""Create a .mdpkg ZIP package from a document directory."""
package_path = self.packages_dir / f"{package_name}.mdpkg"
# Collect asset information
assets_info = []
assets_dir = document_dir / "assets"
if assets_dir.exists():
for asset_path in assets_dir.iterdir():
if asset_path.is_file() or asset_path.is_symlink():
# Resolve symlink to get actual file info
real_path = asset_path.resolve() if asset_path.is_symlink() else asset_path
assets_info.append({
"name": asset_path.name,
"size": real_path.stat().st_size,
"is_symlink": asset_path.is_symlink()
})
# Create manifest
manifest = {
"name": package_name,
"version": "1.0",
"created": datetime.now().isoformat(),
"format": "mdpkg",
"assets": assets_info,
"main_document": "index.md"
}
# Create ZIP package
with zipfile.ZipFile(package_path, 'w', zipfile.ZIP_DEFLATED) as zf:
# Add manifest
zf.writestr("manifest.json", json.dumps(manifest, indent=2))
# Add main document
main_doc = document_dir / "index.md"
if main_doc.exists():
zf.write(main_doc, "index.md")
# Add assets (resolve symlinks)
if assets_dir.exists():
for asset_path in assets_dir.iterdir():
if asset_path.is_file() or asset_path.is_symlink():
real_path = asset_path.resolve() if asset_path.is_symlink() else asset_path
zf.write(real_path, f"assets/{asset_path.name}")
print(f"✓ Created package: {package_path}")
print(f" - Main document: {'' if main_doc.exists() else ''}")
print(f" - Assets: {len(assets_info)}")
return package_path
def extract_package(self, package_path: Path, extract_name: str) -> Path:
"""Extract a .mdpkg package to the workspace."""
if not package_path.exists():
raise FileNotFoundError(f"Package not found: {package_path}")
extract_dir = self.workspace / "documents" / extract_name
extract_dir.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(package_path, 'r') as zf:
# Read manifest
try:
manifest_content = zf.read("manifest.json")
manifest = json.loads(manifest_content)
except (KeyError, json.JSONDecodeError):
manifest = {"assets": []}
# Extract main document
if "index.md" in zf.namelist():
zf.extract("index.md", extract_dir)
# Extract assets
assets_dir = extract_dir / "assets"
for file_info in zf.infolist():
if file_info.filename.startswith("assets/"):
zf.extract(file_info.filename, extract_dir)
print(f"✓ Extracted package to: {extract_dir}")
return extract_dir
def demo_asset_management():
"""Demonstrate the asset management system."""
print("🎯 Asset Management Demo - Concept B")
print("=" * 50)
# Setup workspace
demo_workspace = Path("./demo_workspace")
if demo_workspace.exists():
shutil.rmtree(demo_workspace)
deduplicator = AssetDeduplicator(demo_workspace)
packager = MarkdownPackager(demo_workspace)
# Create demo assets (simulate duplicate images)
demo_assets = demo_workspace / "demo_assets"
demo_assets.mkdir(parents=True, exist_ok=True)
# Create some test "images" (text files for demo)
test_image1 = demo_assets / "logo.png"
test_image2 = demo_assets / "company_logo.png"
test_image3 = demo_assets / "diagram.png"
test_image1.write_text("PNG_IMAGE_CONTENT_LOGO") # Same content
test_image2.write_text("PNG_IMAGE_CONTENT_LOGO") # Same content, different name
test_image3.write_text("PNG_IMAGE_CONTENT_DIAGRAM") # Different content
print(f"Created test assets: {len(list(demo_assets.iterdir()))} files")
# Create two document projects
doc1_dir = demo_workspace / "documents" / "project_a"
doc2_dir = demo_workspace / "documents" / "project_b"
for doc_dir in [doc1_dir, doc2_dir]:
doc_dir.mkdir(parents=True, exist_ok=True)
# Project A uses logo.png and diagram.png
(doc1_dir / "index.md").write_text("""# Project A
![Logo](assets/logo.png)
![Diagram](assets/diagram.png)
This is Project A documentation.
""")
print("\n📁 Processing Project A assets...")
deduplicator.add_asset(test_image1, doc1_dir, "logo.png")
deduplicator.add_asset(test_image3, doc1_dir, "diagram.png")
# Project B uses the same logo (different filename) and same diagram
(doc2_dir / "index.md").write_text("""# Project B
![Company Logo](assets/company_logo.png)
![System Diagram](assets/system_diagram.png)
This is Project B documentation.
""")
print("\n📁 Processing Project B assets...")
deduplicator.add_asset(test_image2, doc2_dir, "company_logo.png") # Same content as logo.png
deduplicator.add_asset(test_image3, doc2_dir, "system_diagram.png") # Same content as diagram.png
# Show deduplication results
print(f"\n📊 Deduplication Results:")
print(f" - Original files: 3")
print(f" - Unique content hashes: {len(deduplicator.registry.registry['assets'])}")
print(f" - Storage efficiency: {3 - len(deduplicator.registry.registry['assets'])} duplicates eliminated")
# Create packages
print(f"\n📦 Creating packages...")
pkg_a = packager.create_package(doc1_dir, "project_a")
pkg_b = packager.create_package(doc2_dir, "project_b")
print(f"\n✅ Demo completed successfully!")
print(f" - Workspace: {demo_workspace}")
print(f" - Shared assets: {deduplicator.shared_assets}")
print(f" - Packages: {packager.packages_dir}")
# Show final directory structure
print(f"\n📂 Final directory structure:")
for root, dirs, files in os.walk(demo_workspace):
level = root.replace(str(demo_workspace), '').count(os.sep)
indent = ' ' * 2 * level
print(f"{indent}{os.path.basename(root)}/")
subindent = ' ' * 2 * (level + 1)
for file in files:
file_path = Path(root) / file
if file_path.is_symlink():
target = os.readlink(file_path)
print(f"{subindent}{file} -> {target}")
else:
print(f"{subindent}{file}")
if __name__ == "__main__":
demo_asset_management()