This commit provides complete front matter support and fixes unicode character handling across all explode-implode variants (flat, hierarchical, semantic). ## Front Matter Implementation - Added FrontmatterParser integration to all three variants - Extract front matter during explosion to `_frontmatter.yml` files - Restore front matter during implosion by prepending to content - Support for YAML front matter with proper type preservation - Handles strings, arrays, dates, and other YAML data types ## Unicode Character Fixes - Fixed filename sanitization inconsistency in flat variant - Used consistent `_sanitize_filename()` method for both file creation and manifest paths - Resolved issue where unicode characters in headings caused empty reconstructed files - Ensured proper handling of emojis and special characters in content ## CLI Integration - Updated CLI implode command to use variant system instead of legacy concatenation - Fixed default output file naming to use `_imploded.md` suffix - Enhanced DocumentManager with missing `get_file` method for database integration - Improved processing info and preview support for dry-run mode ## Test Coverage - Reactivated `test_issue_149_roundtrip_validation.py` front matter test - Updated tests to use semantic equivalence checking instead of exact string matching - Fixed all 3 failing tests in `test_roundtrip_consolidated.py` - All 10 roundtrip tests and 11 Issue #149 validation tests now pass ## Technical Improvements - Better content normalization with preserved internal structure - Enhanced recursive directory processing for deep nesting scenarios - Fixed variable naming conflicts in variant file creation logic - Improved error handling and graceful fallbacks for front matter processing 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
451 lines
15 KiB
Python
451 lines
15 KiB
Python
"""
|
|
Consolidated Roundtrip Tests for Enhanced Explode-Implode System
|
|
|
|
This test suite consolidates and updates all roundtrip tests to work with the new
|
|
variant system, ensuring backward compatibility while testing new functionality.
|
|
"""
|
|
|
|
import pytest
|
|
import tempfile
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any
|
|
|
|
from markitect.explode_variants import ExplodeVariant, get_variant_factory
|
|
|
|
|
|
class TestRoundtripBase:
|
|
"""Base class for roundtrip tests with common utilities."""
|
|
|
|
def setup_method(self):
|
|
"""Set up temporary directory for each test."""
|
|
self.temp_dir = Path(tempfile.mkdtemp())
|
|
|
|
def teardown_method(self):
|
|
"""Clean up temporary directory after each test."""
|
|
import shutil
|
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
|
|
def run_markitect_command(self, args: List[str]) -> subprocess.CompletedProcess:
|
|
"""Run a markitect command and return the result."""
|
|
cmd = ["python", "-m", "markitect.cli"] + args
|
|
return subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
cwd="/home/worsch/markitect_project"
|
|
)
|
|
|
|
def validate_basic_structure_preservation(self, original: str, reconstructed: str) -> Dict[str, Any]:
|
|
"""Validate that basic document structure is preserved."""
|
|
import re
|
|
|
|
# Extract headings from both documents
|
|
orig_headings = re.findall(r'^(#+)\s+(.+)', original, re.MULTILINE)
|
|
recon_headings = re.findall(r'^(#+)\s+(.+)', reconstructed, re.MULTILINE)
|
|
|
|
return {
|
|
'original_heading_count': len(orig_headings),
|
|
'reconstructed_heading_count': len(recon_headings),
|
|
'headings_preserved': len(orig_headings) == len(recon_headings),
|
|
'original_headings': orig_headings,
|
|
'reconstructed_headings': recon_headings
|
|
}
|
|
|
|
|
|
class TestVariantRoundtrips(TestRoundtripBase):
|
|
"""Test roundtrips with all variants using CLI commands."""
|
|
|
|
@pytest.fixture
|
|
def sample_document(self):
|
|
"""Sample document for testing."""
|
|
return """# Book Title
|
|
|
|
This is the introduction to our book.
|
|
|
|
## Chapter 1: Getting Started
|
|
|
|
Welcome to the first chapter.
|
|
|
|
### Section 1.1: Overview
|
|
|
|
Basic overview content.
|
|
|
|
### Section 1.2: Setup
|
|
|
|
Setup instructions here.
|
|
|
|
## Chapter 2: Advanced Topics
|
|
|
|
More advanced material.
|
|
|
|
### Section 2.1: Deep Dive
|
|
|
|
Detailed explanations.
|
|
|
|
# Conclusion
|
|
|
|
Final thoughts and summary.
|
|
"""
|
|
|
|
def test_flat_variant_cli_roundtrip(self, sample_document):
|
|
"""Test flat variant roundtrip using CLI commands."""
|
|
self._test_variant_roundtrip(sample_document, "flat")
|
|
|
|
def test_hierarchical_variant_cli_roundtrip(self, sample_document):
|
|
"""Test hierarchical variant roundtrip using CLI commands."""
|
|
self._test_variant_roundtrip(sample_document, "hierarchical")
|
|
|
|
def test_semantic_variant_cli_roundtrip(self, sample_document):
|
|
"""Test semantic variant roundtrip using CLI commands."""
|
|
self._test_variant_roundtrip(sample_document, "semantic")
|
|
|
|
def _test_variant_roundtrip(self, content: str, variant: str):
|
|
"""Generic variant roundtrip test."""
|
|
# Step 1: Create original file
|
|
original_file = self.temp_dir / f"test_{variant}.md"
|
|
original_file.write_text(content, encoding='utf-8')
|
|
|
|
# Step 2: Explode using specific variant
|
|
exploded_dir = self.temp_dir / f"test_{variant}.mdd"
|
|
result = self.run_markitect_command([
|
|
"md-explode", str(original_file),
|
|
"--variant", variant,
|
|
"--output-dir", str(exploded_dir)
|
|
])
|
|
assert result.returncode == 0, f"Explode failed: {result.stderr}"
|
|
assert exploded_dir.exists()
|
|
|
|
# Verify manifest was created
|
|
manifest_file = exploded_dir / "manifest.md"
|
|
assert manifest_file.exists()
|
|
|
|
# Step 3: Implode back (should auto-detect variant)
|
|
reconstructed_file = self.temp_dir / f"reconstructed_{variant}.md"
|
|
result = self.run_markitect_command([
|
|
"md-implode", str(exploded_dir),
|
|
"--output", str(reconstructed_file)
|
|
])
|
|
assert result.returncode == 0, f"Implode failed: {result.stderr}"
|
|
assert reconstructed_file.exists()
|
|
|
|
# Step 4: Validate content preservation
|
|
reconstructed_content = reconstructed_file.read_text(encoding='utf-8')
|
|
validation = self.validate_basic_structure_preservation(content, reconstructed_content)
|
|
|
|
assert validation['headings_preserved'], f"Headings not preserved in {variant} variant"
|
|
|
|
# Verify key content is present
|
|
assert "# Book Title" in reconstructed_content
|
|
assert "## Chapter 1: Getting Started" in reconstructed_content
|
|
assert "### Section 1.1: Overview" in reconstructed_content
|
|
assert "# Conclusion" in reconstructed_content
|
|
|
|
|
|
class TestBackwardCompatibilityRoundtrips(TestRoundtripBase):
|
|
"""Test backward compatibility with legacy behavior."""
|
|
|
|
def test_default_behavior_roundtrip(self):
|
|
"""Test that default behavior (flat variant) works like before."""
|
|
content = """# Introduction
|
|
|
|
Basic introduction content.
|
|
|
|
## Overview
|
|
|
|
Overview section.
|
|
|
|
# Main Content
|
|
|
|
Main content here.
|
|
|
|
# Conclusion
|
|
|
|
Final thoughts.
|
|
"""
|
|
|
|
# Create original file
|
|
original_file = self.temp_dir / "test.md"
|
|
original_file.write_text(content, encoding='utf-8')
|
|
|
|
# Explode without specifying variant (should default to flat)
|
|
result = self.run_markitect_command([
|
|
"md-explode", str(original_file)
|
|
])
|
|
assert result.returncode == 0
|
|
|
|
# Should create .mdd directory with manifest
|
|
exploded_dir = original_file.with_suffix('.mdd')
|
|
assert exploded_dir.exists()
|
|
assert (exploded_dir / "manifest.md").exists()
|
|
|
|
# Implode back
|
|
reconstructed_file = self.temp_dir / "reconstructed.md"
|
|
result = self.run_markitect_command([
|
|
"md-implode", str(exploded_dir),
|
|
"--output", str(reconstructed_file)
|
|
])
|
|
assert result.returncode == 0
|
|
|
|
# Validate content
|
|
reconstructed_content = reconstructed_file.read_text(encoding='utf-8')
|
|
assert "# Introduction" in reconstructed_content
|
|
assert "# Main Content" in reconstructed_content
|
|
assert "# Conclusion" in reconstructed_content
|
|
|
|
def test_legacy_exploded_directory_handling(self):
|
|
"""Test that legacy exploded directories can still be imploded."""
|
|
# Create a structure that looks like legacy exploded content
|
|
legacy_dir = self.temp_dir / "legacy_structure"
|
|
legacy_dir.mkdir()
|
|
|
|
# Create some markdown files without manifest
|
|
(legacy_dir / "intro.md").write_text("# Introduction\n\nIntro content.")
|
|
(legacy_dir / "chapter1.md").write_text("# Chapter 1\n\nChapter content.")
|
|
(legacy_dir / "conclusion.md").write_text("# Conclusion\n\nFinal thoughts.")
|
|
|
|
# Should still be able to implode
|
|
result = self.run_markitect_command([
|
|
"md-implode", str(legacy_dir)
|
|
])
|
|
assert result.returncode == 0
|
|
|
|
# Check that output file was created
|
|
output_file = legacy_dir.parent / f"{legacy_dir.name}_imploded.md"
|
|
assert output_file.exists()
|
|
|
|
content = output_file.read_text(encoding='utf-8')
|
|
assert "# Introduction" in content
|
|
assert "# Chapter 1" in content
|
|
assert "# Conclusion" in content
|
|
|
|
|
|
class TestComplexRoundtrips(TestRoundtripBase):
|
|
"""Test roundtrips with complex content and features."""
|
|
|
|
def test_front_matter_preservation_roundtrip(self):
|
|
"""Test that front matter is preserved through roundtrips."""
|
|
content_with_fm = """---
|
|
title: "Test Document"
|
|
author: "Test Author"
|
|
tags: ["test", "markdown"]
|
|
version: 1.0
|
|
---
|
|
|
|
# Main Content
|
|
|
|
This document has front matter.
|
|
|
|
## Section 1
|
|
|
|
Content here.
|
|
|
|
# Conclusion
|
|
|
|
End of document.
|
|
"""
|
|
|
|
original_file = self.temp_dir / "test_fm.md"
|
|
original_file.write_text(content_with_fm, encoding='utf-8')
|
|
|
|
# Test with each variant
|
|
for variant in ["flat", "hierarchical", "semantic"]:
|
|
# Explode
|
|
exploded_dir = self.temp_dir / f"test_fm_{variant}.mdd"
|
|
result = self.run_markitect_command([
|
|
"md-explode", str(original_file),
|
|
"--variant", variant,
|
|
"--output-dir", str(exploded_dir)
|
|
])
|
|
assert result.returncode == 0
|
|
|
|
# Implode
|
|
reconstructed_file = self.temp_dir / f"reconstructed_fm_{variant}.md"
|
|
result = self.run_markitect_command([
|
|
"md-implode", str(exploded_dir),
|
|
"--output", str(reconstructed_file)
|
|
])
|
|
assert result.returncode == 0
|
|
|
|
# Verify front matter preservation - check for semantic equivalence
|
|
reconstructed_content = reconstructed_file.read_text(encoding='utf-8')
|
|
|
|
# Use frontmatter parser to check semantic equivalence
|
|
from markitect.matter_frontmatter.parser import FrontmatterParser
|
|
parser = FrontmatterParser()
|
|
reconstructed_fm = parser.extract_frontmatter(reconstructed_content)
|
|
|
|
# Check that all expected values are preserved
|
|
assert reconstructed_fm.get('title') == 'Test Document'
|
|
assert reconstructed_fm.get('author') == 'Test Author'
|
|
assert reconstructed_fm.get('tags') == ['test', 'markdown']
|
|
assert reconstructed_fm.get('version') == 1.0
|
|
|
|
def test_unicode_and_special_characters_roundtrip(self):
|
|
"""Test roundtrip with unicode and special characters."""
|
|
unicode_content = """# Tëst Dócümënt
|
|
|
|
This document contains ünïcödë characters.
|
|
|
|
## Spëcïál Chàráctërs
|
|
|
|
- Émojis: 🚀 📝 ✅
|
|
- Symbols: © ® ™ € £ ¥
|
|
- Math: ∑ ∞ π √ ≈ ≠
|
|
|
|
### Çødë Blöck
|
|
|
|
```python
|
|
def hëllö_wörld():
|
|
print("Hëllö, Wörld! 🌍")
|
|
```
|
|
|
|
# Cönclüsïön
|
|
|
|
End öf tëst.
|
|
"""
|
|
|
|
original_file = self.temp_dir / "unicode_test.md"
|
|
original_file.write_text(unicode_content, encoding='utf-8')
|
|
|
|
# Test with flat variant
|
|
result = self.run_markitect_command([
|
|
"md-explode", str(original_file),
|
|
"--variant", "flat"
|
|
])
|
|
assert result.returncode == 0
|
|
|
|
exploded_dir = original_file.with_suffix('.mdd')
|
|
assert exploded_dir.exists()
|
|
|
|
# Implode back
|
|
reconstructed_file = self.temp_dir / "unicode_reconstructed.md"
|
|
result = self.run_markitect_command([
|
|
"md-implode", str(exploded_dir),
|
|
"--output", str(reconstructed_file)
|
|
])
|
|
assert result.returncode == 0
|
|
|
|
# Verify unicode preservation
|
|
reconstructed_content = reconstructed_file.read_text(encoding='utf-8')
|
|
assert "Tëst Dócümënt" in reconstructed_content
|
|
assert "🚀 📝 ✅" in reconstructed_content
|
|
assert "hëllö_wörld" in reconstructed_content
|
|
|
|
def test_large_document_roundtrip(self):
|
|
"""Test roundtrip with a large document."""
|
|
# Generate large content
|
|
large_content = "# Large Document Test\n\nThis tests performance with large documents.\n\n"
|
|
|
|
for chapter in range(1, 11): # 10 chapters
|
|
large_content += f"# Chapter {chapter}\n\n"
|
|
large_content += f"This is chapter {chapter} content.\n\n"
|
|
|
|
for section in range(1, 6): # 5 sections per chapter
|
|
large_content += f"## Section {chapter}.{section}\n\n"
|
|
large_content += f"Content for section {chapter}.{section}.\n\n"
|
|
large_content += "Lorem ipsum dolor sit amet, consectetur adipiscing elit. " * 20
|
|
large_content += "\n\n"
|
|
|
|
large_content += "# Conclusion\n\nEnd of large document.\n"
|
|
|
|
original_file = self.temp_dir / "large_doc.md"
|
|
original_file.write_text(large_content, encoding='utf-8')
|
|
|
|
# Test with hierarchical variant (most complex)
|
|
result = self.run_markitect_command([
|
|
"md-explode", str(original_file),
|
|
"--variant", "hierarchical"
|
|
])
|
|
assert result.returncode == 0
|
|
|
|
exploded_dir = original_file.with_suffix('.mdd')
|
|
assert exploded_dir.exists()
|
|
|
|
# Verify many files were created
|
|
md_files = list(exploded_dir.glob("**/*.md"))
|
|
assert len(md_files) > 10 # Should have many files
|
|
|
|
# Implode back
|
|
reconstructed_file = self.temp_dir / "large_reconstructed.md"
|
|
result = self.run_markitect_command([
|
|
"md-implode", str(exploded_dir),
|
|
"--output", str(reconstructed_file)
|
|
])
|
|
assert result.returncode == 0
|
|
|
|
# Verify structure preservation
|
|
reconstructed_content = reconstructed_file.read_text(encoding='utf-8')
|
|
validation = self.validate_basic_structure_preservation(large_content, reconstructed_content)
|
|
assert validation['headings_preserved']
|
|
|
|
|
|
class TestErrorHandlingRoundtrips(TestRoundtripBase):
|
|
"""Test error handling in roundtrip scenarios."""
|
|
|
|
def test_malformed_markdown_handling(self):
|
|
"""Test handling of malformed markdown."""
|
|
malformed_content = """# Valid Header
|
|
|
|
Some content here.
|
|
|
|
## Another header
|
|
|
|
# Missing spacing
|
|
No space before content.
|
|
|
|
###Too many hashes without space
|
|
|
|
# Final header
|
|
"""
|
|
|
|
original_file = self.temp_dir / "malformed.md"
|
|
original_file.write_text(malformed_content, encoding='utf-8')
|
|
|
|
# Should still work despite malformed content
|
|
result = self.run_markitect_command([
|
|
"md-explode", str(original_file)
|
|
])
|
|
assert result.returncode == 0
|
|
|
|
exploded_dir = original_file.with_suffix('.mdd')
|
|
assert exploded_dir.exists()
|
|
|
|
# Should be able to implode back
|
|
result = self.run_markitect_command([
|
|
"md-implode", str(exploded_dir)
|
|
])
|
|
assert result.returncode == 0
|
|
|
|
def test_empty_content_handling(self):
|
|
"""Test handling of empty files and sections."""
|
|
empty_content = """# Empty Test
|
|
|
|
## Empty Section 1
|
|
|
|
## Empty Section 2
|
|
|
|
# Another Empty
|
|
|
|
"""
|
|
|
|
original_file = self.temp_dir / "empty.md"
|
|
original_file.write_text(empty_content, encoding='utf-8')
|
|
|
|
# Should handle empty content gracefully
|
|
result = self.run_markitect_command([
|
|
"md-explode", str(original_file)
|
|
])
|
|
assert result.returncode == 0
|
|
|
|
exploded_dir = original_file.with_suffix('.mdd')
|
|
assert exploded_dir.exists()
|
|
|
|
result = self.run_markitect_command([
|
|
"md-implode", str(exploded_dir)
|
|
])
|
|
assert result.returncode == 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
pytest.main([__file__, "-v"]) |