""" Consolidated Roundtrip Tests for Enhanced Explode-Implode System This test suite consolidates and updates all roundtrip tests to work with the new variant system, ensuring backward compatibility while testing new functionality. """ import pytest import tempfile import subprocess from pathlib import Path from typing import List, Dict, Any from markitect.explode_variants import ExplodeVariant, get_variant_factory class TestRoundtripBase: """Base class for roundtrip tests with common utilities.""" def setup_method(self): """Set up temporary directory for each test.""" self.temp_dir = Path(tempfile.mkdtemp()) def teardown_method(self): """Clean up temporary directory after each test.""" import shutil shutil.rmtree(self.temp_dir, ignore_errors=True) def run_markitect_command(self, args: List[str]) -> subprocess.CompletedProcess: """Run a markitect command and return the result.""" cmd = ["python", "-m", "markitect.cli"] + args return subprocess.run( cmd, capture_output=True, text=True, cwd="/home/worsch/markitect_project" ) def validate_basic_structure_preservation(self, original: str, reconstructed: str) -> Dict[str, Any]: """Validate that basic document structure is preserved.""" import re # Extract headings from both documents orig_headings = re.findall(r'^(#+)\s+(.+)', original, re.MULTILINE) recon_headings = re.findall(r'^(#+)\s+(.+)', reconstructed, re.MULTILINE) return { 'original_heading_count': len(orig_headings), 'reconstructed_heading_count': len(recon_headings), 'headings_preserved': len(orig_headings) == len(recon_headings), 'original_headings': orig_headings, 'reconstructed_headings': recon_headings } class TestVariantRoundtrips(TestRoundtripBase): """Test roundtrips with all variants using CLI commands.""" @pytest.fixture def sample_document(self): """Sample document for testing.""" return """# Book Title This is the introduction to our book. ## Chapter 1: Getting Started Welcome to the first chapter. ### Section 1.1: Overview Basic overview content. ### Section 1.2: Setup Setup instructions here. ## Chapter 2: Advanced Topics More advanced material. ### Section 2.1: Deep Dive Detailed explanations. # Conclusion Final thoughts and summary. """ def test_flat_variant_cli_roundtrip(self, sample_document): """Test flat variant roundtrip using CLI commands.""" self._test_variant_roundtrip(sample_document, "flat") def test_hierarchical_variant_cli_roundtrip(self, sample_document): """Test hierarchical variant roundtrip using CLI commands.""" self._test_variant_roundtrip(sample_document, "hierarchical") def test_semantic_variant_cli_roundtrip(self, sample_document): """Test semantic variant roundtrip using CLI commands.""" self._test_variant_roundtrip(sample_document, "semantic") def _test_variant_roundtrip(self, content: str, variant: str): """Generic variant roundtrip test.""" # Step 1: Create original file original_file = self.temp_dir / f"test_{variant}.md" original_file.write_text(content, encoding='utf-8') # Step 2: Explode using specific variant exploded_dir = self.temp_dir / f"test_{variant}.mdd" result = self.run_markitect_command([ "md-explode", str(original_file), "--variant", variant, "--output-dir", str(exploded_dir) ]) assert result.returncode == 0, f"Explode failed: {result.stderr}" assert exploded_dir.exists() # Verify manifest was created manifest_file = exploded_dir / "manifest.md" assert manifest_file.exists() # Step 3: Implode back (should auto-detect variant) reconstructed_file = self.temp_dir / f"reconstructed_{variant}.md" result = self.run_markitect_command([ "md-implode", str(exploded_dir), "--output", str(reconstructed_file) ]) assert result.returncode == 0, f"Implode failed: {result.stderr}" assert reconstructed_file.exists() # Step 4: Validate content preservation reconstructed_content = reconstructed_file.read_text(encoding='utf-8') validation = self.validate_basic_structure_preservation(content, reconstructed_content) assert validation['headings_preserved'], f"Headings not preserved in {variant} variant" # Verify key content is present assert "# Book Title" in reconstructed_content assert "## Chapter 1: Getting Started" in reconstructed_content assert "### Section 1.1: Overview" in reconstructed_content assert "# Conclusion" in reconstructed_content class TestBackwardCompatibilityRoundtrips(TestRoundtripBase): """Test backward compatibility with legacy behavior.""" def test_default_behavior_roundtrip(self): """Test that default behavior (flat variant) works like before.""" content = """# Introduction Basic introduction content. ## Overview Overview section. # Main Content Main content here. # Conclusion Final thoughts. """ # Create original file original_file = self.temp_dir / "test.md" original_file.write_text(content, encoding='utf-8') # Explode without specifying variant (should default to flat) result = self.run_markitect_command([ "md-explode", str(original_file) ]) assert result.returncode == 0 # Should create .mdd directory with manifest exploded_dir = original_file.with_suffix('.mdd') assert exploded_dir.exists() assert (exploded_dir / "manifest.md").exists() # Implode back reconstructed_file = self.temp_dir / "reconstructed.md" result = self.run_markitect_command([ "md-implode", str(exploded_dir), "--output", str(reconstructed_file) ]) assert result.returncode == 0 # Validate content reconstructed_content = reconstructed_file.read_text(encoding='utf-8') assert "# Introduction" in reconstructed_content assert "# Main Content" in reconstructed_content assert "# Conclusion" in reconstructed_content def test_legacy_exploded_directory_handling(self): """Test that legacy exploded directories can still be imploded.""" # Create a structure that looks like legacy exploded content legacy_dir = self.temp_dir / "legacy_structure" legacy_dir.mkdir() # Create some markdown files without manifest (legacy_dir / "intro.md").write_text("# Introduction\n\nIntro content.") (legacy_dir / "chapter1.md").write_text("# Chapter 1\n\nChapter content.") (legacy_dir / "conclusion.md").write_text("# Conclusion\n\nFinal thoughts.") # Should still be able to implode result = self.run_markitect_command([ "md-implode", str(legacy_dir) ]) assert result.returncode == 0 # Check that output file was created output_file = legacy_dir.parent / f"{legacy_dir.name}_imploded.md" assert output_file.exists() content = output_file.read_text(encoding='utf-8') assert "# Introduction" in content assert "# Chapter 1" in content assert "# Conclusion" in content class TestComplexRoundtrips(TestRoundtripBase): """Test roundtrips with complex content and features.""" def test_front_matter_preservation_roundtrip(self): """Test that front matter is preserved through roundtrips.""" content_with_fm = """--- title: "Test Document" author: "Test Author" tags: ["test", "markdown"] version: 1.0 --- # Main Content This document has front matter. ## Section 1 Content here. # Conclusion End of document. """ original_file = self.temp_dir / "test_fm.md" original_file.write_text(content_with_fm, encoding='utf-8') # Test with each variant for variant in ["flat", "hierarchical", "semantic"]: # Explode exploded_dir = self.temp_dir / f"test_fm_{variant}.mdd" result = self.run_markitect_command([ "md-explode", str(original_file), "--variant", variant, "--output-dir", str(exploded_dir) ]) assert result.returncode == 0 # Implode reconstructed_file = self.temp_dir / f"reconstructed_fm_{variant}.md" result = self.run_markitect_command([ "md-implode", str(exploded_dir), "--output", str(reconstructed_file) ]) assert result.returncode == 0 # Verify front matter preservation - check for semantic equivalence reconstructed_content = reconstructed_file.read_text(encoding='utf-8') # Use frontmatter parser to check semantic equivalence from markitect.matter_frontmatter.parser import FrontmatterParser parser = FrontmatterParser() reconstructed_fm = parser.extract_frontmatter(reconstructed_content) # Check that all expected values are preserved assert reconstructed_fm.get('title') == 'Test Document' assert reconstructed_fm.get('author') == 'Test Author' assert reconstructed_fm.get('tags') == ['test', 'markdown'] assert reconstructed_fm.get('version') == 1.0 def test_unicode_and_special_characters_roundtrip(self): """Test roundtrip with unicode and special characters.""" unicode_content = """# Tëst Dócümënt This document contains ünïcödë characters. ## Spëcïál Chàráctërs - Émojis: 🚀 📝 ✅ - Symbols: © ® ™ € £ ¥ - Math: ∑ ∞ π √ ≈ ≠ ### Çødë Blöck ```python def hëllö_wörld(): print("Hëllö, Wörld! 🌍") ``` # Cönclüsïön End öf tëst. """ original_file = self.temp_dir / "unicode_test.md" original_file.write_text(unicode_content, encoding='utf-8') # Test with flat variant result = self.run_markitect_command([ "md-explode", str(original_file), "--variant", "flat" ]) assert result.returncode == 0 exploded_dir = original_file.with_suffix('.mdd') assert exploded_dir.exists() # Implode back reconstructed_file = self.temp_dir / "unicode_reconstructed.md" result = self.run_markitect_command([ "md-implode", str(exploded_dir), "--output", str(reconstructed_file) ]) assert result.returncode == 0 # Verify unicode preservation reconstructed_content = reconstructed_file.read_text(encoding='utf-8') assert "Tëst Dócümënt" in reconstructed_content assert "🚀 📝 ✅" in reconstructed_content assert "hëllö_wörld" in reconstructed_content def test_large_document_roundtrip(self): """Test roundtrip with a large document.""" # Generate large content large_content = "# Large Document Test\n\nThis tests performance with large documents.\n\n" for chapter in range(1, 11): # 10 chapters large_content += f"# Chapter {chapter}\n\n" large_content += f"This is chapter {chapter} content.\n\n" for section in range(1, 6): # 5 sections per chapter large_content += f"## Section {chapter}.{section}\n\n" large_content += f"Content for section {chapter}.{section}.\n\n" large_content += "Lorem ipsum dolor sit amet, consectetur adipiscing elit. " * 20 large_content += "\n\n" large_content += "# Conclusion\n\nEnd of large document.\n" original_file = self.temp_dir / "large_doc.md" original_file.write_text(large_content, encoding='utf-8') # Test with hierarchical variant (most complex) result = self.run_markitect_command([ "md-explode", str(original_file), "--variant", "hierarchical" ]) assert result.returncode == 0 exploded_dir = original_file.with_suffix('.mdd') assert exploded_dir.exists() # Verify many files were created md_files = list(exploded_dir.glob("**/*.md")) assert len(md_files) > 10 # Should have many files # Implode back reconstructed_file = self.temp_dir / "large_reconstructed.md" result = self.run_markitect_command([ "md-implode", str(exploded_dir), "--output", str(reconstructed_file) ]) assert result.returncode == 0 # Verify structure preservation reconstructed_content = reconstructed_file.read_text(encoding='utf-8') validation = self.validate_basic_structure_preservation(large_content, reconstructed_content) assert validation['headings_preserved'] class TestErrorHandlingRoundtrips(TestRoundtripBase): """Test error handling in roundtrip scenarios.""" def test_malformed_markdown_handling(self): """Test handling of malformed markdown.""" malformed_content = """# Valid Header Some content here. ## Another header # Missing spacing No space before content. ###Too many hashes without space # Final header """ original_file = self.temp_dir / "malformed.md" original_file.write_text(malformed_content, encoding='utf-8') # Should still work despite malformed content result = self.run_markitect_command([ "md-explode", str(original_file) ]) assert result.returncode == 0 exploded_dir = original_file.with_suffix('.mdd') assert exploded_dir.exists() # Should be able to implode back result = self.run_markitect_command([ "md-implode", str(exploded_dir) ]) assert result.returncode == 0 def test_empty_content_handling(self): """Test handling of empty files and sections.""" empty_content = """# Empty Test ## Empty Section 1 ## Empty Section 2 # Another Empty """ original_file = self.temp_dir / "empty.md" original_file.write_text(empty_content, encoding='utf-8') # Should handle empty content gracefully result = self.run_markitect_command([ "md-explode", str(original_file) ]) assert result.returncode == 0 exploded_dir = original_file.with_suffix('.mdd') assert exploded_dir.exists() result = self.run_markitect_command([ "md-implode", str(exploded_dir) ]) assert result.returncode == 0 if __name__ == '__main__': pytest.main([__file__, "-v"])