""" Roundtrip validation tests for Issue #149 - Explode-Implode Variants Tests that all variants can successfully explode a markdown file and then implode it back to produce equivalent content, ensuring full reversibility. """ import pytest import tempfile import re from pathlib import Path from typing import List, Dict, Any from markitect.explode_variants import ( ExplodeVariant, ExplodeOptions, ImplodeOptions, get_variant_factory, create_variant ) class RoundtripValidator: """Helper class for validating explode-implode roundtrips.""" @staticmethod def normalize_content(content: str) -> str: """ Normalize markdown content for comparison. Removes excessive whitespace and normalizes line endings. """ # Normalize line endings content = content.replace('\r\n', '\n').replace('\r', '\n') # Remove excessive blank lines (more than 3 consecutive) content = re.sub(r'\n{4,}', '\n\n\n', content) # Strip leading/trailing whitespace content = content.strip() return content @staticmethod def extract_headings(content: str) -> List[Dict[str, Any]]: """Extract headings with their levels and titles for comparison.""" headings = [] lines = content.split('\n') for i, line in enumerate(lines): heading_match = re.match(r'^(#{1,6})\s+(.+)', line.strip()) if heading_match: level = len(heading_match.group(1)) title = heading_match.group(2).strip() headings.append({ 'level': level, 'title': title, 'line': i + 1 }) return headings @staticmethod def validate_heading_structure(original_headings: List[Dict], reconstructed_headings: List[Dict]) -> bool: """Validate that heading structure is preserved.""" if len(original_headings) != len(reconstructed_headings): return False for orig, recon in zip(original_headings, reconstructed_headings): if orig['level'] != recon['level'] or orig['title'] != recon['title']: return False return True @staticmethod def validate_content_preservation(original: str, reconstructed: str) -> Dict[str, Any]: """ Comprehensive validation of content preservation. Returns validation results with details about any differences. """ orig_norm = RoundtripValidator.normalize_content(original) recon_norm = RoundtripValidator.normalize_content(reconstructed) orig_headings = RoundtripValidator.extract_headings(orig_norm) recon_headings = RoundtripValidator.extract_headings(recon_norm) return { 'exact_match': orig_norm == recon_norm, 'heading_structure_preserved': RoundtripValidator.validate_heading_structure(orig_headings, recon_headings), 'original_headings': orig_headings, 'reconstructed_headings': recon_headings, 'original_length': len(orig_norm), 'reconstructed_length': len(recon_norm), 'word_count_original': len(orig_norm.split()), 'word_count_reconstructed': len(recon_norm.split()) } class TestRoundtripValidation: """Test roundtrip validation for all variants.""" @pytest.fixture def sample_content_simple(self): """Simple test content.""" return """# Introduction This is the introduction to the document. ## Overview A brief overview of what's covered. ## Goals - Goal 1 - Goal 2 - Goal 3 # Chapter 1: Getting Started Let's begin with the basics. ## Installation How to install the software. ## Configuration Basic configuration steps. # Chapter 2: Advanced Topics More advanced material. ## Performance Optimization Tips for better performance. ## Security Considerations Important security notes. # Conclusion Final thoughts and summary. """ @pytest.fixture def sample_content_complex(self): """Complex test content with various markdown features.""" return """--- title: "Comprehensive Guide" author: "Test Author" version: "1.0" --- # Introduction Welcome to this **comprehensive guide** with various markdown features. ## What You'll Learn - Basic concepts - Advanced techniques - Best practices ### Prerequisites You should have: 1. Basic knowledge 2. Required software 3. Access to examples # Tutorial: Getting Started This tutorial covers the fundamentals. ## Step 1: Installation ```bash pip install example-package ``` ### System Requirements - Python 3.8+ - 4GB RAM minimum - 10GB disk space ## Step 2: Configuration Create a configuration file: ```yaml settings: debug: false timeout: 30 ``` # Reference Manual Complete API documentation. ## Core Functions ### `initialize()` Initializes the system. **Parameters:** - `config`: Configuration object - `debug`: Enable debug mode **Returns:** - Boolean success status ### `process_data(data)` Processes input data. > **Note:** This function is asynchronous. # Appendix A: Troubleshooting Common issues and solutions. ## Error Messages ### "Connection Failed" Check your network settings. ### "Invalid Configuration" Verify your config file syntax. # Appendix B: Examples Code examples and snippets. ## Basic Usage ```python import example result = example.process("data") ``` # Conclusion Thank you for reading this guide. ## Next Steps 1. Try the examples 2. Read the FAQ 3. Join the community ### Resources - [Documentation](https://docs.example.com) - [GitHub](https://github.com/example/repo) - [Support](mailto:support@example.com) """ def test_flat_variant_roundtrip_simple(self, sample_content_simple): """Test flat variant roundtrip with simple content.""" self._test_variant_roundtrip(ExplodeVariant.FLAT, sample_content_simple) def test_flat_variant_roundtrip_complex(self, sample_content_complex): """Test flat variant roundtrip with complex content.""" self._test_variant_roundtrip(ExplodeVariant.FLAT, sample_content_complex) def test_hierarchical_variant_roundtrip_simple(self, sample_content_simple): """Test hierarchical variant roundtrip with simple content.""" self._test_variant_roundtrip(ExplodeVariant.HIERARCHICAL, sample_content_simple) def test_hierarchical_variant_roundtrip_complex(self, sample_content_complex): """Test hierarchical variant roundtrip with complex content.""" self._test_variant_roundtrip(ExplodeVariant.HIERARCHICAL, sample_content_complex) def test_semantic_variant_roundtrip_simple(self, sample_content_simple): """Test semantic variant roundtrip with simple content.""" self._test_variant_roundtrip(ExplodeVariant.SEMANTIC, sample_content_simple) def test_semantic_variant_roundtrip_complex(self, sample_content_complex): """Test semantic variant roundtrip with complex content.""" self._test_variant_roundtrip(ExplodeVariant.SEMANTIC, sample_content_complex) def _test_variant_roundtrip(self, variant_type: ExplodeVariant, content: str): """Generic roundtrip test for any variant.""" with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) # Step 1: Create original file original_file = temp_path / f"test_{variant_type.value}.md" original_file.write_text(content, encoding='utf-8') # Step 2: Explode the file variant = create_variant(variant_type) explode_options = ExplodeOptions( variant=variant_type, output_dir=temp_path / f"exploded_{variant_type.value}", create_manifest=True ) explode_result = variant.explode(original_file, explode_options) # Validate explosion was successful assert explode_result.success, f"Explosion failed: {explode_result.errors}" assert explode_result.output_directory.exists() assert explode_result.manifest_path is not None assert explode_result.manifest_path.exists() assert len(explode_result.files_created) > 0 # Step 3: Implode the directory back implode_options = ImplodeOptions( output_file=temp_path / f"reconstructed_{variant_type.value}.md", preserve_front_matter=True, section_spacing=2 ) implode_result = variant.implode(explode_result.output_directory, implode_options) # Validate implosion was successful assert implode_result.success, f"Implosion failed: {implode_result.errors}" assert implode_result.output_file.exists() assert len(implode_result.files_processed) > 0 # Step 4: Compare original and reconstructed content reconstructed_content = implode_result.output_file.read_text(encoding='utf-8') validation = RoundtripValidator.validate_content_preservation( content, reconstructed_content ) # Assert key preservation requirements assert validation['heading_structure_preserved'], \ f"Heading structure not preserved for {variant_type.value} variant" # Allow for minor formatting differences but require structural integrity # Note: Front matter and spacing differences can cause small word count variations assert abs(validation['word_count_original'] - validation['word_count_reconstructed']) <= 15, \ f"Significant word count difference for {variant_type.value} variant" def test_all_variants_produce_different_structures(self, sample_content_complex): """Test that different variants produce different directory structures.""" with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) original_file = temp_path / "test.md" original_file.write_text(sample_content_complex, encoding='utf-8') results = {} # Explode using each variant for variant_type in [ExplodeVariant.FLAT, ExplodeVariant.HIERARCHICAL, ExplodeVariant.SEMANTIC]: variant = create_variant(variant_type) options = ExplodeOptions( variant=variant_type, output_dir=temp_path / f"exploded_{variant_type.value}", create_manifest=True ) result = variant.explode(original_file, options) assert result.success # Analyze directory structure subdirs = [d.name for d in result.output_directory.iterdir() if d.is_dir()] results[variant_type] = { 'subdirs': subdirs, 'subdir_count': len(subdirs), 'files_created': len(result.files_created) } # Verify that variants produce different structures flat_subdirs = set(results[ExplodeVariant.FLAT]['subdirs']) hierarchical_subdirs = set(results[ExplodeVariant.HIERARCHICAL]['subdirs']) semantic_subdirs = set(results[ExplodeVariant.SEMANTIC]['subdirs']) # At least one variant should be different from the others assert not (flat_subdirs == hierarchical_subdirs == semantic_subdirs), \ "All variants produced identical directory structures" def test_manifest_enables_accurate_detection(self, sample_content_simple): """Test that manifests enable accurate variant detection during implosion.""" with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) original_file = temp_path / "test.md" original_file.write_text(sample_content_simple, encoding='utf-8') factory = get_variant_factory() # Test each variant for variant_type in [ExplodeVariant.FLAT, ExplodeVariant.HIERARCHICAL, ExplodeVariant.SEMANTIC]: # Explode with manifest variant = create_variant(variant_type) explode_options = ExplodeOptions( variant=variant_type, output_dir=temp_path / f"test_{variant_type.value}", create_manifest=True ) explode_result = variant.explode(original_file, explode_options) assert explode_result.success # Detect variant from directory detection_result = factory.detect_variant(explode_result.output_directory) assert detection_result.variant == variant_type, \ f"Failed to detect {variant_type.value} variant from manifest" assert detection_result.manifest_found, \ f"Manifest not found for {variant_type.value} variant" def test_roundtrip_with_front_matter_preservation(self): """Test roundtrip with front matter preservation.""" content_with_fm = """--- title: "Test Document" author: "Test Author" tags: ["test", "markdown"] published: 2023-01-01 --- # Main Content This document has front matter. ## Section 1 Content here. # Conclusion End of document. """ with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) original_file = temp_path / "test_fm.md" original_file.write_text(content_with_fm, encoding='utf-8') # Test with flat variant (similar for others) variant = create_variant(ExplodeVariant.FLAT) explode_options = ExplodeOptions( variant=ExplodeVariant.FLAT, preserve_front_matter=True, create_manifest=True ) explode_result = variant.explode(original_file, explode_options) assert explode_result.success implode_options = ImplodeOptions( preserve_front_matter=True ) implode_result = variant.implode(explode_result.output_directory, implode_options) assert implode_result.success # Check that front matter is preserved using semantic equivalence reconstructed_content = implode_result.output_file.read_text(encoding='utf-8') # Use frontmatter parser to check semantic equivalence from markitect.matter_frontmatter.parser import FrontmatterParser parser = FrontmatterParser() reconstructed_fm = parser.extract_frontmatter(reconstructed_content) # Check that all expected values are preserved assert reconstructed_fm.get('title') == 'Test Document' assert reconstructed_fm.get('author') == 'Test Author' assert reconstructed_fm.get('tags') == ['test', 'markdown'] # Published date may be parsed as datetime.date object published = reconstructed_fm.get('published') assert published is not None, "Published date should be preserved" # Convert to string for comparison if it's a date object published_str = str(published) if hasattr(published, 'strftime') else published assert '2023-01-01' in str(published_str) def test_roundtrip_error_handling(self): """Test roundtrip error handling with malformed content.""" with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) # Test with empty file empty_file = temp_path / "empty.md" empty_file.write_text("", encoding='utf-8') variant = create_variant(ExplodeVariant.FLAT) options = ExplodeOptions(variant=ExplodeVariant.FLAT) result = variant.explode(empty_file, options) # Should handle gracefully (may succeed with minimal structure) assert isinstance(result.success, bool) # Test with non-existent file nonexistent_file = temp_path / "nonexistent.md" result = variant.explode(nonexistent_file, options) assert not result.success assert len(result.errors) > 0 class TestRoundtripPerformance: """Test performance characteristics of roundtrip operations.""" def test_large_document_roundtrip(self): """Test roundtrip with a large document.""" # Generate large content large_content = "# Introduction\n\nThis is a large document.\n\n" for i in range(1, 21): # 20 chapters large_content += f"# Chapter {i}\n\n" large_content += f"This is chapter {i} content.\n\n" for j in range(1, 6): # 5 sections per chapter large_content += f"## Section {i}.{j}\n\n" large_content += f"Content for section {i}.{j}.\n\n" large_content += "Lorem ipsum dolor sit amet, consectetur adipiscing elit. " * 10 large_content += "\n\n" large_content += "# Conclusion\n\nThe end of the document.\n" with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) original_file = temp_path / "large_doc.md" original_file.write_text(large_content, encoding='utf-8') # Test with hierarchical variant (most complex) variant = create_variant(ExplodeVariant.HIERARCHICAL) explode_options = ExplodeOptions( variant=ExplodeVariant.HIERARCHICAL, create_manifest=True ) explode_result = variant.explode(original_file, explode_options) assert explode_result.success implode_options = ImplodeOptions() implode_result = variant.implode(explode_result.output_directory, implode_options) assert implode_result.success # Verify structure preservation reconstructed_content = implode_result.output_file.read_text(encoding='utf-8') validation = RoundtripValidator.validate_content_preservation( large_content, reconstructed_content ) assert validation['heading_structure_preserved'] if __name__ == '__main__': pytest.main([__file__, "-v"])