""" Test content aggregation functionality for Issue #139: Implode directory to a markdown file. This test module covers combining content from multiple files in correct order while preserving all markdown formatting and handling index files appropriately. """ import pytest import tempfile import shutil from pathlib import Path from unittest.mock import Mock, patch # Import will fail initially (RED phase) until implementation exists try: from markitect.plugins.builtin.markdown_commands import ( aggregate_content, combine_markdown_files, preserve_markdown_formatting, handle_index_files, process_front_matter, ContentAggregator, FrontMatterConsolidator ) except ImportError: # Expected during RED phase - tests should fail initially aggregate_content = None combine_markdown_files = None preserve_markdown_formatting = None handle_index_files = None process_front_matter = None ContentAggregator = None FrontMatterConsolidator = None class TestContentAggregation: """Test aggregating content from multiple markdown files.""" def setup_method(self): """Set up temporary directory for each test.""" self.temp_dir = Path(tempfile.mkdtemp()) def teardown_method(self): """Clean up temporary directory after each test.""" if self.temp_dir.exists(): shutil.rmtree(self.temp_dir) def test_combine_simple_markdown_files(self): """Test combining simple markdown files in correct order.""" # This should fail initially (RED phase) # Create test files (self.temp_dir / "01_intro.md").write_text("# Introduction\nIntro content here.") (self.temp_dir / "02_chapter1.md").write_text("## Chapter 1\nChapter content here.") (self.temp_dir / "03_conclusion.md").write_text("# Conclusion\nConclusion content.") files = [ self.temp_dir / "01_intro.md", self.temp_dir / "02_chapter1.md", self.temp_dir / "03_conclusion.md" ] combined_content = combine_markdown_files(files) # Should combine in order with proper spacing assert "# Introduction" in combined_content assert "## Chapter 1" in combined_content assert "# Conclusion" in combined_content # Check order is maintained intro_pos = combined_content.find("# Introduction") chapter_pos = combined_content.find("## Chapter 1") conclusion_pos = combined_content.find("# Conclusion") assert intro_pos < chapter_pos < conclusion_pos def test_preserve_markdown_formatting(self): """Test that all markdown formatting is preserved during aggregation.""" # This should fail initially (RED phase) markdown_content = """# Test Section ## Subsection with **bold** and *italic* Here's some code: ```python def example(): return "preserved" ``` | Table | Header | |-------|--------| | Cell | Data | - List item 1 - List item 2 - Nested item > Blockquote text [Link text](http://example.com) ![Image alt](image.png) """ (self.temp_dir / "formatted.md").write_text(markdown_content) preserved = preserve_markdown_formatting([self.temp_dir / "formatted.md"]) # Should preserve all formatting elements assert "**bold**" in preserved assert "*italic*" in preserved assert "```python" in preserved assert "| Table | Header |" in preserved assert "- List item 1" in preserved assert "> Blockquote text" in preserved assert "[Link text]" in preserved assert "![Image alt]" in preserved def test_handle_index_files_as_parent_content(self): """Test handling index.md files as parent section content.""" # This should fail initially (RED phase) # Create directory structure with index files part_dir = self.temp_dir / "part_1_introduction" part_dir.mkdir() (part_dir / "index.md").write_text("# Part 1: Introduction\nPart introduction content.") chapter_dir = part_dir / "chapter_1_overview" chapter_dir.mkdir() (chapter_dir / "index.md").write_text("## Chapter 1: Overview\nChapter overview content.") (chapter_dir / "section_1_1.md").write_text("### Section 1.1\nSection content.") aggregated = handle_index_files(self.temp_dir) # Should treat index.md files as parent section content assert "# Part 1: Introduction" in aggregated assert "Part introduction content." in aggregated assert "## Chapter 1: Overview" in aggregated assert "Chapter overview content." in aggregated assert "### Section 1.1" in aggregated def test_maintain_proper_spacing_between_sections(self): """Test maintaining appropriate whitespace between combined sections.""" # This should fail initially (RED phase) files_content = [ ("section1.md", "# Section 1\nContent 1"), ("section2.md", "# Section 2\nContent 2"), ("section3.md", "# Section 3\nContent 3") ] files = [] for filename, content in files_content: file_path = self.temp_dir / filename file_path.write_text(content) files.append(file_path) combined = combine_markdown_files(files) # Should have proper spacing between sections lines = combined.split('\n') # Find section boundaries and check spacing section1_end = None section2_start = None for i, line in enumerate(lines): if line == "Content 1": section1_end = i elif line == "# Section 2": section2_start = i break # Should have appropriate spacing between sections assert section2_start is not None assert section1_end is not None assert section2_start > section1_end + 1 # At least one empty line def test_process_files_in_hierarchical_order(self): """Test processing files in logical hierarchical order.""" # This should fail initially (RED phase) # Create hierarchical structure structure = [ ("part_1", "index.md", "# Part 1\nPart content"), ("part_1/chapter_1", "index.md", "## Chapter 1\nChapter content"), ("part_1/chapter_1", "section_1_1.md", "### Section 1.1\nSection content"), ("part_1/chapter_1", "section_1_2.md", "### Section 1.2\nMore section content"), ("part_1", "chapter_2.md", "## Chapter 2\nChapter 2 content") ] for dir_path, filename, content in structure: full_dir = self.temp_dir / dir_path full_dir.mkdir(parents=True, exist_ok=True) (full_dir / filename).write_text(content) aggregated = aggregate_content(self.temp_dir) # Should maintain hierarchical order part_pos = aggregated.find("# Part 1") ch1_pos = aggregated.find("## Chapter 1") sec11_pos = aggregated.find("### Section 1.1") sec12_pos = aggregated.find("### Section 1.2") ch2_pos = aggregated.find("## Chapter 2") assert part_pos < ch1_pos < sec11_pos < sec12_pos < ch2_pos def test_handle_empty_files_gracefully(self): """Test handling empty markdown files during aggregation.""" # This should fail initially (RED phase) # Create files with various content states (self.temp_dir / "empty.md").write_text("") (self.temp_dir / "whitespace_only.md").write_text(" \n\t\n ") (self.temp_dir / "content.md").write_text("# Real Content\nActual content here.") files = [ self.temp_dir / "empty.md", self.temp_dir / "whitespace_only.md", self.temp_dir / "content.md" ] combined = combine_markdown_files(files) # Should handle empty files gracefully assert "# Real Content" in combined assert "Actual content here." in combined # Should not break or include excessive whitespace class TestFrontMatterHandling: """Test front matter detection, extraction, and consolidation.""" def setup_method(self): """Set up temporary directory for each test.""" self.temp_dir = Path(tempfile.mkdtemp()) def teardown_method(self): """Clean up temporary directory after each test.""" if self.temp_dir.exists(): shutil.rmtree(self.temp_dir) def test_detect_and_extract_front_matter(self): """Test detecting and extracting YAML front matter.""" # This should fail initially (RED phase) content_with_frontmatter = """--- title: "Chapter 1" author: "John Doe" date: "2023-01-01" --- # Chapter 1 Content Actual markdown content here. """ (self.temp_dir / "chapter1.md").write_text(content_with_frontmatter) front_matter, content = process_front_matter(self.temp_dir / "chapter1.md") # Should extract front matter correctly assert front_matter is not None assert "title" in front_matter assert front_matter["title"] == "Chapter 1" assert front_matter["author"] == "John Doe" # Should separate content correctly assert content.strip().startswith("# Chapter 1 Content") assert "---" not in content def test_consolidate_multiple_front_matter_blocks(self): """Test consolidating front matter from multiple files.""" # This should fail initially (RED phase) file1_content = """--- title: "My Document" author: "Author Name" --- # Section 1 Content 1""" file2_content = """--- version: "1.0" tags: ["documentation", "guide"] --- # Section 2 Content 2""" (self.temp_dir / "file1.md").write_text(file1_content) (self.temp_dir / "file2.md").write_text(file2_content) files = [self.temp_dir / "file1.md", self.temp_dir / "file2.md"] consolidator = FrontMatterConsolidator() consolidated_fm, content = consolidator.consolidate(files) # Should merge front matter appropriately assert "title" in consolidated_fm assert "author" in consolidated_fm assert "version" in consolidated_fm assert "tags" in consolidated_fm # Content should be combined without front matter blocks assert "# Section 1" in content assert "# Section 2" in content assert content.count("---") == 0 def test_handle_conflicting_front_matter(self): """Test handling conflicting front matter values.""" # This should fail initially (RED phase) file1_content = """--- title: "Document Title" author: "First Author" --- # Content 1""" file2_content = """--- title: "Different Title" author: "Second Author" --- # Content 2""" (self.temp_dir / "file1.md").write_text(file1_content) (self.temp_dir / "file2.md").write_text(file2_content) files = [self.temp_dir / "file1.md", self.temp_dir / "file2.md"] consolidator = FrontMatterConsolidator(conflict_strategy="merge") consolidated_fm, content = consolidator.consolidate(files) # Should handle conflicts according to strategy assert "title" in consolidated_fm assert "author" in consolidated_fm # Could merge into lists, take first value, etc. # Exact behavior depends on implementation strategy def test_preserve_front_matter_in_output(self): """Test that consolidated front matter is properly placed in output.""" # This should fail initially (RED phase) files_with_fm = [ ("file1.md", """--- title: "Combined Document" --- # Section 1 Content"""), ("file2.md", """--- tags: ["test"] --- # Section 2 More content""") ] files = [] for filename, content in files_with_fm: file_path = self.temp_dir / filename file_path.write_text(content) files.append(file_path) aggregated = aggregate_content(self.temp_dir, preserve_front_matter=True) # Should have front matter at the beginning lines = aggregated.split('\n') assert lines[0] == "---" # Should find closing front matter delimiter closing_fm_index = None for i, line in enumerate(lines[1:], 1): if line == "---": closing_fm_index = i break assert closing_fm_index is not None # Content should follow front matter content_start = closing_fm_index + 1 assert content_start < len(lines) class TestContentAggregator: """Test the ContentAggregator class for comprehensive content processing.""" def setup_method(self): """Set up temporary directory for each test.""" self.temp_dir = Path(tempfile.mkdtemp()) def teardown_method(self): """Clean up temporary directory after each test.""" if self.temp_dir.exists(): shutil.rmtree(self.temp_dir) def test_content_aggregator_initialization(self): """Test creating ContentAggregator instances.""" # This should fail initially (RED phase) aggregator = ContentAggregator() assert aggregator is not None assert hasattr(aggregator, 'preserve_formatting') assert hasattr(aggregator, 'handle_front_matter') assert hasattr(aggregator, 'section_spacing') def test_aggregator_with_custom_options(self): """Test aggregator with custom configuration.""" # This should fail initially (RED phase) aggregator = ContentAggregator( preserve_formatting=True, handle_front_matter=True, section_spacing=2, include_toc=True ) # Create test structure (self.temp_dir / "chapter1.md").write_text("# Chapter 1\nContent 1") (self.temp_dir / "chapter2.md").write_text("# Chapter 2\nContent 2") result = aggregator.aggregate(self.temp_dir) assert result is not None assert "# Chapter 1" in result assert "# Chapter 2" in result def test_aggregator_processes_directory_recursively(self): """Test that aggregator processes nested directory structures.""" # This should fail initially (RED phase) # Create nested structure part_dir = self.temp_dir / "part1" part_dir.mkdir() (part_dir / "index.md").write_text("# Part 1\nPart content") chapter_dir = part_dir / "chapter1" chapter_dir.mkdir() (chapter_dir / "content.md").write_text("## Chapter 1\nChapter content") aggregator = ContentAggregator(recursive=True) result = aggregator.aggregate(self.temp_dir) # Should process all nested content assert "# Part 1" in result assert "## Chapter 1" in result assert "Part content" in result assert "Chapter content" in result def test_aggregator_sorts_content_correctly(self): """Test that aggregator sorts content in logical order.""" # This should fail initially (RED phase) # Create files that need sorting files_data = [ ("03_conclusion.md", "# Conclusion"), ("01_introduction.md", "# Introduction"), ("02_main_content.md", "# Main Content") ] for filename, content in files_data: (self.temp_dir / filename).write_text(content) aggregator = ContentAggregator(sort_files=True) result = aggregator.aggregate(self.temp_dir) # Should be in logical order intro_pos = result.find("# Introduction") main_pos = result.find("# Main Content") conclusion_pos = result.find("# Conclusion") assert intro_pos < main_pos < conclusion_pos def test_aggregator_handles_large_directory_structures(self): """Test aggregator performance with larger directory structures.""" # This should fail initially (RED phase) # Create larger structure for i in range(10): part_dir = self.temp_dir / f"part_{i+1:02d}" part_dir.mkdir() (part_dir / "index.md").write_text(f"# Part {i+1}\nPart {i+1} content") for j in range(5): chapter_file = part_dir / f"chapter_{j+1:02d}.md" chapter_file.write_text(f"## Chapter {i+1}.{j+1}\nChapter content") aggregator = ContentAggregator() result = aggregator.aggregate(self.temp_dir) # Should process all content assert result is not None assert len(result) > 0 # Should contain expected number of parts and chapters part_count = result.count("# Part") chapter_count = result.count("## Chapter") assert part_count >= 10 assert chapter_count >= 50