""" Tests for schema visualization output formatting and consistency. These tests ensure the visualization scripts produce consistent, well-formatted output in both emoji and ASCII modes, pinning down the exact style and format. """ import sys import subprocess import tempfile from pathlib import Path from textwrap import dedent import pytest # Add the project root to the path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) from markitect.schema_generator import SchemaGenerator class TestSchemaVisualization: """Test schema visualization output formatting.""" @pytest.fixture def sample_markdown_file(self): """Create a sample markdown file for testing.""" content = dedent(""" # Main Title This is the introduction paragraph. ## Section One Content for section one. ### Subsection A - Item 1 - Item 2 ### Subsection B More content here. ```python def example(): return "code block" ``` ## Section Two Final section content. """).strip() with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(content) return Path(f.name) def test_visualize_schema_emoji_mode_output_format(self, sample_markdown_file): """Test that emoji mode produces expected output format.""" try: result = subprocess.run([ sys.executable, 'tools/visualize_schema.py', str(sample_markdown_file) ], capture_output=True, text=True, cwd=Path(__file__).parent.parent) assert result.returncode == 0 output = result.stdout # Check document header assert "πŸ“‹ DOCUMENT STRUCTURE:" in output assert sample_markdown_file.name in output # Check main sections are present assert "πŸ“Š OVERVIEW" in output assert "πŸ“‘ HEADING STRUCTURE" in output assert "πŸ“ CONTENT STRUCTURE" in output assert "πŸ” COMPLEXITY ANALYSIS" in output assert "πŸ—ΊοΈ DOCUMENT MAP" in output # Check content formatting assert "Total Elements:" in output assert "Schema Properties:" in output assert "β”œβ”€ Level 1:" in output assert "│─ Level 2:" in output assert "│─ Level 3:" in output # Check content elements with emoji icons assert "πŸ“„ Paragraphs" in output assert "πŸ“‹ Lists" in output assert "πŸ’» Code Blocks" in output # Check complexity rating with emoji complexity_ratings = ["🟒 Simple", "🟑 Moderate", "πŸ”΄ Complex"] assert any(rating in output for rating in complexity_ratings) # Check document map uses '#' symbols (count matches actual headings) assert "H1: #" in output # 1 level-1 heading = 1 hash assert "H2: ##" in output # 2 level-2 headings = 2 hashes # Level 3 should have 2 headings based on our sample assert "H3: ##" in output # 2 level-3 headings = 2 hashes finally: sample_markdown_file.unlink() def test_visualize_schema_ascii_mode_output_format(self, sample_markdown_file): """Test that ASCII mode produces expected output format.""" try: result = subprocess.run([ sys.executable, 'tools/visualize_schema.py', str(sample_markdown_file), '--ascii' ], capture_output=True, text=True, cwd=Path(__file__).parent.parent) assert result.returncode == 0 output = result.stdout # Check document header (ASCII version) assert "[DOC] DOCUMENT STRUCTURE:" in output assert sample_markdown_file.name in output # Check main sections are present (ASCII version) assert "OVERVIEW" in output assert "HEADING STRUCTURE" in output assert "CONTENT STRUCTURE" in output assert "COMPLEXITY ANALYSIS" in output assert "DOCUMENT MAP" in output # Check content formatting (ASCII tree structure) assert "Total Elements:" in output assert "Schema Properties:" in output assert "+- Level 1:" in output assert "|- Level 2:" in output assert "|- Level 3:" in output # Check content elements with ASCII tags assert "[P] Paragraphs" in output assert "[L] Lists" in output assert "[C] Code Blocks" in output # Check complexity rating (no emoji) complexity_ratings = ["Simple", "Moderate", "Complex"] assert any(rating in output for rating in complexity_ratings) # Ensure no emoji complexity indicators assert "🟒" not in output assert "🟑" not in output assert "πŸ”΄" not in output # Check document map uses '#' symbols (count matches actual headings) assert "H1: #" in output # 1 level-1 heading = 1 hash assert "H2: ##" in output # 2 level-2 headings = 2 hashes # Level 3 should have 2 headings based on our sample assert "H3: ##" in output # 2 level-3 headings = 2 hashes # Ensure no Unicode characters are present assert "πŸ“‹" not in output assert "πŸ“Š" not in output assert "πŸ“‘" not in output assert "πŸ“" not in output assert "πŸ”" not in output assert "πŸ—ΊοΈ" not in output assert "β”œ" not in output assert "β”‚" not in output finally: sample_markdown_file.unlink() def test_visualize_schema_depth_limitation(self, sample_markdown_file): """Test that depth limitation works correctly.""" try: result = subprocess.run([ sys.executable, 'tools/visualize_schema.py', str(sample_markdown_file), '--max-depth', '2' ], capture_output=True, text=True, cwd=Path(__file__).parent.parent) assert result.returncode == 0 output = result.stdout # Should have Level 1 and Level 2 headings assert "β”œβ”€ Level 1:" in output assert "│─ Level 2:" in output # Should NOT have Level 3 headings due to depth limit assert "│─ Level 3:" not in output finally: sample_markdown_file.unlink() def test_schema_summary_emoji_mode_output_format(self, sample_markdown_file): """Test that schema summary emoji mode produces expected format.""" try: result = subprocess.run([ sys.executable, 'tools/schema_summary.py', str(sample_markdown_file) ], capture_output=True, text=True, cwd=Path(__file__).parent.parent) assert result.returncode == 0 output = result.stdout lines = output.strip().split('\n') # Should have exactly 4 lines assert len(lines) == 4 # Check each line format assert lines[0].startswith("πŸ“‹ ") assert sample_markdown_file.name in lines[0] assert lines[1].startswith("πŸ—οΈ Structure:") assert "H1:" in lines[1] and "H2:" in lines[1] and "H3:" in lines[1] assert lines[2].startswith("πŸ“ Content:") assert "Paragraphs:" in lines[2] assert "Lists:" in lines[2] assert lines[3].startswith("πŸ“Š Total:") assert "elements" in lines[3] finally: sample_markdown_file.unlink() def test_schema_summary_ascii_mode_output_format(self, sample_markdown_file): """Test that schema summary ASCII mode produces expected format.""" try: result = subprocess.run([ sys.executable, 'tools/schema_summary.py', str(sample_markdown_file), '--ascii' ], capture_output=True, text=True, cwd=Path(__file__).parent.parent) assert result.returncode == 0 output = result.stdout lines = output.strip().split('\n') # Should have exactly 4 lines assert len(lines) == 4 # Check each line format (ASCII version) assert lines[0].startswith("[DOC] ") assert sample_markdown_file.name in lines[0] assert lines[1].startswith("[STRUCTURE] Structure:") assert "H1:" in lines[1] and "H2:" in lines[1] and "H3:" in lines[1] assert " -> " in lines[1] # ASCII arrow instead of β†’ assert lines[2].startswith("[CONTENT] Content:") assert "Paragraphs:" in lines[2] assert "Lists:" in lines[2] assert lines[3].startswith("[TOTAL] Total:") assert "elements" in lines[3] # Ensure no emoji characters assert "πŸ“‹" not in output assert "πŸ—οΈ" not in output assert "πŸ“" not in output assert "πŸ“Š" not in output finally: sample_markdown_file.unlink() def test_document_map_uses_hash_symbols_consistently(self, sample_markdown_file): """Test that document map consistently uses '#' symbols in both modes.""" try: # Test emoji mode result_emoji = subprocess.run([ sys.executable, 'tools/visualize_schema.py', str(sample_markdown_file) ], capture_output=True, text=True, cwd=Path(__file__).parent.parent) # Test ASCII mode result_ascii = subprocess.run([ sys.executable, 'tools/visualize_schema.py', str(sample_markdown_file), '--ascii' ], capture_output=True, text=True, cwd=Path(__file__).parent.parent) assert result_emoji.returncode == 0 assert result_ascii.returncode == 0 emoji_output = result_emoji.stdout ascii_output = result_ascii.stdout # Both should use '#' symbols in document map, not Unicode blocks for output in [emoji_output, ascii_output]: # Should have '#' symbols (actual count matches headings in sample) assert "H1: #" in output assert "H2: ##" in output # 2 level-2 headings # Should NOT have Unicode block characters assert "β–ˆ" not in output finally: sample_markdown_file.unlink() def test_no_horizontal_lines_or_frames(self, sample_markdown_file): """Test that output doesn't contain horizontal lines or frames.""" try: # Test both modes for args in [[], ['--ascii']]: result = subprocess.run([ sys.executable, 'tools/visualize_schema.py', str(sample_markdown_file) ] + args, capture_output=True, text=True, cwd=Path(__file__).parent.parent) assert result.returncode == 0 output = result.stdout # Should NOT contain frame characters assert "β”Œ" not in output assert "β””" not in output assert "┐" not in output assert "β”˜" not in output assert "β”‚" not in args or "β”‚" not in output # Only check if not in ASCII mode # Should NOT contain horizontal separator lines lines = output.split('\n') separator_lines = [line for line in lines if line.strip() and all(c in '─-' for c in line.strip())] assert len(separator_lines) == 0, f"Found separator lines: {separator_lines}" finally: sample_markdown_file.unlink() def test_visualization_handles_empty_file(self): """Test visualization with empty markdown file.""" with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write("") empty_file = Path(f.name) try: result = subprocess.run([ sys.executable, 'tools/visualize_schema.py', str(empty_file) ], capture_output=True, text=True, cwd=Path(__file__).parent.parent) # Should handle empty file gracefully assert result.returncode == 0 output = result.stdout # Should still show basic structure assert "DOCUMENT STRUCTURE:" in output assert "OVERVIEW" in output finally: empty_file.unlink() def test_visualization_error_handling(self): """Test error handling for non-existent files.""" result = subprocess.run([ sys.executable, 'tools/visualize_schema.py', 'nonexistent_file.md' ], capture_output=True, text=True, cwd=Path(__file__).parent.parent) assert result.returncode == 1 assert "File not found" in result.stdout def test_help_output_format(self): """Test help output contains expected information.""" result = subprocess.run([ sys.executable, 'tools/visualize_schema.py', '--help' ], capture_output=True, text=True, cwd=Path(__file__).parent.parent) assert result.returncode == 0 output = result.stdout assert "Visualize markdown document schema structure" in output assert "--max-depth" in output assert "--ascii" in output assert "Use ASCII characters only" in output class TestOutputConsistency: """Test output consistency and formatting standards.""" def test_ascii_mode_contains_no_unicode(self): """Ensure ASCII mode output contains no Unicode characters.""" content = "# Test\n\nSome content with **bold** text.\n\n## Section\n\n- Item 1\n- Item 2" with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(content) test_file = Path(f.name) try: result = subprocess.run([ sys.executable, 'tools/visualize_schema.py', str(test_file), '--ascii' ], capture_output=True, text=True, cwd=Path(__file__).parent.parent) assert result.returncode == 0 output = result.stdout # Check that all characters are ASCII (code points < 128) for char in output: assert ord(char) < 128, f"Non-ASCII character found: {char} (ord: {ord(char)})" finally: test_file.unlink() def test_section_header_formatting_consistency(self): """Test that section headers are consistently formatted.""" content = "# Title\n\nContent here.\n\n## Section\n\nMore content." with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(content) test_file = Path(f.name) try: for mode_args in [[], ['--ascii']]: result = subprocess.run([ sys.executable, 'tools/visualize_schema.py', str(test_file) ] + mode_args, capture_output=True, text=True, cwd=Path(__file__).parent.parent) assert result.returncode == 0 output = result.stdout lines = output.split('\n') # Find section headers section_headers = [] for line in lines: line = line.strip() if line and any(section in line for section in ['OVERVIEW', 'HEADING STRUCTURE', 'CONTENT STRUCTURE', 'COMPLEXITY ANALYSIS', 'DOCUMENT MAP']): section_headers.append(line) # All section headers should be in uppercase for header in section_headers: # Remove emoji/tags and check the text part text_part = header.split()[-2:] # Get last two words like "HEADING STRUCTURE" text = ' '.join(text_part) if text.isupper(): assert True # Good else: # Allow for cases like "DOCUMENT MAP" where the emoji/tag is separate main_text = ' '.join([word for word in header.split() if word.isupper() or word in ['STRUCTURE', 'ANALYSIS', 'MAP', 'OVERVIEW', 'CONTENT']]) assert main_text, f"Section header not properly formatted: {header}" finally: test_file.unlink()