markitect-main/tests/test_schema_visualization.py

"""
Tests for schema visualization output formatting and consistency.

These tests ensure the visualization scripts produce consistent, well-formatted
output in both emoji and ASCII modes, pinning down the exact style and format.
"""

import sys
import subprocess
import tempfile
from pathlib import Path
from textwrap import dedent
import pytest

# Add the project root to the path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))

from markitect.schema_generator import SchemaGenerator


class TestSchemaVisualization:
    """Test schema visualization output formatting."""

    @pytest.fixture
    def sample_markdown_file(self):
        """Create a sample markdown file for testing."""
        content = dedent("""
        # Main Title

        This is the introduction paragraph.

        ## Section One

        Content for section one.

        ### Subsection A

        - Item 1
        - Item 2

        ### Subsection B

        More content here.

        ```python
        def example():
            return "code block"
        ```

        ## Section Two

        Final section content.
        """).strip()

        with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(content)
            return Path(f.name)

    def test_visualize_schema_emoji_mode_output_format(self, sample_markdown_file):
        """Test that emoji mode produces expected output format."""
        try:
            result = subprocess.run([
                sys.executable, 'visualize_schema.py', str(sample_markdown_file)
            ], capture_output=True, text=True, cwd=Path(__file__).parent.parent)

            assert result.returncode == 0
            output = result.stdout

            # Check document header
            assert "📋 DOCUMENT STRUCTURE:" in output
            assert sample_markdown_file.name in output

            # Check main sections are present
            assert "📊 OVERVIEW" in output
            assert "📑 HEADING STRUCTURE" in output
            assert "📝 CONTENT STRUCTURE" in output
            assert "🔍 COMPLEXITY ANALYSIS" in output
            assert "🗺️  DOCUMENT MAP" in output

            # Check content formatting
            assert "Total Elements:" in output
            assert "Schema Properties:" in output
            assert "├─ Level 1:" in output
            assert "│─ Level 2:" in output
            assert "│─ Level 3:" in output

            # Check content elements with emoji icons
            assert "📄 Paragraphs" in output
            assert "📋 Lists" in output
            assert "💻 Code Blocks" in output

            # Check complexity rating with emoji
            complexity_ratings = ["🟢 Simple", "🟡 Moderate", "🔴 Complex"]
            assert any(rating in output for rating in complexity_ratings)

            # Check document map uses '#' symbols (count matches actual headings)
            assert "H1: #" in output  # 1 level-1 heading = 1 hash
            assert "H2: ##" in output  # 2 level-2 headings = 2 hashes
            # Level 3 should have 2 headings based on our sample
            assert "H3: ##" in output  # 2 level-3 headings = 2 hashes

        finally:
            sample_markdown_file.unlink()

    def test_visualize_schema_ascii_mode_output_format(self, sample_markdown_file):
        """Test that ASCII mode produces expected output format."""
        try:
            result = subprocess.run([
                sys.executable, 'visualize_schema.py', str(sample_markdown_file), '--ascii'
            ], capture_output=True, text=True, cwd=Path(__file__).parent.parent)

            assert result.returncode == 0
            output = result.stdout

            # Check document header (ASCII version)
            assert "[DOC] DOCUMENT STRUCTURE:" in output
            assert sample_markdown_file.name in output

            # Check main sections are present (ASCII version)
            assert "OVERVIEW" in output
            assert "HEADING STRUCTURE" in output
            assert "CONTENT STRUCTURE" in output
            assert "COMPLEXITY ANALYSIS" in output
            assert "DOCUMENT MAP" in output

            # Check content formatting (ASCII tree structure)
            assert "Total Elements:" in output
            assert "Schema Properties:" in output
            assert "+- Level 1:" in output
            assert "|- Level 2:" in output
            assert "|- Level 3:" in output

            # Check content elements with ASCII tags
            assert "[P] Paragraphs" in output
            assert "[L] Lists" in output
            assert "[C] Code Blocks" in output

            # Check complexity rating (no emoji)
            complexity_ratings = ["Simple", "Moderate", "Complex"]
            assert any(rating in output for rating in complexity_ratings)
            # Ensure no emoji complexity indicators
            assert "🟢" not in output
            assert "🟡" not in output
            assert "🔴" not in output

            # Check document map uses '#' symbols (count matches actual headings)
            assert "H1: #" in output  # 1 level-1 heading = 1 hash
            assert "H2: ##" in output  # 2 level-2 headings = 2 hashes
            # Level 3 should have 2 headings based on our sample
            assert "H3: ##" in output  # 2 level-3 headings = 2 hashes

            # Ensure no Unicode characters are present
            assert "📋" not in output
            assert "📊" not in output
            assert "📑" not in output
            assert "📝" not in output
            assert "🔍" not in output
            assert "🗺️" not in output
            assert "├" not in output
            assert "│" not in output

        finally:
            sample_markdown_file.unlink()

    def test_visualize_schema_depth_limitation(self, sample_markdown_file):
        """Test that depth limitation works correctly."""
        try:
            result = subprocess.run([
                sys.executable, 'visualize_schema.py',
                str(sample_markdown_file), '--max-depth', '2'
            ], capture_output=True, text=True, cwd=Path(__file__).parent.parent)

            assert result.returncode == 0
            output = result.stdout

            # Should have Level 1 and Level 2 headings
            assert "├─ Level 1:" in output
            assert "│─ Level 2:" in output

            # Should NOT have Level 3 headings due to depth limit
            assert "│─ Level 3:" not in output

        finally:
            sample_markdown_file.unlink()

    def test_schema_summary_emoji_mode_output_format(self, sample_markdown_file):
        """Test that schema summary emoji mode produces expected format."""
        try:
            result = subprocess.run([
                sys.executable, 'schema_summary.py', str(sample_markdown_file)
            ], capture_output=True, text=True, cwd=Path(__file__).parent.parent)

            assert result.returncode == 0
            output = result.stdout
            lines = output.strip().split('\n')

            # Should have exactly 4 lines
            assert len(lines) == 4

            # Check each line format
            assert lines[0].startswith("📋 ")
            assert sample_markdown_file.name in lines[0]

            assert lines[1].startswith("🏗️  Structure:")
            assert "H1:" in lines[1] and "H2:" in lines[1] and "H3:" in lines[1]

            assert lines[2].startswith("📝 Content:")
            assert "Paragraphs:" in lines[2]
            assert "Lists:" in lines[2]

            assert lines[3].startswith("📊 Total:")
            assert "elements" in lines[3]

        finally:
            sample_markdown_file.unlink()

    def test_schema_summary_ascii_mode_output_format(self, sample_markdown_file):
        """Test that schema summary ASCII mode produces expected format."""
        try:
            result = subprocess.run([
                sys.executable, 'schema_summary.py', str(sample_markdown_file), '--ascii'
            ], capture_output=True, text=True, cwd=Path(__file__).parent.parent)

            assert result.returncode == 0
            output = result.stdout
            lines = output.strip().split('\n')

            # Should have exactly 4 lines
            assert len(lines) == 4

            # Check each line format (ASCII version)
            assert lines[0].startswith("[DOC] ")
            assert sample_markdown_file.name in lines[0]

            assert lines[1].startswith("[STRUCTURE] Structure:")
            assert "H1:" in lines[1] and "H2:" in lines[1] and "H3:" in lines[1]
            assert " -> " in lines[1]  # ASCII arrow instead of →

            assert lines[2].startswith("[CONTENT] Content:")
            assert "Paragraphs:" in lines[2]
            assert "Lists:" in lines[2]

            assert lines[3].startswith("[TOTAL] Total:")
            assert "elements" in lines[3]

            # Ensure no emoji characters
            assert "📋" not in output
            assert "🏗️" not in output
            assert "📝" not in output
            assert "📊" not in output

        finally:
            sample_markdown_file.unlink()

    def test_document_map_uses_hash_symbols_consistently(self, sample_markdown_file):
        """Test that document map consistently uses '#' symbols in both modes."""
        try:
            # Test emoji mode
            result_emoji = subprocess.run([
                sys.executable, 'visualize_schema.py', str(sample_markdown_file)
            ], capture_output=True, text=True, cwd=Path(__file__).parent.parent)

            # Test ASCII mode
            result_ascii = subprocess.run([
                sys.executable, 'visualize_schema.py', str(sample_markdown_file), '--ascii'
            ], capture_output=True, text=True, cwd=Path(__file__).parent.parent)

            assert result_emoji.returncode == 0
            assert result_ascii.returncode == 0

            emoji_output = result_emoji.stdout
            ascii_output = result_ascii.stdout

            # Both should use '#' symbols in document map, not Unicode blocks
            for output in [emoji_output, ascii_output]:
                # Should have '#' symbols (actual count matches headings in sample)
                assert "H1: #" in output
                assert "H2: ##" in output  # 2 level-2 headings

                # Should NOT have Unicode block characters
                assert "█" not in output

        finally:
            sample_markdown_file.unlink()

    def test_no_horizontal_lines_or_frames(self, sample_markdown_file):
        """Test that output doesn't contain horizontal lines or frames."""
        try:
            # Test both modes
            for args in [[], ['--ascii']]:
                result = subprocess.run([
                    sys.executable, 'visualize_schema.py', str(sample_markdown_file)
                ] + args, capture_output=True, text=True, cwd=Path(__file__).parent.parent)

                assert result.returncode == 0
                output = result.stdout

                # Should NOT contain frame characters
                assert "┌" not in output
                assert "└" not in output
                assert "┐" not in output
                assert "┘" not in output
                assert "│" not in args or "│" not in output  # Only check if not in ASCII mode

                # Should NOT contain horizontal separator lines
                lines = output.split('\n')
                separator_lines = [line for line in lines if line.strip() and all(c in '─-' for c in line.strip())]
                assert len(separator_lines) == 0, f"Found separator lines: {separator_lines}"

        finally:
            sample_markdown_file.unlink()

    def test_visualization_handles_empty_file(self):
        """Test visualization with empty markdown file."""
        with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write("")
            empty_file = Path(f.name)

        try:
            result = subprocess.run([
                sys.executable, 'visualize_schema.py', str(empty_file)
            ], capture_output=True, text=True, cwd=Path(__file__).parent.parent)

            # Should handle empty file gracefully
            assert result.returncode == 0
            output = result.stdout

            # Should still show basic structure
            assert "DOCUMENT STRUCTURE:" in output
            assert "OVERVIEW" in output

        finally:
            empty_file.unlink()

    def test_visualization_error_handling(self):
        """Test error handling for non-existent files."""
        result = subprocess.run([
            sys.executable, 'visualize_schema.py', 'nonexistent_file.md'
        ], capture_output=True, text=True, cwd=Path(__file__).parent.parent)

        assert result.returncode == 1
        assert "File not found" in result.stdout

    def test_help_output_format(self):
        """Test help output contains expected information."""
        result = subprocess.run([
            sys.executable, 'visualize_schema.py', '--help'
        ], capture_output=True, text=True, cwd=Path(__file__).parent.parent)

        assert result.returncode == 0
        output = result.stdout

        assert "Visualize markdown document schema structure" in output
        assert "--max-depth" in output
        assert "--ascii" in output
        assert "Use ASCII characters only" in output


class TestOutputConsistency:
    """Test output consistency and formatting standards."""

    def test_ascii_mode_contains_no_unicode(self):
        """Ensure ASCII mode output contains no Unicode characters."""
        content = "# Test\n\nSome content with **bold** text.\n\n## Section\n\n- Item 1\n- Item 2"

        with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(content)
            test_file = Path(f.name)

        try:
            result = subprocess.run([
                sys.executable, 'visualize_schema.py', str(test_file), '--ascii'
            ], capture_output=True, text=True, cwd=Path(__file__).parent.parent)

            assert result.returncode == 0
            output = result.stdout

            # Check that all characters are ASCII (code points < 128)
            for char in output:
                assert ord(char) < 128, f"Non-ASCII character found: {char} (ord: {ord(char)})"

        finally:
            test_file.unlink()

    def test_section_header_formatting_consistency(self):
        """Test that section headers are consistently formatted."""
        content = "# Title\n\nContent here.\n\n## Section\n\nMore content."

        with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(content)
            test_file = Path(f.name)

        try:
            for mode_args in [[], ['--ascii']]:
                result = subprocess.run([
                    sys.executable, 'visualize_schema.py', str(test_file)
                ] + mode_args, capture_output=True, text=True, cwd=Path(__file__).parent.parent)

                assert result.returncode == 0
                output = result.stdout
                lines = output.split('\n')

                # Find section headers
                section_headers = []
                for line in lines:
                    line = line.strip()
                    if line and any(section in line for section in
                                   ['OVERVIEW', 'HEADING STRUCTURE', 'CONTENT STRUCTURE',
                                    'COMPLEXITY ANALYSIS', 'DOCUMENT MAP']):
                        section_headers.append(line)

                # All section headers should be in uppercase
                for header in section_headers:
                    # Remove emoji/tags and check the text part
                    text_part = header.split()[-2:]  # Get last two words like "HEADING STRUCTURE"
                    text = ' '.join(text_part)
                    if text.isupper():
                        assert True  # Good
                    else:
                        # Allow for cases like "DOCUMENT MAP" where the emoji/tag is separate
                        main_text = ' '.join([word for word in header.split() if word.isupper() or word in ['STRUCTURE', 'ANALYSIS', 'MAP', 'OVERVIEW', 'CONTENT']])
                        assert main_text, f"Section header not properly formatted: {header}"

        finally:
            test_file.unlink()