markitect-main/tools/visualize_schema.py

#!/usr/bin/env python3
"""
Beautiful command-line visualization for markdown schema structure.
"""

import sys
import json
import argparse
import os
from pathlib import Path

# Add markitect to path
sys.path.insert(0, '.')

from markitect.schema_generator import SchemaGenerator
# Issue #37: Import shared emoji/ASCII output mode utilities
from emoji_utils import determine_output_mode, add_emoji_arguments

def visualize_schema_structure(file_path, max_depth=None, ascii_only=False):
    """Create a beautiful tree visualization of the document structure."""

    generator = SchemaGenerator()
    schema = generator.generate_schema_from_file(Path(file_path), max_depth=max_depth, mode='syntactic')

    # Define icons based on ASCII mode
    if ascii_only:
        icons = {
            'doc': '[DOC]',
            'overview': 'OVERVIEW',
            'headings': 'HEADING STRUCTURE',
            'content': 'CONTENT STRUCTURE',
            'complexity': 'COMPLEXITY ANALYSIS',
            'map': 'DOCUMENT MAP',
            'paragraphs': '[P]',
            'lists': '[L]',
            'code_blocks': '[C]',
            'blockquotes': '[Q]',
            'tables': '[T]',
            'links': '[LINK]',
            'emphasis': '[*]',
            'simple': 'Simple',
            'moderate': 'Moderate',
            'complex': 'Complex',
            'tree_main': '+-',
            'tree_sub': '|-',
            'bar_char': '#'
        }
    else:
        icons = {
            'doc': '📋',
            'overview': '📊 OVERVIEW',
            'headings': '📑 HEADING STRUCTURE',
            'content': '📝 CONTENT STRUCTURE',
            'complexity': '🔍 COMPLEXITY ANALYSIS',
            'map': '🗺️  DOCUMENT MAP',
            'paragraphs': '📄',
            'lists': '📋',
            'code_blocks': '💻',
            'blockquotes': '💬',
            'tables': '📊',
            'links': '🔗',
            'emphasis': '✨',
            'simple': '🟢 Simple',
            'moderate': '🟡 Moderate',
            'complex': '🔴 Complex',
            'tree_main': '├─',
            'tree_sub': '│─',
            'bar_char': '#'
        }

    print(f"{icons['doc']} DOCUMENT STRUCTURE: {Path(file_path).name}")
    print()

    properties = schema.get('properties', {})

    # Document Overview
    metadata = properties.get('metadata', {}).get('properties', {})
    total_elements = metadata.get('total_elements', {}).get('const', 0)

    print(icons['overview'])
    print(f"  Total Elements: {total_elements}")
    print(f"  Schema Properties: {len(properties)}")
    print()

    # Heading Structure
    if 'headings' in properties:
        print(icons['headings'])

        headings = properties['headings'].get('properties', {})
        for level_key in sorted(headings.keys()):
            level_data = headings[level_key]
            count = level_data.get('minItems', 0)
            level_num = level_key.split('_')[1]

            # Create visual hierarchy
            indent = "  " + "  " * (int(level_num) - 1)
            marker = icons['tree_sub'] if int(level_num) > 1 else icons['tree_main']

            print(f"{indent}{marker} Level {level_num}: {count} heading{'s' if count != 1 else ''}")

        print()

    # Content Structure
    print(icons['content'])

    content_elements = [
        ('paragraphs', icons['paragraphs'], 'Paragraphs'),
        ('lists', icons['lists'], 'Lists'),
        ('code_blocks', icons['code_blocks'], 'Code Blocks'),
        ('blockquotes', icons['blockquotes'], 'Blockquotes'),
        ('tables', icons['tables'], 'Tables'),
        ('links', icons['links'], 'Links'),
        ('emphasis', icons['emphasis'], 'Emphasis')
    ]

    for element_key, icon, name in content_elements:
        if element_key in properties:
            count = properties[element_key].get('minItems', 0)
            print(f"  {icon} {name:<15}: {count:>3}")

    print()

    # Document Complexity Analysis
    print(icons['complexity'])

    # Calculate document depth
    heading_levels = len(properties.get('headings', {}).get('properties', {}))
    content_types = len([k for k in properties.keys() if k not in ['headings', 'metadata']])

    complexity_score = (heading_levels * 2) + content_types + (total_elements // 20)

    if complexity_score < 10:
        complexity = icons['simple']
    elif complexity_score < 20:
        complexity = icons['moderate']
    else:
        complexity = icons['complex']

    print(f"  Heading Depth: {heading_levels} levels")
    print(f"  Content Types: {content_types}")
    print(f"  Complexity: {complexity}")

    # Structure Types Summary
    if metadata.get('structure_types'):
        structure_types = metadata['structure_types'].get('const', [])
        unique_types = len(set(structure_types))
        print(f"  Unique AST Types: {unique_types}")

    print()

    # Visual Structure Map
    print(icons['map'])

    if 'headings' in properties:
        headings = properties['headings'].get('properties', {})
        total_headings = sum(h.get('minItems', 0) for h in headings.values())

        # Create a visual representation
        level_counts = {}
        for level_key in sorted(headings.keys()):
            level_num = int(level_key.split('_')[1])
            count = headings[level_key].get('minItems', 0)
            level_counts[level_num] = count

        # Draw the structure tree
        max_level = max(level_counts.keys()) if level_counts else 1

        for level in range(1, max_level + 1):
            count = level_counts.get(level, 0)
            if count > 0:
                indent = "  " * (level - 1)
                bars = icons['bar_char'] * min(count, 20)  # Visual bar representing count
                remaining = max(0, count - 20)
                bar_display = bars + (f" +{remaining}" if remaining > 0 else "")
                print(f"{indent}H{level}: {bar_display} ({count})")

    print()

def main():
    parser = argparse.ArgumentParser(description='Visualize markdown document schema structure')
    parser.add_argument('file_path', help='Path to the markdown file')
    parser.add_argument('--max-depth', type=int, help='Maximum heading depth to include')

    # Issue #37: Add emoji/ASCII output format arguments
    add_emoji_arguments(parser)

    args = parser.parse_args()

    if not Path(args.file_path).exists():
        print(f"File not found: {args.file_path}")
        sys.exit(1)

    # Issue #37: Determine output mode using shared utility
    # Respects CLI flags and MARKITECT_EMOJI environment variable
    use_ascii = determine_output_mode(args)

    visualize_schema_structure(args.file_path, args.max_depth, use_ascii)

if __name__ == "__main__":
    main()