Files
markitect-main/tools/visualize_schema.py
tegwick 60f33443ae
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat(schema): add semantic schema generation as default mode
schema-generate now builds content-aware schemas from the document's
section hierarchy instead of counting markdown syntax elements. Detects
key-value tables, data tables, link lists, and mixed content patterns
to produce schemas that reflect the actual document outline.

Old behavior preserved via --mode syntactic. Validator and visualization
tools pinned to syntactic mode for compatibility.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 18:49:50 +01:00

200 lines
6.6 KiB
Python

#!/usr/bin/env python3
"""
Beautiful command-line visualization for markdown schema structure.
"""
import sys
import json
import argparse
import os
from pathlib import Path
# Add markitect to path
sys.path.insert(0, '.')
from markitect.schema_generator import SchemaGenerator
# Issue #37: Import shared emoji/ASCII output mode utilities
from emoji_utils import determine_output_mode, add_emoji_arguments
def visualize_schema_structure(file_path, max_depth=None, ascii_only=False):
"""Create a beautiful tree visualization of the document structure."""
generator = SchemaGenerator()
schema = generator.generate_schema_from_file(Path(file_path), max_depth=max_depth, mode='syntactic')
# Define icons based on ASCII mode
if ascii_only:
icons = {
'doc': '[DOC]',
'overview': 'OVERVIEW',
'headings': 'HEADING STRUCTURE',
'content': 'CONTENT STRUCTURE',
'complexity': 'COMPLEXITY ANALYSIS',
'map': 'DOCUMENT MAP',
'paragraphs': '[P]',
'lists': '[L]',
'code_blocks': '[C]',
'blockquotes': '[Q]',
'tables': '[T]',
'links': '[LINK]',
'emphasis': '[*]',
'simple': 'Simple',
'moderate': 'Moderate',
'complex': 'Complex',
'tree_main': '+-',
'tree_sub': '|-',
'bar_char': '#'
}
else:
icons = {
'doc': '📋',
'overview': '📊 OVERVIEW',
'headings': '📑 HEADING STRUCTURE',
'content': '📝 CONTENT STRUCTURE',
'complexity': '🔍 COMPLEXITY ANALYSIS',
'map': '🗺️ DOCUMENT MAP',
'paragraphs': '📄',
'lists': '📋',
'code_blocks': '💻',
'blockquotes': '💬',
'tables': '📊',
'links': '🔗',
'emphasis': '',
'simple': '🟢 Simple',
'moderate': '🟡 Moderate',
'complex': '🔴 Complex',
'tree_main': '├─',
'tree_sub': '│─',
'bar_char': '#'
}
print(f"{icons['doc']} DOCUMENT STRUCTURE: {Path(file_path).name}")
print()
properties = schema.get('properties', {})
# Document Overview
metadata = properties.get('metadata', {}).get('properties', {})
total_elements = metadata.get('total_elements', {}).get('const', 0)
print(icons['overview'])
print(f" Total Elements: {total_elements}")
print(f" Schema Properties: {len(properties)}")
print()
# Heading Structure
if 'headings' in properties:
print(icons['headings'])
headings = properties['headings'].get('properties', {})
for level_key in sorted(headings.keys()):
level_data = headings[level_key]
count = level_data.get('minItems', 0)
level_num = level_key.split('_')[1]
# Create visual hierarchy
indent = " " + " " * (int(level_num) - 1)
marker = icons['tree_sub'] if int(level_num) > 1 else icons['tree_main']
print(f"{indent}{marker} Level {level_num}: {count} heading{'s' if count != 1 else ''}")
print()
# Content Structure
print(icons['content'])
content_elements = [
('paragraphs', icons['paragraphs'], 'Paragraphs'),
('lists', icons['lists'], 'Lists'),
('code_blocks', icons['code_blocks'], 'Code Blocks'),
('blockquotes', icons['blockquotes'], 'Blockquotes'),
('tables', icons['tables'], 'Tables'),
('links', icons['links'], 'Links'),
('emphasis', icons['emphasis'], 'Emphasis')
]
for element_key, icon, name in content_elements:
if element_key in properties:
count = properties[element_key].get('minItems', 0)
print(f" {icon} {name:<15}: {count:>3}")
print()
# Document Complexity Analysis
print(icons['complexity'])
# Calculate document depth
heading_levels = len(properties.get('headings', {}).get('properties', {}))
content_types = len([k for k in properties.keys() if k not in ['headings', 'metadata']])
complexity_score = (heading_levels * 2) + content_types + (total_elements // 20)
if complexity_score < 10:
complexity = icons['simple']
elif complexity_score < 20:
complexity = icons['moderate']
else:
complexity = icons['complex']
print(f" Heading Depth: {heading_levels} levels")
print(f" Content Types: {content_types}")
print(f" Complexity: {complexity}")
# Structure Types Summary
if metadata.get('structure_types'):
structure_types = metadata['structure_types'].get('const', [])
unique_types = len(set(structure_types))
print(f" Unique AST Types: {unique_types}")
print()
# Visual Structure Map
print(icons['map'])
if 'headings' in properties:
headings = properties['headings'].get('properties', {})
total_headings = sum(h.get('minItems', 0) for h in headings.values())
# Create a visual representation
level_counts = {}
for level_key in sorted(headings.keys()):
level_num = int(level_key.split('_')[1])
count = headings[level_key].get('minItems', 0)
level_counts[level_num] = count
# Draw the structure tree
max_level = max(level_counts.keys()) if level_counts else 1
for level in range(1, max_level + 1):
count = level_counts.get(level, 0)
if count > 0:
indent = " " * (level - 1)
bars = icons['bar_char'] * min(count, 20) # Visual bar representing count
remaining = max(0, count - 20)
bar_display = bars + (f" +{remaining}" if remaining > 0 else "")
print(f"{indent}H{level}: {bar_display} ({count})")
print()
def main():
parser = argparse.ArgumentParser(description='Visualize markdown document schema structure')
parser.add_argument('file_path', help='Path to the markdown file')
parser.add_argument('--max-depth', type=int, help='Maximum heading depth to include')
# Issue #37: Add emoji/ASCII output format arguments
add_emoji_arguments(parser)
args = parser.parse_args()
if not Path(args.file_path).exists():
print(f"File not found: {args.file_path}")
sys.exit(1)
# Issue #37: Determine output mode using shared utility
# Respects CLI flags and MARKITECT_EMOJI environment variable
use_ascii = determine_output_mode(args)
visualize_schema_structure(args.file_path, args.max_depth, use_ascii)
if __name__ == "__main__":
main()