markitect-main/markitect/core/parser.py

"""
Markdown AST Parser.

This module provides functionality to parse markdown content into an
Abstract Syntax Tree (AST) using the markdown-it library.
"""

from markdown_it import MarkdownIt


def parse_markdown_to_ast(md_content: str):
    """
    Parse markdown content into a JSON-serializable AST.

    Args:
        md_content: Markdown text to parse

    Returns:
        List of token dictionaries representing the AST

    Example:
        ast = parse_markdown_to_ast("# Hello\\n\\nWorld")
    """
    # Enable table parsing and other common plugins
    md = MarkdownIt("commonmark", {"tables": True}).enable(['table'])
    tokens = md.parse(md_content)

    # Convert to a JSON-serializable list of dicts
    def token_to_dict(token):
        d = {
            'type': token.type,
            'tag': token.tag,
            'attrs': token.attrs,
            'map': token.map,
            'nesting': token.nesting,
            'level': token.level,
            'children': [token_to_dict(child) if child else None for child in token.children] if token.children else None,
            'content': token.content,
            'markup': token.markup,
            'info': token.info,
            'meta': token.meta,
            'block': token.block,
            'hidden': token.hidden
        }
        return {k: v for k, v in d.items() if v is not None}  # Remove None values

    return [token_to_dict(token) for token in tokens]