Files
markitect-main/markitect/serializer.py
tegwick 8d4a73b6e3
Some checks failed
Test Suite / code-quality (push) Has been cancelled
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat: optimize code quality with pylint analysis and critical fixes - Issue #130
- Fixed critical CLI function redefinition (E0102): renamed duplicate list() to list_paradigms()
- Fixed CLI parameter passing errors (E1120): updated main() calls with standalone_mode=False
- Removed 20+ unused imports across 6 files (W0611 optimization)
- Added missing final newlines to 10 files (C0304 compliance)
- Optimized control flow patterns: removed unnecessary else-after-return
- Enhanced string comparisons using 'in' operator for better readability
- Maintained pylint score at 8.34/10 while eliminating critical runtime risks

Created follow-up Issue #131 for remaining optimizations:
- 200 broad exception handling instances
- 106 variable shadowing cases
- 278 import organization improvements
- 391 line length standardizations

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-06 03:03:38 +02:00

360 lines
12 KiB
Python

"""
AST to Markdown Serialization - Issue #2 Completion
This module provides functionality to serialize markdown-it AST tokens back into
markdown format, enabling roundtrip validation and document manipulation.
Key Features:
- Convert AST tokens back to markdown text
- Preserve front matter during serialization
- Support for content manipulation operations
- Roundtrip integrity validation
"""
from typing import List, Dict, Any, Optional
import yaml
class ASTSerializer:
"""
Serializes markdown-it AST tokens back to markdown format.
Provides roundtrip capability: markdown → AST → markdown
Supports front matter preservation and content manipulation.
"""
def __init__(self):
"""Initialize the AST serializer."""
pass
def serialize_to_markdown(self, ast: List[Dict[str, Any]], front_matter: Optional[Dict[str, Any]] = None) -> str:
"""
Convert AST tokens back to markdown format.
Args:
ast: List of markdown-it AST tokens
front_matter: Optional YAML front matter dictionary
Returns:
Markdown text with optional front matter
Example:
serializer = ASTSerializer()
markdown = serializer.serialize_to_markdown(ast, front_matter)
"""
markdown_parts = []
# Add front matter if present
if front_matter and isinstance(front_matter, dict) and front_matter:
yaml_content = yaml.dump(front_matter, default_flow_style=False).strip()
markdown_parts.append(f"---\n{yaml_content}\n---\n\n")
# Process AST tokens
markdown_content = self._process_tokens(ast)
markdown_parts.append(markdown_content)
return ''.join(markdown_parts)
def _process_tokens(self, tokens: List[Dict[str, Any]]) -> str:
"""
Process a list of AST tokens into markdown text.
Args:
tokens: List of markdown-it tokens
Returns:
Markdown text representation
"""
markdown_lines = []
current_line = ""
list_level = 0
for token in tokens:
token_type = token.get('type', '')
content = token.get('content', '')
markup = token.get('markup', '')
tag = token.get('tag', '')
nesting = token.get('nesting', 0)
level = token.get('level', 0)
# Handle different token types
if token_type == 'heading_open':
heading_level = int(tag[1]) if tag.startswith('h') else 1
current_line = '#' * heading_level + ' '
elif token_type == 'heading_close':
if current_line:
markdown_lines.append(current_line.rstrip())
current_line = ""
markdown_lines.append("") # Empty line after heading
elif token_type == 'paragraph_open':
pass # Start of paragraph
elif token_type == 'paragraph_close':
if current_line:
markdown_lines.append(current_line.rstrip())
current_line = ""
markdown_lines.append("") # Empty line after paragraph
elif token_type == 'inline':
# Process inline content and children
if content:
current_line += content
elif 'children' in token:
current_line += self._process_inline_children(token['children'])
elif token_type == 'list_item_open':
# Handle list items
indent = ' ' * (level // 2)
if markup in ('-', '*'):
current_line = indent + '- '
elif markup.isdigit():
current_line = indent + '1. '
elif token_type == 'list_item_close':
if current_line:
markdown_lines.append(current_line.rstrip())
current_line = ""
elif token_type in ('bullet_list_open', 'ordered_list_open'):
list_level += 1
elif token_type in ('bullet_list_close', 'ordered_list_close'):
list_level -= 1
if list_level == 0:
markdown_lines.append("") # Empty line after list
elif token_type == 'blockquote_open':
pass
elif token_type == 'blockquote_close':
markdown_lines.append("")
elif token_type == 'code_block':
markdown_lines.append(f"```{token.get('info', '')}")
markdown_lines.append(content.rstrip())
markdown_lines.append("```")
markdown_lines.append("")
elif token_type == 'fence':
if nesting == 1: # Opening fence
markdown_lines.append(f"```{token.get('info', '')}")
else: # Closing fence
markdown_lines.append("```")
markdown_lines.append("")
elif token_type == 'hr':
markdown_lines.append("---")
markdown_lines.append("")
elif token_type == 'text':
current_line += content
# Add any remaining content
if current_line:
markdown_lines.append(current_line.rstrip())
# Clean up extra empty lines at the end
while markdown_lines and markdown_lines[-1] == "":
markdown_lines.pop()
return '\n'.join(markdown_lines)
def _process_inline_children(self, children: List[Dict[str, Any]]) -> str:
"""
Process inline children tokens (emphasis, strong, links, etc.).
Args:
children: List of inline token children
Returns:
Processed inline markdown text
"""
result = ""
for child in children:
token_type = child.get('type', '')
content = child.get('content', '')
markup = child.get('markup', '')
if token_type == 'text':
result += content
elif token_type == 'code_inline':
result += f"`{content}`"
elif token_type == 'em_open':
result += markup or '*'
elif token_type == 'em_close':
result += markup or '*'
elif token_type == 'strong_open':
result += markup or '**'
elif token_type == 'strong_close':
result += markup or '**'
elif token_type == 'link_open':
# Extract href from attrs
href = ""
if 'attrs' in child and child['attrs']:
for attr in child['attrs']:
if attr[0] == 'href':
href = attr[1]
break
result += "["
elif token_type == 'link_close':
# This is tricky - we need to get the href from the opening token
# For now, we'll use a placeholder approach
result += "](#)"
elif token_type == 'softbreak':
result += '\n'
elif token_type == 'hardbreak':
result += ' \n'
return result
def modify_ast_content(self, ast: List[Dict[str, Any]], modifications: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Modify AST content based on provided modifications.
Args:
ast: Original AST tokens
modifications: Dictionary of modifications to apply
Returns:
Modified AST tokens
Supported modifications:
- add_section: Add a new section with title and content
- update_front_matter: Update front matter values
"""
modified_ast = ast.copy()
# Handle adding sections
if 'add_section' in modifications:
section_data = modifications['add_section']
title = section_data.get('title', 'New Section')
content = section_data.get('content', '')
level = section_data.get('level', 2)
# Create new section tokens
new_tokens = [
{
"type": "heading_open",
"tag": f"h{level}",
"attrs": {},
"map": None,
"nesting": 1,
"level": 0,
"content": "",
"markup": "#" * level,
"info": "",
"meta": {},
"block": True,
"hidden": False
},
{
"type": "inline",
"tag": "",
"attrs": {},
"map": None,
"nesting": 0,
"level": 1,
"children": [
{
"type": "text",
"tag": "",
"attrs": {},
"map": None,
"nesting": 0,
"level": 0,
"content": title,
"markup": "",
"info": "",
"meta": {},
"block": False,
"hidden": False
}
],
"content": title,
"markup": "",
"info": "",
"meta": {},
"block": True,
"hidden": False
},
{
"type": "heading_close",
"tag": f"h{level}",
"attrs": {},
"map": None,
"nesting": -1,
"level": 0,
"content": "",
"markup": "#" * level,
"info": "",
"meta": {},
"block": True,
"hidden": False
}
]
if content:
new_tokens.extend([
{
"type": "paragraph_open",
"tag": "p",
"attrs": {},
"map": None,
"nesting": 1,
"level": 0,
"content": "",
"markup": "",
"info": "",
"meta": {},
"block": True,
"hidden": False
},
{
"type": "inline",
"tag": "",
"attrs": {},
"map": None,
"nesting": 0,
"level": 1,
"children": [
{
"type": "text",
"tag": "",
"attrs": {},
"map": None,
"nesting": 0,
"level": 0,
"content": content,
"markup": "",
"info": "",
"meta": {},
"block": False,
"hidden": False
}
],
"content": content,
"markup": "",
"info": "",
"meta": {},
"block": True,
"hidden": False
},
{
"type": "paragraph_close",
"tag": "p",
"attrs": {},
"map": None,
"nesting": -1,
"level": 0,
"content": "",
"markup": "",
"info": "",
"meta": {},
"block": True,
"hidden": False
}
])
# Add to end of AST
modified_ast.extend(new_tokens)
return modified_ast