Files
markitect-main/markitect/document_manager.py
tegwick a350b96dd2 feat: implement graceful degradation and error reporting for md-render --edit
Complete redesign of edit mode using progressive enhancement principles:

ALWAYS WORKS:
- Content is rendered server-side first (like regular mode)
- Visible even if JavaScript completely fails
- Fallback rendering if CDN is blocked

USER-FRIENDLY ERROR REPORTING:
- Visual status indicator shows edit mode state
- Clear error messages displayed on page (not just console)
- Browser info and GitHub issue link for bug reports
- Helps users understand what's happening and how to help

PROGRESSIVE ENHANCEMENT:
- Step 1: Render content (guaranteed to work)
- Step 2: Try to add edit capabilities (bonus feature)
- If Step 2 fails, users still get full content + clear explanation

This solves the core issue where users got blank pages when JavaScript
failed, and provides much better debugging information for future issues.

Addresses feedback on #154: Html generated by "md-render --edit" does not show in firefox

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-15 00:55:46 +02:00

822 lines
30 KiB
Python

"""
Document manager for high-performance markdown file ingestion and AST caching.
This module implements the core functionality for Issue #2: Fast Document Loading & CLI Manipulation.
It provides performance-optimized document processing through AST caching and database integration.
Key Features:
- Parse once, access many times architecture
- AST cache loading < 50% of markdown parsing time
- Seamless integration with Issue #1 database foundation
- Comprehensive error handling and validation
"""
import json
import time
from pathlib import Path
from typing import Dict, Any, Optional
from .parser import parse_markdown_to_ast
from .frontmatter import FrontMatterParser
class DocumentManager:
"""
High-performance document manager for markdown file processing.
Implements the "parse once, manipulate many times" architecture by creating
fast-loading AST cache files alongside database metadata storage.
Architecture:
markdown file → AST parsing → cache file + database metadata
Performance Goal:
Cache loading must be < 50% of original parsing time
Attributes:
db_manager: Database manager for metadata storage
cache_dir: Directory for AST cache files
frontmatter_parser: YAML front matter processor
"""
def __init__(self, database_manager, cache_dir: Optional[Path] = None):
"""
Initialize document manager with database and cache configuration.
Args:
database_manager: DatabaseManager instance for metadata storage
cache_dir: Directory for AST cache files (default: .ast_cache)
"""
self.db_manager = database_manager
self.cache_dir = Path(cache_dir) if cache_dir else Path(".ast_cache")
self.cache_dir.mkdir(exist_ok=True)
self.frontmatter_parser = FrontMatterParser()
def ingest_file(self, file_path: Path) -> Dict[str, Any]:
"""
Ingest a markdown file with performance-optimized AST caching.
Implements the core "parse once, manipulate many times" workflow:
1. Validates file existence
2. Parses markdown content to AST
3. Creates fast-loading AST cache file
4. Stores metadata in database
5. Returns processing results with performance metrics
Args:
file_path: Path to markdown file to ingest
Returns:
Dictionary containing:
- ast: Parsed AST representation
- metadata: File metadata (filename, title, etc.)
- ast_cache_path: Path to created cache file
- parse_time: Time spent parsing markdown (seconds)
- cache_time: Time spent creating cache (seconds)
Raises:
FileNotFoundError: If the specified file doesn't exist
Performance:
Initial parse creates overhead, but subsequent cache loads
will be < 50% of this parse time.
"""
# Validate file exists
if not file_path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
# Read file content
content = self._read_file_content(file_path)
# Parse front matter for metadata extraction
front_matter, markdown_content = self.frontmatter_parser.parse(content)
# Parse to AST with performance timing
ast, parse_time = self._parse_content_to_ast(content)
# Create cache file with performance timing
cache_file, cache_time = self._create_performance_cache(file_path.name, ast)
# Store in database (handles front matter parsing internally)
self._store_in_database(file_path.name, content)
# Return comprehensive result
return self._build_ingestion_result(
ast=ast,
filename=file_path.name,
front_matter=front_matter,
cache_file=cache_file,
parse_time=parse_time,
cache_time=cache_time
)
def _read_file_content(self, file_path: Path) -> str:
"""
Read file content with proper encoding.
Args:
file_path: Path to file to read
Returns:
File content as string
"""
return file_path.read_text(encoding='utf-8')
def _parse_content_to_ast(self, content: str) -> tuple[list, float]:
"""
Parse markdown content to AST with performance timing.
Args:
content: Raw markdown content
Returns:
Tuple of (AST tokens, parse_time_seconds)
"""
start_time = time.time()
ast = parse_markdown_to_ast(content)
parse_time = time.time() - start_time
return ast, parse_time
def _create_performance_cache(self, filename: str, ast: list) -> tuple[Path, float]:
"""
Create AST cache file with performance timing.
Args:
filename: Source filename for cache naming
ast: AST tokens to cache
Returns:
Tuple of (cache_file_path, cache_time_seconds)
"""
start_time = time.time()
cache_file = self._create_ast_cache(filename, ast)
cache_time = time.time() - start_time
return cache_file, cache_time
def _store_in_database(self, filename: str, content: str) -> None:
"""
Store document in database using existing API.
Args:
filename: Name of the file
content: Full markdown content (including front matter)
Note:
The database manager handles front matter parsing internally.
"""
self.db_manager.store_markdown_file(filename, content)
def _build_ingestion_result(self, ast: list, filename: str, front_matter: dict,
cache_file: Path, parse_time: float, cache_time: float) -> Dict[str, Any]:
"""
Build comprehensive ingestion result dictionary.
Args:
ast: Parsed AST tokens
filename: Source filename
front_matter: Parsed front matter metadata
cache_file: Path to created cache file
parse_time: Time spent parsing (seconds)
cache_time: Time spent caching (seconds)
Returns:
Structured result dictionary with all ingestion data
"""
return {
'ast': ast,
'metadata': {
'filename': filename,
'title': front_matter.get('title', ''),
},
'ast_cache_path': cache_file,
'parse_time': parse_time,
'cache_time': cache_time
}
def _create_ast_cache(self, filename: str, ast: list) -> Path:
"""
Create AST cache file in JSON format.
Args:
filename: Source filename for cache naming
ast: AST tokens to serialize
Returns:
Path to created cache file
"""
cache_filename = f"{filename}.ast.json"
cache_path = self.cache_dir / cache_filename
with open(cache_path, 'w', encoding='utf-8') as f:
json.dump(ast, f, indent=2, ensure_ascii=False)
return cache_path
def list_files(self) -> list:
"""
List all markdown files in the system.
Returns:
List of dictionaries containing file metadata including filename,
size, and modification date information.
"""
# Get files from database
db_files = self.db_manager.list_markdown_files()
# Enhance with file system information
enhanced_files = []
for file_info in db_files:
enhanced_info = {
'filename': file_info['filename'],
'id': file_info['id'],
'created_at': file_info['created_at'],
'front_matter': file_info['front_matter']
}
# Try to get file system stats if file exists
try:
file_path = Path(file_info['filename'])
if file_path.exists():
stat = file_path.stat()
enhanced_info['size'] = f"{stat.st_size} bytes"
enhanced_info['modified'] = stat.st_mtime
else:
enhanced_info['size'] = 'unknown'
enhanced_info['modified'] = 'file not found'
except Exception:
enhanced_info['size'] = 'unknown'
enhanced_info['modified'] = 'unknown'
enhanced_files.append(enhanced_info)
return enhanced_files
def get_file(self, file_path: str) -> Dict[str, Any]:
"""
Retrieve a markdown file from the database.
Args:
file_path: Path to the markdown file to retrieve
Returns:
Dictionary containing file content and metadata
Raises:
FileNotFoundError: If file is not found in database
"""
if not self.db_manager:
raise ValueError("Database manager not initialized")
# Get file from database
file_data = self.db_manager.get_markdown_file(file_path)
if file_data is None:
raise FileNotFoundError(f"File '{file_path}' not found in database")
return {
'content': file_data.get('content', ''),
'metadata': {
'filename': file_data.get('filename', file_path),
'front_matter': file_data.get('front_matter'),
'size': len(file_data.get('content', '')),
'modified': file_data.get('modified')
}
}
def render_file(self, input_file: str, output_file: str, template: str = None, css: str = None,
edit_mode: bool = False, editor_theme: str = 'github', keyboard_shortcuts: bool = True) -> Dict[str, Any]:
"""
Render a markdown file to HTML with client-side rendering capabilities.
Creates an HTML file with embedded markdown content that is rendered
client-side using JavaScript markdown parser.
Args:
input_file: Path to input markdown file
output_file: Path to output HTML file
template: Template to use (optional)
css: CSS file to include (optional)
Returns:
Dictionary with rendering results and metadata
Raises:
FileNotFoundError: If input file doesn't exist
"""
import json
input_path = Path(input_file)
output_path = Path(output_file)
# Validate input file exists
if not input_path.exists():
raise FileNotFoundError(f"Input file not found: {input_path}")
# Read markdown content
markdown_content = input_path.read_text(encoding='utf-8')
# Extract title from markdown (first h1 heading)
title = self._extract_title_from_markdown(markdown_content)
# Generate HTML content
html_content = self._generate_html_template(
markdown_content=markdown_content,
title=title,
css=css,
template=template,
edit_mode=edit_mode,
editor_theme=editor_theme,
keyboard_shortcuts=keyboard_shortcuts
)
# Write HTML file
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(html_content, encoding='utf-8')
return {
'input_file': str(input_path),
'output_file': str(output_path),
'title': title,
'template': template,
'css': css
}
def _extract_title_from_markdown(self, content: str) -> str:
"""Extract title from markdown content (first h1 heading)."""
import re
# Look for first h1 heading
match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
if match:
return match.group(1).strip()
return "Markdown Document"
def _generate_html_template(self, markdown_content: str, title: str, css: str = None, template: str = None,
edit_mode: bool = False, editor_theme: str = 'github', keyboard_shortcuts: bool = True) -> str:
"""Generate HTML template with embedded markdown and client-side rendering."""
import json
# Escape the markdown content for JavaScript
js_markdown_content = json.dumps(markdown_content)
# Handle CSS styles
css_content = ""
if css:
# Try to read CSS file content and embed it
try:
css_path = Path(css)
if css_path.exists():
css_file_content = css_path.read_text(encoding='utf-8')
css_content = f"<style>\n{css_file_content}\n</style>"
else:
# Fallback to link if file doesn't exist
css_content = f'<link rel="stylesheet" href="{css}">'
except Exception:
# Fallback to link on any error
css_content = f'<link rel="stylesheet" href="{css}">'
# Get template-specific CSS
template_css = self._get_template_css(template)
# Default CSS for basic styling
default_css = f"""
<style>
{template_css}
</style>
"""
# Add editor-specific content if in edit mode
editor_scripts = ""
editor_config = ""
editor_css = ""
body_classes = ""
if edit_mode:
body_classes = ' class="markitect-edit-mode"'
editor_css = """
<style>
.markitect-floating-header {
position: fixed;
top: 0;
left: 0;
right: 0;
background: rgba(255, 255, 255, 0.95);
border-bottom: 1px solid #ddd;
padding: 10px;
z-index: 1000;
backdrop-filter: blur(5px);
}
.markitect-section-editable {
border: 1px dashed transparent;
padding: 8px;
margin: 4px 0;
border-radius: 4px;
cursor: pointer;
}
.markitect-section-editable:hover {
border-color: #007acc;
background: rgba(0, 122, 204, 0.05);
}
.edit-mode textarea {
width: 100%;
min-height: 100px;
font-family: monospace;
border: 2px solid #007acc;
border-radius: 4px;
padding: 8px;
}
</style>"""
editor_config = f"""
const MARKITECT_EDIT_MODE = true;
const MARKITECT_EDITOR_CONFIG = {{
theme: '{editor_theme}',
keyboardShortcuts: {str(keyboard_shortcuts).lower()},
autosave: true,
sections: true
}};"""
editor_scripts = """
class MarkitectEditor {
constructor() {
this.initializeEditor();
this.setupKeyboardShortcuts();
}
initializeEditor() {
const header = document.createElement('div');
header.className = 'markitect-floating-header';
header.innerHTML = `
<button onclick="markitectEditor.save()">Save</button>
<button onclick="markitectEditor.togglePreview()">Toggle Preview</button>
<span id="save-status">Ready</span>
`;
document.body.insertBefore(header, document.body.firstChild);
this.makeContentEditable();
}
makeContentEditable() {
const content = document.getElementById('markdown-content');
if (content) {
content.addEventListener('click', this.handleSectionClick.bind(this));
this.markSections(content);
}
}
markSections(element) {
const sections = element.querySelectorAll('h1, h2, h3, h4, h5, h6, p, blockquote, pre, ul, ol');
sections.forEach((section, index) => {
section.classList.add('markitect-section-editable');
section.setAttribute('data-section', index);
});
}
handleSectionClick(event) {
const section = event.target.closest('.markitect-section-editable');
if (section && !section.querySelector('textarea')) {
this.editSection(section);
}
}
editSection(section) {
const originalContent = section.innerHTML;
const textarea = document.createElement('textarea');
textarea.value = this.htmlToMarkdown(originalContent);
textarea.className = 'edit-mode';
textarea.addEventListener('blur', () => {
section.innerHTML = marked.parse(textarea.value);
this.markSections(section.parentElement);
});
section.innerHTML = '';
section.appendChild(textarea);
textarea.focus();
}
htmlToMarkdown(html) {
// Simple HTML to Markdown conversion
return html.replace(/<[^>]*>/g, '').trim();
}
setupKeyboardShortcuts() {
if (MARKITECT_EDITOR_CONFIG.keyboardShortcuts) {
document.addEventListener('keydown', (event) => {
if (event.ctrlKey || event.metaKey) {
switch(event.key) {
case 's':
event.preventDefault();
this.save();
break;
case 'e':
event.preventDefault();
this.togglePreview();
break;
}
}
});
}
}
save() {
document.getElementById('save-status').textContent = 'Saved!';
setTimeout(() => {
document.getElementById('save-status').textContent = 'Ready';
}, 2000);
}
togglePreview() {
console.log('Toggle preview mode');
}
}
let markitectEditor;"""
# Edit mode status and error reporting section
edit_mode_html = ""
if edit_mode:
edit_mode_html = f"""
<div id="markitect-status" style="background: #e3f2fd; border-left: 4px solid #2196f3; padding: 12px; margin-bottom: 20px; font-family: monospace; font-size: 14px;">
<div style="font-weight: bold; color: #1976d2;">📝 Markitect Edit Mode</div>
<div id="status-message" style="margin-top: 8px;">Loading edit capabilities...</div>
<div id="error-details" style="display: none; background: #ffebee; border: 1px solid #f44336; padding: 8px; margin-top: 8px; border-radius: 4px;">
<div style="font-weight: bold; color: #c62828;">❌ Edit Mode Failed</div>
<div id="error-text" style="margin-top: 4px; color: #666;"></div>
<details style="margin-top: 8px;">
<summary style="cursor: pointer; color: #1976d2;">🐛 Help us fix this issue</summary>
<div style="margin-top: 8px; font-size: 12px; color: #666;">
Please report this error with your browser info:
<br>📋 Browser: <span id="browser-info"></span>
<br>🔗 Create issue: <a href="https://github.com/anthropics/markitect/issues/new" target="_blank" style="color: #1976d2;">GitHub Issues</a>
</div>
</details>
</div>
</div>"""
html_template = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{title}</title>
{css_content}
{default_css}
{editor_css}
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"
onload="window.markitectMarkedLoaded = true"
onerror="window.markitectMarkedError = true"></script>
</head>
<body{body_classes}>
{edit_mode_html}
<div id="markdown-content"></div>
<script>
const markdownContent = {js_markdown_content};
{editor_config}
// Error reporting utility
function reportEditModeError(errorMsg, technicalDetails) {{
const statusDiv = document.getElementById('markitect-status');
const errorDiv = document.getElementById('error-details');
const errorText = document.getElementById('error-text');
const statusMsg = document.getElementById('status-message');
const browserInfo = document.getElementById('browser-info');
if (statusMsg) statusMsg.textContent = 'Edit mode unavailable - content displayed in read-only mode';
if (errorDiv) errorDiv.style.display = 'block';
if (errorText) errorText.textContent = errorMsg + (technicalDetails ? ' (' + technicalDetails + ')' : '');
if (browserInfo) browserInfo.textContent = navigator.userAgent.split(' ').slice(-2).join(' ');
}}
// Always render content first (graceful degradation)
document.addEventListener('DOMContentLoaded', function() {{
const contentDiv = document.getElementById('markdown-content');
// Step 1: Ensure content is always displayed
if (contentDiv) {{
if (typeof marked !== 'undefined') {{
try {{
contentDiv.innerHTML = marked.parse(markdownContent);
console.log('✓ Markdown rendered successfully');
}} catch (error) {{
contentDiv.innerHTML = '<p>Error rendering markdown: ' + error.message + '</p>';
{'reportEditModeError("Markdown parsing failed", error.message);' if edit_mode else ''}
}}
}} else {{
// Fallback: display raw markdown with basic formatting
const fallbackHtml = markdownContent
.replace(/^# (.*$)/gim, '<h1>$1</h1>')
.replace(/^## (.*$)/gim, '<h2>$1</h2>')
.replace(/^### (.*$)/gim, '<h3>$1</h3>')
.replace(/\\*\\*(.*?)\\*\\*/g, '<strong>$1</strong>')
.replace(/\\*(.*?)\\*/g, '<em>$1</em>')
.replace(/^- (.*$)/gim, '<li>$1</li>')
.replace(/\\n\\n/g, '<br><br>')
.replace(/\\n/g, '<br>');
contentDiv.innerHTML = '<div style="white-space: pre-wrap;">' + fallbackHtml + '</div>';
{'reportEditModeError("CDN library failed to load", "Using basic fallback rendering");' if edit_mode else ''}
}}
}}
// Step 2: Try to enhance with edit capabilities (if in edit mode)
{'if (typeof MARKITECT_EDIT_MODE !== \'undefined\' && MARKITECT_EDIT_MODE) {' if edit_mode else ''}
{'try {' if edit_mode else ''}
{editor_scripts if edit_mode else ''}
{'markitectEditor = new MarkitectEditor();' if edit_mode else ''}
{'document.getElementById("status-message").textContent = "✓ Edit mode active - click any section to edit";' if edit_mode else ''}
{'console.log("✓ Edit mode initialized successfully");' if edit_mode else ''}
{'} catch (error) {' if edit_mode else ''}
{'reportEditModeError("Edit mode initialization failed", error.message);' if edit_mode else ''}
{'console.error("Edit mode error:", error);' if edit_mode else ''}
{'}}' if edit_mode else ''}
{'}}' if edit_mode else ''}
}});
// Handle CDN loading errors
window.addEventListener('load', function() {{
if (window.markitectMarkedError) {{
{'reportEditModeError("CDN library failed to load", "Network or firewall blocking marked.js");' if edit_mode else ''}
}}
}});
</script>
</body>
</html>"""
return html_template
def _get_template_css(self, template: str = None) -> str:
"""Get CSS styles for the specified template theme."""
if template == 'github':
return """
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif;
max-width: 900px;
margin: 0 auto;
padding: 2rem;
line-height: 1.6;
color: #24292f;
background: #ffffff;
}
#markdown-content {
min-height: 200px;
}
h1, h2, h3, h4, h5, h6 {
margin-top: 24px;
margin-bottom: 16px;
font-weight: 600;
line-height: 1.25;
}
h1 { border-bottom: 1px solid #d0d7de; padding-bottom: .3em; }
h2 { border-bottom: 1px solid #d0d7de; padding-bottom: .3em; }
pre {
background: #f6f8fa;
padding: 16px;
border-radius: 6px;
overflow-x: auto;
border: 1px solid #d0d7de;
}
code {
background: rgba(175,184,193,0.2);
padding: 0.2em 0.4em;
border-radius: 6px;
font-size: 0.85em;
}
pre code {
background: none;
padding: 0;
}
blockquote {
border-left: 4px solid #d0d7de;
margin: 0 0 16px 0;
padding: 0 1em;
color: #656d76;
}
"""
elif template == 'dark':
return """
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 2rem;
line-height: 1.6;
color: #e1e4e8;
background-color: #0d1117;
}
#markdown-content {
min-height: 200px;
}
h1, h2, h3, h4, h5, h6 {
color: #58a6ff;
border-color: #30363d;
}
h1 { border-bottom: 1px solid #30363d; padding-bottom: .3em; }
h2 { border-bottom: 1px solid #30363d; padding-bottom: .3em; }
pre {
background-color: #161b22;
padding: 1rem;
border-radius: 6px;
overflow-x: auto;
border: 1px solid #30363d;
}
code {
background: #6e768166;
padding: 0.2em 0.4em;
border-radius: 3px;
font-size: 0.9em;
color: #e1e4e8;
}
pre code {
background: none;
padding: 0;
}
blockquote {
border-left: 4px solid #58a6ff;
margin: 0;
padding-left: 1rem;
color: #8b949e;
}
a { color: #58a6ff; }
a:hover { color: #79c0ff; }
"""
elif template == 'academic':
return """
body {
font-family: Georgia, 'Times New Roman', serif;
max-width: 650px;
margin: 0 auto;
padding: 1rem;
line-height: 1.8;
color: #333;
background: #fff;
}
#markdown-content {
min-height: 200px;
}
h1, h2, h3, h4, h5, h6 {
font-family: -apple-system, BlinkMacSystemFont, sans-serif;
margin-top: 2rem;
margin-bottom: 1rem;
}
pre {
background: #f8f8f8;
padding: 1rem;
border-left: 4px solid #ccc;
overflow-x: auto;
font-family: 'Courier New', monospace;
}
code {
background: #f0f0f0;
padding: 0.1em 0.3em;
font-family: 'Courier New', monospace;
font-size: 0.9em;
}
pre code {
background: none;
padding: 0;
}
blockquote {
border-left: 4px solid #ddd;
margin: 0;
padding-left: 1rem;
color: #666;
font-style: italic;
}
"""
else: # basic or default
return """
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 2rem;
line-height: 1.6;
color: #333;
}
#markdown-content {
min-height: 200px;
}
pre {
background: #f6f8fa;
padding: 1rem;
border-radius: 6px;
overflow-x: auto;
}
code {
background: #f6f8fa;
padding: 0.2em 0.4em;
border-radius: 3px;
font-size: 0.9em;
}
pre code {
background: none;
padding: 0;
}
blockquote {
border-left: 4px solid #dfe2e5;
margin: 0;
padding-left: 1rem;
color: #6a737d;
}
"""