Files
markitect-main/markitect/document_manager.py
tegwick 36e113903d
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
fix: resolve JavaScript syntax errors preventing edit mode initialization in Issue #154
- Fixed fragmented conditional blocks that were generating invalid JavaScript syntax
- Consolidated edit mode initialization logic into cohesive if/try/catch blocks
- Added proper class definition placement at script top level
- Implemented progressive enhancement with graceful degradation (content always displays)
- Added step-by-step status reporting and user-friendly error messaging
- Fixed timeout functionality for edit mode initialization tracking

The edit mode now properly initializes with transparent error reporting while maintaining
content visibility even when JavaScript fails, addressing user feedback for better
debugging and user experience.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-15 01:06:03 +02:00

850 lines
31 KiB
Python

"""
Document manager for high-performance markdown file ingestion and AST caching.
This module implements the core functionality for Issue #2: Fast Document Loading & CLI Manipulation.
It provides performance-optimized document processing through AST caching and database integration.
Key Features:
- Parse once, access many times architecture
- AST cache loading < 50% of markdown parsing time
- Seamless integration with Issue #1 database foundation
- Comprehensive error handling and validation
"""
import json
import time
from pathlib import Path
from typing import Dict, Any, Optional
from .parser import parse_markdown_to_ast
from .frontmatter import FrontMatterParser
class DocumentManager:
"""
High-performance document manager for markdown file processing.
Implements the "parse once, manipulate many times" architecture by creating
fast-loading AST cache files alongside database metadata storage.
Architecture:
markdown file → AST parsing → cache file + database metadata
Performance Goal:
Cache loading must be < 50% of original parsing time
Attributes:
db_manager: Database manager for metadata storage
cache_dir: Directory for AST cache files
frontmatter_parser: YAML front matter processor
"""
def __init__(self, database_manager, cache_dir: Optional[Path] = None):
"""
Initialize document manager with database and cache configuration.
Args:
database_manager: DatabaseManager instance for metadata storage
cache_dir: Directory for AST cache files (default: .ast_cache)
"""
self.db_manager = database_manager
self.cache_dir = Path(cache_dir) if cache_dir else Path(".ast_cache")
self.cache_dir.mkdir(exist_ok=True)
self.frontmatter_parser = FrontMatterParser()
def ingest_file(self, file_path: Path) -> Dict[str, Any]:
"""
Ingest a markdown file with performance-optimized AST caching.
Implements the core "parse once, manipulate many times" workflow:
1. Validates file existence
2. Parses markdown content to AST
3. Creates fast-loading AST cache file
4. Stores metadata in database
5. Returns processing results with performance metrics
Args:
file_path: Path to markdown file to ingest
Returns:
Dictionary containing:
- ast: Parsed AST representation
- metadata: File metadata (filename, title, etc.)
- ast_cache_path: Path to created cache file
- parse_time: Time spent parsing markdown (seconds)
- cache_time: Time spent creating cache (seconds)
Raises:
FileNotFoundError: If the specified file doesn't exist
Performance:
Initial parse creates overhead, but subsequent cache loads
will be < 50% of this parse time.
"""
# Validate file exists
if not file_path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
# Read file content
content = self._read_file_content(file_path)
# Parse front matter for metadata extraction
front_matter, markdown_content = self.frontmatter_parser.parse(content)
# Parse to AST with performance timing
ast, parse_time = self._parse_content_to_ast(content)
# Create cache file with performance timing
cache_file, cache_time = self._create_performance_cache(file_path.name, ast)
# Store in database (handles front matter parsing internally)
self._store_in_database(file_path.name, content)
# Return comprehensive result
return self._build_ingestion_result(
ast=ast,
filename=file_path.name,
front_matter=front_matter,
cache_file=cache_file,
parse_time=parse_time,
cache_time=cache_time
)
def _read_file_content(self, file_path: Path) -> str:
"""
Read file content with proper encoding.
Args:
file_path: Path to file to read
Returns:
File content as string
"""
return file_path.read_text(encoding='utf-8')
def _parse_content_to_ast(self, content: str) -> tuple[list, float]:
"""
Parse markdown content to AST with performance timing.
Args:
content: Raw markdown content
Returns:
Tuple of (AST tokens, parse_time_seconds)
"""
start_time = time.time()
ast = parse_markdown_to_ast(content)
parse_time = time.time() - start_time
return ast, parse_time
def _create_performance_cache(self, filename: str, ast: list) -> tuple[Path, float]:
"""
Create AST cache file with performance timing.
Args:
filename: Source filename for cache naming
ast: AST tokens to cache
Returns:
Tuple of (cache_file_path, cache_time_seconds)
"""
start_time = time.time()
cache_file = self._create_ast_cache(filename, ast)
cache_time = time.time() - start_time
return cache_file, cache_time
def _store_in_database(self, filename: str, content: str) -> None:
"""
Store document in database using existing API.
Args:
filename: Name of the file
content: Full markdown content (including front matter)
Note:
The database manager handles front matter parsing internally.
"""
self.db_manager.store_markdown_file(filename, content)
def _build_ingestion_result(self, ast: list, filename: str, front_matter: dict,
cache_file: Path, parse_time: float, cache_time: float) -> Dict[str, Any]:
"""
Build comprehensive ingestion result dictionary.
Args:
ast: Parsed AST tokens
filename: Source filename
front_matter: Parsed front matter metadata
cache_file: Path to created cache file
parse_time: Time spent parsing (seconds)
cache_time: Time spent caching (seconds)
Returns:
Structured result dictionary with all ingestion data
"""
return {
'ast': ast,
'metadata': {
'filename': filename,
'title': front_matter.get('title', ''),
},
'ast_cache_path': cache_file,
'parse_time': parse_time,
'cache_time': cache_time
}
def _create_ast_cache(self, filename: str, ast: list) -> Path:
"""
Create AST cache file in JSON format.
Args:
filename: Source filename for cache naming
ast: AST tokens to serialize
Returns:
Path to created cache file
"""
cache_filename = f"{filename}.ast.json"
cache_path = self.cache_dir / cache_filename
with open(cache_path, 'w', encoding='utf-8') as f:
json.dump(ast, f, indent=2, ensure_ascii=False)
return cache_path
def list_files(self) -> list:
"""
List all markdown files in the system.
Returns:
List of dictionaries containing file metadata including filename,
size, and modification date information.
"""
# Get files from database
db_files = self.db_manager.list_markdown_files()
# Enhance with file system information
enhanced_files = []
for file_info in db_files:
enhanced_info = {
'filename': file_info['filename'],
'id': file_info['id'],
'created_at': file_info['created_at'],
'front_matter': file_info['front_matter']
}
# Try to get file system stats if file exists
try:
file_path = Path(file_info['filename'])
if file_path.exists():
stat = file_path.stat()
enhanced_info['size'] = f"{stat.st_size} bytes"
enhanced_info['modified'] = stat.st_mtime
else:
enhanced_info['size'] = 'unknown'
enhanced_info['modified'] = 'file not found'
except Exception:
enhanced_info['size'] = 'unknown'
enhanced_info['modified'] = 'unknown'
enhanced_files.append(enhanced_info)
return enhanced_files
def get_file(self, file_path: str) -> Dict[str, Any]:
"""
Retrieve a markdown file from the database.
Args:
file_path: Path to the markdown file to retrieve
Returns:
Dictionary containing file content and metadata
Raises:
FileNotFoundError: If file is not found in database
"""
if not self.db_manager:
raise ValueError("Database manager not initialized")
# Get file from database
file_data = self.db_manager.get_markdown_file(file_path)
if file_data is None:
raise FileNotFoundError(f"File '{file_path}' not found in database")
return {
'content': file_data.get('content', ''),
'metadata': {
'filename': file_data.get('filename', file_path),
'front_matter': file_data.get('front_matter'),
'size': len(file_data.get('content', '')),
'modified': file_data.get('modified')
}
}
def render_file(self, input_file: str, output_file: str, template: str = None, css: str = None,
edit_mode: bool = False, editor_theme: str = 'github', keyboard_shortcuts: bool = True) -> Dict[str, Any]:
"""
Render a markdown file to HTML with client-side rendering capabilities.
Creates an HTML file with embedded markdown content that is rendered
client-side using JavaScript markdown parser.
Args:
input_file: Path to input markdown file
output_file: Path to output HTML file
template: Template to use (optional)
css: CSS file to include (optional)
Returns:
Dictionary with rendering results and metadata
Raises:
FileNotFoundError: If input file doesn't exist
"""
import json
input_path = Path(input_file)
output_path = Path(output_file)
# Validate input file exists
if not input_path.exists():
raise FileNotFoundError(f"Input file not found: {input_path}")
# Read markdown content
markdown_content = input_path.read_text(encoding='utf-8')
# Extract title from markdown (first h1 heading)
title = self._extract_title_from_markdown(markdown_content)
# Generate HTML content
html_content = self._generate_html_template(
markdown_content=markdown_content,
title=title,
css=css,
template=template,
edit_mode=edit_mode,
editor_theme=editor_theme,
keyboard_shortcuts=keyboard_shortcuts
)
# Write HTML file
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(html_content, encoding='utf-8')
return {
'input_file': str(input_path),
'output_file': str(output_path),
'title': title,
'template': template,
'css': css
}
def _extract_title_from_markdown(self, content: str) -> str:
"""Extract title from markdown content (first h1 heading)."""
import re
# Look for first h1 heading
match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
if match:
return match.group(1).strip()
return "Markdown Document"
def _generate_html_template(self, markdown_content: str, title: str, css: str = None, template: str = None,
edit_mode: bool = False, editor_theme: str = 'github', keyboard_shortcuts: bool = True) -> str:
"""Generate HTML template with embedded markdown and client-side rendering."""
import json
# Escape the markdown content for JavaScript
js_markdown_content = json.dumps(markdown_content)
# Handle CSS styles
css_content = ""
if css:
# Try to read CSS file content and embed it
try:
css_path = Path(css)
if css_path.exists():
css_file_content = css_path.read_text(encoding='utf-8')
css_content = f"<style>\n{css_file_content}\n</style>"
else:
# Fallback to link if file doesn't exist
css_content = f'<link rel="stylesheet" href="{css}">'
except Exception:
# Fallback to link on any error
css_content = f'<link rel="stylesheet" href="{css}">'
# Get template-specific CSS
template_css = self._get_template_css(template)
# Default CSS for basic styling
default_css = f"""
<style>
{template_css}
</style>
"""
# Add editor-specific content if in edit mode
editor_scripts = ""
editor_config = ""
editor_css = ""
body_classes = ""
if edit_mode:
body_classes = ' class="markitect-edit-mode"'
editor_css = """
<style>
.markitect-floating-header {
position: fixed;
top: 0;
left: 0;
right: 0;
background: rgba(255, 255, 255, 0.95);
border-bottom: 1px solid #ddd;
padding: 10px;
z-index: 1000;
backdrop-filter: blur(5px);
}
.markitect-section-editable {
border: 1px dashed transparent;
padding: 8px;
margin: 4px 0;
border-radius: 4px;
cursor: pointer;
}
.markitect-section-editable:hover {
border-color: #007acc;
background: rgba(0, 122, 204, 0.05);
}
.edit-mode textarea {
width: 100%;
min-height: 100px;
font-family: monospace;
border: 2px solid #007acc;
border-radius: 4px;
padding: 8px;
}
</style>"""
editor_config = f"""
const MARKITECT_EDIT_MODE = true;
const MARKITECT_EDITOR_CONFIG = {{
theme: '{editor_theme}',
keyboardShortcuts: {str(keyboard_shortcuts).lower()},
autosave: true,
sections: true
}};"""
editor_scripts = """
class MarkitectEditor {
constructor() {
this.initializeEditor();
this.setupKeyboardShortcuts();
}
initializeEditor() {
const header = document.createElement('div');
header.className = 'markitect-floating-header';
header.innerHTML = `
<button onclick="markitectEditor.save()">Save</button>
<button onclick="markitectEditor.togglePreview()">Toggle Preview</button>
<span id="save-status">Ready</span>
`;
document.body.insertBefore(header, document.body.firstChild);
this.makeContentEditable();
}
makeContentEditable() {
const content = document.getElementById('markdown-content');
if (content) {
content.addEventListener('click', this.handleSectionClick.bind(this));
this.markSections(content);
}
}
markSections(element) {
const sections = element.querySelectorAll('h1, h2, h3, h4, h5, h6, p, blockquote, pre, ul, ol');
sections.forEach((section, index) => {
section.classList.add('markitect-section-editable');
section.setAttribute('data-section', index);
});
}
handleSectionClick(event) {
const section = event.target.closest('.markitect-section-editable');
if (section && !section.querySelector('textarea')) {
this.editSection(section);
}
}
editSection(section) {
const originalContent = section.innerHTML;
const textarea = document.createElement('textarea');
textarea.value = this.htmlToMarkdown(originalContent);
textarea.className = 'edit-mode';
textarea.addEventListener('blur', () => {
section.innerHTML = marked.parse(textarea.value);
this.markSections(section.parentElement);
});
section.innerHTML = '';
section.appendChild(textarea);
textarea.focus();
}
htmlToMarkdown(html) {
// Simple HTML to Markdown conversion
return html.replace(/<[^>]*>/g, '').trim();
}
setupKeyboardShortcuts() {
if (MARKITECT_EDITOR_CONFIG.keyboardShortcuts) {
document.addEventListener('keydown', (event) => {
if (event.ctrlKey || event.metaKey) {
switch(event.key) {
case 's':
event.preventDefault();
this.save();
break;
case 'e':
event.preventDefault();
this.togglePreview();
break;
}
}
});
}
}
save() {
document.getElementById('save-status').textContent = 'Saved!';
setTimeout(() => {
document.getElementById('save-status').textContent = 'Ready';
}, 2000);
}
togglePreview() {
console.log('Toggle preview mode');
}
}
let markitectEditor;"""
# Edit mode status and error reporting section
edit_mode_html = ""
if edit_mode:
edit_mode_html = f"""
<div id="markitect-status" style="background: #e3f2fd; border-left: 4px solid #2196f3; padding: 12px; margin-bottom: 20px; font-family: monospace; font-size: 14px;">
<div style="font-weight: bold; color: #1976d2;">📝 Markitect Edit Mode</div>
<div id="status-message" style="margin-top: 8px;">Loading edit capabilities...</div>
<div id="error-details" style="display: none; background: #ffebee; border: 1px solid #f44336; padding: 8px; margin-top: 8px; border-radius: 4px;">
<div style="font-weight: bold; color: #c62828;">❌ Edit Mode Failed</div>
<div id="error-text" style="margin-top: 4px; color: #666;"></div>
<details style="margin-top: 8px;">
<summary style="cursor: pointer; color: #1976d2;">🐛 Help us fix this issue</summary>
<div style="margin-top: 8px; font-size: 12px; color: #666;">
Please report this error with your browser info:
<br>📋 Browser: <span id="browser-info"></span>
<br>🔗 Create issue: <a href="https://github.com/anthropics/markitect/issues/new" target="_blank" style="color: #1976d2;">GitHub Issues</a>
</div>
</details>
</div>
</div>"""
html_template = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{title}</title>
{css_content}
{default_css}
{editor_css}
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"
onload="window.markitectMarkedLoaded = true"
onerror="window.markitectMarkedError = true"></script>
</head>
<body{body_classes}>
{edit_mode_html}
<div id="markdown-content"></div>
<script>
const markdownContent = {js_markdown_content};
{editor_config}
// Define editor class first (if in edit mode)
{editor_scripts if edit_mode else ''}
// Error reporting utility
function reportEditModeError(errorMsg, technicalDetails) {{
const statusDiv = document.getElementById('markitect-status');
const errorDiv = document.getElementById('error-details');
const errorText = document.getElementById('error-text');
const statusMsg = document.getElementById('status-message');
const browserInfo = document.getElementById('browser-info');
if (statusMsg) statusMsg.textContent = 'Edit mode unavailable - content displayed in read-only mode';
if (errorDiv) errorDiv.style.display = 'block';
if (errorText) errorText.textContent = errorMsg + (technicalDetails ? ' (' + technicalDetails + ')' : '');
if (browserInfo) browserInfo.textContent = navigator.userAgent.split(' ').slice(-2).join(' ');
}}
// Status update utility
function updateStatus(message, isError = false) {{
const statusMsg = document.getElementById('status-message');
if (statusMsg) {{
statusMsg.textContent = message;
statusMsg.style.color = isError ? '#c62828' : '#1976d2';
}}
}}
// Always render content first (graceful degradation)
document.addEventListener('DOMContentLoaded', function() {{
updateStatus('Rendering content...');
const contentDiv = document.getElementById('markdown-content');
// Step 1: Ensure content is always displayed
if (contentDiv) {{
if (typeof marked !== 'undefined') {{
try {{
contentDiv.innerHTML = marked.parse(markdownContent);
updateStatus('Content rendered successfully ✓');
console.log('✓ Markdown rendered successfully');
}} catch (error) {{
contentDiv.innerHTML = '<p>Error rendering markdown: ' + error.message + '</p>';
updateStatus('Content rendered with errors', true);
{'reportEditModeError("Markdown parsing failed", error.message);' if edit_mode else ''}
}}
}} else {{
// Fallback: display raw markdown with basic formatting
const fallbackHtml = markdownContent
.replace(/^# (.*$)/gim, '<h1>$1</h1>')
.replace(/^## (.*$)/gim, '<h2>$1</h2>')
.replace(/^### (.*$)/gim, '<h3>$1</h3>')
.replace(/\\*\\*(.*?)\\*\\*/g, '<strong>$1</strong>')
.replace(/\\*(.*?)\\*/g, '<em>$1</em>')
.replace(/^- (.*$)/gim, '<li>$1</li>')
.replace(/\\n\\n/g, '<br><br>')
.replace(/\\n/g, '<br>');
contentDiv.innerHTML = '<div style="white-space: pre-wrap;">' + fallbackHtml + '</div>';
updateStatus('Content rendered with fallback parser', true);
{'reportEditModeError("CDN library failed to load", "Using basic fallback rendering");' if edit_mode else ''}
}}
}}
// Step 2: Try to enhance with edit capabilities (if in edit mode)
{'''if (typeof MARKITECT_EDIT_MODE !== 'undefined' && MARKITECT_EDIT_MODE) {
updateStatus("Initializing edit capabilities...");
try {
updateStatus("Creating editor instance...");
markitectEditor = new MarkitectEditor();
updateStatus("✓ Edit mode active - click any section to edit");
console.log("✓ Edit mode initialized successfully");
} catch (error) {
updateStatus("Edit mode failed to initialize", true);
reportEditModeError("Edit mode initialization failed", error.message);
console.error("Edit mode error:", error);
}
}''' if edit_mode else ''}
}});
// Handle CDN loading errors
window.addEventListener('load', function() {{
if (window.markitectMarkedError) {{
{'reportEditModeError("CDN library failed to load", "Network or firewall blocking marked.js");' if edit_mode else ''}
}}
}});
// Safety timeout for edit mode initialization
{'''setTimeout(function() {
const statusMsg = document.getElementById("status-message");
if (statusMsg && (statusMsg.textContent.includes("Loading") || statusMsg.textContent.includes("Initializing"))) {
updateStatus("Edit mode initialization timeout", true);
reportEditModeError("Edit mode took too long to initialize", "Possible JavaScript performance issue");
}
}, 5000);''' if edit_mode else ''} // 5 second timeout
</script>
</body>
</html>"""
return html_template
def _get_template_css(self, template: str = None) -> str:
"""Get CSS styles for the specified template theme."""
if template == 'github':
return """
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif;
max-width: 900px;
margin: 0 auto;
padding: 2rem;
line-height: 1.6;
color: #24292f;
background: #ffffff;
}
#markdown-content {
min-height: 200px;
}
h1, h2, h3, h4, h5, h6 {
margin-top: 24px;
margin-bottom: 16px;
font-weight: 600;
line-height: 1.25;
}
h1 { border-bottom: 1px solid #d0d7de; padding-bottom: .3em; }
h2 { border-bottom: 1px solid #d0d7de; padding-bottom: .3em; }
pre {
background: #f6f8fa;
padding: 16px;
border-radius: 6px;
overflow-x: auto;
border: 1px solid #d0d7de;
}
code {
background: rgba(175,184,193,0.2);
padding: 0.2em 0.4em;
border-radius: 6px;
font-size: 0.85em;
}
pre code {
background: none;
padding: 0;
}
blockquote {
border-left: 4px solid #d0d7de;
margin: 0 0 16px 0;
padding: 0 1em;
color: #656d76;
}
"""
elif template == 'dark':
return """
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 2rem;
line-height: 1.6;
color: #e1e4e8;
background-color: #0d1117;
}
#markdown-content {
min-height: 200px;
}
h1, h2, h3, h4, h5, h6 {
color: #58a6ff;
border-color: #30363d;
}
h1 { border-bottom: 1px solid #30363d; padding-bottom: .3em; }
h2 { border-bottom: 1px solid #30363d; padding-bottom: .3em; }
pre {
background-color: #161b22;
padding: 1rem;
border-radius: 6px;
overflow-x: auto;
border: 1px solid #30363d;
}
code {
background: #6e768166;
padding: 0.2em 0.4em;
border-radius: 3px;
font-size: 0.9em;
color: #e1e4e8;
}
pre code {
background: none;
padding: 0;
}
blockquote {
border-left: 4px solid #58a6ff;
margin: 0;
padding-left: 1rem;
color: #8b949e;
}
a { color: #58a6ff; }
a:hover { color: #79c0ff; }
"""
elif template == 'academic':
return """
body {
font-family: Georgia, 'Times New Roman', serif;
max-width: 650px;
margin: 0 auto;
padding: 1rem;
line-height: 1.8;
color: #333;
background: #fff;
}
#markdown-content {
min-height: 200px;
}
h1, h2, h3, h4, h5, h6 {
font-family: -apple-system, BlinkMacSystemFont, sans-serif;
margin-top: 2rem;
margin-bottom: 1rem;
}
pre {
background: #f8f8f8;
padding: 1rem;
border-left: 4px solid #ccc;
overflow-x: auto;
font-family: 'Courier New', monospace;
}
code {
background: #f0f0f0;
padding: 0.1em 0.3em;
font-family: 'Courier New', monospace;
font-size: 0.9em;
}
pre code {
background: none;
padding: 0;
}
blockquote {
border-left: 4px solid #ddd;
margin: 0;
padding-left: 1rem;
color: #666;
font-style: italic;
}
"""
else: # basic or default
return """
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 2rem;
line-height: 1.6;
color: #333;
}
#markdown-content {
min-height: 200px;
}
pre {
background: #f6f8fa;
padding: 1rem;
border-radius: 6px;
overflow-x: auto;
}
code {
background: #f6f8fa;
padding: 0.2em 0.4em;
border-radius: 3px;
font-size: 0.9em;
}
pre code {
background: none;
padding: 0;
}
blockquote {
border-left: 4px solid #dfe2e5;
margin: 0;
padding-left: 1rem;
color: #6a737d;
}
"""