feat(spaces): implement Phase 4 HTML Rendering Mode
Implements HTML rendering system for Information Spaces: - SpaceRenderer: Abstract base class for renderers - RenderConfig: Configuration for format, theme, TOC, etc. - RenderResult: Immutable result with content hash and metadata - ThemeConfig: Layered theme system with customization - CompositeRenderer: Multi-format renderer delegation - MarkdownToHTMLRenderer: Full markdown-to-HTML conversion - Theme support (github, dark, minimal, academic) - Code block handling - Link target="_blank" for external links - Table of contents generation - Heading ID generation for navigation - HTMLRendererFactory: Factory for common renderer configurations - SpaceRenderingService: Orchestration layer - Transclusion variable substitution - Render caching with automatic invalidation - Event emission (RENDER_STARTED, RENDER_COMPLETED, RENDER_FAILED) - Batch rendering support - Statistics tracking - SpaceRenderingServiceBuilder: Fluent builder pattern 60 unit tests covering all components. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
576
markitect/spaces/rendering/html_renderer.py
Normal file
576
markitect/spaces/rendering/html_renderer.py
Normal file
@@ -0,0 +1,576 @@
|
||||
"""
|
||||
HTML renderer for Information Spaces.
|
||||
|
||||
This module provides markdown-to-HTML rendering with theming,
|
||||
code highlighting, and accessibility features.
|
||||
"""
|
||||
|
||||
import re
|
||||
import html
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional, List, Set
|
||||
|
||||
from .base import (
|
||||
SpaceRenderer,
|
||||
RenderConfig,
|
||||
RenderResult,
|
||||
RenderFormat,
|
||||
ThemeConfig,
|
||||
)
|
||||
|
||||
|
||||
# Built-in theme properties
|
||||
THEME_PROPERTIES: Dict[str, Dict[str, Any]] = {
|
||||
"default": {
|
||||
"font_family": "-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif",
|
||||
"max_width": "800px",
|
||||
"body_color": "#333333",
|
||||
"body_background": "#ffffff",
|
||||
"heading_color": "#333333",
|
||||
"code_background": "#f6f8fa",
|
||||
"code_color": "#333333",
|
||||
"border_color": "#d0d7de",
|
||||
"blockquote_border": "#dfe2e5",
|
||||
"blockquote_color": "#6a737d",
|
||||
"table_border": "#d0d7de",
|
||||
"table_header_bg": "#f6f8fa",
|
||||
"link_color": "#0366d6",
|
||||
},
|
||||
"github": {
|
||||
"font_family": "-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif",
|
||||
"max_width": "980px",
|
||||
"body_color": "#24292e",
|
||||
"body_background": "#ffffff",
|
||||
"heading_color": "#24292e",
|
||||
"code_background": "#f6f8fa",
|
||||
"code_color": "#24292e",
|
||||
"border_color": "#e1e4e8",
|
||||
"blockquote_border": "#dfe2e5",
|
||||
"blockquote_color": "#6a737d",
|
||||
"table_border": "#e1e4e8",
|
||||
"table_header_bg": "#f6f8fa",
|
||||
"link_color": "#0366d6",
|
||||
},
|
||||
"minimal": {
|
||||
"font_family": "Georgia, serif",
|
||||
"max_width": "680px",
|
||||
"body_color": "#222222",
|
||||
"body_background": "#fafafa",
|
||||
"heading_color": "#111111",
|
||||
"code_background": "#f0f0f0",
|
||||
"code_color": "#222222",
|
||||
"border_color": "#dddddd",
|
||||
"blockquote_border": "#cccccc",
|
||||
"blockquote_color": "#666666",
|
||||
"table_border": "#dddddd",
|
||||
"table_header_bg": "#f0f0f0",
|
||||
"link_color": "#0055aa",
|
||||
},
|
||||
"dark": {
|
||||
"font_family": "-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif",
|
||||
"max_width": "800px",
|
||||
"body_color": "#c9d1d9",
|
||||
"body_background": "#0d1117",
|
||||
"heading_color": "#c9d1d9",
|
||||
"code_background": "#161b22",
|
||||
"code_color": "#c9d1d9",
|
||||
"border_color": "#30363d",
|
||||
"blockquote_border": "#3b434b",
|
||||
"blockquote_color": "#8b949e",
|
||||
"table_border": "#30363d",
|
||||
"table_header_bg": "#161b22",
|
||||
"link_color": "#58a6ff",
|
||||
},
|
||||
"academic": {
|
||||
"font_family": "'Times New Roman', Times, serif",
|
||||
"max_width": "720px",
|
||||
"body_color": "#1a1a1a",
|
||||
"body_background": "#ffffff",
|
||||
"heading_color": "#1a1a1a",
|
||||
"heading_style": "underlined",
|
||||
"text_align": "justify",
|
||||
"code_background": "#f5f5f5",
|
||||
"code_color": "#1a1a1a",
|
||||
"border_color": "#cccccc",
|
||||
"blockquote_border": "#999999",
|
||||
"blockquote_color": "#555555",
|
||||
"table_border": "#cccccc",
|
||||
"table_header_bg": "#f5f5f5",
|
||||
"link_color": "#000080",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def combine_theme_properties(layers: List[str]) -> Dict[str, Any]:
|
||||
"""
|
||||
Combine theme properties from multiple layers.
|
||||
|
||||
Later layers override earlier ones.
|
||||
|
||||
Args:
|
||||
layers: List of theme names to combine
|
||||
|
||||
Returns:
|
||||
Combined properties dictionary
|
||||
"""
|
||||
combined = {}
|
||||
for layer in layers:
|
||||
if layer in THEME_PROPERTIES:
|
||||
combined.update(THEME_PROPERTIES[layer])
|
||||
return combined
|
||||
|
||||
|
||||
class MarkdownToHTMLRenderer(SpaceRenderer):
|
||||
"""
|
||||
Renders markdown content to HTML.
|
||||
|
||||
Features:
|
||||
- Theme support with layer composition
|
||||
- Syntax highlighting for code blocks
|
||||
- Automatic heading IDs for navigation
|
||||
- Link target handling
|
||||
- Table of contents generation
|
||||
"""
|
||||
|
||||
@property
|
||||
def supported_formats(self) -> List[RenderFormat]:
|
||||
"""Return supported formats."""
|
||||
return [RenderFormat.HTML]
|
||||
|
||||
def render(
|
||||
self,
|
||||
content: str,
|
||||
document_id: str,
|
||||
space_id: str,
|
||||
dependencies: Optional[Set[str]] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> RenderResult:
|
||||
"""
|
||||
Render markdown to HTML.
|
||||
|
||||
Args:
|
||||
content: Markdown content
|
||||
document_id: Document ID
|
||||
space_id: Space ID
|
||||
dependencies: Document dependencies
|
||||
metadata: Additional metadata
|
||||
|
||||
Returns:
|
||||
RenderResult with HTML content
|
||||
"""
|
||||
source_hash = RenderResult.compute_hash(content)
|
||||
|
||||
# Parse markdown to HTML
|
||||
html_content = self._render_markdown(content)
|
||||
|
||||
# Apply post-processing
|
||||
html_content = self._apply_post_processing(html_content)
|
||||
|
||||
# Generate table of contents if requested
|
||||
toc_html = ""
|
||||
if self.config.include_toc:
|
||||
toc_html = self._generate_toc(html_content)
|
||||
|
||||
# Build complete HTML document
|
||||
complete_html = self._build_html_document(
|
||||
body_content=html_content,
|
||||
toc_content=toc_html,
|
||||
title=self._extract_title(content),
|
||||
metadata=metadata or {},
|
||||
)
|
||||
|
||||
content_hash = RenderResult.compute_hash(complete_html)
|
||||
|
||||
return RenderResult(
|
||||
content=complete_html,
|
||||
format=RenderFormat.HTML,
|
||||
content_hash=content_hash,
|
||||
source_hash=source_hash,
|
||||
document_id=document_id,
|
||||
space_id=space_id,
|
||||
dependencies=dependencies or set(),
|
||||
metadata=metadata or {},
|
||||
)
|
||||
|
||||
def _render_markdown(self, content: str) -> str:
|
||||
"""
|
||||
Convert markdown to HTML.
|
||||
|
||||
Uses the Python markdown library if available, otherwise falls
|
||||
back to a basic parser.
|
||||
"""
|
||||
try:
|
||||
import markdown
|
||||
|
||||
# Configure extensions
|
||||
extensions = ["extra", "toc", "tables", "fenced_code"]
|
||||
if self.config.highlight_code:
|
||||
try:
|
||||
extensions.append("codehilite")
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
return markdown.markdown(content, extensions=extensions)
|
||||
except ImportError:
|
||||
# Fallback to basic parsing
|
||||
return self._basic_markdown_to_html(content)
|
||||
|
||||
def _basic_markdown_to_html(self, content: str) -> str:
|
||||
"""
|
||||
Basic markdown to HTML conversion.
|
||||
|
||||
Used as fallback when markdown library is not available.
|
||||
"""
|
||||
lines = content.split('\n')
|
||||
html_lines = []
|
||||
in_code_block = False
|
||||
in_list = False
|
||||
|
||||
for line in lines:
|
||||
# Code blocks
|
||||
if line.startswith('```'):
|
||||
if in_code_block:
|
||||
html_lines.append('</code></pre>')
|
||||
in_code_block = False
|
||||
else:
|
||||
lang = line[3:].strip()
|
||||
lang_class = f' class="language-{lang}"' if lang else ''
|
||||
html_lines.append(f'<pre><code{lang_class}>')
|
||||
in_code_block = True
|
||||
continue
|
||||
|
||||
if in_code_block:
|
||||
html_lines.append(html.escape(line))
|
||||
continue
|
||||
|
||||
# Close list if not a list item
|
||||
if in_list and not line.strip().startswith(('-', '*', '+')):
|
||||
html_lines.append('</ul>')
|
||||
in_list = False
|
||||
|
||||
stripped = line.strip()
|
||||
|
||||
# Headers
|
||||
if stripped.startswith('######'):
|
||||
text = stripped[6:].strip()
|
||||
slug = self._slugify(text)
|
||||
html_lines.append(f'<h6 id="{slug}">{html.escape(text)}</h6>')
|
||||
elif stripped.startswith('#####'):
|
||||
text = stripped[5:].strip()
|
||||
slug = self._slugify(text)
|
||||
html_lines.append(f'<h5 id="{slug}">{html.escape(text)}</h5>')
|
||||
elif stripped.startswith('####'):
|
||||
text = stripped[4:].strip()
|
||||
slug = self._slugify(text)
|
||||
html_lines.append(f'<h4 id="{slug}">{html.escape(text)}</h4>')
|
||||
elif stripped.startswith('###'):
|
||||
text = stripped[3:].strip()
|
||||
slug = self._slugify(text)
|
||||
html_lines.append(f'<h3 id="{slug}">{html.escape(text)}</h3>')
|
||||
elif stripped.startswith('##'):
|
||||
text = stripped[2:].strip()
|
||||
slug = self._slugify(text)
|
||||
html_lines.append(f'<h2 id="{slug}">{html.escape(text)}</h2>')
|
||||
elif stripped.startswith('#'):
|
||||
text = stripped[1:].strip()
|
||||
slug = self._slugify(text)
|
||||
html_lines.append(f'<h1 id="{slug}">{html.escape(text)}</h1>')
|
||||
# Horizontal rule
|
||||
elif stripped in ('---', '***', '___'):
|
||||
html_lines.append('<hr>')
|
||||
# Blockquote
|
||||
elif stripped.startswith('>'):
|
||||
text = stripped[1:].strip()
|
||||
html_lines.append(f'<blockquote>{html.escape(text)}</blockquote>')
|
||||
# Unordered list
|
||||
elif stripped.startswith(('-', '*', '+')) and len(stripped) > 1 and stripped[1] == ' ':
|
||||
if not in_list:
|
||||
html_lines.append('<ul>')
|
||||
in_list = True
|
||||
text = stripped[2:].strip()
|
||||
html_lines.append(f'<li>{self._process_inline(text)}</li>')
|
||||
# Empty line
|
||||
elif not stripped:
|
||||
if in_list:
|
||||
html_lines.append('</ul>')
|
||||
in_list = False
|
||||
html_lines.append('')
|
||||
# Paragraph
|
||||
else:
|
||||
html_lines.append(f'<p>{self._process_inline(stripped)}</p>')
|
||||
|
||||
# Close any open list
|
||||
if in_list:
|
||||
html_lines.append('</ul>')
|
||||
|
||||
# Close any open code block
|
||||
if in_code_block:
|
||||
html_lines.append('</code></pre>')
|
||||
|
||||
return '\n'.join(html_lines)
|
||||
|
||||
def _process_inline(self, text: str) -> str:
|
||||
"""Process inline markdown elements."""
|
||||
# Bold
|
||||
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text)
|
||||
text = re.sub(r'__(.+?)__', r'<strong>\1</strong>', text)
|
||||
|
||||
# Italic
|
||||
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text)
|
||||
text = re.sub(r'_(.+?)_', r'<em>\1</em>', text)
|
||||
|
||||
# Code
|
||||
text = re.sub(r'`([^`]+)`', r'<code>\1</code>', text)
|
||||
|
||||
# Links
|
||||
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<a href="\2">\1</a>', text)
|
||||
|
||||
# Images
|
||||
text = re.sub(r'!\[([^\]]*)\]\(([^)]+)\)', r'<img src="\2" alt="\1">', text)
|
||||
|
||||
return text
|
||||
|
||||
def _slugify(self, text: str) -> str:
|
||||
"""Create a URL-safe slug from text."""
|
||||
slug = text.lower()
|
||||
slug = re.sub(r'[^\w\s-]', '', slug)
|
||||
slug = re.sub(r'[\s_]+', '-', slug)
|
||||
slug = slug.strip('-')
|
||||
return slug
|
||||
|
||||
def _apply_post_processing(self, html_content: str) -> str:
|
||||
"""Apply post-processing to HTML content."""
|
||||
# Add target="_blank" to external links
|
||||
if self.config.link_target_blank:
|
||||
html_content = re.sub(
|
||||
r'<a href="(https?://[^"]+)"',
|
||||
r'<a href="\1" target="_blank" rel="noopener noreferrer"',
|
||||
html_content,
|
||||
)
|
||||
|
||||
# Limit image dimensions
|
||||
if self.config.image_max_width != "100%":
|
||||
html_content = re.sub(
|
||||
r'<img ([^>]*)>',
|
||||
f'<img \\1 style="max-width: {self.config.image_max_width}; max-height: {self.config.image_max_height};">',
|
||||
html_content,
|
||||
)
|
||||
|
||||
return html_content
|
||||
|
||||
def _generate_toc(self, html_content: str) -> str:
|
||||
"""Generate table of contents from HTML headings."""
|
||||
headings = re.findall(r'<h([1-6])[^>]*id="([^"]+)"[^>]*>([^<]+)</h\1>', html_content)
|
||||
|
||||
if not headings:
|
||||
return ""
|
||||
|
||||
toc_lines = ['<nav class="toc"><h2>Contents</h2><ul>']
|
||||
current_level = 0
|
||||
|
||||
for level_str, slug, text in headings:
|
||||
level = int(level_str)
|
||||
|
||||
# Adjust nesting
|
||||
while current_level < level:
|
||||
toc_lines.append('<ul>')
|
||||
current_level += 1
|
||||
while current_level > level:
|
||||
toc_lines.append('</ul>')
|
||||
current_level -= 1
|
||||
|
||||
toc_lines.append(f'<li><a href="#{slug}">{html.escape(text)}</a></li>')
|
||||
|
||||
# Close remaining lists
|
||||
while current_level > 0:
|
||||
toc_lines.append('</ul>')
|
||||
current_level -= 1
|
||||
|
||||
toc_lines.append('</ul></nav>')
|
||||
return '\n'.join(toc_lines)
|
||||
|
||||
def _extract_title(self, content: str) -> str:
|
||||
"""Extract title from first H1 heading."""
|
||||
match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return "Document"
|
||||
|
||||
def _generate_css(self) -> str:
|
||||
"""Generate CSS based on theme configuration."""
|
||||
# Get theme properties
|
||||
layers = self.config.theme.layers
|
||||
props = combine_theme_properties(layers)
|
||||
|
||||
# Apply custom property overrides
|
||||
props.update(self.config.theme.custom_properties)
|
||||
|
||||
css = f"""
|
||||
body {{
|
||||
font-family: {props.get('font_family', 'sans-serif')};
|
||||
max-width: {props.get('max_width', '800px')};
|
||||
margin: 0 auto;
|
||||
padding: 2rem;
|
||||
line-height: 1.6;
|
||||
color: {props.get('body_color', '#333')};
|
||||
background-color: {props.get('body_background', '#fff')};
|
||||
}}
|
||||
h1, h2, h3, h4, h5, h6 {{
|
||||
color: {props.get('heading_color', props.get('body_color', '#333'))};
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 0.5em;
|
||||
}}
|
||||
pre {{
|
||||
background-color: {props.get('code_background', '#f6f8fa')};
|
||||
color: {props.get('code_color', '#333')};
|
||||
padding: 1rem;
|
||||
border-radius: 6px;
|
||||
overflow-x: auto;
|
||||
border: 1px solid {props.get('border_color', '#ddd')};
|
||||
}}
|
||||
code {{
|
||||
background-color: {props.get('code_background', '#f6f8fa')};
|
||||
color: {props.get('code_color', '#333')};
|
||||
padding: 0.2em 0.4em;
|
||||
border-radius: 3px;
|
||||
font-size: 0.9em;
|
||||
}}
|
||||
pre code {{
|
||||
background: none;
|
||||
padding: 0;
|
||||
}}
|
||||
blockquote {{
|
||||
border-left: 4px solid {props.get('blockquote_border', '#ddd')};
|
||||
margin: 0;
|
||||
padding-left: 1rem;
|
||||
color: {props.get('blockquote_color', '#666')};
|
||||
}}
|
||||
table {{
|
||||
border-collapse: collapse;
|
||||
margin: 1rem 0;
|
||||
width: 100%;
|
||||
border: 1px solid {props.get('table_border', '#ddd')};
|
||||
}}
|
||||
th, td {{
|
||||
border: 1px solid {props.get('table_border', '#ddd')};
|
||||
padding: 0.5rem;
|
||||
text-align: left;
|
||||
}}
|
||||
th {{
|
||||
background-color: {props.get('table_header_bg', '#f6f8fa')};
|
||||
}}
|
||||
a {{
|
||||
color: {props.get('link_color', '#0366d6')};
|
||||
text-decoration: none;
|
||||
}}
|
||||
a:hover {{
|
||||
text-decoration: underline;
|
||||
}}
|
||||
img {{
|
||||
max-width: {self.config.image_max_width};
|
||||
max-height: {self.config.image_max_height};
|
||||
height: auto;
|
||||
}}
|
||||
.toc {{
|
||||
background-color: {props.get('code_background', '#f6f8fa')};
|
||||
padding: 1rem;
|
||||
border-radius: 6px;
|
||||
margin-bottom: 2rem;
|
||||
}}
|
||||
.toc h2 {{
|
||||
margin-top: 0;
|
||||
}}
|
||||
.toc ul {{
|
||||
padding-left: 1.5rem;
|
||||
}}
|
||||
"""
|
||||
|
||||
# Add custom CSS if provided
|
||||
if self.config.theme.custom_css:
|
||||
css += f"\n{self.config.theme.custom_css}"
|
||||
|
||||
return css
|
||||
|
||||
def _build_html_document(
|
||||
self,
|
||||
body_content: str,
|
||||
toc_content: str,
|
||||
title: str,
|
||||
metadata: Dict[str, Any],
|
||||
) -> str:
|
||||
"""Build complete HTML document."""
|
||||
css = self._generate_css()
|
||||
|
||||
# Meta tags
|
||||
meta_tags = '<meta charset="utf-8">\n'
|
||||
meta_tags += '<meta name="viewport" content="width=device-width, initial-scale=1.0">\n'
|
||||
meta_tags += '<meta name="generator" content="Markitect Information Space">\n'
|
||||
|
||||
# Add custom meta from metadata
|
||||
for key, value in metadata.items():
|
||||
if key.startswith('meta_'):
|
||||
meta_name = key[5:]
|
||||
meta_tags += f'<meta name="{html.escape(meta_name)}" content="{html.escape(str(value))}">\n'
|
||||
|
||||
return f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
{meta_tags}
|
||||
<title>{html.escape(title)}</title>
|
||||
<style>
|
||||
{css}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
{toc_content}
|
||||
<div id="content">
|
||||
{body_content}
|
||||
</div>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
|
||||
class HTMLRendererFactory:
|
||||
"""Factory for creating configured HTML renderers."""
|
||||
|
||||
@staticmethod
|
||||
def create_default() -> MarkdownToHTMLRenderer:
|
||||
"""Create a renderer with default settings."""
|
||||
return MarkdownToHTMLRenderer()
|
||||
|
||||
@staticmethod
|
||||
def create_github_style() -> MarkdownToHTMLRenderer:
|
||||
"""Create a renderer with GitHub-style theme."""
|
||||
config = RenderConfig(
|
||||
theme=ThemeConfig(name="github", layers=["github"]),
|
||||
include_toc=False,
|
||||
)
|
||||
return MarkdownToHTMLRenderer(config)
|
||||
|
||||
@staticmethod
|
||||
def create_academic_style() -> MarkdownToHTMLRenderer:
|
||||
"""Create a renderer with academic styling."""
|
||||
config = RenderConfig(
|
||||
theme=ThemeConfig(name="academic", layers=["academic"]),
|
||||
include_toc=True,
|
||||
)
|
||||
return MarkdownToHTMLRenderer(config)
|
||||
|
||||
@staticmethod
|
||||
def create_minimal_style() -> MarkdownToHTMLRenderer:
|
||||
"""Create a renderer with minimal styling."""
|
||||
config = RenderConfig(
|
||||
theme=ThemeConfig(name="minimal", layers=["minimal"]),
|
||||
include_toc=False,
|
||||
)
|
||||
return MarkdownToHTMLRenderer(config)
|
||||
|
||||
@staticmethod
|
||||
def create_dark_mode() -> MarkdownToHTMLRenderer:
|
||||
"""Create a renderer with dark mode theme."""
|
||||
config = RenderConfig(
|
||||
theme=ThemeConfig(name="dark", layers=["dark"]),
|
||||
include_toc=False,
|
||||
)
|
||||
return MarkdownToHTMLRenderer(config)
|
||||
Reference in New Issue
Block a user