Files
markitect-main/markitect/spaces/rendering/html_renderer.py
tegwick 2a5c265458 feat(spaces): implement Phase 4 HTML Rendering Mode
Implements HTML rendering system for Information Spaces:

- SpaceRenderer: Abstract base class for renderers
- RenderConfig: Configuration for format, theme, TOC, etc.
- RenderResult: Immutable result with content hash and metadata
- ThemeConfig: Layered theme system with customization
- CompositeRenderer: Multi-format renderer delegation

- MarkdownToHTMLRenderer: Full markdown-to-HTML conversion
  - Theme support (github, dark, minimal, academic)
  - Code block handling
  - Link target="_blank" for external links
  - Table of contents generation
  - Heading ID generation for navigation
- HTMLRendererFactory: Factory for common renderer configurations

- SpaceRenderingService: Orchestration layer
  - Transclusion variable substitution
  - Render caching with automatic invalidation
  - Event emission (RENDER_STARTED, RENDER_COMPLETED, RENDER_FAILED)
  - Batch rendering support
  - Statistics tracking
- SpaceRenderingServiceBuilder: Fluent builder pattern

60 unit tests covering all components.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-08 08:42:27 +01:00

577 lines
18 KiB
Python

"""
HTML renderer for Information Spaces.
This module provides markdown-to-HTML rendering with theming,
code highlighting, and accessibility features.
"""
import re
import html
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Any, Optional, List, Set
from .base import (
SpaceRenderer,
RenderConfig,
RenderResult,
RenderFormat,
ThemeConfig,
)
# Built-in theme properties
THEME_PROPERTIES: Dict[str, Dict[str, Any]] = {
"default": {
"font_family": "-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif",
"max_width": "800px",
"body_color": "#333333",
"body_background": "#ffffff",
"heading_color": "#333333",
"code_background": "#f6f8fa",
"code_color": "#333333",
"border_color": "#d0d7de",
"blockquote_border": "#dfe2e5",
"blockquote_color": "#6a737d",
"table_border": "#d0d7de",
"table_header_bg": "#f6f8fa",
"link_color": "#0366d6",
},
"github": {
"font_family": "-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif",
"max_width": "980px",
"body_color": "#24292e",
"body_background": "#ffffff",
"heading_color": "#24292e",
"code_background": "#f6f8fa",
"code_color": "#24292e",
"border_color": "#e1e4e8",
"blockquote_border": "#dfe2e5",
"blockquote_color": "#6a737d",
"table_border": "#e1e4e8",
"table_header_bg": "#f6f8fa",
"link_color": "#0366d6",
},
"minimal": {
"font_family": "Georgia, serif",
"max_width": "680px",
"body_color": "#222222",
"body_background": "#fafafa",
"heading_color": "#111111",
"code_background": "#f0f0f0",
"code_color": "#222222",
"border_color": "#dddddd",
"blockquote_border": "#cccccc",
"blockquote_color": "#666666",
"table_border": "#dddddd",
"table_header_bg": "#f0f0f0",
"link_color": "#0055aa",
},
"dark": {
"font_family": "-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif",
"max_width": "800px",
"body_color": "#c9d1d9",
"body_background": "#0d1117",
"heading_color": "#c9d1d9",
"code_background": "#161b22",
"code_color": "#c9d1d9",
"border_color": "#30363d",
"blockquote_border": "#3b434b",
"blockquote_color": "#8b949e",
"table_border": "#30363d",
"table_header_bg": "#161b22",
"link_color": "#58a6ff",
},
"academic": {
"font_family": "'Times New Roman', Times, serif",
"max_width": "720px",
"body_color": "#1a1a1a",
"body_background": "#ffffff",
"heading_color": "#1a1a1a",
"heading_style": "underlined",
"text_align": "justify",
"code_background": "#f5f5f5",
"code_color": "#1a1a1a",
"border_color": "#cccccc",
"blockquote_border": "#999999",
"blockquote_color": "#555555",
"table_border": "#cccccc",
"table_header_bg": "#f5f5f5",
"link_color": "#000080",
},
}
def combine_theme_properties(layers: List[str]) -> Dict[str, Any]:
"""
Combine theme properties from multiple layers.
Later layers override earlier ones.
Args:
layers: List of theme names to combine
Returns:
Combined properties dictionary
"""
combined = {}
for layer in layers:
if layer in THEME_PROPERTIES:
combined.update(THEME_PROPERTIES[layer])
return combined
class MarkdownToHTMLRenderer(SpaceRenderer):
"""
Renders markdown content to HTML.
Features:
- Theme support with layer composition
- Syntax highlighting for code blocks
- Automatic heading IDs for navigation
- Link target handling
- Table of contents generation
"""
@property
def supported_formats(self) -> List[RenderFormat]:
"""Return supported formats."""
return [RenderFormat.HTML]
def render(
self,
content: str,
document_id: str,
space_id: str,
dependencies: Optional[Set[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> RenderResult:
"""
Render markdown to HTML.
Args:
content: Markdown content
document_id: Document ID
space_id: Space ID
dependencies: Document dependencies
metadata: Additional metadata
Returns:
RenderResult with HTML content
"""
source_hash = RenderResult.compute_hash(content)
# Parse markdown to HTML
html_content = self._render_markdown(content)
# Apply post-processing
html_content = self._apply_post_processing(html_content)
# Generate table of contents if requested
toc_html = ""
if self.config.include_toc:
toc_html = self._generate_toc(html_content)
# Build complete HTML document
complete_html = self._build_html_document(
body_content=html_content,
toc_content=toc_html,
title=self._extract_title(content),
metadata=metadata or {},
)
content_hash = RenderResult.compute_hash(complete_html)
return RenderResult(
content=complete_html,
format=RenderFormat.HTML,
content_hash=content_hash,
source_hash=source_hash,
document_id=document_id,
space_id=space_id,
dependencies=dependencies or set(),
metadata=metadata or {},
)
def _render_markdown(self, content: str) -> str:
"""
Convert markdown to HTML.
Uses the Python markdown library if available, otherwise falls
back to a basic parser.
"""
try:
import markdown
# Configure extensions
extensions = ["extra", "toc", "tables", "fenced_code"]
if self.config.highlight_code:
try:
extensions.append("codehilite")
except ImportError:
pass
return markdown.markdown(content, extensions=extensions)
except ImportError:
# Fallback to basic parsing
return self._basic_markdown_to_html(content)
def _basic_markdown_to_html(self, content: str) -> str:
"""
Basic markdown to HTML conversion.
Used as fallback when markdown library is not available.
"""
lines = content.split('\n')
html_lines = []
in_code_block = False
in_list = False
for line in lines:
# Code blocks
if line.startswith('```'):
if in_code_block:
html_lines.append('</code></pre>')
in_code_block = False
else:
lang = line[3:].strip()
lang_class = f' class="language-{lang}"' if lang else ''
html_lines.append(f'<pre><code{lang_class}>')
in_code_block = True
continue
if in_code_block:
html_lines.append(html.escape(line))
continue
# Close list if not a list item
if in_list and not line.strip().startswith(('-', '*', '+')):
html_lines.append('</ul>')
in_list = False
stripped = line.strip()
# Headers
if stripped.startswith('######'):
text = stripped[6:].strip()
slug = self._slugify(text)
html_lines.append(f'<h6 id="{slug}">{html.escape(text)}</h6>')
elif stripped.startswith('#####'):
text = stripped[5:].strip()
slug = self._slugify(text)
html_lines.append(f'<h5 id="{slug}">{html.escape(text)}</h5>')
elif stripped.startswith('####'):
text = stripped[4:].strip()
slug = self._slugify(text)
html_lines.append(f'<h4 id="{slug}">{html.escape(text)}</h4>')
elif stripped.startswith('###'):
text = stripped[3:].strip()
slug = self._slugify(text)
html_lines.append(f'<h3 id="{slug}">{html.escape(text)}</h3>')
elif stripped.startswith('##'):
text = stripped[2:].strip()
slug = self._slugify(text)
html_lines.append(f'<h2 id="{slug}">{html.escape(text)}</h2>')
elif stripped.startswith('#'):
text = stripped[1:].strip()
slug = self._slugify(text)
html_lines.append(f'<h1 id="{slug}">{html.escape(text)}</h1>')
# Horizontal rule
elif stripped in ('---', '***', '___'):
html_lines.append('<hr>')
# Blockquote
elif stripped.startswith('>'):
text = stripped[1:].strip()
html_lines.append(f'<blockquote>{html.escape(text)}</blockquote>')
# Unordered list
elif stripped.startswith(('-', '*', '+')) and len(stripped) > 1 and stripped[1] == ' ':
if not in_list:
html_lines.append('<ul>')
in_list = True
text = stripped[2:].strip()
html_lines.append(f'<li>{self._process_inline(text)}</li>')
# Empty line
elif not stripped:
if in_list:
html_lines.append('</ul>')
in_list = False
html_lines.append('')
# Paragraph
else:
html_lines.append(f'<p>{self._process_inline(stripped)}</p>')
# Close any open list
if in_list:
html_lines.append('</ul>')
# Close any open code block
if in_code_block:
html_lines.append('</code></pre>')
return '\n'.join(html_lines)
def _process_inline(self, text: str) -> str:
"""Process inline markdown elements."""
# Bold
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text)
text = re.sub(r'__(.+?)__', r'<strong>\1</strong>', text)
# Italic
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text)
text = re.sub(r'_(.+?)_', r'<em>\1</em>', text)
# Code
text = re.sub(r'`([^`]+)`', r'<code>\1</code>', text)
# Links
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<a href="\2">\1</a>', text)
# Images
text = re.sub(r'!\[([^\]]*)\]\(([^)]+)\)', r'<img src="\2" alt="\1">', text)
return text
def _slugify(self, text: str) -> str:
"""Create a URL-safe slug from text."""
slug = text.lower()
slug = re.sub(r'[^\w\s-]', '', slug)
slug = re.sub(r'[\s_]+', '-', slug)
slug = slug.strip('-')
return slug
def _apply_post_processing(self, html_content: str) -> str:
"""Apply post-processing to HTML content."""
# Add target="_blank" to external links
if self.config.link_target_blank:
html_content = re.sub(
r'<a href="(https?://[^"]+)"',
r'<a href="\1" target="_blank" rel="noopener noreferrer"',
html_content,
)
# Limit image dimensions
if self.config.image_max_width != "100%":
html_content = re.sub(
r'<img ([^>]*)>',
f'<img \\1 style="max-width: {self.config.image_max_width}; max-height: {self.config.image_max_height};">',
html_content,
)
return html_content
def _generate_toc(self, html_content: str) -> str:
"""Generate table of contents from HTML headings."""
headings = re.findall(r'<h([1-6])[^>]*id="([^"]+)"[^>]*>([^<]+)</h\1>', html_content)
if not headings:
return ""
toc_lines = ['<nav class="toc"><h2>Contents</h2><ul>']
current_level = 0
for level_str, slug, text in headings:
level = int(level_str)
# Adjust nesting
while current_level < level:
toc_lines.append('<ul>')
current_level += 1
while current_level > level:
toc_lines.append('</ul>')
current_level -= 1
toc_lines.append(f'<li><a href="#{slug}">{html.escape(text)}</a></li>')
# Close remaining lists
while current_level > 0:
toc_lines.append('</ul>')
current_level -= 1
toc_lines.append('</ul></nav>')
return '\n'.join(toc_lines)
def _extract_title(self, content: str) -> str:
"""Extract title from first H1 heading."""
match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
if match:
return match.group(1).strip()
return "Document"
def _generate_css(self) -> str:
"""Generate CSS based on theme configuration."""
# Get theme properties
layers = self.config.theme.layers
props = combine_theme_properties(layers)
# Apply custom property overrides
props.update(self.config.theme.custom_properties)
css = f"""
body {{
font-family: {props.get('font_family', 'sans-serif')};
max-width: {props.get('max_width', '800px')};
margin: 0 auto;
padding: 2rem;
line-height: 1.6;
color: {props.get('body_color', '#333')};
background-color: {props.get('body_background', '#fff')};
}}
h1, h2, h3, h4, h5, h6 {{
color: {props.get('heading_color', props.get('body_color', '#333'))};
margin-top: 1.5em;
margin-bottom: 0.5em;
}}
pre {{
background-color: {props.get('code_background', '#f6f8fa')};
color: {props.get('code_color', '#333')};
padding: 1rem;
border-radius: 6px;
overflow-x: auto;
border: 1px solid {props.get('border_color', '#ddd')};
}}
code {{
background-color: {props.get('code_background', '#f6f8fa')};
color: {props.get('code_color', '#333')};
padding: 0.2em 0.4em;
border-radius: 3px;
font-size: 0.9em;
}}
pre code {{
background: none;
padding: 0;
}}
blockquote {{
border-left: 4px solid {props.get('blockquote_border', '#ddd')};
margin: 0;
padding-left: 1rem;
color: {props.get('blockquote_color', '#666')};
}}
table {{
border-collapse: collapse;
margin: 1rem 0;
width: 100%;
border: 1px solid {props.get('table_border', '#ddd')};
}}
th, td {{
border: 1px solid {props.get('table_border', '#ddd')};
padding: 0.5rem;
text-align: left;
}}
th {{
background-color: {props.get('table_header_bg', '#f6f8fa')};
}}
a {{
color: {props.get('link_color', '#0366d6')};
text-decoration: none;
}}
a:hover {{
text-decoration: underline;
}}
img {{
max-width: {self.config.image_max_width};
max-height: {self.config.image_max_height};
height: auto;
}}
.toc {{
background-color: {props.get('code_background', '#f6f8fa')};
padding: 1rem;
border-radius: 6px;
margin-bottom: 2rem;
}}
.toc h2 {{
margin-top: 0;
}}
.toc ul {{
padding-left: 1.5rem;
}}
"""
# Add custom CSS if provided
if self.config.theme.custom_css:
css += f"\n{self.config.theme.custom_css}"
return css
def _build_html_document(
self,
body_content: str,
toc_content: str,
title: str,
metadata: Dict[str, Any],
) -> str:
"""Build complete HTML document."""
css = self._generate_css()
# Meta tags
meta_tags = '<meta charset="utf-8">\n'
meta_tags += '<meta name="viewport" content="width=device-width, initial-scale=1.0">\n'
meta_tags += '<meta name="generator" content="Markitect Information Space">\n'
# Add custom meta from metadata
for key, value in metadata.items():
if key.startswith('meta_'):
meta_name = key[5:]
meta_tags += f'<meta name="{html.escape(meta_name)}" content="{html.escape(str(value))}">\n'
return f"""<!DOCTYPE html>
<html lang="en">
<head>
{meta_tags}
<title>{html.escape(title)}</title>
<style>
{css}
</style>
</head>
<body>
{toc_content}
<div id="content">
{body_content}
</div>
</body>
</html>"""
class HTMLRendererFactory:
"""Factory for creating configured HTML renderers."""
@staticmethod
def create_default() -> MarkdownToHTMLRenderer:
"""Create a renderer with default settings."""
return MarkdownToHTMLRenderer()
@staticmethod
def create_github_style() -> MarkdownToHTMLRenderer:
"""Create a renderer with GitHub-style theme."""
config = RenderConfig(
theme=ThemeConfig(name="github", layers=["github"]),
include_toc=False,
)
return MarkdownToHTMLRenderer(config)
@staticmethod
def create_academic_style() -> MarkdownToHTMLRenderer:
"""Create a renderer with academic styling."""
config = RenderConfig(
theme=ThemeConfig(name="academic", layers=["academic"]),
include_toc=True,
)
return MarkdownToHTMLRenderer(config)
@staticmethod
def create_minimal_style() -> MarkdownToHTMLRenderer:
"""Create a renderer with minimal styling."""
config = RenderConfig(
theme=ThemeConfig(name="minimal", layers=["minimal"]),
include_toc=False,
)
return MarkdownToHTMLRenderer(config)
@staticmethod
def create_dark_mode() -> MarkdownToHTMLRenderer:
"""Create a renderer with dark mode theme."""
config = RenderConfig(
theme=ThemeConfig(name="dark", layers=["dark"]),
include_toc=False,
)
return MarkdownToHTMLRenderer(config)