""" HTML renderer for Information Spaces. This module provides markdown-to-HTML rendering with theming, code highlighting, and accessibility features. """ import re import html from dataclasses import dataclass from pathlib import Path from typing import Dict, Any, Optional, List, Set from .base import ( SpaceRenderer, RenderConfig, RenderResult, RenderFormat, ThemeConfig, ) # Built-in theme properties THEME_PROPERTIES: Dict[str, Dict[str, Any]] = { "default": { "font_family": "-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif", "max_width": "800px", "body_color": "#333333", "body_background": "#ffffff", "heading_color": "#333333", "code_background": "#f6f8fa", "code_color": "#333333", "border_color": "#d0d7de", "blockquote_border": "#dfe2e5", "blockquote_color": "#6a737d", "table_border": "#d0d7de", "table_header_bg": "#f6f8fa", "link_color": "#0366d6", }, "github": { "font_family": "-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif", "max_width": "980px", "body_color": "#24292e", "body_background": "#ffffff", "heading_color": "#24292e", "code_background": "#f6f8fa", "code_color": "#24292e", "border_color": "#e1e4e8", "blockquote_border": "#dfe2e5", "blockquote_color": "#6a737d", "table_border": "#e1e4e8", "table_header_bg": "#f6f8fa", "link_color": "#0366d6", }, "minimal": { "font_family": "Georgia, serif", "max_width": "680px", "body_color": "#222222", "body_background": "#fafafa", "heading_color": "#111111", "code_background": "#f0f0f0", "code_color": "#222222", "border_color": "#dddddd", "blockquote_border": "#cccccc", "blockquote_color": "#666666", "table_border": "#dddddd", "table_header_bg": "#f0f0f0", "link_color": "#0055aa", }, "dark": { "font_family": "-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif", "max_width": "800px", "body_color": "#c9d1d9", "body_background": "#0d1117", "heading_color": "#c9d1d9", "code_background": "#161b22", "code_color": "#c9d1d9", "border_color": "#30363d", "blockquote_border": "#3b434b", "blockquote_color": "#8b949e", "table_border": "#30363d", "table_header_bg": "#161b22", "link_color": "#58a6ff", }, "academic": { "font_family": "'Times New Roman', Times, serif", "max_width": "720px", "body_color": "#1a1a1a", "body_background": "#ffffff", "heading_color": "#1a1a1a", "heading_style": "underlined", "text_align": "justify", "code_background": "#f5f5f5", "code_color": "#1a1a1a", "border_color": "#cccccc", "blockquote_border": "#999999", "blockquote_color": "#555555", "table_border": "#cccccc", "table_header_bg": "#f5f5f5", "link_color": "#000080", }, } def combine_theme_properties(layers: List[str]) -> Dict[str, Any]: """ Combine theme properties from multiple layers. Later layers override earlier ones. Args: layers: List of theme names to combine Returns: Combined properties dictionary """ combined = {} for layer in layers: if layer in THEME_PROPERTIES: combined.update(THEME_PROPERTIES[layer]) return combined class MarkdownToHTMLRenderer(SpaceRenderer): """ Renders markdown content to HTML. Features: - Theme support with layer composition - Syntax highlighting for code blocks - Automatic heading IDs for navigation - Link target handling - Table of contents generation """ @property def supported_formats(self) -> List[RenderFormat]: """Return supported formats.""" return [RenderFormat.HTML] def render( self, content: str, document_id: str, space_id: str, dependencies: Optional[Set[str]] = None, metadata: Optional[Dict[str, Any]] = None, ) -> RenderResult: """ Render markdown to HTML. Args: content: Markdown content document_id: Document ID space_id: Space ID dependencies: Document dependencies metadata: Additional metadata Returns: RenderResult with HTML content """ source_hash = RenderResult.compute_hash(content) # Parse markdown to HTML html_content = self._render_markdown(content) # Apply post-processing html_content = self._apply_post_processing(html_content) # Generate table of contents if requested toc_html = "" if self.config.include_toc: toc_html = self._generate_toc(html_content) # Build complete HTML document complete_html = self._build_html_document( body_content=html_content, toc_content=toc_html, title=self._extract_title(content), metadata=metadata or {}, ) content_hash = RenderResult.compute_hash(complete_html) return RenderResult( content=complete_html, format=RenderFormat.HTML, content_hash=content_hash, source_hash=source_hash, document_id=document_id, space_id=space_id, dependencies=dependencies or set(), metadata=metadata or {}, ) def _render_markdown(self, content: str) -> str: """ Convert markdown to HTML. Uses the Python markdown library if available, otherwise falls back to a basic parser. """ try: import markdown # Configure extensions extensions = ["extra", "toc", "tables", "fenced_code"] if self.config.highlight_code: try: extensions.append("codehilite") except ImportError: pass return markdown.markdown(content, extensions=extensions) except ImportError: # Fallback to basic parsing return self._basic_markdown_to_html(content) def _basic_markdown_to_html(self, content: str) -> str: """ Basic markdown to HTML conversion. Used as fallback when markdown library is not available. """ lines = content.split('\n') html_lines = [] in_code_block = False in_list = False for line in lines: # Code blocks if line.startswith('```'): if in_code_block: html_lines.append('') in_code_block = False else: lang = line[3:].strip() lang_class = f' class="language-{lang}"' if lang else '' html_lines.append(f'
')
in_code_block = True
continue
if in_code_block:
html_lines.append(html.escape(line))
continue
# Close list if not a list item
if in_list and not line.strip().startswith(('-', '*', '+')):
html_lines.append('')
in_list = False
stripped = line.strip()
# Headers
if stripped.startswith('######'):
text = stripped[6:].strip()
slug = self._slugify(text)
html_lines.append(f'{html.escape(text)}
')
elif stripped.startswith('#####'):
text = stripped[5:].strip()
slug = self._slugify(text)
html_lines.append(f'{html.escape(text)}
')
elif stripped.startswith('####'):
text = stripped[4:].strip()
slug = self._slugify(text)
html_lines.append(f'{html.escape(text)}
')
elif stripped.startswith('###'):
text = stripped[3:].strip()
slug = self._slugify(text)
html_lines.append(f'{html.escape(text)}
')
elif stripped.startswith('##'):
text = stripped[2:].strip()
slug = self._slugify(text)
html_lines.append(f'{html.escape(text)}
')
elif stripped.startswith('#'):
text = stripped[1:].strip()
slug = self._slugify(text)
html_lines.append(f'{html.escape(text)}
')
# Horizontal rule
elif stripped in ('---', '***', '___'):
html_lines.append('
')
# Blockquote
elif stripped.startswith('>'):
text = stripped[1:].strip()
html_lines.append(f'{html.escape(text)}
')
# Unordered list
elif stripped.startswith(('-', '*', '+')) and len(stripped) > 1 and stripped[1] == ' ':
if not in_list:
html_lines.append('')
in_list = True
text = stripped[2:].strip()
html_lines.append(f'- {self._process_inline(text)}
')
# Empty line
elif not stripped:
if in_list:
html_lines.append('
')
in_list = False
html_lines.append('')
# Paragraph
else:
html_lines.append(f'{self._process_inline(stripped)}
')
# Close any open list
if in_list:
html_lines.append('')
# Close any open code block
if in_code_block:
html_lines.append('')
return '\n'.join(html_lines)
def _process_inline(self, text: str) -> str:
"""Process inline markdown elements."""
# Bold
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
text = re.sub(r'__(.+?)__', r'\1', text)
# Italic
text = re.sub(r'\*(.+?)\*', r'\1', text)
text = re.sub(r'_(.+?)_', r'\1', text)
# Code
text = re.sub(r'`([^`]+)`', r'\1', text)
# Links
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1', text)
# Images
text = re.sub(r'!\[([^\]]*)\]\(([^)]+)\)', r'