""" HTML renderer for Information Spaces. This module provides markdown-to-HTML rendering with theming, code highlighting, and accessibility features. """ import re import html from dataclasses import dataclass from pathlib import Path from typing import Dict, Any, Optional, List, Set from .base import ( SpaceRenderer, RenderConfig, RenderResult, RenderFormat, ThemeConfig, ) # Built-in theme properties THEME_PROPERTIES: Dict[str, Dict[str, Any]] = { "default": { "font_family": "-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif", "max_width": "800px", "body_color": "#333333", "body_background": "#ffffff", "heading_color": "#333333", "code_background": "#f6f8fa", "code_color": "#333333", "border_color": "#d0d7de", "blockquote_border": "#dfe2e5", "blockquote_color": "#6a737d", "table_border": "#d0d7de", "table_header_bg": "#f6f8fa", "link_color": "#0366d6", }, "github": { "font_family": "-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif", "max_width": "980px", "body_color": "#24292e", "body_background": "#ffffff", "heading_color": "#24292e", "code_background": "#f6f8fa", "code_color": "#24292e", "border_color": "#e1e4e8", "blockquote_border": "#dfe2e5", "blockquote_color": "#6a737d", "table_border": "#e1e4e8", "table_header_bg": "#f6f8fa", "link_color": "#0366d6", }, "minimal": { "font_family": "Georgia, serif", "max_width": "680px", "body_color": "#222222", "body_background": "#fafafa", "heading_color": "#111111", "code_background": "#f0f0f0", "code_color": "#222222", "border_color": "#dddddd", "blockquote_border": "#cccccc", "blockquote_color": "#666666", "table_border": "#dddddd", "table_header_bg": "#f0f0f0", "link_color": "#0055aa", }, "dark": { "font_family": "-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif", "max_width": "800px", "body_color": "#c9d1d9", "body_background": "#0d1117", "heading_color": "#c9d1d9", "code_background": "#161b22", "code_color": "#c9d1d9", "border_color": "#30363d", "blockquote_border": "#3b434b", "blockquote_color": "#8b949e", "table_border": "#30363d", "table_header_bg": "#161b22", "link_color": "#58a6ff", }, "academic": { "font_family": "'Times New Roman', Times, serif", "max_width": "720px", "body_color": "#1a1a1a", "body_background": "#ffffff", "heading_color": "#1a1a1a", "heading_style": "underlined", "text_align": "justify", "code_background": "#f5f5f5", "code_color": "#1a1a1a", "border_color": "#cccccc", "blockquote_border": "#999999", "blockquote_color": "#555555", "table_border": "#cccccc", "table_header_bg": "#f5f5f5", "link_color": "#000080", }, } def combine_theme_properties(layers: List[str]) -> Dict[str, Any]: """ Combine theme properties from multiple layers. Later layers override earlier ones. Args: layers: List of theme names to combine Returns: Combined properties dictionary """ combined = {} for layer in layers: if layer in THEME_PROPERTIES: combined.update(THEME_PROPERTIES[layer]) return combined class MarkdownToHTMLRenderer(SpaceRenderer): """ Renders markdown content to HTML. Features: - Theme support with layer composition - Syntax highlighting for code blocks - Automatic heading IDs for navigation - Link target handling - Table of contents generation """ @property def supported_formats(self) -> List[RenderFormat]: """Return supported formats.""" return [RenderFormat.HTML] def render( self, content: str, document_id: str, space_id: str, dependencies: Optional[Set[str]] = None, metadata: Optional[Dict[str, Any]] = None, ) -> RenderResult: """ Render markdown to HTML. Args: content: Markdown content document_id: Document ID space_id: Space ID dependencies: Document dependencies metadata: Additional metadata Returns: RenderResult with HTML content """ source_hash = RenderResult.compute_hash(content) # Parse markdown to HTML html_content = self._render_markdown(content) # Apply post-processing html_content = self._apply_post_processing(html_content) # Generate table of contents if requested toc_html = "" if self.config.include_toc: toc_html = self._generate_toc(html_content) # Build complete HTML document complete_html = self._build_html_document( body_content=html_content, toc_content=toc_html, title=self._extract_title(content), metadata=metadata or {}, ) content_hash = RenderResult.compute_hash(complete_html) return RenderResult( content=complete_html, format=RenderFormat.HTML, content_hash=content_hash, source_hash=source_hash, document_id=document_id, space_id=space_id, dependencies=dependencies or set(), metadata=metadata or {}, ) def _render_markdown(self, content: str) -> str: """ Convert markdown to HTML. Uses the Python markdown library if available, otherwise falls back to a basic parser. """ try: import markdown # Configure extensions extensions = ["extra", "toc", "tables", "fenced_code"] if self.config.highlight_code: try: extensions.append("codehilite") except ImportError: pass return markdown.markdown(content, extensions=extensions) except ImportError: # Fallback to basic parsing return self._basic_markdown_to_html(content) def _basic_markdown_to_html(self, content: str) -> str: """ Basic markdown to HTML conversion. Used as fallback when markdown library is not available. """ lines = content.split('\n') html_lines = [] in_code_block = False in_list = False for line in lines: # Code blocks if line.startswith('```'): if in_code_block: html_lines.append('') in_code_block = False else: lang = line[3:].strip() lang_class = f' class="language-{lang}"' if lang else '' html_lines.append(f'
')
                    in_code_block = True
                continue

            if in_code_block:
                html_lines.append(html.escape(line))
                continue

            # Close list if not a list item
            if in_list and not line.strip().startswith(('-', '*', '+')):
                html_lines.append('')
                in_list = False

            stripped = line.strip()

            # Headers
            if stripped.startswith('######'):
                text = stripped[6:].strip()
                slug = self._slugify(text)
                html_lines.append(f'
{html.escape(text)}
') elif stripped.startswith('#####'): text = stripped[5:].strip() slug = self._slugify(text) html_lines.append(f'
{html.escape(text)}
') elif stripped.startswith('####'): text = stripped[4:].strip() slug = self._slugify(text) html_lines.append(f'

{html.escape(text)}

') elif stripped.startswith('###'): text = stripped[3:].strip() slug = self._slugify(text) html_lines.append(f'

{html.escape(text)}

') elif stripped.startswith('##'): text = stripped[2:].strip() slug = self._slugify(text) html_lines.append(f'

{html.escape(text)}

') elif stripped.startswith('#'): text = stripped[1:].strip() slug = self._slugify(text) html_lines.append(f'

{html.escape(text)}

') # Horizontal rule elif stripped in ('---', '***', '___'): html_lines.append('
') # Blockquote elif stripped.startswith('>'): text = stripped[1:].strip() html_lines.append(f'
{html.escape(text)}
') # Unordered list elif stripped.startswith(('-', '*', '+')) and len(stripped) > 1 and stripped[1] == ' ': if not in_list: html_lines.append('') in_list = False html_lines.append('') # Paragraph else: html_lines.append(f'

{self._process_inline(stripped)}

') # Close any open list if in_list: html_lines.append('') # Close any open code block if in_code_block: html_lines.append('
') return '\n'.join(html_lines) def _process_inline(self, text: str) -> str: """Process inline markdown elements.""" # Bold text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) text = re.sub(r'__(.+?)__', r'\1', text) # Italic text = re.sub(r'\*(.+?)\*', r'\1', text) text = re.sub(r'_(.+?)_', r'\1', text) # Code text = re.sub(r'`([^`]+)`', r'\1', text) # Links text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1', text) # Images text = re.sub(r'!\[([^\]]*)\]\(([^)]+)\)', r'\1', text) return text def _slugify(self, text: str) -> str: """Create a URL-safe slug from text.""" slug = text.lower() slug = re.sub(r'[^\w\s-]', '', slug) slug = re.sub(r'[\s_]+', '-', slug) slug = slug.strip('-') return slug def _apply_post_processing(self, html_content: str) -> str: """Apply post-processing to HTML content.""" # Add target="_blank" to external links if self.config.link_target_blank: html_content = re.sub( r']*)>', f'', html_content, ) return html_content def _generate_toc(self, html_content: str) -> str: """Generate table of contents from HTML headings.""" headings = re.findall(r']*id="([^"]+)"[^>]*>([^<]+)', html_content) if not headings: return "" toc_lines = ['') return '\n'.join(toc_lines) def _extract_title(self, content: str) -> str: """Extract title from first H1 heading.""" match = re.search(r'^#\s+(.+)$', content, re.MULTILINE) if match: return match.group(1).strip() return "Document" def _generate_css(self) -> str: """Generate CSS based on theme configuration.""" # Get theme properties layers = self.config.theme.layers props = combine_theme_properties(layers) # Apply custom property overrides props.update(self.config.theme.custom_properties) css = f""" body {{ font-family: {props.get('font_family', 'sans-serif')}; max-width: {props.get('max_width', '800px')}; margin: 0 auto; padding: 2rem; line-height: 1.6; color: {props.get('body_color', '#333')}; background-color: {props.get('body_background', '#fff')}; }} h1, h2, h3, h4, h5, h6 {{ color: {props.get('heading_color', props.get('body_color', '#333'))}; margin-top: 1.5em; margin-bottom: 0.5em; }} pre {{ background-color: {props.get('code_background', '#f6f8fa')}; color: {props.get('code_color', '#333')}; padding: 1rem; border-radius: 6px; overflow-x: auto; border: 1px solid {props.get('border_color', '#ddd')}; }} code {{ background-color: {props.get('code_background', '#f6f8fa')}; color: {props.get('code_color', '#333')}; padding: 0.2em 0.4em; border-radius: 3px; font-size: 0.9em; }} pre code {{ background: none; padding: 0; }} blockquote {{ border-left: 4px solid {props.get('blockquote_border', '#ddd')}; margin: 0; padding-left: 1rem; color: {props.get('blockquote_color', '#666')}; }} table {{ border-collapse: collapse; margin: 1rem 0; width: 100%; border: 1px solid {props.get('table_border', '#ddd')}; }} th, td {{ border: 1px solid {props.get('table_border', '#ddd')}; padding: 0.5rem; text-align: left; }} th {{ background-color: {props.get('table_header_bg', '#f6f8fa')}; }} a {{ color: {props.get('link_color', '#0366d6')}; text-decoration: none; }} a:hover {{ text-decoration: underline; }} img {{ max-width: {self.config.image_max_width}; max-height: {self.config.image_max_height}; height: auto; }} .toc {{ background-color: {props.get('code_background', '#f6f8fa')}; padding: 1rem; border-radius: 6px; margin-bottom: 2rem; }} .toc h2 {{ margin-top: 0; }} .toc ul {{ padding-left: 1.5rem; }} """ # Add custom CSS if provided if self.config.theme.custom_css: css += f"\n{self.config.theme.custom_css}" return css def _build_html_document( self, body_content: str, toc_content: str, title: str, metadata: Dict[str, Any], ) -> str: """Build complete HTML document.""" css = self._generate_css() # Meta tags meta_tags = '\n' meta_tags += '\n' meta_tags += '\n' # Add custom meta from metadata for key, value in metadata.items(): if key.startswith('meta_'): meta_name = key[5:] meta_tags += f'\n' return f""" {meta_tags} {html.escape(title)} {toc_content}
{body_content}
""" class HTMLRendererFactory: """Factory for creating configured HTML renderers.""" @staticmethod def create_default() -> MarkdownToHTMLRenderer: """Create a renderer with default settings.""" return MarkdownToHTMLRenderer() @staticmethod def create_github_style() -> MarkdownToHTMLRenderer: """Create a renderer with GitHub-style theme.""" config = RenderConfig( theme=ThemeConfig(name="github", layers=["github"]), include_toc=False, ) return MarkdownToHTMLRenderer(config) @staticmethod def create_academic_style() -> MarkdownToHTMLRenderer: """Create a renderer with academic styling.""" config = RenderConfig( theme=ThemeConfig(name="academic", layers=["academic"]), include_toc=True, ) return MarkdownToHTMLRenderer(config) @staticmethod def create_minimal_style() -> MarkdownToHTMLRenderer: """Create a renderer with minimal styling.""" config = RenderConfig( theme=ThemeConfig(name="minimal", layers=["minimal"]), include_toc=False, ) return MarkdownToHTMLRenderer(config) @staticmethod def create_dark_mode() -> MarkdownToHTMLRenderer: """Create a renderer with dark mode theme.""" config = RenderConfig( theme=ThemeConfig(name="dark", layers=["dark"]), include_toc=False, ) return MarkdownToHTMLRenderer(config)