diff --git a/markitect/clean_document_manager.py b/markitect/clean_document_manager.py index 7663c614..d5e1e23b 100644 --- a/markitect/clean_document_manager.py +++ b/markitect/clean_document_manager.py @@ -56,7 +56,8 @@ class CleanDocumentManager: } def render_file(self, input_file: str, output_file: str, template: str = None, css: str = None, - edit_mode: bool = False, insert_mode: bool = False, editor_theme: str = 'github', keyboard_shortcuts: bool = True, nodogtag: bool = False) -> Dict[str, Any]: + edit_mode: bool = False, insert_mode: bool = False, editor_theme: str = 'github', keyboard_shortcuts: bool = True, nodogtag: bool = False, + image_max_width: str = '12cm', image_max_height: str = '20cm') -> Dict[str, Any]: """ Render a markdown file to HTML with optional clean editing capabilities. """ @@ -67,7 +68,10 @@ class CleanDocumentManager: raise FileNotFoundError(f"Input file not found: {input_file}") # Read markdown content - markdown_content = input_path.read_text(encoding='utf-8') + raw_markdown_content = input_path.read_text(encoding='utf-8') + + # Process base64 images - relocate payloads to document end + markdown_content, base64_references = self._process_base64_images(raw_markdown_content) # Extract title from markdown (first h1 heading) title = self._extract_title_from_markdown(markdown_content) @@ -90,7 +94,10 @@ class CleanDocumentManager: keyboard_shortcuts=keyboard_shortcuts, original_filename=original_filename, version_info=version_info, - nodogtag=nodogtag + nodogtag=nodogtag, + image_max_width=image_max_width, + image_max_height=image_max_height, + base64_references=base64_references ) # Write HTML file @@ -112,6 +119,69 @@ class CleanDocumentManager: return match.group(1).strip() return "Markdown Document" + def _process_base64_images(self, markdown_content: str) -> tuple: + """ + Process base64 encoded images in markdown content. + + - Extracts base64 image data URLs + - Replaces them with reference links + - Returns processed content and reference mapping + + Returns: + tuple: (processed_markdown, base64_references_dict) + """ + import re + import uuid + + # Pattern to match base64 image data URLs + base64_pattern = r'!\[([^\]]*)\]\(data:image/([^;]+);base64,([^)]+)\)' + + base64_references = {} + reference_definitions = [] + processed_content = markdown_content + + # Find all base64 images + matches = list(re.finditer(base64_pattern, markdown_content)) + + for i, match in enumerate(matches): + alt_text = match.group(1) + image_type = match.group(2) # png, jpeg, svg+xml, etc. + base64_data = match.group(3) + + # Generate a unique reference ID + ref_id = f"base64-image-{i+1}" + + # Store the mapping + base64_references[ref_id] = { + 'alt': alt_text, + 'type': image_type, + 'data': base64_data, + 'full_data_url': f"data:image/{image_type};base64,{base64_data}" + } + + # Replace the inline base64 with reference + original_match = match.group(0) + reference_link = f"![{alt_text}][{ref_id}]" + processed_content = processed_content.replace(original_match, reference_link, 1) + + # Create reference definition for the end of document + reference_definitions.append(f"[{ref_id}]: data:image/{image_type};base64,{base64_data}") + + # Add reference definitions to the end of the document if any base64 images were found + if reference_definitions: + # Ensure there's a blank line before the references + if not processed_content.endswith('\n\n'): + if processed_content.endswith('\n'): + processed_content += '\n' + else: + processed_content += '\n\n' + + # Add a comment to indicate the base64 reference section + processed_content += "\n" + processed_content += '\n'.join(reference_definitions) + '\n' + + return processed_content, base64_references + def _get_version_info(self) -> dict: """Get repository name and version information.""" from .__version__ import get_version_info @@ -125,7 +195,7 @@ class CleanDocumentManager: 'git_info': '' # Already included in full_version } - def _get_template_css(self, template: str = None) -> str: + def _get_template_css(self, template: str = None, image_max_width: str = '12cm', image_max_height: str = '20cm') -> str: """Generate layered theme CSS styles.""" # Import layered theme functions from markitect.plugins.builtin.markdown_commands import ( @@ -137,21 +207,21 @@ class CleanDocumentManager: # New layered theme system theme_list = parse_theme_string(template) combined_props = combine_theme_properties(theme_list) - return self._generate_layered_css(combined_props) + return self._generate_layered_css(combined_props, image_max_width, image_max_height) else: # Legacy single theme or fallback if not template or template not in TEMPLATE_STYLES: # Use default layered themes or the specified theme theme_list = parse_theme_string(template or 'basic') combined_props = combine_theme_properties(theme_list) - return self._generate_layered_css(combined_props) + return self._generate_layered_css(combined_props, image_max_width, image_max_height) else: # Legacy theme - convert to layered theme_list = parse_theme_string(template) combined_props = combine_theme_properties(theme_list) - return self._generate_layered_css(combined_props) + return self._generate_layered_css(combined_props, image_max_width, image_max_height) - def _generate_layered_css(self, properties: dict) -> str: + def _generate_layered_css(self, properties: dict, image_max_width: str = '12cm', image_max_height: str = '20cm') -> str: """Generate CSS from combined theme properties.""" # Set defaults for missing properties (properties override defaults) @@ -852,9 +922,19 @@ class CleanDocumentManager: }} """ - return f"" + # Image size CSS with configurable limits + image_css = f""" + img {{ + max-width: {image_max_width}; + max-height: {image_max_height}; + height: auto; + display: block; + margin: 1rem auto; + }}""" - def _get_legacy_template_css(self, template: str) -> str: + return f"" + + def _get_legacy_template_css(self, template: str, image_max_width: str = '12cm', image_max_height: str = '20cm') -> str: """Legacy CSS generation - kept for backward compatibility.""" # Import template styles from markitect.plugins.builtin.markdown_commands import TEMPLATE_STYLES @@ -886,10 +966,11 @@ class CleanDocumentManager: 'max_width': legacy_config['max_width'], 'body_color': legacy_config['body_color'], } - return self._generate_layered_css(layered_props) + return self._generate_layered_css(layered_props, image_max_width, image_max_height) def _generate_html_template(self, markdown_content: str, title: str, css: str = None, template: str = None, - edit_mode: bool = False, insert_mode: bool = False, editor_theme: str = 'github', keyboard_shortcuts: bool = True, original_filename: str = 'document', version_info: dict = None, nodogtag: bool = False) -> str: + edit_mode: bool = False, insert_mode: bool = False, editor_theme: str = 'github', keyboard_shortcuts: bool = True, original_filename: str = 'document', version_info: dict = None, nodogtag: bool = False, + image_max_width: str = '12cm', image_max_height: str = '20cm', base64_references: dict = None) -> str: """Generate clean HTML template.""" # Add dogtag to markdown content if not disabled @@ -914,9 +995,14 @@ class CleanDocumentManager: markdown_content_with_dogtag = markdown_content + dogtag else: markdown_content_with_dogtag = markdown_content + dogtag = "" - # Escape the markdown content for JavaScript - js_markdown_content = json.dumps(markdown_content_with_dogtag) + # Pass original markdown content to editor (without dogtag for editing) + # But make dogtag available separately for protected display in editor + js_markdown_content = json.dumps(markdown_content) + js_markdown_content_with_dogtag = json.dumps(markdown_content_with_dogtag) + js_dogtag_content = json.dumps(dogtag) + js_base64_references = json.dumps(base64_references or {}) # Handle CSS styles css_content = "" @@ -932,7 +1018,7 @@ class CleanDocumentManager: css_content = f'' # Generate template-specific CSS - default_css = self._get_template_css(template) + default_css = self._get_template_css(template, image_max_width, image_max_height) # Load clean editor JavaScript files editor_scripts = "" @@ -984,6 +1070,8 @@ class CleanDocumentManager: # Load clean editor architecture for both edit and insert modes if edit_mode or insert_mode: editor_scripts = self._get_clean_editor_scripts() + else: + editor_scripts = "" # Generate the complete HTML template html_template = f""" @@ -1005,6 +1093,9 @@ class CleanDocumentManager: