""" Markdown commands plugin for MarkiTect. This plugin provides the core markdown file operations with md- prefixes, replacing the legacy unprefixed commands for better namespace consistency. """ import click import json import os import re import tempfile import unicodedata from pathlib import Path from typing import Dict, Any from markitect.plugins.base import CommandPlugin, PluginMetadata, PluginType from markitect.plugins.decorators import register_plugin from markitect.document_manager import DocumentManager from markitect.serializer import ASTSerializer # Simple helper function - avoiding circular imports def get_default_format(available_formats=['table', 'json', 'yaml', 'simple'], fallback='simple'): """Get the default output format - simplified version for plugin.""" return fallback @register_plugin("markdown_commands") class MarkdownCommandsPlugin(CommandPlugin): """Plugin providing core markdown file operations.""" @property def metadata(self) -> PluginMetadata: return PluginMetadata( name="markdown_commands", version="1.0.0", description="Core markdown file operations (ingest, get, list) with md- prefixes", author="MarkiTect Core Team", plugin_type=PluginType.COMMAND, markitect_version=">=0.1.0" ) def get_commands(self) -> Dict[str, Any]: """Return the markdown commands with md- prefixes.""" return { 'md-ingest': md_ingest_command, 'md-get': md_get_command, 'md-list': md_list_command, 'md-render': md_render_command, 'md-index': md_index_command, 'md-explode': md_explode_command } # Define commands as standalone functions @click.command() @click.argument('file_path', type=click.Path(exists=True)) @click.pass_context def md_ingest_command(ctx, file_path): """ Process and store a markdown file. Ingests a markdown file into the MarkiTect system, parsing its content, extracting front matter, generating AST cache, and storing metadata in the database. FILE_PATH: Path to the markdown file to process Examples: markitect md-ingest README.md markitect md-ingest docs/guide.md """ config = ctx.obj or {} try: if config.get('verbose', False): click.echo(f"Processing file: {file_path}") # Initialize document manager with database manager doc_manager = DocumentManager(config.get('db_manager')) # Process the file result = doc_manager.ingest_file(file_path) if config.get('verbose', False): click.echo(f"Processing results:") click.echo(f" File: {result['metadata']['filename']}") click.echo(f" AST nodes: {len(result['ast'])} nodes") click.echo(f" Cache file: {result['ast_cache_path']}") click.echo(f" Parse time: {result['parse_time']:.2f}s") click.echo(f" Cache time: {result['cache_time']:.2f}s") click.echo(f"✓ Successfully ingested: {Path(file_path).name}") except Exception as e: click.echo(f"Error processing file: {e}", err=True) raise click.Abort() @click.command() @click.argument('file_path', type=str) @click.option('--output', '-o', type=click.Path(), help='Output file path (default: stdout)') @click.pass_context def md_get_command(ctx, file_path, output): """ Retrieve and output a processed markdown file. Loads the file from the database and AST cache, then serializes it back to markdown format. Supports outputting to file or stdout. FILE_PATH: Name of the file to retrieve Examples: markitect md-get README.md markitect md-get docs/guide.md --output modified_guide.md """ config = ctx.obj or {} try: if config.get('verbose', False): click.echo(f"Retrieving file: {file_path}") db_manager = config.get('db_manager') # Get file information from database file_info = db_manager.get_markdown_file(file_path) if not file_info: click.echo(f"File not found in database: {file_path}", err=True) click.echo("Use 'markitect md-ingest' to process the file first.", err=True) raise click.Abort() # Load AST from cache cache_filename = f"{file_path}.ast.json" cache_path = Path('.ast_cache') / cache_filename if not cache_path.exists(): click.echo(f"AST cache not found: {cache_path}", err=True) click.echo("Try re-ingesting the file to regenerate cache.", err=True) raise click.Abort() # Read AST from cache import json with open(cache_path, 'r', encoding='utf-8') as f: ast = json.load(f) # Parse front matter from database front_matter = None if file_info.get('front_matter'): try: front_matter = eval(file_info['front_matter']) except (ValueError, TypeError, SyntaxError): if config.get('verbose', False): click.echo("Warning: Could not parse front matter", err=True) # Serialize AST back to markdown serializer = ASTSerializer() markdown_content = serializer.serialize_to_markdown(ast, front_matter) # Output to file or stdout if output: output_path = Path(output) output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: f.write(markdown_content) click.echo(f"✓ File written to: {output_path}") else: click.echo(markdown_content) if config.get('verbose', False): click.echo(f"Retrieved {len(ast)} AST tokens", err=True) except Exception as e: click.echo(f"Error retrieving file: {e}", err=True) raise click.Abort() @click.command() @click.option('--format', 'output_format', type=click.Choice(['table', 'json', 'yaml', 'simple']), default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format') @click.option('--names-only', is_flag=True, help='Show only filenames (no metadata)') @click.pass_context def md_list_command(ctx, output_format, names_only): """ List all stored markdown files and their status. Shows all markdown files that have been processed and stored in the MarkiTect database with their basic metadata. Examples: markitect md-list markitect md-list --format table markitect md-list --format json markitect md-list --names-only """ config = ctx.obj or {} try: if config.get('verbose', False): click.echo("Retrieving all stored files...") db_manager = config.get('db_manager') files = db_manager.list_markdown_files() if not files: click.echo("No files found in database.") click.echo("Use 'markitect md-ingest ' to add files.") return # Handle names-only option if names_only: for file_info in files: click.echo(file_info['filename']) return # Handle different output formats if output_format == 'simple': # Original emoji format click.echo(f"Found {len(files)} file(s):") click.echo() for file_info in files: click.echo(f"📄 {file_info['filename']}") if config.get('verbose', False): click.echo(f" Created: {file_info['created_at']}") if file_info.get('front_matter'): try: front_matter = eval(file_info['front_matter']) if front_matter: click.echo(f" Front matter: {list(front_matter.keys())}") except (ValueError, TypeError, SyntaxError): click.echo(f" Front matter: (parsing error)") click.echo() else: # Use structured format (table, json, yaml) if output_format == 'json': import json click.echo(json.dumps(files, indent=2, default=str)) elif output_format == 'yaml': import yaml click.echo(yaml.dump(files, default_flow_style=False)) else: # table format (default) # Simple table output click.echo(f"Found {len(files)} file(s):") click.echo(f"{'Filename':<30} {'Created':<20}") click.echo("-" * 50) for file_info in files: click.echo(f"{file_info['filename']:<30} {file_info['created_at']:<20}") except Exception as e: click.echo(f"Error listing files: {e}", err=True) raise click.Abort() @click.command() @click.argument('input_file', type=click.Path(exists=True)) @click.option('--output', '-o', type=click.Path(), help='Output HTML file path (defaults to input filename with .html extension)') @click.option('--template', type=click.Choice(['basic', 'github', 'academic', 'dark']), default='basic', help='HTML template: basic (default), github, academic, or dark theme') @click.option('--css', type=click.Path(exists=True), help='Custom CSS file to inject into the template') @click.option('--edit', is_flag=True, help='Enable instant markdown editing capabilities in the generated HTML') @click.option('--editor-theme', type=click.Choice(['light', 'dark']), default='light', help='Editor interface theme (light or dark)') @click.option('--keyboard-shortcuts', is_flag=True, help='Enable keyboard shortcuts for editing actions') @click.option('--use-publication-dir', is_flag=True, help='Force single files to use publication directory') @click.option('--dont-use-publication-dir', is_flag=True, help='Force directory processing to place HTML next to MD files') @click.pass_context def md_render_command(ctx, input_file, output, template, css, edit, editor_theme, keyboard_shortcuts, use_publication_dir, dont_use_publication_dir): """ Generate HTML with client-side JavaScript markdown rendering. Creates self-contained HTML files that include markdown content as JavaScript data and render in the browser using client-side markdown parsing with marked.js. Supports both single files and directory processing. The generated HTML includes: • Embedded markdown content as JavaScript payload • Client-side rendering with marked.js from CDN • YAML front matter support and metadata extraction • Multiple responsive template options • Custom CSS injection capability • Optional instant editing capabilities with --edit flag • Graceful fallback if JavaScript fails INPUT_FILE: Path to the markdown file or directory to render Publication Directory: • Default publication directory: ~/Notes/ • Override with MARKITECT_PUBLICATION_DIR environment variable • Single files: HTML generated next to MD file by default • Directories: HTML generated in publication directory with preserved structure Flags: • --use-publication-dir: Force single files to use publication directory • --dont-use-publication-dir: Force directory processing to place HTML next to MD files Available Templates: • basic (default) - Clean, minimal design with system fonts • github - GitHub-style appearance with heading underlines • academic - Academic paper style with serif fonts and justified text • dark - GitHub dark mode inspired theme with dark background Examples: # Single file - HTML next to MD file markitect md-render README.md # Single file - HTML in publication directory markitect md-render README.md --use-publication-dir # Directory - HTML in publication directory with structure markitect md-render docs/ # Directory - HTML next to each MD file markitect md-render docs/ --dont-use-publication-dir # Custom publication directory MARKITECT_PUBLICATION_DIR=/tmp/pub markitect md-render docs/ # Directory with custom template markitect md-render docs/ --template github --edit """ config = ctx.obj or {} try: input_path = Path(input_file) # Validate flags if use_publication_dir and dont_use_publication_dir: click.echo("Error: Cannot use both --use-publication-dir and --dont-use-publication-dir flags together", err=True) raise click.Abort() # Get publication directory publication_dir = get_publication_directory() if config.get('verbose', False): click.echo(f"Input: {input_path}") click.echo(f"Publication directory: {publication_dir}") # Check if input is a directory or file if input_path.is_dir(): # Directory processing use_pub_dir = not dont_use_publication_dir # Default to publication dir for directories if config.get('verbose', False): click.echo(f"Processing directory: {input_path}") click.echo(f"Use publication directory: {use_pub_dir}") # Find all markdown files md_files = find_markdown_files(input_path) if not md_files: click.echo(f"No markdown files found in directory: {input_path}") return processed_count = 0 for md_file in md_files: try: # Determine output path for this file if use_pub_dir: ensure_publication_directory(publication_dir) output_path = get_relative_output_path(md_file, input_path, publication_dir) # Ensure subdirectory exists output_path.parent.mkdir(parents=True, exist_ok=True) else: output_path = md_file.with_suffix('.html') # Process the markdown file _render_single_markdown_file( md_file, output_path, template, css, edit, editor_theme, keyboard_shortcuts, config ) processed_count += 1 if config.get('verbose', False): click.echo(f" ✓ {md_file} → {output_path}") except Exception as e: click.echo(f" ✗ Error processing {md_file}: {e}", err=True) click.echo(f"✓ Processed {processed_count} markdown file(s)") else: # Single file processing use_pub_dir = use_publication_dir # Default to next to file for single files if config.get('verbose', False): click.echo(f"Processing single file: {input_path}") click.echo(f"Use publication directory: {use_pub_dir}") # Determine output path if output: output_path = Path(output) elif use_pub_dir: ensure_publication_directory(publication_dir) output_path = publication_dir / get_output_filename(input_path) else: output_path = input_path.with_suffix('.html') # Process the single file _render_single_markdown_file( input_path, output_path, template, css, edit, editor_theme, keyboard_shortcuts, config ) click.echo(f"✓ HTML generated: {output_path}") except Exception as e: click.echo(f"Error: {e}", err=True) raise click.Abort() @click.command() @click.argument('directory', type=click.Path(exists=True)) @click.option('--output', '-o', type=click.Path(), help='Output index file path (defaults to directory/index.html)') @click.option('--template', type=click.Choice(['basic', 'github', 'academic', 'dark']), default='basic', help='HTML template: basic (default), github, academic, or dark theme') @click.option('--recursive', '-r', is_flag=True, help='Include HTML files from subdirectories') @click.pass_context def md_index_command(ctx, directory, output, template, recursive): """ Generate an index page for HTML files in a directory. Creates an HTML index page that lists all HTML files found in the specified directory, providing navigation links to each file. The index page uses the same template system as md-render for consistent styling. DIRECTORY: Path to the directory containing HTML files Examples: # Generate index for current directory markitect md-index . # Generate index with custom output file markitect md-index docs/ --output docs/contents.html # Generate index with GitHub template markitect md-index notes/ --template github # Include subdirectories recursively markitect md-index docs/ --recursive """ config = ctx.obj or {} try: directory_path = Path(directory) if config.get('verbose', False): click.echo(f"Generating index for directory: {directory_path}") # Determine output file if output: output_path = Path(output) else: output_path = directory_path / "index.html" # Find and filter HTML files html_files = find_html_files(directory_path, recursive=recursive) html_files = [f for f in html_files if f != output_path] if config.get('verbose', False): click.echo(f"Found {len(html_files)} HTML file(s)") # Prepare file info for template file_infos = _prepare_file_infos(html_files, output_path) # Generate and write index HTML directory_name = directory_path.name or "Directory" index_title = f"{directory_name} - Index" index_html = generate_index_html(file_infos, index_title, template) # Ensure output directory exists and write file output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(index_html, encoding='utf-8') click.echo(f"✓ Index generated: {output_path}") if config.get('verbose', False): click.echo(f" Template: {template}") click.echo(f" Files indexed: {len(file_infos)}") if recursive: click.echo(f" Recursive: enabled") except Exception as e: click.echo(f"Error generating index: {e}", err=True) raise click.Abort() def _render_single_markdown_file(input_path, output_path, template, css, edit, editor_theme, keyboard_shortcuts, config): """Render a single markdown file to HTML.""" # Read markdown file markdown_content = input_path.read_text(encoding='utf-8') # Extract front matter if present front_matter = {} if markdown_content.startswith('---\n'): parts = markdown_content.split('---\n', 2) if len(parts) >= 3: try: import yaml front_matter = yaml.safe_load(parts[1]) or {} markdown_content = parts[2] except ImportError: # Fallback without yaml parsing pass # Generate title from first heading or filename title = front_matter.get('title', input_path.stem) lines = markdown_content.strip().split('\n') for line in lines: if line.startswith('# '): title = line[2:].strip() break # Load custom CSS if provided css_content = "" if css: css_path = Path(css) css_content = css_path.read_text(encoding='utf-8') # Generate HTML with embedded markdown html_content = generate_html_with_embedded_markdown( markdown_content, title, template, css_content, front_matter, edit, editor_theme, keyboard_shortcuts ) # Ensure output directory exists output_path.parent.mkdir(parents=True, exist_ok=True) # Write HTML file output_path.write_text(html_content, encoding='utf-8') # Template definitions for cleaner code organization TEMPLATE_STYLES = { 'basic': { 'body_color': '#333', 'body_bg': '', 'heading_color': '#2c3e50', 'heading_border': '', 'code_bg': '#f4f4f4', 'code_border': '', 'blockquote_border': '#ddd', 'blockquote_color': '#666', 'font_family': '-apple-system, BlinkMacSystemFont, \'Segoe UI\', \'Roboto\', \'Helvetica\', \'Arial\', sans-serif', 'max_width': '800px', 'text_align': '' }, 'github': { 'body_color': '#24292e', 'body_bg': 'background-color: #ffffff;', 'heading_color': '#1f2328', 'heading_border': 'border-bottom: 1px solid #d0d7de; padding-bottom: 0.3em;', 'code_bg': '#f4f4f4', 'code_border': '', 'blockquote_border': '#ddd', 'blockquote_color': '#666', 'font_family': '-apple-system, BlinkMacSystemFont, \'Segoe UI\', \'Roboto\', \'Helvetica\', \'Arial\', sans-serif', 'max_width': '800px', 'text_align': '' }, 'academic': { 'body_color': '#333', 'body_bg': '', 'heading_color': '#2c3e50', 'heading_border': '', 'code_bg': '#f4f4f4', 'code_border': '', 'blockquote_border': '#ddd', 'blockquote_color': '#666', 'font_family': '"Times New Roman", Times, serif', 'max_width': '900px', 'text_align': 'text-align: justify;' }, 'dark': { 'body_color': '#e1e4e8', 'body_bg': 'background-color: #0d1117;', 'heading_color': '#58a6ff', 'heading_border': 'border-bottom: 1px solid #21262d; padding-bottom: 0.3em;', 'code_bg': '#161b22', 'code_border': 'border: 1px solid #21262d;', 'blockquote_border': '#58a6ff', 'blockquote_color': '#8b949e', 'font_family': '-apple-system, BlinkMacSystemFont, \'Segoe UI\', \'Roboto\', \'Helvetica\', \'Arial\', sans-serif', 'max_width': '800px', 'text_align': '' } } def generate_html_with_embedded_markdown(markdown_content, title, template, css_content, front_matter, edit=False, editor_theme='light', keyboard_shortcuts=False): """Generate HTML with embedded markdown content for client-side rendering. Args: markdown_content: The markdown content to embed title: Page title template: Template name (basic, github, academic, dark) css_content: Custom CSS content to inject front_matter: YAML front matter dictionary edit: Enable editing capabilities editor_theme: Editor theme (light or dark) keyboard_shortcuts: Enable keyboard shortcuts """ # Get template styles or default to basic styles = TEMPLATE_STYLES.get(template, TEMPLATE_STYLES['basic']) # Build editor styles if editing is enabled editor_styles = "" if edit: editor_styles = ''' /* Markitect Editor Styles */ .markitect-floating-header {{ position: fixed; top: 10px; right: 10px; background: rgba(0, 123, 255, 0.9); color: white; padding: 10px 20px; border-radius: 20px; font-size: 14px; font-weight: bold; box-shadow: 0 2px 10px rgba(0,0,0,0.2); z-index: 1000; display: none; }} .markitect-floating-header.show {{ display: block; }} .markitect-section-editable {{ position: relative; cursor: pointer; transition: background-color 0.2s; }} .markitect-section-editable:hover {{ background-color: rgba(0, 123, 255, 0.1); }} .markitect-section-modified {{ border-left: 4px solid #007bff; padding-left: 16px; }} .markitect-edit-interface {{ margin: 15px 0; padding: 20px; border: 2px dashed #007bff; border-radius: 8px; background: #f8f9fa; }} .markitect-edit-textarea {{ width: 100%; min-height: 150px; font-family: 'Courier New', Consolas, monospace; font-size: 14px; padding: 10px; border: 1px solid #ddd; border-radius: 4px; resize: vertical; }} .markitect-edit-actions {{ margin-top: 10px; text-align: right; }} .markitect-edit-btn {{ margin-left: 10px; padding: 8px 16px; border: none; border-radius: 4px; cursor: pointer; font-size: 14px; }} .markitect-btn-apply {{ background-color: #28a745; color: white; }} .markitect-btn-reset {{ background-color: #ffc107; color: #212529; }} .markitect-btn-cancel {{ background-color: #6c757d; color: white; }} .markitect-btn-save {{ background-color: #007bff; color: white; padding: 10px 20px; margin-left: 15px; }} ''' if editor_theme == 'dark': editor_styles += ''' /* Dark theme overrides */ .markitect-edit-interface {{ background: #2d2d2d; border-color: #666; }} .markitect-edit-textarea {{ background: #1a1a1a; color: #f0f0f0; border-color: #666; }} ''' # HTML template with style variables html_template = ''' {title}
{editor_html} {editor_scripts} ''' # Build editor HTML components if editing is enabled editor_html = "" editor_scripts = "" editor_config = "" if edit: editor_config = ''' // Editor configuration window.MARKITECT_EDIT_MODE = true; window.MARKITECT_EDITOR_CONFIG = { theme: \'''' + editor_theme + '''\', keyboardShortcuts: ''' + ('true' if keyboard_shortcuts else 'false') + ''' };''' editor_html = '''
0 sections changed
''' # Basic JavaScript editor implementation editor_scripts = ''' ''' # Format template with styles and content return html_template.format( title=title, css_content=css_content, editor_styles=editor_styles, editor_html=editor_html, editor_scripts=editor_scripts, editor_config=editor_config, markdown_json=json.dumps(markdown_content), front_matter_json=json.dumps(front_matter), **styles ) # Publication directory management functions for Issue #135 def get_publication_directory(): """Get the publication directory from environment variable or default.""" pub_dir = os.environ.get('MARKITECT_PUBLICATION_DIR') if pub_dir: return normalize_publication_path(pub_dir) return Path.home() / "Notes" def normalize_publication_path(path_str): """Normalize publication directory path with tilde expansion and absolute resolution.""" path = Path(path_str) if str(path).startswith('~'): path = path.expanduser() return path.resolve() def ensure_publication_directory(pub_dir): """Ensure publication directory exists, creating it if necessary.""" pub_dir = Path(pub_dir) pub_dir.mkdir(parents=True, exist_ok=True) return pub_dir def get_output_filename(input_file): """Get HTML output filename from markdown input filename.""" return input_file.stem + ".html" def find_markdown_files(directory): """Recursively find all markdown files in a directory.""" directory = Path(directory) md_files = [] for pattern in ['*.md', '*.markdown']: md_files.extend(directory.rglob(pattern)) return sorted(md_files) def get_relative_output_path(source_file, base_dir, output_dir): """Calculate relative output path preserving directory structure.""" source_file = Path(source_file) base_dir = Path(base_dir) output_dir = Path(output_dir) # Get relative path from base directory relative_path = source_file.relative_to(base_dir) # Change extension to .html relative_path = relative_path.with_suffix('.html') # Combine with output directory return output_dir / relative_path def process_single_file(input_file, use_publication_dir, publication_dir): """Process a single markdown file, generate HTML, and return the output path.""" input_file = Path(input_file) if not input_file.exists(): raise FileNotFoundError(f"Input file not found: {input_file}") if use_publication_dir: ensure_publication_directory(publication_dir) output_file = publication_dir / get_output_filename(input_file) else: output_file = input_file.with_suffix('.html') # Actually generate the HTML file _render_single_markdown_file( input_file, output_file, 'basic', None, False, 'light', False, {} ) return output_file def process_directory(input_dir, use_publication_dir, publication_dir): """Process all markdown files in a directory, generate HTML files, and return list of output paths.""" input_dir = Path(input_dir) if not input_dir.exists() or not input_dir.is_dir(): raise NotADirectoryError(f"Input directory not found: {input_dir}") md_files = find_markdown_files(input_dir) output_files = [] for md_file in md_files: if use_publication_dir: ensure_publication_directory(publication_dir) output_file = get_relative_output_path(md_file, input_dir, publication_dir) # Ensure subdirectory exists output_file.parent.mkdir(parents=True, exist_ok=True) else: output_file = md_file.with_suffix('.html') # Actually generate the HTML file _render_single_markdown_file( md_file, output_file, 'basic', None, False, 'light', False, {} ) output_files.append(output_file) return output_files # Index generation functions for Issue #136 def find_html_files(directory, recursive=False): """Find all HTML files in a directory.""" directory = Path(directory) html_files = [] if recursive: for pattern in ['*.html', '*.htm']: html_files.extend(directory.rglob(pattern)) else: for pattern in ['*.html', '*.htm']: html_files.extend(directory.glob(pattern)) return sorted(html_files) # HTML parsing patterns for index generation HTML_TITLE_PATTERN = re.compile(r']*>(.*?)', re.IGNORECASE | re.DOTALL) HTML_H1_PATTERN = re.compile(r']*>(.*?)', re.IGNORECASE | re.DOTALL) HTML_TAG_PATTERN = re.compile(r'<[^>]+>') def extract_html_title(html_file): """Extract title from HTML file, falling back to H1 tag or filename.""" try: content = html_file.read_text(encoding='utf-8') # Try to extract from title tag title_match = HTML_TITLE_PATTERN.search(content) if title_match: return title_match.group(1).strip() # Try to extract from H1 tag h1_match = HTML_H1_PATTERN.search(content) if h1_match: # Remove HTML tags from H1 content h1_text = HTML_TAG_PATTERN.sub('', h1_match.group(1)) return h1_text.strip() # Fallback to filename return html_file.stem except Exception: # If any error occurs, fallback to filename return html_file.stem def generate_index_html(html_files, title, template="basic"): """Generate HTML index page with links to HTML files.""" # Get template styles from existing TEMPLATE_STYLES styles = TEMPLATE_STYLES.get(template, TEMPLATE_STYLES['basic']) # Generate links list links_html = "" if html_files: links_html = "
    \n" for file_info in html_files: relative_path = file_info['relative_path'] file_title = file_info['title'] links_html += f'
  • {file_title}
  • \n' links_html += "
" else: links_html = "

No HTML files found in this directory.

" # Generate HTML template html_template = ''' {title}

{title}

📁 Directory Index - Navigate through the available HTML pages

Available Pages

{links_html}

Generated with MarkiTect • {file_count} file(s)

''' return html_template.format( title=title, links_html=links_html, file_count=len(html_files), **styles ) def _prepare_file_infos(html_files, output_path): """Prepare file information for template generation.""" file_infos = [] for html_file in html_files: title = extract_html_title(html_file) # Calculate relative path from output directory to HTML file try: relative_path = html_file.relative_to(output_path.parent) except ValueError: # If files are in different directory trees, use filename relative_path = html_file.name file_infos.append({ 'path': html_file, 'title': title, 'relative_path': str(relative_path) }) return file_infos def process_directory_for_index(directory, index_filename="index.html", template="basic", recursive=False): """Process directory and generate index file.""" directory = Path(directory) output_path = directory / index_filename if not directory.exists() or not directory.is_dir(): raise FileNotFoundError(f"Directory not found: {directory}") # Find and filter HTML files html_files = find_html_files(directory, recursive=recursive) html_files = [f for f in html_files if f != output_path] # Prepare file info for template file_infos = _prepare_file_infos(html_files, output_path) # Generate and write index HTML directory_name = directory.name or "Directory" index_title = f"{directory_name} - Index" index_html = generate_index_html(file_infos, index_title, template) # Ensure output directory exists and write file output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(index_html, encoding='utf-8') return output_path # ============================================================================== # Markdown Explosion Functions for Issue #138 # ============================================================================== class MarkdownSection: """ Represents a section of markdown content with hierarchical structure. This class models a single section from a markdown document, identified by a heading (# ## ### etc.), along with its content and child sections. Attributes: level (int): Heading level (1 for #, 2 for ##, etc.) title (str): Section title text (without # markers) content (str): Full markdown content for this section line_start (int): Starting line number in original document line_end (int): Ending line number in original document children (list): List of child MarkdownSection objects parent (MarkdownSection): Parent section (None for top-level) """ def __init__(self, level, title, content="", line_start=0, line_end=0): """ Initialize a new MarkdownSection. Args: level (int): Heading level (1-6) title (str): Section title content (str): Section content including the heading line_start (int): Starting line in source document line_end (int): Ending line in source document """ self.level = level self.title = title self.content = content self.line_start = line_start self.line_end = line_end self.children = [] self.parent = None def add_child(self, child_section): """ Add a child section to this section. Validates that the child section has the correct heading level (exactly one level deeper than the parent). Args: child_section (MarkdownSection): The section to add as a child Raises: ValueError: If the child section's level is not exactly parent_level + 1 """ # Only allow direct child levels (no skipping levels) if child_section.level == self.level + 1: child_section.parent = self self.children.append(child_section) else: raise ValueError("Invalid heading hierarchy") def extract_headings(markdown_content): """ Extract headings with their levels from markdown content. Parses a markdown text and identifies all headings (# ## ### etc.), returning their level, title, and line position. Args: markdown_content (str): The markdown text to parse Returns: list: List of dictionaries with keys: - level (int): Heading level (1-6) - title (str): Heading text (without # markers) - line (int): Line number in the content Example: >>> content = "# Title\\n## Section\\nContent" >>> headings = extract_headings(content) >>> headings[0] {'level': 1, 'title': 'Title', 'line': 0} """ headings = [] lines = markdown_content.split('\n') for i, line in enumerate(lines): stripped_line = line.strip() if stripped_line.startswith('#'): # Count the number of # characters level = 0 for char in stripped_line: if char == '#': level += 1 else: break # Extract title (remove # and whitespace) title = stripped_line[level:].strip() if title: # Only add if there's actual content after the # headings.append({ 'level': level, 'title': title, 'line': i }) return headings def extract_section_content(markdown_content, headings, section_index): """Extract content that belongs to a specific section.""" if section_index >= len(headings): return "" lines = markdown_content.split('\n') current_heading = headings[section_index] start_line = current_heading['line'] # Find end line (next heading at same or higher level) end_line = len(lines) for i in range(section_index + 1, len(headings)): next_heading = headings[i] if next_heading['level'] <= current_heading['level']: end_line = next_heading['line'] break # Extract content including the heading section_lines = lines[start_line:end_line] return '\n'.join(section_lines) def _remove_front_matter(content): """Remove YAML front matter from markdown content.""" if content.startswith('---\n'): parts = content.split('---\n', 2) if len(parts) >= 3: return parts[2] # Content after front matter return content def parse_markdown_structure(markdown_file): """Parse markdown file and create hierarchical structure.""" content = markdown_file.read_text(encoding='utf-8') content = _remove_front_matter(content) headings = extract_headings(content) if not headings: return [] # No structure found # Build hierarchical structure root_sections = [] stack = [] # Stack to track current parent at each level for i, heading in enumerate(headings): section_content = extract_section_content(content, headings, i) section = MarkdownSection( level=heading['level'], title=heading['title'], content=section_content, line_start=heading['line'], line_end=headings[i + 1]['line'] if i + 1 < len(headings) else len(content.split('\n')) ) # Find appropriate parent # Pop stack until we find a valid parent (lower level) while stack and stack[-1].level >= section.level: stack.pop() if stack: # Add as child to current parent parent = stack[-1] parent.children.append(section) section.parent = parent else: # Top-level section root_sections.append(section) stack.append(section) return root_sections def sanitize_heading_text(text): """Remove markdown formatting from heading text.""" # Remove markdown formatting text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # Bold text = re.sub(r'\*(.*?)\*', r'\1', text) # Italic text = re.sub(r'`(.*?)`', r'\1', text) # Code text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # Links return text.strip() def generate_safe_filename(heading_text, max_length=100): """Generate filesystem-safe filename from heading text.""" # Use FilenameGenerator for consistent behavior generator = FilenameGenerator(max_length=max_length) return generator._apply_filename_rules(heading_text, max_length) class FilenameGenerator: """Manages filename generation with conflict resolution.""" def __init__(self, max_length=100, separator="_", case_style="lower", preserve_numbers=False): self.max_length = max_length self.separator = separator self.case_style = case_style self.preserve_numbers = preserve_numbers self.used_names = set() def generate(self, heading_text): """Generate a unique filename from heading text.""" base_name = self._generate_base_name(heading_text) unique_name = self._resolve_conflicts(base_name) self.used_names.add(unique_name) return unique_name def _generate_base_name(self, heading_text): """Generate base filename without conflict resolution.""" if self.preserve_numbers: # Extract leading numbers and format them match = re.match(r'^(\d+)\.?\s*(.+)', heading_text) if match: number, rest = match.groups() number_part = f"{int(number):02d}" text_part = self._apply_filename_rules(rest, self.max_length - len(number_part) - len(self.separator)) return f"{number_part}{self.separator}{text_part}" return self._apply_filename_rules(heading_text, self.max_length) def _apply_filename_rules(self, text, max_length): """Apply filename generation rules with custom settings.""" if not text or not text.strip(): return "untitled" # Sanitize markdown formatting first text = sanitize_heading_text(text) # Handle numbered sections specially (e.g., "Section 1.1.1" -> "section_1_1_1") while re.search(r'(\d+)\.(\d+)', text): text = re.sub(r'(\d+)\.(\d+)', r'\1_\2', text) # Apply case style if self.case_style == "lower": text = text.lower() elif self.case_style == "upper": text = text.upper() elif self.case_style == "title": text = text.title() elif self.case_style == "camel": # Split into words and camelCase them words = re.split(r'[-\s]+', text.lower()) if words: text = words[0] + ''.join(word.capitalize() for word in words[1:]) # Replace path separators with separators first text = re.sub(r'[/\\]', self.separator, text) if self.separator else re.sub(r'[/\\]', '', text) # Convert Unicode characters to ASCII equivalents text = unicodedata.normalize('NFKD', text) text = ''.join(c for c in text if not unicodedata.combining(c)) # Remove other special characters and replace spaces with separators safe_name = re.sub(r'[^\w\s-]', '', text) if self.separator: safe_name = re.sub(r'[-\s]+', self.separator, safe_name) else: safe_name = re.sub(r'[-\s]+', '', safe_name) # Remove leading/trailing separators if self.separator: safe_name = safe_name.strip(self.separator) # Handle empty result after sanitization if not safe_name: return "untitled" # Truncate if too long if len(safe_name) > max_length: if self.separator: safe_name = safe_name[:max_length].rstrip(self.separator) else: safe_name = safe_name[:max_length] return safe_name def _resolve_conflicts(self, base_name): """Resolve filename conflicts by adding numbers.""" if base_name not in self.used_names: return base_name counter = 2 while True: candidate = f"{base_name}{self.separator}{counter}" if candidate not in self.used_names: return candidate counter += 1 def reset(self): """Reset the used names tracking.""" self.used_names.clear() def resolve_filename_conflicts(filename, existing_files): """Resolve conflicts with existing files.""" existing_basenames = {Path(f).stem for f in existing_files} if filename not in existing_basenames: return filename counter = 2 while True: candidate = f"{filename}_{counter}" if candidate not in existing_basenames: return candidate counter += 1 class DirectoryStructureBuilder: """Builds directory structures from markdown sections.""" def __init__(self, output_dir, max_depth=10, file_extension=".md"): self.output_dir = Path(output_dir) self.max_depth = max_depth self.file_extension = file_extension self.filename_generator = FilenameGenerator() def build(self, sections): """Build directory structure from sections.""" self.output_dir.mkdir(parents=True, exist_ok=True) for section in sections: self._process_section(section, self.output_dir, 1) return self.output_dir def _process_section(self, section, parent_dir, current_depth): """Process a single section and its children.""" if current_depth > self.max_depth: return safe_name = self.filename_generator.generate(section.title) if section.children and current_depth < self.max_depth: # Create directory for sections with children section_dir = parent_dir / safe_name section_dir.mkdir(exist_ok=True) # Create an index file for the section content if section.content.strip(): index_file = section_dir / f"index{self.file_extension}" index_file.write_text(section.content, encoding='utf-8') # Process children for child in section.children: self._process_section(child, section_dir, current_depth + 1) else: # Create file for leaf sections section_file = parent_dir / f"{safe_name}{self.file_extension}" section_file.write_text(section.content, encoding='utf-8') def create_directory_structure(sections, output_dir): """Create directory structure from parsed markdown sections.""" builder = DirectoryStructureBuilder(output_dir) builder.build(sections) return True def explode_markdown_file(input_file, output_dir): """ Explode a markdown file into a directory structure. Takes a markdown file with hierarchical headings and creates a directory structure where each heading becomes a directory or file, preserving the document's organization and all content. Args: input_file (Path or str): Path to the input markdown file output_dir (Path or str): Directory where exploded structure will be created Returns: Path: Path to the created output directory Raises: FileNotFoundError: If the input file doesn't exist ValueError: If no heading structure is found in the file PermissionError: If unable to write to the output directory Example: >>> explode_markdown_file("book.md", "chapters/") PosixPath('/path/to/chapters') """ input_path = Path(input_file) output_path = Path(output_dir) if not input_path.exists(): raise FileNotFoundError(f"Input file not found: {input_path}") # Parse the markdown structure sections = parse_markdown_structure(input_path) if not sections: raise ValueError("No heading structure found in markdown file") # Create the directory structure create_directory_structure(sections, output_path) return output_path # CLI Command for markdown explosion @click.command() @click.argument('input_file', type=click.Path(exists=True)) @click.option('--output-dir', '-o', type=click.Path(), help='Output directory for exploded files (default: _exploded)') @click.option('--max-depth', type=int, default=10, help='Maximum directory nesting depth (default: 10)') @click.option('--dry-run', is_flag=True, help='Show what would be done without creating files') @click.option('--verbose', '-v', is_flag=True, help='Show detailed output during processing') @click.pass_context def md_explode_command(ctx, input_file, output_dir, max_depth, dry_run, verbose): """ Explode a markdown file into a directory structure. Takes a markdown file with hierarchical headings (# ## ### etc.) and creates a directory structure where each heading becomes a directory or file, with content distributed appropriately. INPUT_FILE: Path to the markdown file to explode Examples: # Explode book.md into book_exploded/ directory markitect md-explode book.md # Explode into custom output directory markitect md-explode book.md --output-dir /path/to/chapters # Preview what would be created markitect md-explode book.md --dry-run --verbose """ config = ctx.obj or {} try: input_path = Path(input_file) # Determine output directory if output_dir: output_path = Path(output_dir) else: output_path = input_path.parent / f"{input_path.stem}_exploded" is_verbose = verbose or config.get('verbose', False) if dry_run: if is_verbose: _show_verbose_output(input_path, output_path, max_depth, None) _handle_dry_run(input_path, output_path, max_depth) return # Actually explode the file result_dir = explode_markdown_file(input_path, output_path) click.echo(f"✅ Successfully exploded markdown file!") click.echo(f"📁 Created structure in: {result_dir}") if is_verbose: _show_verbose_output(input_path, output_path, max_depth, result_dir) except Exception as e: click.echo(f"❌ Error exploding markdown file: {e}", err=True) raise click.Abort() def _show_section_structure(section, indent=""): """Helper to show section structure for dry-run.""" click.echo(f"{indent}📁 {section.title} (Level {section.level})") for child in section.children: _show_section_structure(child, indent + " ") def _count_sections(sections): """Helper to count total sections.""" count = len(sections) for section in sections: count += _count_sections(section.children) return count def _handle_dry_run(input_path, output_path, max_depth): """Handle dry-run mode for md-explode command.""" sections = parse_markdown_structure(input_path) if not sections: click.echo("❌ No heading structure found in file") return click.echo(f"📋 Would create structure:") for section in sections: _show_section_structure(section) click.echo(f"📁 Total sections: {_count_sections(sections)}") def _show_verbose_output(input_path, output_path, max_depth, result_dir=None): """Show verbose output after successful explosion.""" click.echo(f"Exploding markdown file: {input_path}") click.echo(f"Output directory: {output_path}") click.echo(f"Maximum depth: {max_depth}") if result_dir: # Show created files (only for actual explosion, not dry-run) md_files = list(result_dir.rglob("*.md")) click.echo(f"📄 Created {len(md_files)} markdown files:") for md_file in sorted(md_files): relative_path = md_file.relative_to(result_dir) click.echo(f" {relative_path}")