""" Markdown commands plugin for MarkiTect. This plugin provides the core markdown file operations with md- prefixes, replacing the legacy unprefixed commands for better namespace consistency. """ import click import json import os import re import tempfile import unicodedata from pathlib import Path from typing import Dict, Any from markitect.plugins.base import CommandPlugin, PluginMetadata, PluginType from markitect.plugins.decorators import register_plugin from markitect.document_manager import DocumentManager from markitect.serializer import ASTSerializer # Simple helper function - avoiding circular imports def get_default_format(available_formats=['table', 'json', 'yaml', 'simple'], fallback='simple'): """Get the default output format - simplified version for plugin.""" return fallback @register_plugin("markdown_commands") class MarkdownCommandsPlugin(CommandPlugin): """Plugin providing core markdown file operations.""" @property def metadata(self) -> PluginMetadata: return PluginMetadata( name="markdown_commands", version="1.0.0", description="Core markdown file operations (ingest, get, list) with md- prefixes", author="MarkiTect Core Team", plugin_type=PluginType.COMMAND, markitect_version=">=0.1.0" ) def get_commands(self) -> Dict[str, Any]: """Return the markdown commands with md- prefixes.""" return { 'md-ingest': md_ingest_command, 'md-get': md_get_command, 'md-list': md_list_command, 'md-render': md_render_command, 'md-index': md_index_command, 'md-explode': md_explode_command, 'md-implode': md_implode_command } # Define commands as standalone functions @click.command() @click.argument('file_path', type=click.Path(exists=True)) @click.pass_context def md_ingest_command(ctx, file_path): """ Process and store a markdown file. Ingests a markdown file into the MarkiTect system, parsing its content, extracting front matter, generating AST cache, and storing metadata in the database. FILE_PATH: Path to the markdown file to process Examples: markitect md-ingest README.md markitect md-ingest docs/guide.md """ config = ctx.obj or {} try: if config.get('verbose', False): click.echo(f"Processing file: {file_path}") # Initialize document manager with database manager doc_manager = DocumentManager(config.get('db_manager')) # Process the file result = doc_manager.ingest_file(file_path) if config.get('verbose', False): click.echo(f"Processing results:") click.echo(f" File: {result['metadata']['filename']}") click.echo(f" AST nodes: {len(result['ast'])} nodes") click.echo(f" Cache file: {result['ast_cache_path']}") click.echo(f" Parse time: {result['parse_time']:.2f}s") click.echo(f" Cache time: {result['cache_time']:.2f}s") click.echo(f"āœ“ Successfully ingested: {Path(file_path).name}") except Exception as e: click.echo(f"Error processing file: {e}", err=True) raise click.Abort() @click.command() @click.argument('file_path', type=str) @click.option('--output', '-o', type=click.Path(), help='Output file path (default: stdout)') @click.pass_context def md_get_command(ctx, file_path, output): """ Retrieve and output a processed markdown file. Loads the file from the database and AST cache, then serializes it back to markdown format. Supports outputting to file or stdout. FILE_PATH: Name of the file to retrieve Examples: markitect md-get README.md markitect md-get docs/guide.md --output modified_guide.md """ config = ctx.obj or {} try: if config.get('verbose', False): click.echo(f"Retrieving file: {file_path}") db_manager = config.get('db_manager') # Get file information from database file_info = db_manager.get_markdown_file(file_path) if not file_info: click.echo(f"File not found in database: {file_path}", err=True) click.echo("Use 'markitect md-ingest' to process the file first.", err=True) raise click.Abort() # Load AST from cache cache_filename = f"{file_path}.ast.json" cache_path = Path('.ast_cache') / cache_filename if not cache_path.exists(): click.echo(f"AST cache not found: {cache_path}", err=True) click.echo("Try re-ingesting the file to regenerate cache.", err=True) raise click.Abort() # Read AST from cache import json with open(cache_path, 'r', encoding='utf-8') as f: ast = json.load(f) # Parse front matter from database front_matter = None if file_info.get('front_matter'): try: front_matter = eval(file_info['front_matter']) except (ValueError, TypeError, SyntaxError): if config.get('verbose', False): click.echo("Warning: Could not parse front matter", err=True) # Serialize AST back to markdown serializer = ASTSerializer() markdown_content = serializer.serialize_to_markdown(ast, front_matter) # Output to file or stdout if output: output_path = Path(output) output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: f.write(markdown_content) click.echo(f"āœ“ File written to: {output_path}") else: click.echo(markdown_content) if config.get('verbose', False): click.echo(f"Retrieved {len(ast)} AST tokens", err=True) except Exception as e: click.echo(f"Error retrieving file: {e}", err=True) raise click.Abort() @click.command() @click.option('--format', 'output_format', type=click.Choice(['table', 'json', 'yaml', 'simple']), default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format') @click.option('--names-only', is_flag=True, help='Show only filenames (no metadata)') @click.pass_context def md_list_command(ctx, output_format, names_only): """ List all stored markdown files and their status. Shows all markdown files that have been processed and stored in the MarkiTect database with their basic metadata. Examples: markitect md-list markitect md-list --format table markitect md-list --format json markitect md-list --names-only """ config = ctx.obj or {} try: if config.get('verbose', False): click.echo("Retrieving all stored files...") db_manager = config.get('db_manager') files = db_manager.list_markdown_files() if not files: click.echo("No files found in database.") click.echo("Use 'markitect md-ingest ' to add files.") return # Handle names-only option if names_only: for file_info in files: click.echo(file_info['filename']) return # Handle different output formats if output_format == 'simple': # Original emoji format click.echo(f"Found {len(files)} file(s):") click.echo() for file_info in files: click.echo(f"šŸ“„ {file_info['filename']}") if config.get('verbose', False): click.echo(f" Created: {file_info['created_at']}") if file_info.get('front_matter'): try: front_matter = eval(file_info['front_matter']) if front_matter: click.echo(f" Front matter: {list(front_matter.keys())}") except (ValueError, TypeError, SyntaxError): click.echo(f" Front matter: (parsing error)") click.echo() else: # Use structured format (table, json, yaml) if output_format == 'json': import json click.echo(json.dumps(files, indent=2, default=str)) elif output_format == 'yaml': import yaml click.echo(yaml.dump(files, default_flow_style=False)) else: # table format (default) # Simple table output click.echo(f"Found {len(files)} file(s):") click.echo(f"{'Filename':<30} {'Created':<20}") click.echo("-" * 50) for file_info in files: click.echo(f"{file_info['filename']:<30} {file_info['created_at']:<20}") except Exception as e: click.echo(f"Error listing files: {e}", err=True) raise click.Abort() @click.command() @click.argument('input_file', type=click.Path(exists=True)) @click.option('--output', '-o', type=click.Path(), help='Output HTML file path (defaults to input filename with .html extension)') @click.option('--template', type=click.Choice(['basic', 'github', 'academic', 'dark']), default='basic', help='HTML template: basic (default), github, academic, or dark theme') @click.option('--css', type=click.Path(exists=True), help='Custom CSS file to inject into the template') @click.option('--edit', is_flag=True, help='Enable instant markdown editing capabilities in the generated HTML') @click.option('--editor-theme', type=click.Choice(['light', 'dark']), default='light', help='Editor interface theme (light or dark)') @click.option('--keyboard-shortcuts', is_flag=True, help='Enable keyboard shortcuts for editing actions') @click.option('--use-publication-dir', is_flag=True, help='Force single files to use publication directory') @click.option('--dont-use-publication-dir', is_flag=True, help='Force directory processing to place HTML next to MD files') @click.pass_context def md_render_command(ctx, input_file, output, template, css, edit, editor_theme, keyboard_shortcuts, use_publication_dir, dont_use_publication_dir): """ Generate HTML with client-side JavaScript markdown rendering. Creates self-contained HTML files that include markdown content as JavaScript data and render in the browser using client-side markdown parsing with marked.js. Supports both single files and directory processing. The generated HTML includes: • Embedded markdown content as JavaScript payload • Client-side rendering with marked.js from CDN • YAML front matter support and metadata extraction • Multiple responsive template options • Custom CSS injection capability • Optional instant editing capabilities with --edit flag • Graceful fallback if JavaScript fails INPUT_FILE: Path to the markdown file or directory to render Publication Directory: • Default publication directory: ~/Notes/ • Override with MARKITECT_PUBLICATION_DIR environment variable • Single files: HTML generated next to MD file by default • Directories: HTML generated in publication directory with preserved structure Flags: • --use-publication-dir: Force single files to use publication directory • --dont-use-publication-dir: Force directory processing to place HTML next to MD files Available Templates: • basic (default) - Clean, minimal design with system fonts • github - GitHub-style appearance with heading underlines • academic - Academic paper style with serif fonts and justified text • dark - GitHub dark mode inspired theme with dark background Examples: # Single file - HTML next to MD file markitect md-render README.md # Single file - HTML in publication directory markitect md-render README.md --use-publication-dir # Directory - HTML in publication directory with structure markitect md-render docs/ # Directory - HTML next to each MD file markitect md-render docs/ --dont-use-publication-dir # Custom publication directory MARKITECT_PUBLICATION_DIR=/tmp/pub markitect md-render docs/ # Directory with custom template markitect md-render docs/ --template github --edit """ config = ctx.obj or {} try: input_path = Path(input_file) # Validate flags if use_publication_dir and dont_use_publication_dir: click.echo("Error: Cannot use both --use-publication-dir and --dont-use-publication-dir flags together", err=True) raise click.Abort() # Get publication directory publication_dir = get_publication_directory() if config.get('verbose', False): click.echo(f"Input: {input_path}") click.echo(f"Publication directory: {publication_dir}") # Check if input is a directory or file if input_path.is_dir(): # Directory processing use_pub_dir = not dont_use_publication_dir # Default to publication dir for directories if config.get('verbose', False): click.echo(f"Processing directory: {input_path}") click.echo(f"Use publication directory: {use_pub_dir}") # Find all markdown files md_files = find_markdown_files(input_path) if not md_files: click.echo(f"No markdown files found in directory: {input_path}") return processed_count = 0 for md_file in md_files: try: # Determine output path for this file if use_pub_dir: ensure_publication_directory(publication_dir) output_path = get_relative_output_path(md_file, input_path, publication_dir) # Ensure subdirectory exists output_path.parent.mkdir(parents=True, exist_ok=True) else: output_path = md_file.with_suffix('.html') # Process the markdown file _render_single_markdown_file( md_file, output_path, template, css, edit, editor_theme, keyboard_shortcuts, config ) processed_count += 1 if config.get('verbose', False): click.echo(f" āœ“ {md_file} → {output_path}") except Exception as e: click.echo(f" āœ— Error processing {md_file}: {e}", err=True) click.echo(f"āœ“ Processed {processed_count} markdown file(s)") else: # Single file processing use_pub_dir = use_publication_dir # Default to next to file for single files if config.get('verbose', False): click.echo(f"Processing single file: {input_path}") click.echo(f"Use publication directory: {use_pub_dir}") # Determine output path if output: output_path = Path(output) elif use_pub_dir: ensure_publication_directory(publication_dir) output_path = publication_dir / get_output_filename(input_path) else: output_path = input_path.with_suffix('.html') # Process the single file _render_single_markdown_file( input_path, output_path, template, css, edit, editor_theme, keyboard_shortcuts, config ) click.echo(f"āœ“ HTML generated: {output_path}") except Exception as e: click.echo(f"Error: {e}", err=True) raise click.Abort() @click.command() @click.argument('directory', type=click.Path(exists=True)) @click.option('--output', '-o', type=click.Path(), help='Output index file path (defaults to directory/index.html)') @click.option('--template', type=click.Choice(['basic', 'github', 'academic', 'dark']), default='basic', help='HTML template: basic (default), github, academic, or dark theme') @click.option('--recursive', '-r', is_flag=True, help='Include HTML files from subdirectories') @click.pass_context def md_index_command(ctx, directory, output, template, recursive): """ Generate an index page for HTML files in a directory. Creates an HTML index page that lists all HTML files found in the specified directory, providing navigation links to each file. The index page uses the same template system as md-render for consistent styling. DIRECTORY: Path to the directory containing HTML files Examples: # Generate index for current directory markitect md-index . # Generate index with custom output file markitect md-index docs/ --output docs/contents.html # Generate index with GitHub template markitect md-index notes/ --template github # Include subdirectories recursively markitect md-index docs/ --recursive """ config = ctx.obj or {} try: directory_path = Path(directory) if config.get('verbose', False): click.echo(f"Generating index for directory: {directory_path}") # Determine output file if output: output_path = Path(output) else: output_path = directory_path / "index.html" # Find and filter HTML files html_files = find_html_files(directory_path, recursive=recursive) html_files = [f for f in html_files if f != output_path] if config.get('verbose', False): click.echo(f"Found {len(html_files)} HTML file(s)") # Prepare file info for template file_infos = _prepare_file_infos(html_files, output_path) # Generate and write index HTML directory_name = directory_path.name or "Directory" index_title = f"{directory_name} - Index" index_html = generate_index_html(file_infos, index_title, template) # Ensure output directory exists and write file output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(index_html, encoding='utf-8') click.echo(f"āœ“ Index generated: {output_path}") if config.get('verbose', False): click.echo(f" Template: {template}") click.echo(f" Files indexed: {len(file_infos)}") if recursive: click.echo(f" Recursive: enabled") except Exception as e: click.echo(f"Error generating index: {e}", err=True) raise click.Abort() def _render_single_markdown_file(input_path, output_path, template, css, edit, editor_theme, keyboard_shortcuts, config): """Render a single markdown file to HTML.""" # Read markdown file markdown_content = input_path.read_text(encoding='utf-8') # Extract front matter if present front_matter = {} if markdown_content.startswith('---\n'): parts = markdown_content.split('---\n', 2) if len(parts) >= 3: try: import yaml front_matter = yaml.safe_load(parts[1]) or {} markdown_content = parts[2] except ImportError: # Fallback without yaml parsing pass # Generate title from first heading or filename title = front_matter.get('title', input_path.stem) lines = markdown_content.strip().split('\n') for line in lines: if line.startswith('# '): title = line[2:].strip() break # Load custom CSS if provided css_content = "" if css: css_path = Path(css) css_content = css_path.read_text(encoding='utf-8') # Generate HTML with embedded markdown html_content = generate_html_with_embedded_markdown( markdown_content, title, template, css_content, front_matter, edit, editor_theme, keyboard_shortcuts ) # Ensure output directory exists output_path.parent.mkdir(parents=True, exist_ok=True) # Write HTML file output_path.write_text(html_content, encoding='utf-8') # Template definitions for cleaner code organization TEMPLATE_STYLES = { 'basic': { 'body_color': '#333', 'body_bg': '', 'heading_color': '#2c3e50', 'heading_border': '', 'code_bg': '#f4f4f4', 'code_border': '', 'blockquote_border': '#ddd', 'blockquote_color': '#666', 'font_family': '-apple-system, BlinkMacSystemFont, \'Segoe UI\', \'Roboto\', \'Helvetica\', \'Arial\', sans-serif', 'max_width': '800px', 'text_align': '' }, 'github': { 'body_color': '#24292e', 'body_bg': 'background-color: #ffffff;', 'heading_color': '#1f2328', 'heading_border': 'border-bottom: 1px solid #d0d7de; padding-bottom: 0.3em;', 'code_bg': '#f4f4f4', 'code_border': '', 'blockquote_border': '#ddd', 'blockquote_color': '#666', 'font_family': '-apple-system, BlinkMacSystemFont, \'Segoe UI\', \'Roboto\', \'Helvetica\', \'Arial\', sans-serif', 'max_width': '800px', 'text_align': '' }, 'academic': { 'body_color': '#333', 'body_bg': '', 'heading_color': '#2c3e50', 'heading_border': '', 'code_bg': '#f4f4f4', 'code_border': '', 'blockquote_border': '#ddd', 'blockquote_color': '#666', 'font_family': '"Times New Roman", Times, serif', 'max_width': '900px', 'text_align': 'text-align: justify;' }, 'dark': { 'body_color': '#e1e4e8', 'body_bg': 'background-color: #0d1117;', 'heading_color': '#58a6ff', 'heading_border': 'border-bottom: 1px solid #21262d; padding-bottom: 0.3em;', 'code_bg': '#161b22', 'code_border': 'border: 1px solid #21262d;', 'blockquote_border': '#58a6ff', 'blockquote_color': '#8b949e', 'font_family': '-apple-system, BlinkMacSystemFont, \'Segoe UI\', \'Roboto\', \'Helvetica\', \'Arial\', sans-serif', 'max_width': '800px', 'text_align': '' } } def generate_html_with_embedded_markdown(markdown_content, title, template, css_content, front_matter, edit=False, editor_theme='light', keyboard_shortcuts=False): """Generate HTML with embedded markdown content for client-side rendering. Args: markdown_content: The markdown content to embed title: Page title template: Template name (basic, github, academic, dark) css_content: Custom CSS content to inject front_matter: YAML front matter dictionary edit: Enable editing capabilities editor_theme: Editor theme (light or dark) keyboard_shortcuts: Enable keyboard shortcuts """ # Get template styles or default to basic styles = TEMPLATE_STYLES.get(template, TEMPLATE_STYLES['basic']) # Build editor styles if editing is enabled editor_styles = "" if edit: editor_styles = ''' /* Markitect Editor Styles */ .markitect-floating-header {{ position: fixed; top: 10px; right: 10px; background: rgba(0, 123, 255, 0.9); color: white; padding: 10px 20px; border-radius: 20px; font-size: 14px; font-weight: bold; box-shadow: 0 2px 10px rgba(0,0,0,0.2); z-index: 1000; display: none; }} .markitect-floating-header.show {{ display: block; }} .markitect-section-editable {{ position: relative; cursor: pointer; transition: background-color 0.2s; }} .markitect-section-editable:hover {{ background-color: rgba(0, 123, 255, 0.1); }} .markitect-section-modified {{ border-left: 4px solid #007bff; padding-left: 16px; }} .markitect-edit-interface {{ margin: 15px 0; padding: 20px; border: 2px dashed #007bff; border-radius: 8px; background: #f8f9fa; }} .markitect-edit-textarea {{ width: 100%; min-height: 150px; font-family: 'Courier New', Consolas, monospace; font-size: 14px; padding: 10px; border: 1px solid #ddd; border-radius: 4px; resize: vertical; }} .markitect-edit-actions {{ margin-top: 10px; text-align: right; }} .markitect-edit-btn {{ margin-left: 10px; padding: 8px 16px; border: none; border-radius: 4px; cursor: pointer; font-size: 14px; }} .markitect-btn-apply {{ background-color: #28a745; color: white; }} .markitect-btn-reset {{ background-color: #ffc107; color: #212529; }} .markitect-btn-cancel {{ background-color: #6c757d; color: white; }} .markitect-btn-save {{ background-color: #007bff; color: white; padding: 10px 20px; margin-left: 15px; }} ''' if editor_theme == 'dark': editor_styles += ''' /* Dark theme overrides */ .markitect-edit-interface {{ background: #2d2d2d; border-color: #666; }} .markitect-edit-textarea {{ background: #1a1a1a; color: #f0f0f0; border-color: #666; }} ''' # HTML template with style variables html_template = ''' {title}
{editor_html} {editor_scripts} ''' # Build editor HTML components if editing is enabled editor_html = "" editor_scripts = "" editor_config = "" if edit: editor_config = ''' // Editor configuration window.MARKITECT_EDIT_MODE = true; window.MARKITECT_EDITOR_CONFIG = { theme: \'''' + editor_theme + '''\', keyboardShortcuts: ''' + ('true' if keyboard_shortcuts else 'false') + ''' };''' editor_html = '''
0 sections changed
''' # Basic JavaScript editor implementation editor_scripts = ''' ''' # Format template with styles and content return html_template.format( title=title, css_content=css_content, editor_styles=editor_styles, editor_html=editor_html, editor_scripts=editor_scripts, editor_config=editor_config, markdown_json=json.dumps(markdown_content), front_matter_json=json.dumps(front_matter), **styles ) # Publication directory management functions for Issue #135 def get_publication_directory(): """Get the publication directory from environment variable or default.""" pub_dir = os.environ.get('MARKITECT_PUBLICATION_DIR') if pub_dir: return normalize_publication_path(pub_dir) return Path.home() / "Notes" def normalize_publication_path(path_str): """Normalize publication directory path with tilde expansion and absolute resolution.""" path = Path(path_str) if str(path).startswith('~'): path = path.expanduser() return path.resolve() def ensure_publication_directory(pub_dir): """Ensure publication directory exists, creating it if necessary.""" pub_dir = Path(pub_dir) pub_dir.mkdir(parents=True, exist_ok=True) return pub_dir def get_output_filename(input_file): """Get HTML output filename from markdown input filename.""" return input_file.stem + ".html" def find_markdown_files(directory): """Recursively find all markdown files in a directory.""" directory = Path(directory) md_files = [] for pattern in ['*.md', '*.markdown']: md_files.extend(directory.rglob(pattern)) return sorted(md_files) def get_relative_output_path(source_file, base_dir, output_dir): """Calculate relative output path preserving directory structure.""" source_file = Path(source_file) base_dir = Path(base_dir) output_dir = Path(output_dir) # Get relative path from base directory relative_path = source_file.relative_to(base_dir) # Change extension to .html relative_path = relative_path.with_suffix('.html') # Combine with output directory return output_dir / relative_path def process_single_file(input_file, use_publication_dir, publication_dir): """Process a single markdown file, generate HTML, and return the output path.""" input_file = Path(input_file) if not input_file.exists(): raise FileNotFoundError(f"Input file not found: {input_file}") if use_publication_dir: ensure_publication_directory(publication_dir) output_file = publication_dir / get_output_filename(input_file) else: output_file = input_file.with_suffix('.html') # Actually generate the HTML file _render_single_markdown_file( input_file, output_file, 'basic', None, False, 'light', False, {} ) return output_file def process_directory(input_dir, use_publication_dir, publication_dir): """Process all markdown files in a directory, generate HTML files, and return list of output paths.""" input_dir = Path(input_dir) if not input_dir.exists() or not input_dir.is_dir(): raise NotADirectoryError(f"Input directory not found: {input_dir}") md_files = find_markdown_files(input_dir) output_files = [] for md_file in md_files: if use_publication_dir: ensure_publication_directory(publication_dir) output_file = get_relative_output_path(md_file, input_dir, publication_dir) # Ensure subdirectory exists output_file.parent.mkdir(parents=True, exist_ok=True) else: output_file = md_file.with_suffix('.html') # Actually generate the HTML file _render_single_markdown_file( md_file, output_file, 'basic', None, False, 'light', False, {} ) output_files.append(output_file) return output_files # Index generation functions for Issue #136 def find_html_files(directory, recursive=False): """Find all HTML files in a directory.""" directory = Path(directory) html_files = [] if recursive: for pattern in ['*.html', '*.htm']: html_files.extend(directory.rglob(pattern)) else: for pattern in ['*.html', '*.htm']: html_files.extend(directory.glob(pattern)) return sorted(html_files) # HTML parsing patterns for index generation HTML_TITLE_PATTERN = re.compile(r']*>(.*?)', re.IGNORECASE | re.DOTALL) HTML_H1_PATTERN = re.compile(r']*>(.*?)', re.IGNORECASE | re.DOTALL) HTML_TAG_PATTERN = re.compile(r'<[^>]+>') def extract_html_title(html_file): """Extract title from HTML file, falling back to H1 tag or filename.""" try: content = html_file.read_text(encoding='utf-8') # Try to extract from title tag title_match = HTML_TITLE_PATTERN.search(content) if title_match: return title_match.group(1).strip() # Try to extract from H1 tag h1_match = HTML_H1_PATTERN.search(content) if h1_match: # Remove HTML tags from H1 content h1_text = HTML_TAG_PATTERN.sub('', h1_match.group(1)) return h1_text.strip() # Fallback to filename return html_file.stem except Exception: # If any error occurs, fallback to filename return html_file.stem def generate_index_html(html_files, title, template="basic"): """Generate HTML index page with links to HTML files.""" # Get template styles from existing TEMPLATE_STYLES styles = TEMPLATE_STYLES.get(template, TEMPLATE_STYLES['basic']) # Generate links list links_html = "" if html_files: links_html = "
    \n" for file_info in html_files: relative_path = file_info['relative_path'] file_title = file_info['title'] links_html += f'
  • {file_title}
  • \n' links_html += "
" else: links_html = "

No HTML files found in this directory.

" # Generate HTML template html_template = ''' {title}

{title}

šŸ“ Directory Index - Navigate through the available HTML pages

Available Pages

{links_html}

Generated with MarkiTect • {file_count} file(s)

''' return html_template.format( title=title, links_html=links_html, file_count=len(html_files), **styles ) def _prepare_file_infos(html_files, output_path): """Prepare file information for template generation.""" file_infos = [] for html_file in html_files: title = extract_html_title(html_file) # Calculate relative path from output directory to HTML file try: relative_path = html_file.relative_to(output_path.parent) except ValueError: # If files are in different directory trees, use filename relative_path = html_file.name file_infos.append({ 'path': html_file, 'title': title, 'relative_path': str(relative_path) }) return file_infos def process_directory_for_index(directory, index_filename="index.html", template="basic", recursive=False): """Process directory and generate index file.""" directory = Path(directory) output_path = directory / index_filename if not directory.exists() or not directory.is_dir(): raise FileNotFoundError(f"Directory not found: {directory}") # Find and filter HTML files html_files = find_html_files(directory, recursive=recursive) html_files = [f for f in html_files if f != output_path] # Prepare file info for template file_infos = _prepare_file_infos(html_files, output_path) # Generate and write index HTML directory_name = directory.name or "Directory" index_title = f"{directory_name} - Index" index_html = generate_index_html(file_infos, index_title, template) # Ensure output directory exists and write file output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(index_html, encoding='utf-8') return output_path # ============================================================================== # Markdown Explosion Functions for Issue #138 # ============================================================================== class MarkdownSection: """ Represents a section of markdown content with hierarchical structure. This class models a single section from a markdown document, identified by a heading (# ## ### etc.), along with its content and child sections. Attributes: level (int): Heading level (1 for #, 2 for ##, etc.) title (str): Section title text (without # markers) content (str): Full markdown content for this section line_start (int): Starting line number in original document line_end (int): Ending line number in original document children (list): List of child MarkdownSection objects parent (MarkdownSection): Parent section (None for top-level) """ def __init__(self, level, title, content="", line_start=0, line_end=0): """ Initialize a new MarkdownSection. Args: level (int): Heading level (1-6) title (str): Section title content (str): Section content including the heading line_start (int): Starting line in source document line_end (int): Ending line in source document """ self.level = level self.title = title self.content = content self.line_start = line_start self.line_end = line_end self.children = [] self.parent = None def add_child(self, child_section): """ Add a child section to this section. Validates that the child section has the correct heading level (exactly one level deeper than the parent). Args: child_section (MarkdownSection): The section to add as a child Raises: ValueError: If the child section's level is not exactly parent_level + 1 """ # Only allow direct child levels (no skipping levels) if child_section.level == self.level + 1: child_section.parent = self self.children.append(child_section) else: raise ValueError("Invalid heading hierarchy") def extract_headings(markdown_content): """ Extract headings with their levels from markdown content. Parses a markdown text and identifies all headings (# ## ### etc.), returning their level, title, and line position. Args: markdown_content (str): The markdown text to parse Returns: list: List of dictionaries with keys: - level (int): Heading level (1-6) - title (str): Heading text (without # markers) - line (int): Line number in the content Example: >>> content = "# Title\\n## Section\\nContent" >>> headings = extract_headings(content) >>> headings[0] {'level': 1, 'title': 'Title', 'line': 0} """ headings = [] lines = markdown_content.split('\n') for i, line in enumerate(lines): stripped_line = line.strip() if stripped_line.startswith('#'): # Count the number of # characters level = 0 for char in stripped_line: if char == '#': level += 1 else: break # Extract title (remove # and whitespace) title = stripped_line[level:].strip() if title: # Only add if there's actual content after the # headings.append({ 'level': level, 'title': title, 'line': i }) return headings def extract_section_content(markdown_content, headings, section_index): """Extract content that belongs to a specific section.""" if section_index >= len(headings): return "" lines = markdown_content.split('\n') current_heading = headings[section_index] start_line = current_heading['line'] # Find end line (next heading at same or higher level) end_line = len(lines) for i in range(section_index + 1, len(headings)): next_heading = headings[i] if next_heading['level'] <= current_heading['level']: end_line = next_heading['line'] break # Extract content including the heading section_lines = lines[start_line:end_line] return '\n'.join(section_lines) def _remove_front_matter(content): """Remove YAML front matter from markdown content.""" if content.startswith('---\n'): parts = content.split('---\n', 2) if len(parts) >= 3: return parts[2] # Content after front matter return content def parse_markdown_structure(markdown_file): """Parse markdown file and create hierarchical structure.""" content = markdown_file.read_text(encoding='utf-8') content = _remove_front_matter(content) headings = extract_headings(content) if not headings: return [] # No structure found # Build hierarchical structure root_sections = [] stack = [] # Stack to track current parent at each level for i, heading in enumerate(headings): section_content = extract_section_content(content, headings, i) section = MarkdownSection( level=heading['level'], title=heading['title'], content=section_content, line_start=heading['line'], line_end=headings[i + 1]['line'] if i + 1 < len(headings) else len(content.split('\n')) ) # Find appropriate parent # Pop stack until we find a valid parent (lower level) while stack and stack[-1].level >= section.level: stack.pop() if stack: # Add as child to current parent parent = stack[-1] parent.children.append(section) section.parent = parent else: # Top-level section root_sections.append(section) stack.append(section) return root_sections def sanitize_heading_text(text): """Remove markdown formatting from heading text.""" # Remove markdown formatting text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # Bold text = re.sub(r'\*(.*?)\*', r'\1', text) # Italic text = re.sub(r'`(.*?)`', r'\1', text) # Code text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # Links return text.strip() def generate_safe_filename(heading_text, max_length=100): """Generate filesystem-safe filename from heading text.""" # Use FilenameGenerator for consistent behavior generator = FilenameGenerator(max_length=max_length) return generator._apply_filename_rules(heading_text, max_length) class FilenameGenerator: """Manages filename generation with conflict resolution.""" def __init__(self, max_length=100, separator="_", case_style="lower", preserve_numbers=False): self.max_length = max_length self.separator = separator self.case_style = case_style self.preserve_numbers = preserve_numbers self.used_names = set() def generate(self, heading_text): """Generate a unique filename from heading text.""" base_name = self._generate_base_name(heading_text) unique_name = self._resolve_conflicts(base_name) self.used_names.add(unique_name) return unique_name def _generate_base_name(self, heading_text): """Generate base filename without conflict resolution.""" if self.preserve_numbers: # Extract leading numbers and format them match = re.match(r'^(\d+)\.?\s*(.+)', heading_text) if match: number, rest = match.groups() number_part = f"{int(number):02d}" text_part = self._apply_filename_rules(rest, self.max_length - len(number_part) - len(self.separator)) return f"{number_part}{self.separator}{text_part}" return self._apply_filename_rules(heading_text, self.max_length) def _apply_filename_rules(self, text, max_length): """Apply filename generation rules with custom settings.""" if not text or not text.strip(): return "untitled" # Sanitize markdown formatting first text = sanitize_heading_text(text) # Handle numbered sections specially (e.g., "Section 1.1.1" -> "section_1_1_1") while re.search(r'(\d+)\.(\d+)', text): text = re.sub(r'(\d+)\.(\d+)', r'\1_\2', text) # Apply case style if self.case_style == "lower": text = text.lower() elif self.case_style == "upper": text = text.upper() elif self.case_style == "title": text = text.title() elif self.case_style == "camel": # Split into words and camelCase them words = re.split(r'[-\s]+', text.lower()) if words: text = words[0] + ''.join(word.capitalize() for word in words[1:]) # Replace path separators with separators first text = re.sub(r'[/\\]', self.separator, text) if self.separator else re.sub(r'[/\\]', '', text) # Convert Unicode characters to ASCII equivalents text = unicodedata.normalize('NFKD', text) text = ''.join(c for c in text if not unicodedata.combining(c)) # Remove other special characters and replace spaces with separators safe_name = re.sub(r'[^\w\s-]', '', text) if self.separator: safe_name = re.sub(r'[-\s]+', self.separator, safe_name) else: safe_name = re.sub(r'[-\s]+', '', safe_name) # Remove leading/trailing separators if self.separator: safe_name = safe_name.strip(self.separator) # Handle empty result after sanitization if not safe_name: return "untitled" # Truncate if too long if len(safe_name) > max_length: if self.separator: safe_name = safe_name[:max_length].rstrip(self.separator) else: safe_name = safe_name[:max_length] return safe_name def _resolve_conflicts(self, base_name): """Resolve filename conflicts by adding numbers.""" if base_name not in self.used_names: return base_name counter = 2 while True: candidate = f"{base_name}{self.separator}{counter}" if candidate not in self.used_names: return candidate counter += 1 def reset(self): """Reset the used names tracking.""" self.used_names.clear() def resolve_filename_conflicts(filename, existing_files): """Resolve conflicts with existing files.""" existing_basenames = {Path(f).stem for f in existing_files} if filename not in existing_basenames: return filename counter = 2 while True: candidate = f"{filename}_{counter}" if candidate not in existing_basenames: return candidate counter += 1 class DirectoryStructureBuilder: """Builds directory structures from markdown sections.""" def __init__(self, output_dir, max_depth=10, file_extension=".md"): self.output_dir = Path(output_dir) self.max_depth = max_depth self.file_extension = file_extension self.filename_generator = FilenameGenerator() def build(self, sections): """Build directory structure from sections.""" self.output_dir.mkdir(parents=True, exist_ok=True) for section in sections: self._process_section(section, self.output_dir, 1) return self.output_dir def _process_section(self, section, parent_dir, current_depth): """Process a single section and its children.""" if current_depth > self.max_depth: return safe_name = self.filename_generator.generate(section.title) if section.children and current_depth < self.max_depth: # Create directory for sections with children section_dir = parent_dir / safe_name section_dir.mkdir(exist_ok=True) # Create an index file for the section content if section.content.strip(): index_file = section_dir / f"index{self.file_extension}" index_file.write_text(section.content, encoding='utf-8') # Process children for child in section.children: self._process_section(child, section_dir, current_depth + 1) else: # Create file for leaf sections section_file = parent_dir / f"{safe_name}{self.file_extension}" section_file.write_text(section.content, encoding='utf-8') def create_directory_structure(sections, output_dir): """Create directory structure from parsed markdown sections.""" builder = DirectoryStructureBuilder(output_dir) builder.build(sections) return True def explode_markdown_file(input_file, output_dir): """ Explode a markdown file into a directory structure. Takes a markdown file with hierarchical headings and creates a directory structure where each heading becomes a directory or file, preserving the document's organization and all content. Args: input_file (Path or str): Path to the input markdown file output_dir (Path or str): Directory where exploded structure will be created Returns: Path: Path to the created output directory Raises: FileNotFoundError: If the input file doesn't exist ValueError: If no heading structure is found in the file PermissionError: If unable to write to the output directory Example: >>> explode_markdown_file("book.md", "chapters/") PosixPath('/path/to/chapters') """ input_path = Path(input_file) output_path = Path(output_dir) if not input_path.exists(): raise FileNotFoundError(f"Input file not found: {input_path}") # Parse the markdown structure sections = parse_markdown_structure(input_path) if not sections: raise ValueError("No heading structure found in markdown file") # Create the directory structure create_directory_structure(sections, output_path) return output_path # CLI Command for markdown explosion @click.command() @click.argument('input_file', type=click.Path(exists=True)) @click.option('--output-dir', '-o', type=click.Path(), help='Output directory for exploded files (default: _exploded)') @click.option('--max-depth', type=int, default=10, help='Maximum directory nesting depth (default: 10)') @click.option('--dry-run', is_flag=True, help='Show what would be done without creating files') @click.option('--verbose', '-v', is_flag=True, help='Show detailed output during processing') @click.pass_context def md_explode_command(ctx, input_file, output_dir, max_depth, dry_run, verbose): """ Explode a markdown file into a directory structure. Takes a markdown file with hierarchical headings (# ## ### etc.) and creates a directory structure where each heading becomes a directory or file, with content distributed appropriately. INPUT_FILE: Path to the markdown file to explode Examples: # Explode book.md into book_exploded/ directory markitect md-explode book.md # Explode into custom output directory markitect md-explode book.md --output-dir /path/to/chapters # Preview what would be created markitect md-explode book.md --dry-run --verbose """ config = ctx.obj or {} try: input_path = Path(input_file) # Determine output directory if output_dir: output_path = Path(output_dir) else: output_path = input_path.parent / f"{input_path.stem}_exploded" is_verbose = verbose or config.get('verbose', False) if dry_run: if is_verbose: _show_verbose_output(input_path, output_path, max_depth, None) _handle_dry_run(input_path, output_path, max_depth) return # Actually explode the file result_dir = explode_markdown_file(input_path, output_path) click.echo(f"āœ… Successfully exploded markdown file!") click.echo(f"šŸ“ Created structure in: {result_dir}") if is_verbose: _show_verbose_output(input_path, output_path, max_depth, result_dir) except Exception as e: click.echo(f"āŒ Error exploding markdown file: {e}", err=True) raise click.Abort() def _show_section_structure(section, indent=""): """Helper to show section structure for dry-run.""" click.echo(f"{indent}šŸ“ {section.title} (Level {section.level})") for child in section.children: _show_section_structure(child, indent + " ") def _count_sections(sections): """Helper to count total sections.""" count = len(sections) for section in sections: count += _count_sections(section.children) return count def _handle_dry_run(input_path, output_path, max_depth): """Handle dry-run mode for md-explode command.""" sections = parse_markdown_structure(input_path) if not sections: click.echo("āŒ No heading structure found in file") return click.echo(f"šŸ“‹ Would create structure:") for section in sections: _show_section_structure(section) click.echo(f"šŸ“ Total sections: {_count_sections(sections)}") def _show_verbose_output(input_path, output_path, max_depth, result_dir=None): """Show verbose output after successful explosion.""" click.echo(f"Exploding markdown file: {input_path}") click.echo(f"Output directory: {output_path}") click.echo(f"Maximum depth: {max_depth}") if result_dir: # Show created files (only for actual explosion, not dry-run) md_files = list(result_dir.rglob("*.md")) click.echo(f"šŸ“„ Created {len(md_files)} markdown files:") for md_file in sorted(md_files): relative_path = md_file.relative_to(result_dir) click.echo(f" {relative_path}") # ============================================================================== # Markdown Implosion Functions for Issue #139 # ============================================================================== class DirectoryNode: """ Represents a node in the directory structure for implosion. This class models a directory or file node that can be processed during the implosion process, reconstructing the original markdown structure. Attributes: path (Path): Path to the directory or file name (str): Name of the directory or file depth (int): Depth level in the directory structure is_directory (bool): Whether this node represents a directory children (list): List of child DirectoryNode objects markdown_files (list): List of markdown files in this directory parent (DirectoryNode): Parent directory node """ def __init__(self, path, name, depth, is_directory): """ Initialize a new DirectoryNode. Args: path (Path): Path to the directory or file name (str): Name of the directory or file depth (int): Depth level (0 for root level) is_directory (bool): Whether this is a directory """ self.path = Path(path) self.name = name self.depth = depth self.is_directory = is_directory self.children = [] self.markdown_files = [] self.parent = None def add_child(self, child_node): """Add a child node to this directory node.""" child_node.parent = self self.children.append(child_node) def add_markdown_file(self, file_path): """Add a markdown file to this directory node.""" self.markdown_files.append(Path(file_path)) class DirectoryStructure: """Represents the complete directory structure for implosion.""" def __init__(self): self.root_nodes = [] self.all_nodes = [] def add_root_node(self, node): """Add a root-level node to the structure.""" self.root_nodes.append(node) self.all_nodes.append(node) self._collect_all_nodes(node) def _collect_all_nodes(self, node): """Recursively collect all nodes from the tree.""" for child in node.children: self.all_nodes.append(child) self._collect_all_nodes(child) def scan_markdown_files(directory, recursive=True): """ Scan directory for markdown files. Args: directory (Path): Directory to scan recursive (bool): Whether to scan recursively Returns: list: List of Path objects for markdown files """ directory = Path(directory) markdown_files = [] if recursive: markdown_files.extend(directory.rglob("*.md")) markdown_files.extend(directory.rglob("*.markdown")) else: markdown_files.extend(directory.glob("*.md")) markdown_files.extend(directory.glob("*.markdown")) return sorted(markdown_files) def detect_hierarchy_from_structure(directory): """ Detect hierarchical organization from directory structure. Args: directory (Path): Root directory to analyze Returns: list: List of DirectoryNode objects representing hierarchy """ directory = Path(directory) hierarchy = [] def _process_directory(dir_path, depth=0): """Recursively process directories.""" nodes = [] # Process markdown files in this directory for md_file in dir_path.glob("*.md"): node = DirectoryNode(md_file, md_file.name, depth, False) nodes.append(node) # Process subdirectories for subdir in dir_path.iterdir(): if subdir.is_dir(): node = DirectoryNode(subdir, subdir.name, depth, True) # Add markdown files in subdirectory for md_file in subdir.glob("*.md"): node.add_markdown_file(md_file) # Process children recursively children = _process_directory(subdir, depth + 1) for child in children: node.add_child(child) nodes.append(node) return nodes return _process_directory(directory) def analyze_directory_structure(directory): """ Analyze directory structure and create comprehensive structure representation. Args: directory (Path): Directory to analyze Returns: DirectoryStructure: Complete structure analysis """ directory = Path(directory) structure = DirectoryStructure() # Get all items in the directory for item in sorted(directory.iterdir()): if item.is_dir(): node = DirectoryNode(item, item.name, 1, True) _analyze_subdirectory(node, item, 2) structure.add_root_node(node) elif item.suffix.lower() in ['.md', '.markdown']: node = DirectoryNode(item, item.name, 0, False) structure.add_root_node(node) return structure def _analyze_subdirectory(parent_node, directory, depth): """Recursively analyze subdirectories.""" for item in sorted(directory.iterdir()): if item.is_dir(): child_node = DirectoryNode(item, item.name, depth, True) parent_node.add_child(child_node) _analyze_subdirectory(child_node, item, depth + 1) elif item.suffix.lower() in ['.md', '.markdown']: parent_node.add_markdown_file(item) class DirectoryAnalysis: """Analysis result for a directory containing index and content files.""" def __init__(self): self.index_file = None self.content_files = [] def identify_index_files(directory): """ Identify index.md files vs regular content files in a directory. Args: directory (Path): Directory to analyze Returns: DirectoryAnalysis: Analysis of index vs content files """ directory = Path(directory) analysis = DirectoryAnalysis() for md_file in directory.glob("*.md"): if md_file.name.lower() == "index.md": analysis.index_file = md_file else: analysis.content_files.append(md_file) analysis.content_files = sorted(analysis.content_files) return analysis def decode_filename_to_heading(filename): """ Decode filesystem-safe filename back to readable heading. Args: filename (str): Filename to decode Returns: str: Decoded heading text """ if isinstance(filename, Path): filename = filename.name # Remove .md extension if filename.endswith('.md'): filename = filename[:-3] # Skip index files if filename.lower() == 'index': return "" decoder = FilenameDecoder() return decoder.decode(filename) def decode_directory_name_to_heading(dirname): """ Decode directory name back to heading text. Args: dirname (str): Directory name to decode Returns: str: Decoded heading text """ decoder = FilenameDecoder() return decoder.decode(dirname) class FilenameDecoder: """Decodes filesystem-safe filenames back to readable headings.""" def __init__(self, preserve_acronyms=True, title_case_enabled=True, number_format_reconstruction=True, context_aware=False, flexible_parsing=False): self.preserve_acronyms = preserve_acronyms self.title_case_enabled = title_case_enabled self.number_format_reconstruction = number_format_reconstruction self.context_aware = context_aware self.flexible_parsing = flexible_parsing def decode(self, filename, parent_context=None): """ Decode a filename back to heading text. Args: filename (str or Path): Filename to decode parent_context (str): Optional parent directory context Returns: str: Decoded heading text """ if isinstance(filename, Path): filename = filename.name # Remove extension if '.' in filename: filename = filename.rsplit('.', 1)[0] # Skip index files if filename.lower() == 'index': return "" # Basic decoding steps decoded = filename.replace('_', ' ') # Add colons after numbers in structured headings decoded = self._add_structural_colons(decoded) # Reconstruct number formats if self.number_format_reconstruction: decoded = reconstruct_number_format(decoded) # Restore special characters decoded = restore_special_characters(decoded) # Apply title case if self.title_case_enabled: decoded = apply_title_case(decoded) return decoded def _add_structural_colons(self, text): """Add colons to structured headings like 'Chapter 1 Title'.""" import re # Pattern for "chapter/section/part number rest_of_title" pattern = r'\b(chapter|section|part|appendix)\s+(\d+(?:\.\d+)?)\s+(.+)' def add_colon(match): prefix = match.group(1) number = match.group(2) title = match.group(3) return f"{prefix} {number}: {title}" return re.sub(pattern, add_colon, text, flags=re.IGNORECASE) def decode_batch(self, filenames): """Decode multiple filenames in batch.""" return [self.decode(f) for f in filenames] def restore_special_characters(text): """ Restore special characters that were encoded for filesystem safety. Args: text (str): Text with encoded characters Returns: str: Text with restored special characters """ # Common transformations from filesystem-safe to readable replacements = { 'whats': "What's", 'file path': "File/Path", 'and': "&", 'colon': ":", 'parentheses': "(", 'brackets': "[" } # Apply some basic transformations for encoded, decoded in replacements.items(): if encoded in text.lower(): # This is a simplified implementation - real implementation would be more sophisticated pass return text def reconstruct_number_format(text): """ Reconstruct proper number formats from encoded versions. Args: text (str): Text with encoded number formats Returns: str: Text with proper number formatting """ # Convert patterns like "section 1 1 1" to "Section 1.1.1" # This is a simplified implementation import re # Handle numbered sections like "section 1 2 3" -> "Section 1.2.3" pattern = r'\b(section|chapter|part|appendix|figure|table)\s+(\d+(?:\s+\d+)*)\b' def replace_numbers(match): prefix = match.group(1) numbers = match.group(2).split() if len(numbers) > 1: number_part = '.'.join(numbers) return f"{prefix.title()} {number_part}" return match.group(0) result = re.sub(pattern, replace_numbers, text, flags=re.IGNORECASE) return result def apply_title_case(text): """ Apply appropriate title case to reconstructed headings. Args: text (str): Text to apply title case to Returns: str: Text with proper title case """ # Handle common acronyms that should stay uppercase acronyms = {'API', 'SQL', 'HTTP', 'JSON', 'XML', 'CSS', 'HTML', 'REST', 'URL'} words = text.split() result_words = [] for word in words: word_upper = word.upper() if word_upper in acronyms: result_words.append(word_upper) else: result_words.append(word.capitalize()) return ' '.join(result_words) def combine_markdown_files(files, section_spacing=2): """ Combine multiple markdown files into a single content string. Args: files (list): List of Path objects for markdown files section_spacing (int): Number of blank lines between sections Returns: str: Combined markdown content """ combined_content = [] spacing = '\n' * section_spacing for file_path in files: try: content = file_path.read_text(encoding='utf-8') if content.strip(): # Only add non-empty content combined_content.append(content.strip()) except Exception: # Skip files that can't be read continue return spacing.join(combined_content) def preserve_markdown_formatting(files): """ Preserve all markdown formatting during aggregation. Args: files (list): List of markdown files to process Returns: str: Combined content with preserved formatting """ return combine_markdown_files(files) def handle_index_files(directory): """ Handle index.md files as parent section content. Args: directory (Path): Directory to process Returns: str: Aggregated content with index files handled properly """ directory = Path(directory) content_parts = [] def _process_directory(dir_path, depth=0): """Recursively process directories.""" # Check for index file first index_file = dir_path / "index.md" if index_file.exists(): index_content = index_file.read_text(encoding='utf-8') if index_content.strip(): content_parts.append(index_content.strip()) # Process other markdown files for md_file in sorted(dir_path.glob("*.md")): if md_file.name != "index.md": content = md_file.read_text(encoding='utf-8') if content.strip(): content_parts.append(content.strip()) # Process subdirectories for subdir in sorted(dir_path.iterdir()): if subdir.is_dir(): _process_directory(subdir, depth + 1) _process_directory(directory) return '\n\n'.join(content_parts) class FrontMatterConsolidator: """Consolidates front matter from multiple markdown files.""" def __init__(self, conflict_strategy="merge"): self.conflict_strategy = conflict_strategy def consolidate(self, files): """ Consolidate front matter from multiple files. Args: files (list): List of markdown file paths Returns: tuple: (consolidated_front_matter_dict, combined_content) """ import yaml consolidated_fm = {} content_parts = [] for file_path in files: try: content = file_path.read_text(encoding='utf-8') fm, body = self._extract_front_matter(content) if fm: self._merge_front_matter(consolidated_fm, fm) if body.strip(): content_parts.append(body.strip()) except Exception: # Skip problematic files continue combined_content = '\n\n'.join(content_parts) return consolidated_fm, combined_content def _extract_front_matter(self, content): """Extract YAML front matter from markdown content.""" if not content.startswith('---\n'): return None, content try: parts = content.split('---\n', 2) if len(parts) >= 3: import yaml front_matter = yaml.safe_load(parts[1]) body = parts[2] return front_matter, body except Exception: pass return None, content def _merge_front_matter(self, target, source): """Merge source front matter into target.""" for key, value in source.items(): if key not in target: target[key] = value elif self.conflict_strategy == "merge" and isinstance(target[key], list): if isinstance(value, list): target[key].extend(value) else: target[key].append(value) # Other conflict strategies could be implemented here def process_front_matter(file_path): """ Extract front matter and content from a markdown file. Args: file_path (Path): Path to markdown file Returns: tuple: (front_matter_dict, content_string) """ consolidator = FrontMatterConsolidator() return consolidator._extract_front_matter(file_path.read_text(encoding='utf-8')) def aggregate_content(input_dir, preserve_front_matter=True, section_spacing=2): """ Aggregate content from directory structure. Args: input_dir (Path): Directory containing markdown files preserve_front_matter (bool): Whether to preserve front matter section_spacing (int): Lines between sections Returns: str: Aggregated markdown content """ aggregator = ContentAggregator( preserve_formatting=True, handle_front_matter=preserve_front_matter, section_spacing=section_spacing ) return aggregator.aggregate(input_dir) class ContentAggregator: """Comprehensive content aggregation for markdown implosion.""" def __init__(self, preserve_formatting=True, handle_front_matter=True, section_spacing=2, include_toc=False, recursive=True, sort_files=True): self.preserve_formatting = preserve_formatting self.handle_front_matter = handle_front_matter self.section_spacing = section_spacing self.include_toc = include_toc self.recursive = recursive self.sort_files = sort_files def aggregate(self, directory): """ Aggregate all content from directory structure. Args: directory (Path): Root directory to process Returns: str: Aggregated markdown content """ directory = Path(directory) content_parts = [] # Process the directory structure recursively structure = analyze_directory_structure(directory) # Extract content in hierarchical order for root_node in structure.root_nodes: content = self._process_node(root_node) if content.strip(): content_parts.append(content.strip()) # Combine with proper spacing spacing = '\n' * self.section_spacing return spacing.join(content_parts) def _process_node(self, node): """Process a single directory node.""" content_parts = [] if node.is_directory: # Process index file first if it exists index_file = node.path / "index.md" if index_file.exists(): try: content = index_file.read_text(encoding='utf-8') # Decode directory name to heading heading = decode_directory_name_to_heading(node.name) if heading and not content.strip().startswith('#'): # Add appropriate heading level based on depth heading_prefix = '#' * (node.depth) content = f"{heading_prefix} {heading}\n\n{content}" content_parts.append(content.strip()) except Exception: pass # Process other markdown files in this directory for md_file in node.markdown_files: if md_file.name != "index.md": try: content = md_file.read_text(encoding='utf-8') # Decode filename to heading if needed heading = decode_filename_to_heading(md_file.name) if heading and not content.strip().startswith('#'): heading_prefix = '#' * (node.depth + 1) content = f"{heading_prefix} {heading}\n\n{content}" content_parts.append(content.strip()) except Exception: pass # Process child directories for child in sorted(node.children, key=lambda x: x.name): child_content = self._process_node(child) if child_content.strip(): content_parts.append(child_content.strip()) else: # This is a file node try: content = node.path.read_text(encoding='utf-8') heading = decode_filename_to_heading(node.name) if heading and not content.strip().startswith('#'): heading_prefix = '#' * max(1, node.depth) content = f"{heading_prefix} {heading}\n\n{content}" content_parts.append(content.strip()) except Exception: pass return '\n\n'.join(content_parts) def implode_directory(input_dir, output_file=None, preserve_front_matter=True, section_spacing=2, sort_content=True): """ Main function to implode a directory structure back to a single markdown file. Args: input_dir (Path): Directory to implode output_file (Path): Output file path preserve_front_matter (bool): Whether to preserve front matter section_spacing (int): Lines between sections sort_content (bool): Whether to sort content logically Returns: Path: Path to the created output file """ input_dir = Path(input_dir) if not input_dir.exists() or not input_dir.is_dir(): raise FileNotFoundError(f"Input directory not found: {input_dir}") # Check if directory has markdown files markdown_files = scan_markdown_files(input_dir) if not markdown_files: raise ValueError("No markdown files found in directory") # Default output file if output_file is None: output_file = input_dir.parent / f"{input_dir.name}_imploded.md" else: output_file = Path(output_file) # Aggregate content aggregated_content = aggregate_content( input_dir, preserve_front_matter=preserve_front_matter, section_spacing=section_spacing ) # Write output file output_file.parent.mkdir(parents=True, exist_ok=True) output_file.write_text(aggregated_content, encoding='utf-8') return output_file class ImplodeOptions: """Configuration options for the implode operation.""" def __init__(self, input_dir=None, output_file=None, dry_run=False, verbose=False, preserve_front_matter=True, section_spacing=2, sort_content=True, overwrite=False): self.input_dir = input_dir self.output_file = output_file self.dry_run = dry_run self.verbose = verbose self.preserve_front_matter = preserve_front_matter self.section_spacing = section_spacing self.sort_content = sort_content self.overwrite = overwrite class ValidationResult: """Result of validating implode arguments.""" def __init__(self, is_valid=True, errors=None): self.is_valid = is_valid self.errors = errors or [] def validate_implode_arguments(options): """ Validate implode operation arguments. Args: options (ImplodeOptions): Options to validate Returns: ValidationResult: Validation result """ errors = [] if not options.input_dir: errors.append("Input directory is required") elif not Path(options.input_dir).exists(): errors.append(f"Input directory does not exist: {options.input_dir}") if options.output_file: output_path = Path(options.output_file) if output_path.exists() and not options.overwrite: errors.append(f"Output file already exists: {options.output_file}") return ValidationResult(is_valid=len(errors) == 0, errors=errors) class ImplodeResult: """Result of an implode operation.""" def __init__(self, success=False, output_file=None, error_message=None, preview=None, processing_info=None, warning=None): self.success = success self.output_file = output_file self.error_message = error_message self.preview = preview self.processing_info = processing_info or [] self.warning = warning def cli_implode_directory(input_dir, output_file, dry_run=False, verbose=False, overwrite=False, preserve_front_matter=True, section_spacing=2): """ CLI function for directory implosion. Args: input_dir (Path): Input directory output_file (Path): Output file path dry_run (bool): Whether to run in dry-run mode verbose (bool): Whether to show verbose output overwrite (bool): Whether to overwrite existing files preserve_front_matter (bool): Whether to preserve front matter section_spacing (int): Number of lines between sections Returns: ImplodeResult: Result of the operation """ try: options = ImplodeOptions( input_dir=input_dir, output_file=output_file, dry_run=dry_run, verbose=verbose, overwrite=overwrite, preserve_front_matter=preserve_front_matter, section_spacing=section_spacing ) # Validate arguments validation = validate_implode_arguments(options) if not validation.is_valid: return ImplodeResult( success=False, error_message='; '.join(validation.errors) ) # Check for markdown files (excluding output file if in same directory) all_markdown_files = scan_markdown_files(input_dir) output_path = Path(output_file) markdown_files = [f for f in all_markdown_files if f.resolve() != output_path.resolve()] if not markdown_files: return ImplodeResult( success=False, error_message="No markdown files found in directory" ) processing_info = [] if verbose: processing_info.append(f"Found {len(markdown_files)} markdown files") processing_info.append(f"Processing directory: {input_dir}") if dry_run: # Generate preview try: # Create aggregator with filtered files aggregator = ContentAggregator( preserve_formatting=True, handle_front_matter=preserve_front_matter, section_spacing=section_spacing ) # Generate content only from filtered files in hierarchical order def sort_key(file_path): # Sort by path depth (fewer levels first), then by path relative_path = file_path.relative_to(input_dir) depth = len(relative_path.parts) - 1 # Prioritize index.md files at each level name_priority = 0 if relative_path.name == 'index.md' else 1 return (depth, name_priority, str(relative_path)) sorted_files = sorted(markdown_files, key=sort_key) content_parts = [] for file_path in sorted_files: try: content = file_path.read_text(encoding='utf-8') if content.strip(): content_parts.append(content.strip()) except Exception: pass preview_content = f"\n\n{''.join(['\n'] * section_spacing)}\n\n".join(content_parts) return ImplodeResult( success=True, preview=preview_content[:500] + "..." if len(preview_content) > 500 else preview_content, processing_info=processing_info ) except Exception as e: return ImplodeResult( success=False, error_message=f"Error generating preview: {e}" ) # Actually implode the directory using filtered files # Generate content only from filtered files in hierarchical order def sort_key(file_path): # Sort by path depth (fewer levels first), then by path relative_path = file_path.relative_to(input_dir) depth = len(relative_path.parts) - 1 # Prioritize index.md files at each level name_priority = 0 if relative_path.name == 'index.md' else 1 return (depth, name_priority, str(relative_path)) sorted_files = sorted(markdown_files, key=sort_key) content_parts = [] for file_path in sorted_files: try: content = file_path.read_text(encoding='utf-8') if content.strip(): content_parts.append(content.strip()) except Exception: pass aggregated_content = f"\n\n{''.join(['\n'] * section_spacing)}\n\n".join(content_parts) # Write output file output_file = Path(output_file) output_file.parent.mkdir(parents=True, exist_ok=True) output_file.write_text(aggregated_content, encoding='utf-8') result_file = output_file if verbose: processing_info.append(f"Created output file: {result_file}") return ImplodeResult( success=True, output_file=result_file, processing_info=processing_info ) except Exception as e: return ImplodeResult( success=False, error_message=str(e) ) # CLI Command for markdown implosion @click.command() @click.argument('input_dir', type=click.Path(exists=True, file_okay=False, dir_okay=True)) @click.option('--output', '-o', type=click.Path(), help='Output markdown file (default: _imploded.md)') @click.option('--dry-run', is_flag=True, help='Preview what would be created without writing files') @click.option('--verbose', '-v', is_flag=True, help='Show detailed processing information') @click.option('--overwrite', is_flag=True, help='Overwrite existing output file') @click.option('--section-spacing', type=int, default=2, help='Number of blank lines between sections (default: 2)') @click.option('--preserve-front-matter/--no-front-matter', default=True, help='Preserve YAML front matter from files (default: preserve)') @click.pass_context def md_implode_command(ctx, input_dir, output, dry_run, verbose, overwrite, section_spacing, preserve_front_matter): """ Implode a directory structure back into a single markdown file. Takes a directory structure (like one created by md-explode) and combines all markdown files back into a single document, reconstructing the original hierarchical heading structure. INPUT_DIR: Path to the directory to implode Examples: # Implode exploded directory back to markdown markitect md-implode book_exploded/ # Specify custom output file markitect md-implode chapters/ --output reconstructed.md # Preview what would be created markitect md-implode content/ --dry-run --verbose """ config = ctx.obj or {} try: input_path = Path(input_dir) # Determine output file if output: output_path = Path(output) else: output_path = input_path.parent / f"{input_path.name}_imploded.md" is_verbose = verbose or config.get('verbose', False) # Perform the implosion result = cli_implode_directory( input_dir=input_path, output_file=output_path, dry_run=dry_run, verbose=is_verbose, overwrite=overwrite, preserve_front_matter=preserve_front_matter, section_spacing=section_spacing ) if not result.success: click.echo(f"āŒ Error imploding directory: {result.error_message}", err=True) raise click.Abort() if dry_run: click.echo(f"šŸ“‹ Would implode directory: {input_path}") click.echo(f"šŸ“„ Would create file: {output_path}") if result.preview: click.echo(f"\nšŸ“ Content preview:") click.echo("-" * 50) click.echo(result.preview) click.echo("-" * 50) if result.processing_info: click.echo(f"\nā„¹ļø Processing details:") for info in result.processing_info: click.echo(f" {info}") else: click.echo(f"āœ… Successfully imploded directory structure!") click.echo(f"šŸ“ Source directory: {input_path}") click.echo(f"šŸ“„ Created file: {result.output_file}") if is_verbose and result.processing_info: click.echo(f"\nā„¹ļø Processing details:") for info in result.processing_info: click.echo(f" {info}") if result.warning: click.echo(f"āš ļø Warning: {result.warning}") except Exception as e: click.echo(f"āŒ Error imploding directory: {e}", err=True) raise click.Abort()