""" Markdown commands plugin for MarkiTect. This plugin provides the core markdown file operations with md- prefixes, using the new explode-implode variant system for enhanced functionality. """ import click import json import os import re import tempfile import unicodedata from pathlib import Path from typing import Dict, Any from markitect.plugins.base import CommandPlugin, PluginMetadata, PluginType from markitect.plugins.decorators import register_plugin # DocumentManager removed - using CleanDocumentManager directly from markitect.serializer import ASTSerializer # Simple helper function - avoiding circular imports def get_default_format(available_formats=['table', 'json', 'yaml', 'simple'], fallback='simple'): """Get the default output format - simplified version for plugin.""" return fallback # Layered theme system - themes can be combined across different scopes LAYERED_THEMES = { # Mode Themes - Light/dark color schemes 'light': { 'scope': 'mode', 'properties': { 'body_background': '#ffffff', 'body_color': '#333333', 'heading_color': '#24292f', 'code_background': '#f6f8fa', 'code_color': '#24292e', 'border_color': '#d0d7de', 'blockquote_border': '#dfe2e5', 'blockquote_color': '#6a737d', 'table_border': '#d0d7de', 'table_header_bg': '#f6f8fa', 'link_color': '#0969da', 'link_hover_color': '#0550ae' } }, 'dark': { 'scope': 'mode', 'properties': { 'body_background': '#0d1117', 'body_color': '#e1e4e8', 'heading_color': '#58a6ff', 'code_background': '#161b22', 'code_color': '#e1e4e8', 'border_color': '#30363d', 'blockquote_border': '#58a6ff', 'blockquote_color': '#8b949e', 'table_border': '#30363d', 'table_header_bg': '#161b22', 'link_color': '#79c0ff', 'link_hover_color': '#a5d6ff' } }, # UI Themes - Editor interface elements (floating panels, buttons, editing frames) 'standard': { 'scope': 'ui', 'properties': { 'editor_panel_bg': '#f8f9fa', 'editor_panel_border': '#dee2e6', 'editor_button_bg': '#ffffff', 'editor_button_hover': '#e9ecef', 'editor_button_active': '#dee2e6', 'editor_text_color': '#212529', 'editor_focus_color': '#0066cc', 'editor_shadow': 'rgba(0,0,0,0.1)', 'editor_danger_button': '#dc3545', 'editor_danger_button_hover': '#c82333', 'editor_secondary_button': '#6c757d', 'editor_secondary_button_hover': '#545b62', 'editor_warning_bg': '#fff3cd', 'editor_warning_border': '#ffeaa7', 'editor_warning_text': '#856404' } }, 'greyscale': { 'scope': 'ui', 'properties': { 'editor_panel_bg': '#f5f5f5', 'editor_panel_border': '#d0d0d0', 'editor_button_bg': '#ffffff', 'editor_button_hover': '#e8e8e8', 'editor_button_active': '#d4d4d4', 'editor_text_color': '#333333', 'editor_focus_color': '#666666', 'editor_shadow': 'rgba(0,0,0,0.15)', 'editor_accept_bg': '#888888', 'editor_accept_hover': '#777777', 'editor_cancel_bg': '#999999', 'editor_cancel_hover': '#808080', 'editor_danger_button': '#8b0000', 'editor_danger_button_hover': '#700000', 'editor_secondary_button': '#666666', 'editor_secondary_button_hover': '#555555', 'editor_warning_bg': '#f0f0f0', 'editor_warning_border': '#cccccc', 'editor_warning_text': '#555555' } }, 'electric': { 'scope': 'ui', 'properties': { 'editor_panel_bg': '#001122', 'editor_panel_border': '#00ffff', 'editor_button_bg': '#003366', 'editor_button_hover': '#0066cc', 'editor_button_active': '#0099ff', 'editor_text_color': '#00ffff', 'editor_focus_color': '#ffff00', 'editor_shadow': '0 0 20px rgba(0,255,255,0.5), 0 0 40px rgba(255,255,0,0.2)', 'editor_danger_button': '#ff3366', 'editor_danger_button_hover': '#ff0033', 'editor_secondary_button': '#006699', 'editor_secondary_button_hover': '#004d73', 'editor_warning_bg': '#003366', 'editor_warning_border': '#00ffff', 'editor_warning_text': '#ffff00' } }, 'psychedelic': { 'scope': 'ui', 'properties': { 'editor_panel_bg': 'linear-gradient(45deg, #ff6b35, #f7931e, #ffd23f, #06ffa5)', 'editor_panel_border': '#ff1493', 'editor_button_bg': 'rgba(255,255,255,0.2)', 'editor_button_hover': 'rgba(255,20,147,0.3)', 'editor_button_active': 'rgba(255,20,147,0.5)', 'editor_text_color': '#ffffff', 'editor_focus_color': '#ff1493', 'editor_shadow': 'rgba(255,20,147,0.4)', 'editor_danger_button': 'linear-gradient(45deg, #ff0066, #cc0044)', 'editor_danger_button_hover': 'linear-gradient(45deg, #ff3388, #dd1155)', 'editor_secondary_button': 'linear-gradient(45deg, #8a2be2, #4b0082)', 'editor_secondary_button_hover': 'linear-gradient(45deg, #9932cc, #6a1a9a)', 'editor_warning_bg': 'linear-gradient(45deg, #ffa500, #ff8c00)', 'editor_warning_border': '#ff1493', 'editor_warning_text': '#ffffff' } }, # Document Themes - Typography and layout 'basic': { 'scope': 'document', 'properties': { 'font_family': '-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif', 'max_width': '800px', 'heading_style': 'simple', 'text_align': 'left' } }, 'github': { 'scope': 'document', 'properties': { 'font_family': '-apple-system, BlinkMacSystemFont, Segoe UI, Roboto, Helvetica Neue, Arial, sans-serif', 'max_width': '900px', 'heading_style': 'underlined', 'text_align': 'left' } }, 'academic': { 'scope': 'document', 'properties': { 'font_family': 'Georgia, Times New Roman, serif', 'max_width': '650px', 'heading_style': 'centered', 'text_align': 'justify', 'link_color': '#777777', 'link_hover_color': '#999999' } }, # Branding Themes - Company/personal styling 'corporate': { 'scope': 'branding', 'properties': { 'accent_color': '#0066cc', 'secondary_color': '#f8f9fa', 'brand_font': 'inherit' } }, 'startup': { 'scope': 'branding', 'properties': { 'accent_color': '#ff6b35', 'secondary_color': '#f4f4f4', 'brand_font': 'inherit' } } } # Legacy compatibility - map old theme names to new layered equivalents LEGACY_THEME_MAPPING = { 'basic': ['light', 'standard', 'basic'], 'github': ['light', 'standard', 'github'], 'dark': ['dark', 'standard', 'basic'], 'academic': ['light', 'standard', 'academic'] } # Keep TEMPLATE_STYLES for backward compatibility in tests TEMPLATE_STYLES = { 'basic': { 'body_color': '#333', 'font_family': '-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif', 'max_width': '800px' }, 'github': { 'body_color': '#24292f', 'font_family': '-apple-system, BlinkMacSystemFont, Segoe UI, Roboto, Helvetica Neue, Arial, sans-serif', 'max_width': '900px' }, 'dark': { 'body_color': '#e1e4e8', 'font_family': '-apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif', 'max_width': '800px' }, 'academic': { 'body_color': '#333', 'font_family': 'Georgia, Times New Roman, serif', 'max_width': '650px' } } def parse_theme_string(theme_string: str) -> list: """ Parse theme string into list of individual themes. Supports: - Single theme: "dark" - Multiple themes: "dark,academic" or "dark, academic" - Legacy theme mapping: "basic" -> ["light", "basic"] Args: theme_string: Comma-separated theme names Returns: List of theme names in order """ if not theme_string: return ['light', 'basic'] # Default themes # Split by comma and clean up whitespace themes = [theme.strip() for theme in theme_string.split(',')] # Expand legacy themes only if they don't exist in the new layered system expanded_themes = [] for theme in themes: if theme in LAYERED_THEMES: # Theme exists in new system, use as-is expanded_themes.append(theme) elif theme in LEGACY_THEME_MAPPING: # Legacy theme, expand it expanded_themes.extend(LEGACY_THEME_MAPPING[theme]) else: # Unknown theme, add as-is (will be warned about later) expanded_themes.append(theme) return expanded_themes class ThemeType(click.ParamType): """Custom click type for theme validation.""" name = "theme" def convert(self, value, param, ctx): if value is None: return value try: validate_theme_string(value) return value except click.BadParameter as e: self.fail(str(e), param, ctx) def validate_theme_string(theme_string: str) -> None: """ Validate that all themes in a theme string are known themes. Args: theme_string: Comma-separated theme names Raises: click.BadParameter: If any theme is unknown """ if not theme_string: return # Allow empty/None themes themes = parse_theme_string(theme_string) unknown_themes = [] for theme_name in themes: if theme_name not in LAYERED_THEMES and theme_name not in LEGACY_THEME_MAPPING: unknown_themes.append(theme_name) if unknown_themes: available_themes = list(LAYERED_THEMES.keys()) + list(LEGACY_THEME_MAPPING.keys()) raise click.BadParameter( f"Unknown theme(s): {', '.join(unknown_themes)}. " f"Available themes: {', '.join(sorted(set(available_themes)))}" ) def combine_theme_properties(theme_list: list) -> dict: """ Combine properties from multiple themes, with later themes overriding earlier ones. Args: theme_list: List of theme names in order of application Returns: Combined properties dictionary """ combined_properties = {} for theme_name in theme_list: if theme_name in LAYERED_THEMES: theme_data = LAYERED_THEMES[theme_name] # Later themes override earlier ones combined_properties.update(theme_data['properties']) elif theme_name in LEGACY_THEME_MAPPING: # Handle legacy themes by expanding them expanded_themes = LEGACY_THEME_MAPPING[theme_name] for expanded_theme in expanded_themes: if expanded_theme in LAYERED_THEMES: theme_data = LAYERED_THEMES[expanded_theme] combined_properties.update(theme_data['properties']) else: # This should not happen if validation is working print(f"Warning: Unknown theme '{theme_name}' - skipping") return combined_properties def generate_html_with_embedded_markdown(markdown_content, title, theme, css_content, template_vars): """ Generate HTML with embedded markdown content for testing. This function is used by tests to validate template functionality. """ # Create a temporary document manager for rendering from markitect.clean_document_manager import CleanDocumentManager doc_manager = CleanDocumentManager(None) # Generate HTML template html_content = doc_manager._generate_html_template( markdown_content=markdown_content, title=title, css=css_content, template=theme ) return html_content # Publication directory management functions def get_publication_directory() -> Path: """ Get the publication directory path. Returns the path specified by MARKITECT_PUBLICATION_DIR environment variable, or defaults to ~/Notes if not set. """ pub_dir = os.environ.get('MARKITECT_PUBLICATION_DIR') if pub_dir: return Path(pub_dir) return Path.home() / "Notes" def ensure_publication_directory(pub_dir: Path) -> None: """ Ensure the publication directory exists, creating it if necessary. Args: pub_dir: Path to the publication directory """ pub_dir.mkdir(parents=True, exist_ok=True) def normalize_publication_path(path_str: str) -> Path: """ Normalize a publication directory path. Handles tilde expansion and resolves relative paths to absolute paths. Args: path_str: String path that may contain ~ or relative components Returns: Absolute Path object """ path = Path(path_str).expanduser().resolve() return path def get_output_filename(input_file: Path) -> str: """ Get the output filename for a markdown file. Args: input_file: Path to the input markdown file Returns: Output filename with .html extension """ return input_file.stem + ".html" def find_markdown_files(directory: Path) -> list[Path]: """ Find all markdown files in a directory recursively. Args: directory: Directory to search in Returns: List of Path objects for found markdown files """ if not directory.exists(): return [] markdown_files = [] for md_file in directory.rglob("*.md"): if md_file.is_file(): markdown_files.append(md_file) return sorted(markdown_files) def get_relative_output_path(source_file: Path, base_dir: Path, pub_dir: Path) -> Path: """ Get the output path for a source file, preserving directory structure. Args: source_file: Path to the source markdown file base_dir: Base directory (to calculate relative path from) pub_dir: Publication directory (destination base) Returns: Full output path in publication directory """ # Get relative path from base directory relative_path = source_file.relative_to(base_dir) # Change extension to .html html_relative = relative_path.with_suffix('.html') # Combine with publication directory return pub_dir / html_relative def process_single_file(input_file: Path, use_publication_dir: bool, publication_dir: Path) -> Path: """ Process a single markdown file. Args: input_file: Path to the input markdown file use_publication_dir: Whether to use publication directory publication_dir: Publication directory path Returns: Path to the output HTML file Raises: FileNotFoundError: If input file doesn't exist """ if not input_file.exists(): raise FileNotFoundError(f"Input file does not exist: {input_file}") # Determine output path if use_publication_dir: ensure_publication_directory(publication_dir) output_file = publication_dir / get_output_filename(input_file) else: output_file = input_file.with_suffix('.html') # Create document manager and render from markitect.clean_document_manager import CleanDocumentManager doc_manager = CleanDocumentManager(None) doc_manager.render_file(str(input_file), str(output_file)) return output_file def process_directory(input_dir: Path, use_publication_dir: bool, publication_dir: Path) -> list[Path]: """ Process all markdown files in a directory. Args: input_dir: Directory containing markdown files use_publication_dir: Whether to use publication directory publication_dir: Publication directory path Returns: List of paths to generated HTML files """ markdown_files = find_markdown_files(input_dir) output_files = [] from markitect.clean_document_manager import CleanDocumentManager doc_manager = CleanDocumentManager(None) for md_file in markdown_files: if use_publication_dir: ensure_publication_directory(publication_dir) output_file = get_relative_output_path(md_file, input_dir, publication_dir) # Ensure subdirectories exist output_file.parent.mkdir(parents=True, exist_ok=True) else: output_file = md_file.with_suffix('.html') # Render the file doc_manager.render_file(str(md_file), str(output_file)) output_files.append(output_file) return output_files # Index generation functions def find_html_files(directory: Path, recursive: bool = False) -> list[Path]: """ Find all HTML files in a directory. Args: directory: Directory to search in recursive: Whether to search recursively in subdirectories Returns: List of Path objects for found HTML files """ if not directory.exists(): return [] html_files = [] if recursive: # Search recursively for html_file in directory.rglob("*.html"): if html_file.is_file(): html_files.append(html_file) else: # Search only in current directory for html_file in directory.glob("*.html"): if html_file.is_file(): html_files.append(html_file) return sorted(html_files) def extract_html_title(html_file: Path) -> str: """ Extract title from an HTML file. Tries to extract the title from tag first, then from <h1> tag, and finally falls back to the filename. Args: html_file: Path to the HTML file Returns: Extracted title string """ try: content = html_file.read_text(encoding='utf-8', errors='ignore') # Try to extract from <title> tag import re title_match = re.search(r'<title[^>]*>(.*?)', content, re.IGNORECASE | re.DOTALL) if title_match: title = title_match.group(1).strip() # Clean up any HTML entities or extra whitespace title = re.sub(r'\s+', ' ', title) if title: return title # Try to extract from

tag h1_match = re.search(r']*>(.*?)

', content, re.IGNORECASE | re.DOTALL) if h1_match: h1_title = h1_match.group(1).strip() # Remove any HTML tags within the h1 h1_title = re.sub(r'<[^>]+>', '', h1_title) h1_title = re.sub(r'\s+', ' ', h1_title) if h1_title: return h1_title except Exception: # If anything goes wrong reading/parsing the file, fall back to filename pass # Fallback to filename without extension return html_file.stem def generate_index_html(html_files: list, title: str, theme: str = None) -> str: """ Generate HTML content for an index page. Args: html_files: List of dictionaries with 'path', 'title', and 'relative_path' keys title: Title for the index page theme: Theme to use Returns: HTML content string """ # Get template CSS from markitect.clean_document_manager import CleanDocumentManager doc_manager = CleanDocumentManager(None) template_css = doc_manager._get_template_css(theme) # Generate file list HTML if not html_files: file_list_html = '

No HTML files found in this directory.

' else: file_items = [] for file_info in html_files: href = file_info['relative_path'] link_title = file_info['title'] file_items.append(f'
  • {link_title}
  • ') file_list_html = f""" """ # Generate complete HTML html_content = f""" {title}

    {title}

    {file_list_html}
    """ return html_content def process_directory_for_index(directory: Path, index_filename: str = "index.html") -> Path: """ Process a directory and create an index HTML file. Args: directory: Directory to process index_filename: Name of the index file to create Returns: Path to the created index file Raises: FileNotFoundError: If directory doesn't exist """ if not directory.exists(): raise FileNotFoundError(f"Directory does not exist: {directory}") # Find all HTML files except the index file itself html_files = find_html_files(directory, recursive=False) # Create file info list, excluding the index file file_info_list = [] for html_file in html_files: if html_file.name != index_filename: title = extract_html_title(html_file) relative_path = html_file.name # Since we're not doing recursive, just use filename file_info_list.append({ 'path': html_file, 'title': title, 'relative_path': relative_path }) # Generate index page title index_title = f"Index - {directory.name}" # Generate HTML content html_content = generate_index_html(file_info_list, index_title) # Write index file index_path = directory / index_filename index_path.write_text(html_content, encoding='utf-8') return index_path # Markdown parsing functions - decoupled utilities class MarkdownSection: """ Represents a section of markdown content with hierarchical structure. This is a simple data class that doesn't depend on any external systems, making it easily reusable and testable. """ def __init__(self, level: int, title: str, content: str = "", line_start: int = 0, line_end: int = 0): self.level = level self.title = title self.content = content self.line_start = line_start self.line_end = line_end self.children = [] self.parent = None def add_child(self, child: 'MarkdownSection'): """Add a child section with hierarchy validation.""" # Validate hierarchy - child level should be exactly one level deeper if child.level != self.level + 1: raise ValueError(f"Invalid heading hierarchy: level {child.level} cannot be child of level {self.level}") child.parent = self self.children.append(child) def __repr__(self): return f"MarkdownSection(level={self.level}, title='{self.title}', children={len(self.children)})" def extract_headings(markdown_content: str) -> list[dict]: """ Extract all headings from markdown content with their positions. Decoupled function that only requires markdown text as input. Returns a simple list of dictionaries for easy processing. Args: markdown_content: Raw markdown text Returns: List of dictionaries with 'level', 'title', and 'line' keys """ import re headings = [] lines = markdown_content.split('\n') for line_num, line in enumerate(lines): # Match ATX-style headings (### Title) heading_match = re.match(r'^(#{1,6})\s+(.+)$', line.strip()) if heading_match: level = len(heading_match.group(1)) title = heading_match.group(2).strip() headings.append({ 'level': level, 'title': title, 'line': line_num }) return headings def extract_section_content(markdown_content: str, headings: list[dict], section_index: int) -> str: """ Extract content for a specific section between headings. Decoupled function that operates on simple data structures. Args: markdown_content: Raw markdown text headings: List of heading dictionaries from extract_headings() section_index: Index of the heading to extract content for Returns: Markdown content for the specified section """ if not headings or section_index >= len(headings): return "" lines = markdown_content.split('\n') current_heading = headings[section_index] start_line = current_heading['line'] # Find the end line (next heading at same or higher level) end_line = len(lines) current_level = current_heading['level'] for next_heading in headings[section_index + 1:]: if next_heading['level'] <= current_level: end_line = next_heading['line'] break # Extract the section content section_lines = lines[start_line:end_line] return '\n'.join(section_lines) def parse_markdown_structure(file_path: Path) -> tuple[list[MarkdownSection], dict]: """ Parse a markdown file into hierarchical structure with front matter. Decoupled function that works with file paths and returns simple objects. Args: file_path: Path to the markdown file Returns: Tuple of (list of root MarkdownSection objects, front_matter dict or None) """ import re # Read file content try: content = file_path.read_text(encoding='utf-8') except Exception as e: raise FileNotFoundError(f"Could not read markdown file: {file_path}") from e # Extract front matter if present front_matter = None markdown_content = content # Check for YAML front matter front_matter_match = re.match(r'^---\n(.*?)\n---\n(.*)$', content, re.DOTALL) if front_matter_match: # Return raw YAML string as tests expect front_matter = front_matter_match.group(1) markdown_content = front_matter_match.group(2) # Extract headings headings = extract_headings(markdown_content) if not headings: return [], front_matter # Build hierarchical structure root_sections = [] section_stack = [] for i, heading in enumerate(headings): # Extract content for this section section_content = extract_section_content(markdown_content, headings, i) # Create section object section = MarkdownSection( level=heading['level'], title=heading['title'], content=section_content, line_start=heading['line'] ) # Find the right place in hierarchy while section_stack and section_stack[-1].level >= section.level: section_stack.pop() if section_stack: # Add as child to the last section in stack # Use direct assignment to handle hierarchy gaps gracefully during parsing parent = section_stack[-1] section.parent = parent parent.children.append(section) else: # This is a root level section root_sections.append(section) section_stack.append(section) return root_sections, front_matter def title_to_filesystem_name(title: str) -> str: """Convert a markdown heading title to a filesystem-safe name. Args: title: The markdown heading title Returns: A filesystem-safe name (lowercase, spaces/punctuation to underscores) """ import re # Remove any markdown formatting cleaned = re.sub(r'[#*`\[\](){}]', '', title) # Convert to lowercase cleaned = cleaned.lower() # Remove non-alphanumeric chars except spaces, hyphens, periods, colons, slashes cleaned = re.sub(r'[^\w\s.-:/]', '', cleaned) # Replace dots, spaces, hyphens, colons, and slashes with underscores cleaned = re.sub(r'[.\s:/\-]', '_', cleaned) # Collapse multiple underscores into single underscore cleaned = re.sub(r'_+', '_', cleaned) # Remove leading/trailing underscores cleaned = cleaned.strip('_') return cleaned or 'untitled' def create_directory_structure(sections: list[MarkdownSection], target_dir: Path) -> list[Path]: """Create directory structure from markdown sections. Args: sections: List of root-level MarkdownSection objects target_dir: Target directory to create structure in Returns: List of created paths (files and directories) """ target_dir = Path(target_dir) target_dir.mkdir(parents=True, exist_ok=True) created_paths = [] used_names = set() def get_unique_name(base_name: str, is_file: bool = False) -> str: """Get a unique name, adding numeric suffix if needed.""" extension = '.md' if is_file else '' name = base_name counter = 2 while name + extension in used_names: name = f"{base_name}_{counter}" counter += 1 used_names.add(name + extension) return name def create_structure_recursive(sections: list[MarkdownSection], parent_dir: Path): """Recursively create directory structure.""" for section in sections: safe_name = title_to_filesystem_name(section.title) if section.children: # Create directory for sections with children unique_name = get_unique_name(safe_name) section_dir = parent_dir / unique_name section_dir.mkdir(exist_ok=True) created_paths.append(section_dir) # Create README.md for the section content if it exists if section.content.strip(): readme_path = section_dir / 'README.md' readme_path.write_text(section.content) created_paths.append(readme_path) # Recursively create children create_structure_recursive(section.children, section_dir) else: # Create markdown file for leaf sections unique_name = get_unique_name(safe_name, is_file=True) file_path = parent_dir / f"{unique_name}.md" file_path.write_text(section.content) created_paths.append(file_path) create_structure_recursive(sections, target_dir) return created_paths def explode_markdown_file(input_file: Path, output_dir: Path) -> Path: """Explode a markdown file into a directory structure. Args: input_file: Path to input markdown file output_dir: Path to output directory Returns: Path to the created output directory Raises: FileNotFoundError: If input file doesn't exist PermissionError: If can't create output directory """ input_file = Path(input_file) output_dir = Path(output_dir) if not input_file.exists(): raise FileNotFoundError(f"Input file not found: {input_file}") try: # Parse the markdown file structure sections, front_matter = parse_markdown_structure(input_file) # Create the directory structure created_paths = create_directory_structure(sections, output_dir) # Create front matter file if present if front_matter: front_matter_file = output_dir / '_frontmatter.yml' front_matter_file.write_text(front_matter) return output_dir except PermissionError as e: raise PermissionError(f"Cannot create output directory: {e}") class DirectoryStructureBuilder: """Builder class for creating directory structures from markdown sections.""" def __init__(self, output_dir: Path = None, target_dir: Path = None, max_depth: int = None, file_extension: str = '.md'): # Support both output_dir and target_dir for backward compatibility self.target_dir = Path(output_dir or target_dir) self.output_dir = self.target_dir # Alias for tests self.max_depth = max_depth self.file_extension = file_extension self.created_paths = [] def build(self, sections: list[MarkdownSection]) -> list[Path]: """Build directory structure from sections.""" # Apply depth limiting if specified if self.max_depth is not None: sections = self._limit_depth(sections, self.max_depth) self.created_paths = create_directory_structure(sections, self.target_dir) return self.created_paths def _limit_depth(self, sections: list[MarkdownSection], max_depth: int) -> list[MarkdownSection]: """Recursively limit section depth.""" if max_depth <= 0: return [] limited_sections = [] for section in sections: if section.level <= max_depth: # Create a shallow copy and limit children limited_section = MarkdownSection( level=section.level, title=section.title, content=section.content, line_start=getattr(section, 'line_start', 0), line_end=getattr(section, 'line_end', 0) ) if section.level < max_depth: limited_section.children = self._limit_depth(section.children, max_depth) limited_sections.append(limited_section) return limited_sections def sanitize_heading_text(heading_text: str) -> str: """Remove markdown formatting from heading text. Args: heading_text: Raw heading text with potential markdown formatting Returns: Clean text with markdown formatting removed """ import re # Remove bold and italic formatting cleaned = re.sub(r'\*\*([^*]+)\*\*', r'\1', heading_text) # **bold** cleaned = re.sub(r'\*([^*]+)\*', r'\1', cleaned) # *italic* cleaned = re.sub(r'__([^_]+)__', r'\1', cleaned) # __bold__ cleaned = re.sub(r'_([^_]+)_', r'\1', cleaned) # _italic_ # Remove code formatting cleaned = re.sub(r'`([^`]+)`', r'\1', cleaned) # `code` # Remove links but keep text cleaned = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', cleaned) # [text](url) # Remove other markdown elements cleaned = re.sub(r'[#]+\s*', '', cleaned) # heading markers cleaned = cleaned.strip() return cleaned def generate_safe_filename(heading: str, max_length: int = 100) -> str: """Generate a filesystem-safe filename from a heading. Args: heading: The heading text to convert max_length: Maximum length for the filename Returns: A safe filename suitable for use across platforms """ import re import unicodedata if not heading or not heading.strip(): return 'untitled' # First sanitize markdown formatting cleaned = sanitize_heading_text(heading) # Normalize unicode characters (café -> cafe) cleaned = unicodedata.normalize('NFKD', cleaned) cleaned = ''.join(c for c in cleaned if not unicodedata.combining(c)) # Convert to lowercase cleaned = cleaned.lower() # Remove non-alphanumeric chars except spaces, hyphens, periods, colons, slashes cleaned = re.sub(r'[^\w\s.-:/\\]', '', cleaned) # Replace dots, spaces, hyphens, colons, slashes, backslashes with underscores cleaned = re.sub(r'[.\s:/\\\-]', '_', cleaned) # Collapse multiple underscores into single underscore cleaned = re.sub(r'_+', '_', cleaned) # Remove leading/trailing underscores cleaned = cleaned.strip('_') # Handle empty result if not cleaned: return 'untitled' # Apply length limit, but try to break at word boundaries if len(cleaned) > max_length: truncated = cleaned[:max_length] # Find last underscore before limit last_underscore = truncated.rfind('_') if last_underscore > max_length // 2: # Only if it's not too early truncated = truncated[:last_underscore] cleaned = truncated.rstrip('_') return cleaned or 'untitled' def resolve_filename_conflicts(base_filename: str, existing_files: list[str]) -> str: """Resolve filename conflicts by adding numeric suffixes. Args: base_filename: The desired filename (without extension) existing_files: List of already existing filenames (may include extensions) Returns: A unique filename that doesn't conflict with existing ones """ # Normalize existing files to remove extensions for comparison existing_basenames = set() for filename in existing_files: # Remove common extensions for comparison base = filename for ext in ['.md', '.txt', '.html']: if base.endswith(ext): base = base[:-len(ext)] break existing_basenames.add(base) if base_filename not in existing_basenames: return base_filename # Try adding numeric suffixes counter = 2 while True: candidate = f"{base_filename}_{counter}" if candidate not in existing_basenames: return candidate counter += 1 class FilenameGenerator: """Generator for creating unique, filesystem-safe filenames from headings.""" def __init__(self, max_length: int = 100, separator: str = '_', case_style: str = 'lower', preserve_numbers: bool = False): self.max_length = max_length self.separator = separator self.case_style = case_style self.preserve_numbers = preserve_numbers self.used_filenames = set() def generate(self, heading: str) -> str: """Generate a unique safe filename from a heading.""" import re # Handle numbered headings if preserve_numbers is enabled processed_heading = heading if self.preserve_numbers: # Look for patterns like "1. Introduction" or "10. Advanced Topics" match = re.match(r'^(\d+)\.\s*(.+)$', heading.strip()) if match: number = match.group(1).zfill(2) # Zero-pad to 2 digits title = match.group(2) processed_heading = f"{number}. {title}" # Use the existing generate_safe_filename function base_filename = generate_safe_filename(processed_heading, self.max_length) # Apply case style and separator customization if self.case_style == 'camel': # For camelCase, split on underscores, capitalize each word after first, join without separator parts = base_filename.split('_') if parts: camel_cased = parts[0].lower() for part in parts[1:]: if part: camel_cased += part.capitalize() base_filename = camel_cased else: # Apply separator customization for other styles if self.separator != '_': base_filename = base_filename.replace('_', self.separator) # Apply case style if self.case_style == 'upper': base_filename = base_filename.upper() elif self.case_style == 'title': base_filename = base_filename.title().replace(self.separator, self.separator.lower()) # 'lower' is already default unique_filename = resolve_filename_conflicts(base_filename, list(self.used_filenames)) self.used_filenames.add(unique_filename) return unique_filename def reset(self): """Reset the internal state of used filenames.""" self.used_filenames.clear() class ImplodeOptions: """Options for the implode operation.""" def __init__(self, input_dir: Path = None, output_file: Path = None, preserve_front_matter: bool = True, section_spacing: int = 2, overwrite: bool = False, dry_run: bool = False, verbose: bool = False, preserve_heading_levels: bool = False, include_readme_files: bool = False): self.input_dir = input_dir self.output_file = output_file self.preserve_front_matter = preserve_front_matter self.section_spacing = section_spacing self.overwrite = overwrite self.dry_run = dry_run self.verbose = verbose self.preserve_heading_levels = preserve_heading_levels self.include_readme_files = include_readme_files class ValidationResult: """Result of validation operation.""" def __init__(self, is_valid: bool, errors: list = None): self.is_valid = is_valid self.errors = errors or [] def validate_implode_arguments(options: ImplodeOptions) -> ValidationResult: """Validate arguments for the implode operation. Args: options: Implode options Returns: ValidationResult with is_valid flag and any errors """ errors = [] if not options.input_dir: errors.append("Input directory is required") elif not options.input_dir.exists(): errors.append(f"Input directory does not exist: {options.input_dir}") elif not options.input_dir.is_dir(): errors.append(f"Input path is not a directory: {options.input_dir}") if options.output_file and not options.overwrite: try: if options.output_file.exists(): errors.append(f"Output file already exists: {options.output_file}") except (PermissionError, OSError) as e: errors.append(f"Cannot access output file: {e}") return ValidationResult(is_valid=len(errors) == 0, errors=errors) class ImplodeResult: """Result of implode operation.""" def __init__(self, success: bool, output_file: Path = None, errors: list = None, preview: str = None, processing_info: list = None): self.success = success self.output_file = output_file self.errors = errors or [] self.preview = preview self.processing_info = processing_info or [] @property def error_message(self) -> str: """Get the first error message or None.""" return self.errors[0] if self.errors else None def cli_implode_directory(input_dir: Path = None, output_file: Path = None, options: ImplodeOptions = None, dry_run: bool = False, verbose: bool = False, overwrite: bool = False, **kwargs) -> ImplodeResult: """Implode a directory structure back into a markdown file using variant system. Args: input_dir: Directory containing markdown files to implode options: Options for the implode operation output_file: Output file path (alternative to options.output_file) dry_run: Preview mode without creating files verbose: Provide detailed processing information overwrite: Overwrite existing output file **kwargs: Additional arguments for compatibility Returns: ImplodeResult with success flag and output file path (legacy format) """ from markitect.explode_variants import get_variant_factory # Handle different calling patterns if options is None: options = ImplodeOptions( output_file=output_file, preserve_front_matter=True, section_spacing=2, dry_run=dry_run ) else: # Update options with any provided keyword arguments if output_file and not options.output_file: options.output_file = output_file if dry_run: options.dry_run = dry_run # Determine input directory if input_dir is None: return ImplodeResult(success=False, errors=["Input directory is required"]) input_dir = Path(input_dir) if not input_dir.exists() or not input_dir.is_dir(): return ImplodeResult(success=False, errors=[f"Input directory does not exist: {input_dir}"]) # Determine output file if options.output_file is None: options.output_file = input_dir.parent / f"{input_dir.name}_imploded.md" processing_info = [] preview_content = None try: # Use variant factory to auto-detect and implode factory = get_variant_factory() # Detect variant from directory structure detection_result = factory.detect_variant(input_dir) processing_info.append(f"Processing directory: {input_dir}") processing_info.append(f"Detected variant: {detection_result.variant.value}") processing_info.append(f"Confidence: {detection_result.confidence}") processing_info.append(f"Manifest found: {detection_result.manifest_found}") # Get the appropriate variant variant = factory.create_variant(detection_result.variant) # Count files for verbose output md_files = list(input_dir.rglob("*.md")) # Exclude manifest.md from count md_files = [f for f in md_files if f.name != "manifest.md"] processing_info.append(f"Found {len(md_files)} markdown files in directory") # Handle dry run mode differently if dry_run: # For dry run, temporarily disable dry_run to generate content options.dry_run = False variant_result = variant.implode(input_dir, options) if not variant_result.success: return ImplodeResult( success=False, errors=variant_result.errors, processing_info=processing_info ) # Read the generated content for preview if options.output_file.exists(): preview_content = options.output_file.read_text(encoding='utf-8') # Remove the file since this is dry run options.output_file.unlink() else: preview_content = "No content generated" return ImplodeResult( success=True, output_file=options.output_file, preview=preview_content, processing_info=processing_info ) # Normal mode - perform the implode operation variant_result = variant.implode(input_dir, options) if not variant_result.success: return ImplodeResult( success=False, errors=variant_result.errors, processing_info=processing_info ) # Return successful result in legacy format return ImplodeResult( success=True, output_file=variant_result.output_file, processing_info=processing_info ) except Exception as e: processing_info.append(f"Error during implode: {e}") return ImplodeResult( success=False, errors=[f"Error during implode: {e}"], processing_info=processing_info ) def _adjust_heading_levels(content: str, base_level: int) -> str: """Adjust heading levels in markdown content. Args: content: Markdown content base_level: Base level to add to existing headings Returns: Content with adjusted heading levels """ import re def adjust_heading(match): current_level = len(match.group(1)) new_level = min(current_level + base_level, 6) # Max 6 heading levels return '#' * new_level + ' ' + match.group(2) return re.sub(r'^(#{1,6})\s+(.+)$', adjust_heading, content, flags=re.MULTILINE) def combine_markdown_files(file_paths: list[Path], section_spacing: int = 2) -> str: """Combine multiple markdown files into a single content string. Args: file_paths: List of markdown file paths to combine section_spacing: Number of blank lines between sections Returns: Combined markdown content as a string """ combined_parts = [] for file_path in file_paths: if file_path.exists() and file_path.is_file(): content = file_path.read_text().strip() if content: combined_parts.append(content) spacing = "\n" * (section_spacing + 1) # +1 for the natural line break return spacing.join(combined_parts) def preserve_markdown_formatting(file_paths: list[Path]) -> str: """Preserve markdown formatting while combining files. Args: file_paths: List of markdown file paths Returns: Combined content with all formatting preserved """ # This function focuses on preserving formatting during combination # For now, it's equivalent to combine_markdown_files but could be extended # with specific formatting preservation logic return combine_markdown_files(file_paths, section_spacing=2) def handle_index_files(directory: Path) -> str: """Handle index.md files as parent section content. Args: directory: Directory to scan for index files Returns: Combined content from all index files and other markdown files """ all_content = [] # Collect all markdown files including index files markdown_files = [] # First, collect index files and regular files separately for path in directory.rglob("*.md"): if path.is_file(): markdown_files.append(path) # Sort files hierarchically: depth-first traversal with index.md files first in each directory def hierarchical_sort_key(path: Path): # Calculate relative path from the root directory try: rel_path = path.relative_to(directory) except ValueError: rel_path = path # Build path components for hierarchical ordering path_parts = list(rel_path.parts) # Index files come first within their directory is_index = path.name == "index.md" # For depth-first traversal with index.md first: # 1. Sort by directory path components # 2. Within each directory, index.md comes first (priority 0), others come after (priority 1) # 3. For non-index files, sort alphabetically by filename if is_index: # Index files: replace filename with empty string and priority 0 sort_parts = path_parts[:-1] + ['', 0] else: # Regular files: keep full path with priority 1 sort_parts = path_parts[:-1] + [path_parts[-1], 1] return sort_parts markdown_files.sort(key=hierarchical_sort_key) # Combine all content for file_path in markdown_files: content = file_path.read_text().strip() if content: all_content.append(content) # Combine with proper spacing return "\n\n\n".join(all_content) def process_front_matter(content_or_path) -> tuple[dict, str]: """Process YAML front matter from markdown content or file. Args: content_or_path: Markdown content string or Path to markdown file Returns: Tuple of (front_matter_dict, content_without_front_matter) """ import re import yaml from pathlib import Path # Handle both string content and file paths if isinstance(content_or_path, (str, Path)): if isinstance(content_or_path, Path): if content_or_path.exists(): content = content_or_path.read_text() else: return {}, "" else: content = content_or_path else: content = str(content_or_path) # Match YAML front matter fm_match = re.match(r'^---\n(.*?)\n---\n(.*)$', content, re.DOTALL) if fm_match: front_matter_yaml = fm_match.group(1) content_without_fm = fm_match.group(2).strip() try: front_matter = yaml.safe_load(front_matter_yaml) return front_matter or {}, content_without_fm except yaml.YAMLError: # If YAML parsing fails, return content as-is return {}, content else: return {}, content def aggregate_content(directory: Path, output_file: Path = None, preserve_structure: bool = True, preserve_front_matter: bool = False) -> str: """Aggregate content from a directory structure into a single markdown document. Args: directory: Source directory containing markdown files output_file: Optional output file path preserve_structure: Whether to preserve hierarchical structure preserve_front_matter: Whether to preserve and consolidate front matter Returns: Aggregated markdown content """ # Collect all markdown files markdown_files = [] for path in directory.rglob("*.md"): if path.is_file() and path.name.lower() not in ["readme.md"]: # Exclude output file if specified if output_file and path == output_file: continue markdown_files.append(path) # Sort files for consistent ordering markdown_files.sort() if preserve_front_matter: # Handle front matter consolidation consolidator = FrontMatterConsolidator(conflict_strategy="merge") consolidated_fm, combined_content = consolidator.consolidate(markdown_files) if consolidated_fm: import yaml # Add front matter to the beginning front_matter_yaml = yaml.dump(consolidated_fm, default_flow_style=False).strip() return f"---\n{front_matter_yaml}\n---\n\n{combined_content}" else: return combined_content elif preserve_structure: # Handle index files and hierarchy - use the comprehensive approach return handle_index_files(directory) else: return combine_markdown_files(markdown_files) class ContentAggregator: """Aggregator for combining markdown content from multiple sources.""" def __init__(self, section_spacing: int = 2, preserve_formatting: bool = True, handle_front_matter: bool = True, include_toc: bool = False, recursive: bool = True, sort_files: bool = True): self.section_spacing = section_spacing self.preserve_formatting = preserve_formatting self.handle_front_matter = handle_front_matter self.include_toc = include_toc self.recursive = recursive self.sort_files = sort_files self.aggregated_content = [] def add_file(self, file_path: Path): """Add a file to the aggregation.""" if file_path.exists() and file_path.is_file(): content = file_path.read_text().strip() if content: self.aggregated_content.append(content) def add_content(self, content: str): """Add raw content to the aggregation.""" if content.strip(): self.aggregated_content.append(content.strip()) def get_combined_content(self) -> str: """Get the combined content.""" spacing = "\n" * (self.section_spacing + 1) return spacing.join(self.aggregated_content) def aggregate(self, directory: Path) -> str: """Aggregate content from a directory. Args: directory: Directory to aggregate content from Returns: Aggregated content string """ # Use the existing aggregate_content function but with our settings return aggregate_content( directory, preserve_structure=True, preserve_front_matter=self.handle_front_matter ) def reset(self): """Reset the aggregator.""" self.aggregated_content.clear() class FrontMatterConsolidator: """Consolidator for handling front matter from multiple files.""" def __init__(self, conflict_strategy: str = "merge"): self.front_matters = [] self.consolidated = {} self.conflict_strategy = conflict_strategy def add_front_matter(self, front_matter: dict): """Add front matter from a file.""" if front_matter: self.front_matters.append(front_matter) def consolidate(self, files: list[Path] = None) -> tuple[dict, str]: """Consolidate front matter from files and return combined content. Args: files: List of file paths to process (optional if front matter already added) Returns: Tuple of (consolidated_front_matter, combined_content) """ if files: # Process files and extract front matter all_content = [] for file_path in files: front_matter, content = process_front_matter(file_path) if front_matter: self.add_front_matter(front_matter) if content.strip(): all_content.append(content.strip()) combined_content = "\n\n\n".join(all_content) else: combined_content = "" # Consolidate front matter consolidated = {} for fm in self.front_matters: for key, value in fm.items(): if key in consolidated: # Handle conflicts - for now, use list aggregation if not isinstance(consolidated[key], list): consolidated[key] = [consolidated[key]] if isinstance(value, list): consolidated[key].extend(value) else: consolidated[key].append(value) else: consolidated[key] = value self.consolidated = consolidated return consolidated, combined_content def to_yaml(self) -> str: """Convert consolidated front matter to YAML string.""" import yaml if self.consolidated: return yaml.dump(self.consolidated, default_flow_style=False) return "" @register_plugin("markdown_commands") class MarkdownCommandsPlugin(CommandPlugin): """Plugin providing core markdown file operations.""" @property def metadata(self) -> PluginMetadata: return PluginMetadata( name="markdown_commands", version="1.0.0", description="Core markdown file operations with md- prefixes", author="MarkiTect Core Team", plugin_type=PluginType.COMMAND, markitect_version=">=0.1.0" ) def get_commands(self) -> Dict[str, Any]: """Return the markdown commands with md- prefixes.""" return { 'md-ingest': md_ingest_command, 'md-get': md_get_command, 'md-list': md_list_command, 'md-render': md_render_command, 'themes': themes_list_command, 'md-index': md_index_command, 'md-explode': md_explode_command, 'md-implode': md_implode_command, 'md-package': md_package_command, 'md-transclude': md_transclude_command } # Define commands as standalone functions @click.command() @click.argument('file_path', type=click.Path(exists=True)) @click.pass_context def md_ingest_command(ctx, file_path): """ Process and store a markdown file. Ingests a markdown file into the MarkiTect system, parsing its content, extracting front matter, generating AST cache, and storing metadata in the database. FILE_PATH: Path to the markdown file to process Examples: markitect md-ingest README.md markitect md-ingest docs/guide.md """ config = ctx.obj or {} try: if config.get('verbose', False): click.echo(f"Processing file: {file_path}") # Initialize document manager with database manager from markitect.clean_document_manager import CleanDocumentManager doc_manager = CleanDocumentManager(config.get('db_manager')) # Process the file result = doc_manager.ingest_file(Path(file_path)) if config.get('verbose', False): click.echo(f"Processing results:") click.echo(f" File: {result['metadata']['filename']}") click.echo(f" AST nodes: {len(result['ast'])} nodes") click.echo(f" Cache file: {result['ast_cache_path']}") click.echo(f" Parse time: {result['parse_time']:.2f}s") click.echo(f" Cache time: {result['cache_time']:.2f}s") click.echo(f"✓ Successfully ingested: {Path(file_path).name}") except Exception as e: click.echo(f"Error processing file: {e}", err=True) raise click.Abort() @click.command() @click.argument('file_path', type=str) @click.option('--output', '-o', default='-', help='Output file (default: stdout)') @click.pass_context def md_get_command(ctx, file_path, output): """ Retrieve content from a markdown file with metadata. Fetches a markdown file from the MarkiTect system, returning its content along with metadata, front matter, and optional AST information. FILE_PATH: Path to the markdown file to retrieve Examples: markitect md-get README.md markitect md-get docs/guide.md --output processed.md """ config = ctx.obj or {} try: # Initialize document manager from markitect.clean_document_manager import CleanDocumentManager doc_manager = CleanDocumentManager(config.get('db_manager')) # Get file information result = doc_manager.get_file(file_path) # Output to file or stdout if output == '-': click.echo(result['content']) else: output_path = Path(output) output_path.write_text(result['content'], encoding='utf-8') click.echo(f"✓ Content written to: {output_path}") if config.get('verbose', False): metadata = result['metadata'] click.echo(f"File: {metadata['filename']}", err=True) click.echo(f"Size: {metadata.get('size', 'unknown')} bytes", err=True) click.echo(f"Modified: {metadata.get('modified', 'unknown')}", err=True) except FileNotFoundError as e: click.echo(f"Error: File not found in database - {e}", err=True) raise click.Abort() except Exception as e: click.echo(f"Error retrieving file: {e}", err=True) raise click.Abort() @click.command() @click.option('--output-format', '-f', default='table', type=click.Choice(['table', 'json', 'yaml', 'simple']), help='Output format (default: table)') @click.option('--names-only', is_flag=True, help='Show only filenames, no metadata') @click.pass_context def md_list_command(ctx, output_format, names_only): """ List all markdown files in the MarkiTect system. Shows a list of all ingested markdown files with their metadata, including file sizes, modification dates, and processing status. Examples: markitect md-list markitect md-list --output-format json markitect md-list --names-only """ config = ctx.obj or {} try: # Initialize document manager from markitect.clean_document_manager import CleanDocumentManager doc_manager = CleanDocumentManager(config.get('db_manager')) # Get file listing files = doc_manager.list_files() if not files: click.echo("No markdown files found in the system.") return if names_only: for file_info in files: click.echo(file_info['filename']) elif output_format == 'json': click.echo(json.dumps(files, indent=2)) elif output_format == 'yaml': import yaml click.echo(yaml.dump(files, default_flow_style=False)) else: # table or simple click.echo(f"{'Filename':<40} {'Size':<10} {'Modified':<20}") click.echo("-" * 72) for file_info in files: size = file_info.get('size', 'unknown') modified = file_info.get('modified', 'unknown') click.echo(f"{file_info['filename']:<40} {size:<10} {modified:<20}") except Exception as e: click.echo(f"Error listing files: {e}", err=True) raise click.Abort() @click.command() @click.argument('input_file', type=click.Path(exists=True)) @click.option('--output', '-o', type=click.Path(), help='Output HTML file (default: .html)') @click.option('--theme', type=ThemeType(), help='Theme(s) to apply. Single: dark or layered: dark,academic or light,github,corporate. Available: basic, github, dark, academic, light, corporate, startup') @click.option('--css', type=click.Path(), help='Custom CSS file to include') @click.option('--edit', is_flag=True, help='Open in interactive edit mode with stable section editing') @click.option('--insert', is_flag=True, help='Open in interactive insert mode with heading protection (levels 1-3 read-only)') @click.option('--editor-theme', default='github', type=click.Choice(['github', 'monokai', 'tomorrow', 'dark']), help='Editor theme for live edit mode (default: github)') @click.option('--keyboard-shortcuts', is_flag=True, default=True, help='Enable keyboard shortcuts in live edit mode') @click.option('--use-publication-dir', is_flag=True, help='Use publication directory for output') @click.option('--dont-use-publication-dir', is_flag=True, help='Don\'t use publication directory for output') @click.option('--nodogtag', is_flag=True, help='Don\'t add HTML generation dogtag at end of document') @click.option('--ship-assets', is_flag=True, default=None, help='Copy referenced assets to output directory') @click.option('--no-ship-assets', is_flag=True, help='Don\'t copy referenced assets to output directory') @click.pass_context def md_render_command(ctx, input_file, output, theme, css, edit, insert, editor_theme, keyboard_shortcuts, use_publication_dir, dont_use_publication_dir, nodogtag, ship_assets, no_ship_assets): """ Render a markdown file to HTML with basic templates and live preview capabilities. Converts a markdown file to HTML using customizable layered themes and styles. Supports live editing mode with real-time preview and syntax highlighting. Theme Layering: - Single themes: basic, github, dark, academic, light, corporate, startup - Layered themes: dark,academic combines dark UI with academic typography - Later themes override settings from earlier themes INPUT_FILE: Path to the markdown file to render Examples: markitect md-render README.md markitect md-render docs/guide.md --output guide.html --theme github markitect md-render draft.md --edit --editor-theme monokai markitect md-render draft.md --insert --editor-theme monokai markitect md-render doc.md --theme dark --css custom.css markitect md-render doc.md --theme dark,academic markitect md-render doc.md --theme light,github,corporate """ config = ctx.obj or {} try: input_path = Path(input_file) # Validate mode flags if edit and insert: raise click.BadParameter("Cannot use both --edit and --insert flags simultaneously. Choose one mode.") # Validate asset shipping flags if ship_assets and no_ship_assets: raise click.BadParameter("Cannot use both --ship-assets and --no-ship-assets flags simultaneously.") # Determine output path with environment variable support if output: output_path = Path(output) # If output is a directory, use canonical filename within that directory if output_path.is_dir() or (not output_path.suffix and not output_path.exists()): # Ensure the directory exists output_path.mkdir(parents=True, exist_ok=True) # Use canonical filename (input name + .html) in the specified directory canonical_filename = input_path.with_suffix('.html').name output_path = output_path / canonical_filename output_is_directory = True else: output_is_directory = False else: # Check for environment variable import os env_output_dir = os.environ.get('MARKITECT_OUTPUT_DIR') if env_output_dir: output_path = Path(env_output_dir) output_path.mkdir(parents=True, exist_ok=True) canonical_filename = input_path.with_suffix('.html').name output_path = output_path / canonical_filename output_is_directory = True else: output_path = input_path.with_suffix('.html') output_is_directory = False # Use publication directory if specified if use_publication_dir and not dont_use_publication_dir: pub_dir = get_publication_directory() ensure_publication_directory(pub_dir) output_path = pub_dir / get_output_filename(input_path) output_is_directory = True # Publication dir is always a directory output # Determine if we should ship assets should_ship_assets = False if no_ship_assets: should_ship_assets = False elif ship_assets: should_ship_assets = True elif output_is_directory: # Default: ship assets when output is a directory should_ship_assets = True # Discover and ship assets if needed if should_ship_assets: if output_is_directory: # For directory output, ship to the same directory as the HTML file _ship_assets(input_path, output_path.parent, config.get('verbose', False)) # For file output, we don't ship assets (shouldn't reach here anyway) # Initialize clean document manager from markitect.clean_document_manager import CleanDocumentManager doc_manager = CleanDocumentManager(config.get('db_manager')) # Render the file if edit: # Edit mode - generate HTML with editing capabilities result = doc_manager.render_file(input_file, str(output_path), template=theme, css=css, edit_mode=True, editor_theme=editor_theme, keyboard_shortcuts=keyboard_shortcuts, nodogtag=nodogtag) click.echo(f"✓ Rendered with interactive editing capabilities to: {output_path}") if config.get('verbose', False): click.echo(f"Editor theme: {editor_theme}") click.echo(f"Keyboard shortcuts: {'enabled' if keyboard_shortcuts else 'disabled'}") click.echo(f"Theme: {theme or 'default'}") click.echo(f"CSS: {css or 'default'}") elif insert: # Insert mode - generate HTML with insert capabilities and heading protection result = doc_manager.render_file(input_file, str(output_path), template=theme, css=css, insert_mode=True, editor_theme=editor_theme, keyboard_shortcuts=keyboard_shortcuts, nodogtag=nodogtag) click.echo(f"✓ Rendered with interactive insert capabilities to: {output_path}") if config.get('verbose', False): click.echo(f"Editor theme: {editor_theme}") click.echo(f"Keyboard shortcuts: {'enabled' if keyboard_shortcuts else 'disabled'}") click.echo(f"Heading protection: levels 1-3 read-only") click.echo(f"Theme: {theme or 'default'}") click.echo(f"CSS: {css or 'default'}") else: # Static render result = doc_manager.render_file(input_file, str(output_path), template=theme, css=css, edit_mode=False, insert_mode=False, nodogtag=nodogtag) click.echo(f"✓ Rendered to: {output_path}") if config.get('verbose', False): click.echo(f"Theme: {theme or 'default'}") click.echo(f"CSS: {css or 'default'}") except Exception as e: click.echo(f"Error rendering file: {e}", err=True) raise click.Abort() @click.command() @click.option('--format', type=click.Choice(['table', 'list', 'json']), default='table', help='Output format: table (default), list, or json') @click.option('--scope', type=click.Choice(['mode', 'ui', 'document', 'branding', 'all']), default='all', help='Filter themes by scope: mode (light/dark), ui (editor interface), document (typography), branding (colors), or all (default)') def themes_list_command(format, scope): """ List all available themes and their properties. Shows the available themes that can be used with md-render and other commands. Themes can be used individually or combined in layers. Examples: markitect themes list markitect themes list --format json markitect themes list --scope ui markitect themes list --scope document --format list """ from tabulate import tabulate import json # Get theme data layered_themes = [] legacy_mappings = [] # Process layered themes for theme_name, theme_data in LAYERED_THEMES.items(): theme_scope = theme_data['scope'] if scope == 'all' or scope == theme_scope: properties = theme_data['properties'] # Get key properties for display based on scope key_props = [] if theme_scope == 'mode': if 'body_background' in properties: key_props.append(f"bg:{properties['body_background']}") if 'link_color' in properties: key_props.append(f"links:{properties['link_color']}") elif theme_scope == 'ui': if 'editor_panel_bg' in properties: key_props.append(f"panel:{properties['editor_panel_bg']}") if 'editor_text_color' in properties: key_props.append(f"text:{properties['editor_text_color']}") if 'editor_focus_color' in properties: key_props.append(f"focus:{properties['editor_focus_color']}") elif theme_scope == 'document': if 'font_family' in properties: family = properties['font_family'].split(',')[0].strip().strip('"\'') key_props.append(f"font:{family}") if 'link_color' in properties: key_props.append(f"links:{properties['link_color']}") elif theme_scope == 'branding': if 'accent_color' in properties: key_props.append(f"accent:{properties['accent_color']}") layered_themes.append({ 'name': theme_name, 'scope': theme_scope, 'properties': ', '.join(key_props) if key_props else 'default styling' }) # Process legacy mappings for legacy_name, expanded_themes in LEGACY_THEME_MAPPING.items(): legacy_mappings.append({ 'name': legacy_name, 'expands_to': ' + '.join(expanded_themes) }) if format == 'json': # JSON output output_data = { 'layered_themes': layered_themes, 'legacy_mappings': legacy_mappings, 'usage': { 'single': 'markitect md-render file.md --theme dark', 'layered': 'markitect md-render file.md --theme dark,academic', 'legacy': 'markitect md-render file.md --theme github' } } click.echo(json.dumps(output_data, indent=2)) elif format == 'list': # Simple list output click.echo("Available themes:") for theme in layered_themes: click.echo(f" {theme['name']} ({theme['scope']})") if legacy_mappings: click.echo("\nLegacy mappings:") for mapping in legacy_mappings: click.echo(f" {mapping['name']} -> {mapping['expands_to']}") else: # table format (default) # Table output if layered_themes: click.echo("Layered themes (can be combined):") headers = ['Theme', 'Scope', 'Key Properties'] table_data = [[t['name'], t['scope'], t['properties']] for t in layered_themes] click.echo(tabulate(table_data, headers=headers, tablefmt='grid')) if legacy_mappings: click.echo("\nLegacy theme mappings:") headers = ['Legacy Name', 'Expands To'] table_data = [[m['name'], m['expands_to']] for m in legacy_mappings] click.echo(tabulate(table_data, headers=headers, tablefmt='grid')) click.echo("\nUsage examples:") click.echo(" Single theme: markitect md-render file.md --theme dark") click.echo(" Layered themes: markitect md-render file.md --theme dark,academic") click.echo(" Legacy mapping: markitect md-render file.md --theme github") @click.command() @click.argument('directory', type=click.Path(exists=True, file_okay=False, dir_okay=True)) @click.option('--output', '-o', type=click.Path(), help='Output index file (default: /index.html)') @click.option('--theme', type=ThemeType(), help='Theme(s) to apply to index. Single: dark or layered: dark,github. Available: basic, github, dark, academic, light, corporate, startup') @click.option('--recursive', '-r', is_flag=True, help='Include subdirectories recursively') @click.pass_context def md_index_command(ctx, directory, output, theme, recursive): """ Generate an index page for HTML files in a directory. Creates an HTML index page listing all HTML files in the specified directory, with links and extracted titles. DIRECTORY: Path to the directory to index Examples: markitect md-index docs/ markitect md-index . --recursive --output site-index.html """ config = ctx.obj or {} try: dir_path = Path(directory) # Determine output path if output: output_path = Path(output) else: output_path = dir_path / 'index.html' # Find HTML files html_files = find_html_files(dir_path, recursive=recursive) if not html_files: click.echo(f"No HTML files found in: {dir_path}") # Create file info list, excluding the index file itself file_info_list = [] for html_file in html_files: if html_file.name != output_path.name: title = extract_html_title(html_file) # Calculate relative path from output directory try: relative_path = html_file.relative_to(dir_path) except ValueError: # If html_file is not under dir_path, use absolute path relative_path = html_file file_info_list.append({ 'path': html_file, 'title': title, 'relative_path': str(relative_path) }) # Generate index page title index_title = f"Index - {dir_path.name}" # Generate HTML content html_content = generate_index_html(file_info_list, index_title, theme) # Write index file output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(html_content, encoding='utf-8') click.echo(f"✓ Generated index: {output_path}") click.echo(f"📄 Indexed {len(file_info_list)} files") if config.get('verbose', False): click.echo("Files indexed:") for file_info in file_info_list: click.echo(f" {file_info['title']} ({file_info['relative_path']})") except Exception as e: click.echo(f"Error generating index: {e}", err=True) raise click.Abort() # ============================================================================== # Enhanced Explode/Implode Commands with Variant System # ============================================================================== @click.command() @click.argument('input_file', type=click.Path(exists=True)) @click.option('--output-dir', '-o', type=click.Path(), help='Output directory for exploded files (default: .mdd)') @click.option('--variant', type=click.Choice(['flat', 'hierarchical', 'semantic']), default='flat', help='Directory organization variant (default: flat)') @click.option('--max-depth', type=int, default=10, help='Maximum directory nesting depth (default: 10)') @click.option('--create-manifest/--no-manifest', default=True, help='Create manifest.md for reversibility (default: true)') @click.option('--dry-run', is_flag=True, help='Show what would be done without creating files') @click.option('--verbose', '-v', is_flag=True, help='Show detailed output during processing') @click.pass_context def md_explode_command(ctx, input_file, output_dir, variant, max_depth, create_manifest, dry_run, verbose): """ Explode a markdown file into a directory structure. Takes a markdown file with hierarchical headings (# ## ### etc.) and creates a directory structure where each heading becomes a directory or file, with content distributed appropriately. Supports multiple organization variants for different use cases. INPUT_FILE: Path to the markdown file to explode Variants: flat: Creates directories based on h1 headings (traditional) hierarchical: Numbered structure reflecting heading hierarchy semantic: Content-based grouping (parts, chapters, appendices) Examples: # Explode book.md into book.mdd/ directory (flat structure) markitect md-explode book.md # Use hierarchical structure with numbered directories markitect md-explode book.md --variant hierarchical # Explode into custom output directory markitect md-explode book.md --output-dir /path/to/chapters # Preview what would be created markitect md-explode book.md --dry-run --verbose --variant semantic # Explode without creating manifest (legacy mode) markitect md-explode book.md --no-manifest """ config = ctx.obj or {} try: input_path = Path(input_file) # Import variant system from markitect.explode_variants import ExplodeVariant, ExplodeOptions, get_variant_factory # Convert string variant to enum try: variant_enum = ExplodeVariant(variant) except ValueError: click.echo(f"❌ Error: Unknown variant '{variant}'. Available: flat, hierarchical, semantic", err=True) raise click.Abort() # Determine output directory if output_dir: output_path = Path(output_dir) else: suffix = ".mdd" if create_manifest else "_exploded" output_path = input_path.parent / f"{input_path.stem}{suffix}" is_verbose = verbose or config.get('verbose', False) # Create explode options options = ExplodeOptions( variant=variant_enum, output_dir=output_path, max_depth=max_depth, create_manifest=create_manifest, dry_run=dry_run, verbose=is_verbose ) if dry_run: click.echo(f"📋 Would explode using {variant.title()} Structure") click.echo(f"📁 Input file: {input_path}") click.echo(f"📁 Output directory: {output_path}") click.echo(f"📄 Create manifest: {create_manifest}") return # Use the variant system to explode the file factory = get_variant_factory() variant_instance = factory.create_variant(variant_enum) result = variant_instance.explode(input_path, options) if not result.success: click.echo(f"❌ Error exploding markdown file:", err=True) for error in result.errors: click.echo(f" {error}", err=True) if result.warnings: click.echo("⚠️ Warnings:") for warning in result.warnings: click.echo(f" {warning}") raise click.Abort() click.echo(f"✅ Successfully exploded markdown file using {variant_instance.name}!") click.echo(f"📁 Created structure in: {result.output_directory}") if result.manifest_path: click.echo(f"📄 Created manifest: {result.manifest_path.name}") if is_verbose: click.echo(f"📄 Input file: {input_path}") click.echo(f"🔧 Variant used: {result.variant_used.value}") if result.files_created: click.echo(f"📄 Created {len(result.files_created)} files:") for file_path in sorted(result.files_created): try: relative_path = file_path.relative_to(result.output_directory) click.echo(f" {relative_path}") except ValueError: click.echo(f" {file_path}") except Exception as e: click.echo(f"❌ Error exploding markdown file: {e}", err=True) raise click.Abort() @click.command() @click.argument('input_dir', type=click.Path(exists=True, file_okay=False, dir_okay=True)) @click.option('--output', '-o', type=click.Path(), help='Output markdown file (default: _imploded.md)') @click.option('--force-variant', type=click.Choice(['flat', 'hierarchical', 'semantic']), help='Force specific variant instead of auto-detection') @click.option('--dry-run', is_flag=True, help='Preview what would be created without writing files') @click.option('--verbose', '-v', is_flag=True, help='Show detailed processing information') @click.option('--overwrite', is_flag=True, help='Overwrite existing output file') @click.option('--section-spacing', type=int, default=2, help='Number of blank lines between sections (default: 2)') @click.option('--preserve-front-matter/--no-front-matter', default=True, help='Preserve YAML front matter from files (default: preserve)') @click.pass_context def md_implode_command(ctx, input_dir, output, force_variant, dry_run, verbose, overwrite, section_spacing, preserve_front_matter): """ Implode a directory structure back into a single markdown file. Takes a directory structure (like one created by md-explode) and combines all markdown files back into a single document, reconstructing the original hierarchical heading structure. Automatically detects the variant used during explosion for optimal reconstruction. INPUT_DIR: Path to the directory to implode Auto-Detection: The command automatically detects the variant type by analyzing: - manifest.md file (highest priority) - Directory naming patterns - Content organization structure Examples: # Implode exploded directory back to markdown (auto-detect variant) markitect md-implode book.mdd/ # Force specific variant instead of auto-detection markitect md-implode chapters/ --force-variant hierarchical # Specify custom output file markitect md-implode chapters/ --output reconstructed.md # Preview what would be created with detection info markitect md-implode content/ --dry-run --verbose """ config = ctx.obj or {} try: input_path = Path(input_dir) # Determine output file if output: output_path = Path(output) else: output_path = input_path.parent / f"{input_path.name}_imploded.md" # Check if output file exists and overwrite not specified if output_path.exists() and not overwrite: click.echo(f"❌ Error: Output file {output_path} already exists. Use --overwrite to overwrite.", err=True) raise click.Abort() # Create implode options options = ImplodeOptions( output_file=output_path, preserve_front_matter=preserve_front_matter, section_spacing=section_spacing, overwrite=overwrite ) if dry_run: # Collect files that would be processed markdown_files = [] for path in input_path.rglob("*.md"): if path.is_file() and path.name.lower() != "readme.md": markdown_files.append(path) markdown_files.sort() click.echo(f"📋 Would implode directory structure") click.echo(f"📁 Source directory: {input_path}") click.echo(f"📄 Would create file: {output_path}") click.echo(f"📄 Would process {len(markdown_files)} files") if verbose: click.echo(f"\nℹ️ Files to process:") for file_path in markdown_files: try: relative_path = file_path.relative_to(input_path) click.echo(f" {relative_path}") except ValueError: click.echo(f" {file_path}") else: # Actually perform the implode operation result = cli_implode_directory(input_dir=input_path, options=options) if result.success: click.echo(f"✅ Successfully imploded directory") click.echo(f"📁 Source directory: {input_path}") click.echo(f"📄 Created file: {result.output_file}") if verbose: # Count processed files for feedback markdown_files = [] for path in input_path.rglob("*.md"): if path.is_file() and path.name.lower() != "readme.md": markdown_files.append(path) click.echo(f"📄 Processed {len(markdown_files)} files") else: click.echo(f"❌ Failed to implode directory:", err=True) for error in result.errors: click.echo(f" {error}", err=True) raise click.Abort() except Exception as e: click.echo(f"❌ Error during implode: {e}", err=True) if ctx.obj and ctx.obj.get('debug'): import traceback traceback.print_exc() raise click.Abort() # ============================================================================== # Advanced Packaging Commands # ============================================================================== @click.command() @click.argument('action', type=click.Choice(['create', 'extract', 'info'])) @click.argument('input_path', type=click.Path(exists=True)) @click.option('--output', '-o', type=click.Path(), help='Output path for package or extraction') @click.option('--format', '-f', type=click.Choice(['mdz', 'mdt']), default='mdz', help='Package format (mdz for Markdown Zip, mdt for Markdown Transcluded)') @click.option('--compression', '-c', type=click.IntRange(0, 9), default=6, help='Compression level for MDZ packages (0-9)') @click.option('--include-assets', is_flag=True, default=True, help='Include assets when creating packages') @click.option('--variables', type=click.Path(exists=True), help='JSON file with variables for MDT processing') @click.option('--dry-run', is_flag=True, help='Show what would be done without making changes') @click.option('--verbose', '-v', is_flag=True, help='Enable verbose output') @click.pass_context def md_package_command(ctx, action, input_path, output, format, compression, include_assets, variables, dry_run, verbose): """ Advanced package management for markdown documents. Actions: - create: Create MDZ/MDT package from source - extract: Extract package contents - info: Show package information Examples: markitect md-package create document.md --format mdz --output document.mdz markitect md-package extract document.mdz --output extracted/ markitect md-package info document.mdz """ try: input_path = Path(input_path) if action == 'create': # Import packaging modules from markitect.packaging.mdz_variant import MdzVariant from markitect.packaging.transclusion import TransclusionEngine if not output: if format == 'mdz': output = input_path.with_suffix('.mdz') else: output = input_path.with_suffix('.mdt') else: output = Path(output) if verbose: click.echo(f"📦 Creating {format.upper()} package") click.echo(f"📄 Source: {input_path}") click.echo(f"📦 Output: {output}") if dry_run: click.echo("🔍 Dry run - no files would be created") return if format == 'mdz': mdz = MdzVariant() result = mdz.create_package( source_path=input_path, options={ 'output_path': output, 'compression_level': compression } ) click.echo(f"✅ MDZ package created successfully") click.echo(f"📦 Package: {result.get('package_path', output)}") click.echo(f"📊 Assets embedded: {result.get('assets_embedded', 0)}") click.echo(f"💾 Package size: {result.get('package_size', 0):,} bytes") else: # mdt format if not input_path.is_file(): click.echo("❌ MDT format requires a single markdown file", err=True) raise click.Abort() # For MDT, we just copy the file with transclusion processing content = input_path.read_text(encoding='utf-8') # Process with transclusion engine if variables provided if variables: variables_path = Path(variables) if variables_path.exists(): import json var_data = json.loads(variables_path.read_text()) engine = TransclusionEngine( base_path=input_path.parent, variables=var_data ) content = engine.process_content(content) output.write_text(content, encoding='utf-8') click.echo(f"✅ MDT template created successfully") click.echo(f"📄 Template: {output}") elif action == 'extract': from markitect.packaging.mdz_variant import MdzVariant if not output: output = input_path.parent / f"{input_path.stem}_extracted" else: output = Path(output) if verbose: click.echo(f"📂 Extracting package") click.echo(f"📦 Source: {input_path}") click.echo(f"📁 Output: {output}") if dry_run: click.echo("🔍 Dry run - no files would be extracted") return mdz = MdzVariant() result = mdz.extract_package( package_path=input_path, options={'output_dir': output} ) click.echo(f"✅ Package extracted successfully") click.echo(f"📁 Output directory: {result['output_directory']}") click.echo(f"📄 Files extracted: {result['files_extracted']}") elif action == 'info': from markitect.packaging.mdz_variant import MdzVariant if verbose: click.echo(f"ℹ️ Package information for: {input_path}") mdz = MdzVariant() metadata = mdz.get_package_metadata(input_path) click.echo(f"📋 Package Format: {metadata.format}") click.echo(f"🏷️ Format Version: {metadata.version}") click.echo(f"⏰ Created: {metadata.created}") click.echo(f"🛠️ MarkiTect Version: {metadata.markitect_version}") click.echo(f"📊 Assets: {len(metadata.assets) if metadata.assets else 0}") if verbose and metadata.assets: click.echo("\n📁 Assets:") for asset in metadata.assets: click.echo(f" - {asset.path} ({asset.size:,} bytes)") except Exception as e: click.echo(f"❌ Error during package operation: {e}", err=True) if ctx.obj and ctx.obj.get('debug'): import traceback traceback.print_exc() raise click.Abort() @click.command() @click.argument('action', type=click.Choice(['process', 'validate'])) @click.argument('input_file', type=click.Path(exists=True)) @click.option('--output', '-o', type=click.Path(), help='Output file for processed content') @click.option('--variables', type=click.Path(exists=True), help='JSON file containing template variables') @click.option('--base-path', type=click.Path(exists=True), help='Base path for resolving includes (defaults to input file directory)') @click.option('--max-depth', type=int, default=10, help='Maximum inclusion depth to prevent infinite recursion') @click.option('--dry-run', is_flag=True, help='Show what would be processed without creating output') @click.option('--verbose', '-v', is_flag=True, help='Enable verbose output with processing details') @click.pass_context def md_transclude_command(ctx, action, input_file, output, variables, base_path, max_depth, dry_run, verbose): """ Process markdown files with transclusion directives. Actions: - process: Process transclusion directives and generate output - validate: Check template for errors without processing Transclusion directives supported: - {{include "file.md"}} - Include another markdown file - {{variable_name}} - Substitute variables - {{if condition}} content {{endif}} - Conditional content Examples: markitect md-transclude process template.mdt --variables vars.json markitect md-transclude validate template.mdt markitect md-transclude process template.mdt --output result.md """ try: from markitect.packaging.transclusion import TransclusionEngine from markitect.packaging.errors import TransclusionError, CircularReferenceError input_file = Path(input_file) # Load variables if provided var_data = {} if variables: variables_path = Path(variables) if verbose: click.echo(f"📋 Loading variables from: {variables_path}") import json var_data = json.loads(variables_path.read_text()) # Set base path if base_path: base_path = Path(base_path) else: base_path = input_file.parent if verbose: click.echo(f"📄 Processing template: {input_file}") click.echo(f"📁 Base path: {base_path}") click.echo(f"📋 Variables: {len(var_data)} loaded") click.echo(f"🔢 Max depth: {max_depth}") # Create transclusion engine engine = TransclusionEngine( base_path=base_path, variables=var_data, max_depth=max_depth ) if action == 'validate': # Validate template without full processing try: content = input_file.read_text(encoding='utf-8') # Parse directives to check syntax from markitect.packaging.transclusion.directives import DirectiveParser directives = DirectiveParser.parse_directives(content) click.echo(f"✅ Template validation successful") click.echo(f"📊 Found {len(directives)} transclusion directives") if verbose: for directive in directives: click.echo(f" - {directive.type}: {directive.args}") # Check for potential circular references file_includes = DirectiveParser.extract_file_includes(content) if file_includes: click.echo(f"📁 File includes: {len(file_includes)}") if verbose: for include in file_includes: include_path = base_path / include status = "✅" if include_path.exists() else "❌" click.echo(f" {status} {include}") except Exception as e: click.echo(f"❌ Template validation failed: {e}", err=True) raise click.Abort() elif action == 'process': if not output: output = input_file.with_suffix('.processed.md') else: output = Path(output) if verbose: click.echo(f"🔄 Processing transclusion directives") click.echo(f"📤 Output: {output}") if dry_run: click.echo("🔍 Dry run - no output file would be created") try: result = engine.process_file(input_file) click.echo(f"✅ Template processed successfully ({len(result)} characters)") except CircularReferenceError as e: click.echo(f"❌ Circular reference detected: {e}", err=True) raise click.Abort() except TransclusionError as e: click.echo(f"❌ Transclusion error: {e}", err=True) raise click.Abort() return # Process the template try: result = engine.process_file(input_file) # Write output output.write_text(result, encoding='utf-8') click.echo(f"✅ Transclusion processing completed") click.echo(f"📄 Input: {input_file}") click.echo(f"📄 Output: {output}") click.echo(f"📊 Output size: {len(result):,} characters") if verbose: # Count lines for additional stats lines = result.count('\n') + 1 click.echo(f"📊 Output lines: {lines:,}") except CircularReferenceError as e: click.echo(f"❌ Circular reference detected: {e}", err=True) click.echo("💡 Check your include directives for loops", err=True) raise click.Abort() except TransclusionError as e: click.echo(f"❌ Transclusion error: {e}", err=True) raise click.Abort() except Exception as e: click.echo(f"❌ Error during transclusion: {e}", err=True) if ctx.obj and ctx.obj.get('debug'): import traceback traceback.print_exc() raise click.Abort() # ============================================================================== # Utility Functions # ============================================================================== def normalize_filename(title): """ Normalize a title string for use as a filename. Args: title: The title string to normalize Returns: A safe filename string """ # Remove markdown formatting title = re.sub(r'[*_`~]', '', title) # Handle special characters title = unicodedata.normalize('NFKD', title) title = title.encode('ascii', 'ignore').decode('ascii') # Replace spaces and special chars with underscores title = re.sub(r'[^\w\s-]', '', title).strip() title = re.sub(r'[-\s]+', '_', title) # Convert to lowercase and limit length title = title.lower()[:50] return title or 'untitled' def generate_safe_path(base_path, filename): """ Generate a safe file path, avoiding conflicts. Args: base_path: Base directory path filename: Desired filename Returns: Path object for a safe, non-conflicting file """ output_path = Path(base_path) / filename counter = 1 while output_path.exists(): name_part = output_path.stem ext_part = output_path.suffix output_path = output_path.parent / f"{name_part}_{counter}{ext_part}" counter += 1 return output_path # Directory Structure Analysis Functions class DirectoryNode: """Represents a node in a directory structure analysis.""" def __init__(self, path: Path, name: str, depth: int, is_directory: bool): self.path = path self.name = name self.depth = depth self.is_directory = is_directory self.children = [] self.markdown_files = [] self.parent = None def add_child(self, child: 'DirectoryNode'): """Add a child node to this directory node.""" self.children.append(child) child.parent = self def add_markdown_file(self, file_path: Path): """Add a markdown file to this directory node.""" self.markdown_files.append(file_path) def __repr__(self): return f"DirectoryNode(path={self.path}, name='{self.name}', depth={self.depth}, is_directory={self.is_directory})" class DirectoryAnalysis: """Result of directory structure analysis.""" def __init__(self): self.index_file = None self.content_files = [] self.subdirectories = [] def add_content_file(self, file_path: Path): """Add a content file to the analysis.""" self.content_files.append(file_path) def add_subdirectory(self, dir_path: Path): """Add a subdirectory to the analysis.""" self.subdirectories.append(dir_path) class DirectoryStructure: """Complete directory structure analysis result.""" def __init__(self): self.root_nodes = [] self.all_nodes = [] def add_root_node(self, node: DirectoryNode): """Add a root-level node.""" self.root_nodes.append(node) self.all_nodes.append(node) def add_node(self, node: DirectoryNode): """Add any node to the complete list.""" self.all_nodes.append(node) def scan_markdown_files(directory: Path, recursive: bool = False) -> list[Path]: """Scan directory for markdown files. Args: directory: Directory to scan recursive: Whether to scan recursively Returns: List of markdown file paths """ directory = Path(directory) markdown_files = [] if recursive: # Use rglob for recursive search for file_path in directory.rglob("*.md"): if file_path.is_file(): markdown_files.append(file_path) else: # Use glob for non-recursive search for file_path in directory.glob("*.md"): if file_path.is_file(): markdown_files.append(file_path) # Sort for consistent ordering markdown_files.sort() return markdown_files def detect_hierarchy_from_structure(directory: Path) -> list[DirectoryNode]: """Detect hierarchy levels based on directory depth. Args: directory: Root directory to analyze Returns: List of DirectoryNode objects representing the hierarchy """ directory = Path(directory) nodes = [] # Walk through all directories and files for root_path in directory.rglob("*"): if root_path.is_file() and root_path.suffix == ".md": # Calculate depth relative to base directory try: relative_path = root_path.relative_to(directory) depth = len(relative_path.parts) - 1 # File depth (subtract file itself) # Create node for the file node = DirectoryNode( path=root_path, name=root_path.name, depth=depth, is_directory=False ) nodes.append(node) except ValueError: # Skip files outside the directory continue # Also add directory nodes for root_path in directory.rglob("*"): if root_path.is_dir(): try: relative_path = root_path.relative_to(directory) depth = len(relative_path.parts) # Create node for the directory node = DirectoryNode( path=root_path, name=root_path.name, depth=depth, is_directory=True ) nodes.append(node) except ValueError: continue # Sort by depth and name for consistent ordering nodes.sort(key=lambda n: (n.depth, n.name)) return nodes def identify_index_files(directory: Path) -> DirectoryAnalysis: """Identify index.md files vs regular content files. Args: directory: Directory to analyze Returns: DirectoryAnalysis object with index and content files categorized """ directory = Path(directory) analysis = DirectoryAnalysis() # Scan for markdown files in the directory (non-recursive) for file_path in directory.glob("*.md"): if file_path.is_file(): if file_path.name == "index.md": analysis.index_file = file_path else: analysis.add_content_file(file_path) # Also identify subdirectories for dir_path in directory.iterdir(): if dir_path.is_dir(): analysis.add_subdirectory(dir_path) return analysis def analyze_directory_structure(directory: Path) -> DirectoryStructure: """Analyze complete directory structure for hierarchical organization. Args: directory: Root directory to analyze Returns: DirectoryStructure object with complete hierarchy analysis """ directory = Path(directory) structure = DirectoryStructure() node_map = {} # Path -> DirectoryNode mapping # First pass: create all nodes all_paths = [directory] # Add the root directory itself # Add all subdirectories and files (rglob doesn't include the root) for path in directory.rglob("*"): all_paths.append(path) # Create nodes for all paths for path in all_paths: try: if path == directory: relative_path = Path(".") depth = 0 else: relative_path = path.relative_to(directory) # Both files and directories: depth = number of path components depth = len(relative_path.parts) node = DirectoryNode( path=path, name=path.name if path != directory else directory.name, depth=depth, is_directory=path.is_dir() ) node_map[path] = node structure.add_node(node) # Add to root nodes if at depth 1 (direct children of root) if depth == 1: structure.add_root_node(node) except ValueError: # Skip paths outside the directory continue # Special handling for flat directories (only files, no subdirectories) has_subdirectories = any(node.is_directory for node in structure.all_nodes if node.depth > 0) if not has_subdirectories: # This is a flat directory - adjust file depths to 0 and add them to root_nodes structure.root_nodes.clear() for node in structure.all_nodes: if node.depth == 1 and not node.is_directory: node.depth = 0 structure.add_root_node(node) # Second pass: establish parent-child relationships for path, node in node_map.items(): if path != directory: parent_path = path.parent if parent_path in node_map: parent_node = node_map[parent_path] parent_node.add_child(node) # Add markdown files to directory nodes if node.is_directory: for md_file in node.path.glob("*.md"): node.add_markdown_file(md_file) return structure def implode_directory(input_dir: Path, output_file: Path) -> Path: """Implode a directory structure back into a markdown file. Simple wrapper around cli_implode_directory for use in tests and scripts. Args: input_dir: Directory containing markdown files to implode output_file: Output markdown file path Returns: Path to the created output file Raises: Exception: If the implode operation fails """ from pathlib import Path input_dir = Path(input_dir) output_file = Path(output_file) # Use the existing cli_implode_directory function with round-trip compatibility options options = ImplodeOptions( input_dir=input_dir, output_file=output_file, overwrite=True, preserve_heading_levels=True, # Preserve original heading levels for round-trip consistency include_readme_files=True # Include README.md files created by explode process ) result = cli_implode_directory(options=options) if not result.success: error_msg = result.error_message or "Implode operation failed" raise Exception(error_msg) return result.output_file # ============================================================================= # Filename Decoding Functions for Issue #139 # ============================================================================= # These functions convert filesystem-safe names back to readable headings def restore_special_characters(encoded_text: str) -> str: """Restore special characters that were encoded for filesystem safety.""" # First convert underscores to spaces result = encoded_text.replace('_', ' ') # Handle specific patterns for special characters (before title casing) special_patterns = { 'whats': "what's", 'file path issues': 'file/path issues', 'questions and answers': 'questions & answers', 'cafe resume': 'café & résumé', 'colon separated': 'colon: separated', 'parentheses content': 'parentheses (content)', 'brackets and more': 'brackets [and more]' } # Handle version patterns like v2 1 -> v2.1 result = re.sub(r'\bv(\d+)\s+(\d+)', r'v\1.\2', result) for pattern, replacement in special_patterns.items(): result = result.replace(pattern, replacement) # Apply title case to the result return apply_title_case(result) def reconstruct_number_format(encoded_text: str) -> str: """Reconstruct proper number formats from encoded versions.""" # Convert patterns like "section_1_1_1" to "Section 1.1.1" # Pattern for numbered sections with underscores (including letter sections like "appendix_a_1") pattern = r'(section|version|appendix|figure|table)_([a-zA-Z0-9]+)(_[a-zA-Z0-9]+)*' def replace_numbers(match): prefix = match.group(1).title() parts = match.group(0).split('_')[1:] # Get all parts after the prefix # Convert underscores to dots in numeric parts, keep letters as uppercase formatted_parts = [] for part in parts: if part.isdigit(): formatted_parts.append(part) elif len(part) == 1 and part.isalpha(): formatted_parts.append(part.upper()) else: formatted_parts.append(part) number_str = '.'.join(formatted_parts) return f"{prefix} {number_str}" result = re.sub(pattern, replace_numbers, encoded_text, flags=re.IGNORECASE) return result def apply_title_case(text: str) -> str: """Apply appropriate title case to reconstructed headings.""" # Simple title case with some exceptions exceptions = {'and', 'or', 'the', 'a', 'an', 'with', 'of', 'in', 'on', 'at', 'to', 'for'} # Split on spaces and handle special characters within words words = text.split() result = [] for i, word in enumerate(words): # Handle common acronyms first (overrides other rules) if word.lower() in ['api', 'sql', 'http', 'json', 'xml', 'css']: result.append(word.upper()) # Handle words with brackets or parentheses - always capitalize content inside elif '[' in word or ']' in word or '(' in word or ')' in word: result.append(_capitalize_word(word)) # Always capitalize first and last word elif i == 0 or i == len(words) - 1: result.append(_capitalize_word(word)) # Don't capitalize exceptions unless they're the first word (but be more lenient with single letters) elif word.lower() in exceptions and len(word) > 1: result.append(word.lower()) # Single letter words like "a" should generally be capitalized unless they're truly exceptions elif len(word) == 1 and word.lower() in ['a', 'i']: result.append(word.upper()) else: result.append(_capitalize_word(word)) return ' '.join(result) def _capitalize_word(word: str) -> str: """Capitalize a word, handling special characters within the word.""" if not word: return word # Handle words with special characters like "file/path" if '/' in word: parts = word.split('/') return '/'.join(part.capitalize() for part in parts) elif ':' in word: parts = word.split(':') return ':'.join(part.capitalize() for part in parts) elif '(' in word and ')' in word: # Handle parentheses - capitalize content inside before_paren = word[:word.index('(')] inside_parens = word[word.index('(')+1:word.index(')')] after_paren = word[word.index(')')+1:] return before_paren.capitalize() + '(' + inside_parens.capitalize() + ')' + after_paren.capitalize() elif '[' in word and ']' in word: # Handle brackets - capitalize content inside before_bracket = word[:word.index('[')] inside_brackets = word[word.index('[')+1:word.index(']')] after_bracket = word[word.index(']')+1:] return before_bracket.capitalize() + '[' + inside_brackets.capitalize() + ']' + after_bracket.capitalize() elif word.startswith('[') or word.endswith(']'): # Handle partial bracket words like "[and" or "more]" result = "" if word.startswith('['): result += '[' word = word[1:] if word.endswith(']'): end_bracket = ']' word = word[:-1] else: end_bracket = '' result += word.capitalize() + end_bracket return result elif word.startswith('(') or word.endswith(')'): # Handle partial parenthesis words like "(content" or "content)" result = "" if word.startswith('('): result += '(' word = word[1:] if word.endswith(')'): end_paren = ')' word = word[:-1] else: end_paren = '' result += word.capitalize() + end_paren return result else: return word.capitalize() def decode_filename_to_heading(filename: str) -> str: """Decode filesystem-safe filename to readable heading.""" if isinstance(filename, Path): filename = filename.name # Remove .md extension name = filename if name.endswith('.md'): name = name[:-3] # Handle special cases if name.lower() == 'index': return "" if name.lower() == 'readme': return "Readme" # Handle special API/version patterns like "api_v2_1_reference" (put early to avoid conflicts) api_version_pattern = r'(\w+)_v(\d+)_(\d+)_(.+)' api_version_match = re.match(api_version_pattern, name, re.IGNORECASE) if api_version_match: prefix, major, minor, title = api_version_match.groups() formatted_prefix = prefix.upper() if prefix.lower() in ['api', 'sql', 'http', 'json', 'xml', 'css'] else prefix.title() formatted_title = apply_title_case(restore_special_characters(title)) return f"{formatted_prefix} v{major}.{minor}: {formatted_title}" # Handle numbered prefixes numbered_pattern = r'^(\d+)_(.+)$' numbered_match = re.match(numbered_pattern, name) if numbered_match: number, rest = numbered_match.groups() return f"{number}: {apply_title_case(restore_special_characters(rest))}" # Handle private sections (starting with _) if name.startswith('_'): name = name[1:] return apply_title_case(restore_special_characters(name)) # Handle common patterns like "chapter_1_getting_started" or "section_a_getting_started" # First try pattern with multiple numeric parts like "1_2_3" multi_id_pattern = r'(chapter|section|part|appendix)_(\d+(?:_\d+)+)_(.+)' multi_id_match = re.match(multi_id_pattern, name, re.IGNORECASE) if multi_id_match: prefix, numbers, title = multi_id_match.groups() # Convert underscores in numbers to dots formatted_numbers = numbers.replace('_', '.') formatted_title = apply_title_case(restore_special_characters(title)) return f"{prefix.title()} {formatted_numbers}: {formatted_title}" # Then try pattern with single letter/number identifier (but not if it looks like a multi-number pattern) single_id_pattern = r'(chapter|section|part|appendix)_([a-zA-Z]|\d+)_(.+)' single_id_match = re.match(single_id_pattern, name, re.IGNORECASE) if single_id_match: prefix, identifier, title = single_id_match.groups() # Capitalize single letters, keep numbers as-is if identifier.isalpha(): formatted_id = identifier.upper() else: formatted_id = identifier formatted_title = apply_title_case(restore_special_characters(title)) return f"{prefix.title()} {formatted_id}: {formatted_title}" # Handle simple prefix+title patterns like "appendix_troubleshooting" simple_prefix_pattern = r'(chapter|section|part|appendix)_(.+)' simple_prefix_match = re.match(simple_prefix_pattern, name, re.IGNORECASE) if simple_prefix_match: prefix, title = simple_prefix_match.groups() formatted_title = apply_title_case(restore_special_characters(title)) return f"{prefix.title()}: {formatted_title}" # Handle simple numbered patterns like "section_2_3_4_advanced" simple_numbered = r'(\w+)_(\d+(?:_\d+)*)_(.+)' simple_match = re.match(simple_numbered, name, re.IGNORECASE) if simple_match: prefix, numbers, title = simple_match.groups() formatted_numbers = numbers.replace('_', '.') formatted_title = apply_title_case(restore_special_characters(title)) return f"{prefix.title()} {formatted_numbers}: {formatted_title}" # Default case - just apply title case and restore special characters return apply_title_case(restore_special_characters(name)) def decode_directory_name_to_heading(dirname: str) -> str: """Decode directory name to heading.""" # Use the same logic as filename decoding but without .md extension handling return decode_filename_to_heading(dirname) class FilenameDecoder: """Comprehensive filename decoder for batch processing and configuration.""" def __init__(self, preserve_acronyms=True, title_case_enabled=True, number_format_reconstruction=True, context_aware=False, flexible_parsing=False): """Initialize the decoder with configuration options.""" self.preserve_acronyms = preserve_acronyms self.title_case_enabled = title_case_enabled self.number_format_reconstruction = number_format_reconstruction self.context_aware = context_aware self.flexible_parsing = flexible_parsing def decode(self, filename_or_path, parent_context=None): """Decode a single filename or path.""" if isinstance(filename_or_path, Path): filename = filename_or_path.name else: filename = str(filename_or_path) return decode_filename_to_heading(filename) def decode_batch(self, filenames): """Process multiple filenames in batch.""" return [self.decode(filename) for filename in filenames] def _ship_assets(input_path: Path, output_dir: Path, verbose: bool = False): """ Ship (copy) assets referenced in markdown file to output directory. Args: input_path: Path to the markdown file output_dir: Directory where assets should be copied verbose: Whether to print verbose output """ import shutil from markitect.assets.discovery import discover_assets_from_markdown try: # Read the markdown content markdown_content = input_path.read_text(encoding='utf-8') # Discover assets base_path = input_path.parent assets = discover_assets_from_markdown(markdown_content, base_path) shipped_count = 0 skipped_count = 0 missing_count = 0 for asset_ref in assets: # Skip URLs and broken assets if asset_ref.asset_path.startswith(('http:', 'https:', 'mailto:', 'data:')): continue if asset_ref.is_broken or not asset_ref.resolved_path: missing_count += 1 if verbose: click.echo(f" ⚠ Missing asset: {asset_ref.asset_path}", err=True) continue # Determine output path (preserve relative directory structure) clean_path = asset_ref.asset_path.lstrip('./') dest_path = output_dir / clean_path # Create destination directory dest_path.parent.mkdir(parents=True, exist_ok=True) # Check if we need to copy (timestamp-based) should_copy = True if dest_path.exists(): source_mtime = asset_ref.resolved_path.stat().st_mtime dest_mtime = dest_path.stat().st_mtime if source_mtime <= dest_mtime: should_copy = False skipped_count += 1 if should_copy: shutil.copy2(asset_ref.resolved_path, dest_path) shipped_count += 1 if verbose: click.echo(f" ✓ Copied: {asset_ref.asset_path}") elif verbose: click.echo(f" → Skipped (up-to-date): {asset_ref.asset_path}") # Summary if verbose or shipped_count > 0: if shipped_count > 0: click.echo(f"✓ Shipped {shipped_count} assets") if skipped_count > 0: click.echo(f" → Skipped {skipped_count} up-to-date assets") if missing_count > 0: click.echo(f" ⚠ {missing_count} assets not found", err=True) except Exception as e: if verbose: click.echo(f"Error shipping assets: {e}", err=True)