""" Flat variant implementation for explode-implode operations. This variant represents the current default behavior where h1 headings become top-level directories with content organized beneath them. """ import re from pathlib import Path from typing import Dict, List, Any, Optional from .base_variant import ( BaseVariant, ExplodeOptions, ImplodeOptions, ExplodeResult, ImplodeResult ) from .enums import ExplodeVariant from .manifest_manager import ManifestManager, StructureEntry from ..matter_frontmatter.parser import FrontmatterParser class FlatVariant(BaseVariant): """ Flat variant implementation. Creates directories based on h1 headings with nested content. This is the current default behavior for backward compatibility. Structure example: book.mdd/ ├── manifest.md ├── book_title/ │ ├── index.md │ ├── chapter_1.md │ └── chapter_2.md └── conclusion.md """ def __init__(self): """Initialize the flat variant.""" super().__init__(ExplodeVariant.FLAT) self.manifest_manager = ManifestManager() self.frontmatter_parser = FrontmatterParser() @property def name(self) -> str: """Human-readable name of the variant.""" return "Flat Structure" @property def description(self) -> str: """Description of the variant's behavior.""" return ("Creates directories based on h1 headings with content organized beneath them. " "This is the default structure for backward compatibility.") def explode( self, input_file: Path, options: ExplodeOptions ) -> ExplodeResult: """ Explode a markdown file using the flat structure variant. Args: input_file: Path to the markdown file to explode options: Options controlling the explode operation Returns: Result of the explode operation """ # Validate input validation_errors = self.validate_input_file(input_file) if validation_errors: return ExplodeResult( success=False, output_directory=options.output_dir or Path(), files_created=[], manifest_path=None, warnings=[], errors=validation_errors, variant_used=self.variant_type ) # Determine output directory if options.output_dir: output_dir = options.output_dir else: suffix = ".mdd" if options.create_manifest else "_exploded" output_dir = input_file.parent / f"{input_file.stem}{suffix}" # Create output directory creation_errors = self.create_output_directory(output_dir, overwrite=True) if creation_errors: return ExplodeResult( success=False, output_directory=output_dir, files_created=[], manifest_path=None, warnings=[], errors=creation_errors, variant_used=self.variant_type ) try: # Parse the markdown content content = input_file.read_text(encoding='utf-8') # Implement flat explode logic directly files_created = self._explode_flat_structure( input_file, output_dir, content, options ) # Create manifest if requested manifest_path = None if options.create_manifest: structure = self._analyze_structure(content, output_dir) manifest_path = self.manifest_manager.create_manifest( output_dir=output_dir, original_file=input_file, variant=self.variant_type, structure=structure, preservation_options={ "front_matter": options.preserve_front_matter, "section_order": True, "heading_levels": True } ) files_created.append(manifest_path) return ExplodeResult( success=True, output_directory=output_dir, files_created=files_created, manifest_path=manifest_path, warnings=[], errors=[], variant_used=self.variant_type ) except Exception as e: return ExplodeResult( success=False, output_directory=output_dir, files_created=[], manifest_path=None, warnings=[], errors=[f"Error during explosion: {e}"], variant_used=self.variant_type ) def implode( self, input_directory: Path, options: ImplodeOptions ) -> ImplodeResult: """ Implode a directory structure back into a markdown file. Args: input_directory: Path to the directory to implode options: Options controlling the implode operation Returns: Result of the implode operation """ # Validate input validation_errors = self.validate_input_directory(input_directory) if validation_errors: return ImplodeResult( success=False, output_file=options.output_file or Path(), files_processed=[], variant_detected=self.variant_type, warnings=[], errors=validation_errors ) # Determine output file if options.output_file: output_file = options.output_file else: output_file = input_directory.parent / f"{input_directory.name}_imploded.md" try: # Read manifest if available manifest_data = self.manifest_manager.read_manifest(input_directory) # Implement flat implode logic directly content, files_processed = self._implode_flat_structure( input_directory, manifest_data, options ) # Write output file if not options.dry_run: output_file.write_text(content, encoding='utf-8') return ImplodeResult( success=True, output_file=output_file, files_processed=files_processed, variant_detected=self.variant_type, warnings=[], errors=[] ) except Exception as e: return ImplodeResult( success=False, output_file=output_file, files_processed=[], variant_detected=self.variant_type, warnings=[], errors=[f"Error during implosion: {e}"] ) def can_handle_directory(self, directory: Path) -> bool: """ Check if this variant can handle the given directory structure. Args: directory: Path to the directory to check Returns: True if this variant can handle the directory """ if not directory.exists() or not directory.is_dir(): return False # Check for manifest indicating flat variant manifest_data = self.manifest_manager.read_manifest(directory) if manifest_data and manifest_data.explosion_type == "flat": return True # Check for flat structure patterns subdirs = [d for d in directory.iterdir() if d.is_dir()] # Look for typical flat patterns (no numbered prefixes, no semantic grouping) numbered_dirs = sum(1 for d in subdirs if re.match(r'^\d+_', d.name)) semantic_dirs = sum(1 for d in subdirs if any(name in d.name.lower() for name in ['parts', 'chapters', 'sections', 'appendices'])) # Flat structure has minimal numbered or semantic directories return (numbered_dirs / len(subdirs) if subdirs else 0) < 0.3 and \ (semantic_dirs / len(subdirs) if subdirs else 0) < 0.3 def get_detection_patterns(self) -> Dict[str, Any]: """ Get patterns used for auto-detecting this variant. Returns: Dictionary of detection patterns and weights """ return { "manifest_type": "flat", "numbered_directory_ratio": {"max": 0.3, "weight": 0.6}, "semantic_directory_ratio": {"max": 0.3, "weight": 0.5}, "index_file_count": {"min": 0, "weight": 0.3}, "fallback_score": 0.6 # Default choice } def _explode_flat_structure( self, input_file: Path, output_dir: Path, content: str, options: ExplodeOptions ) -> List[Path]: """ Implement flat structure explosion directly. Creates directories based on h1 headings with nested content. This is the traditional behavior for backward compatibility. """ files_created = [] # Extract and save front matter if present and preservation is enabled if options.preserve_front_matter: frontmatter, content_without_fm = self.frontmatter_parser.separate_frontmatter_and_content(content) if frontmatter: # Save front matter to _frontmatter.yml import yaml fm_file = output_dir / "_frontmatter.yml" fm_content = yaml.dump(frontmatter, default_flow_style=False) fm_file.write_text(fm_content, encoding='utf-8') files_created.append(fm_file) # Use content without front matter for processing content = content_without_fm # Parse sections based on headings sections = self._parse_flat_sections(content) for section in sections: if section['level'] == 1: # Create directory for h1 sections safe_title = self._sanitize_filename(section['title']) section_dir = output_dir / safe_title section_dir.mkdir(exist_ok=True) # Create index.md for the main content index_file = section_dir / "index.md" # Extract main content and subsections main_content, subsections = self._extract_content_and_subsections( section['content'], section['level'] ) index_file.write_text(main_content, encoding='utf-8') files_created.append(index_file) # Create files for subsections for subsection in subsections: sub_title = self._sanitize_filename(subsection['title']) sub_file = section_dir / f"{sub_title}.md" sub_file.write_text(subsection['content'], encoding='utf-8') files_created.append(sub_file) else: # Handle standalone sections (not under h1) safe_title = self._sanitize_filename(section['title']) standalone_file = output_dir / f"{safe_title}.md" standalone_file.write_text(section['content'], encoding='utf-8') files_created.append(standalone_file) return files_created def _implode_flat_structure( self, input_directory: Path, manifest_data: Any, options: ImplodeOptions ) -> tuple[str, List[Path]]: """ Implement flat structure implosion directly. Reconstructs markdown content from flat directory structure. """ content_parts = [] files_processed = [] # If we have manifest data, use it for proper ordering if manifest_data and hasattr(manifest_data, 'structure'): # Use manifest to determine file order output_file = options.output_file for entry in sorted(manifest_data.structure, key=lambda x: x.order): file_path = input_directory / entry.path if (file_path.exists() and file_path.name != "manifest.md" and (output_file is None or file_path.resolve() != output_file.resolve())): file_content = file_path.read_text(encoding='utf-8') content_parts.append(file_content) files_processed.append(file_path) else: # Fallback: collect all markdown files recursively (legacy behavior) # This ensures compatibility with tests that expect all nested files to be processed all_md_files = [] # Collect all markdown files recursively, excluding output file if it exists output_file = options.output_file for md_file in input_directory.rglob("*.md"): if (md_file.name != "manifest.md" and (output_file is None or md_file.resolve() != output_file.resolve())): all_md_files.append(md_file) # Sort files by their path to ensure consistent ordering all_md_files.sort(key=lambda f: str(f.relative_to(input_directory))) # Process all found markdown files for md_file in all_md_files: content = md_file.read_text(encoding='utf-8') content_parts.append(content) files_processed.append(md_file) # Check for legacy front matter file (from old explode system) legacy_front_matter = None fm_file = input_directory / '_frontmatter.yml' if fm_file.exists() and options.preserve_front_matter: try: legacy_front_matter = fm_file.read_text(encoding='utf-8').strip() except Exception: pass # Ignore errors reading front matter # Normalize content parts - remove excessive leading/trailing whitespace but preserve content normalized_parts = [] for part in content_parts: if part: # Remove excessive leading/trailing newlines but preserve internal structure normalized = part.strip('\r\n') if normalized: normalized_parts.append(normalized) # Join content with appropriate spacing spacing = '\n' * (options.section_spacing + 1) full_content = spacing.join(normalized_parts) # Add front matter to the beginning if found if legacy_front_matter and options.preserve_front_matter: full_content = f"---\n{legacy_front_matter}\n---\n\n{full_content}" return full_content, files_processed def _parse_flat_sections(self, content: str) -> List[Dict[str, Any]]: """Parse content into sections for flat structure.""" sections = [] lines = content.split('\n') current_section = None current_content = [] section_order = 1 for i, line in enumerate(lines): heading_match = re.match(r'^(#{1,6})\s+(.+)', line) if heading_match: # Save previous section if current_section: current_section['content'] = '\n'.join(current_content) sections.append(current_section) # Start new section level = len(heading_match.group(1)) title = heading_match.group(2).strip() current_section = { 'level': level, 'title': title, 'order': section_order, 'start_line': i + 1 } current_content = [line] section_order += 1 else: if current_content: current_content.append(line) # Handle last section if current_section: current_section['content'] = '\n'.join(current_content) sections.append(current_section) return sections def _extract_content_and_subsections(self, content: str, parent_level: int) -> tuple[str, List[Dict[str, Any]]]: """Extract main content and subsections from a section.""" lines = content.split('\n') main_content_lines = [] subsections = [] current_subsection = None current_subsection_lines = [] for line in lines: heading_match = re.match(r'^(#{1,6})\s+(.+)', line) if heading_match: level = len(heading_match.group(1)) title = heading_match.group(2).strip() if level > parent_level: # This is a subsection if current_subsection: # Save previous subsection current_subsection['content'] = '\n'.join(current_subsection_lines) subsections.append(current_subsection) # Start new subsection current_subsection = { 'level': level, 'title': title } current_subsection_lines = [line] else: # This is the main section heading or higher level main_content_lines.append(line) else: # Regular content line if current_subsection: current_subsection_lines.append(line) else: main_content_lines.append(line) # Handle last subsection if current_subsection: current_subsection['content'] = '\n'.join(current_subsection_lines) subsections.append(current_subsection) main_content = '\n'.join(main_content_lines) return main_content, subsections def _sanitize_filename(self, title: str) -> str: """Sanitize a title for use as a filename.""" # Remove markdown heading markers title = re.sub(r'^#+\s*', '', title) # Remove special characters safe_title = re.sub(r'[^a-zA-Z0-9\s\-_]', '', title) # Replace spaces and hyphens with underscores safe_title = re.sub(r'[\s\-]+', '_', safe_title) # Convert to lowercase safe_title = safe_title.lower() # Remove leading/trailing underscores safe_title = safe_title.strip('_') # Limit length if len(safe_title) > 50: safe_title = safe_title[:50].rstrip('_') return safe_title or 'untitled' def _basic_explode_implementation( self, input_file: Path, output_dir: Path, content: str ) -> List[Path]: """Basic explode implementation for testing purposes.""" files_created = [] # Simple h1-based splitting sections = re.split(r'\n# ', content) for i, section in enumerate(sections): if not section.strip(): continue if i == 0: # First section might not have leading # if not section.startswith('#'): section = '# ' + section else: # Add back the # that was removed by split section = '# ' + section # Extract title lines = section.split('\n') title_line = lines[0] title = re.sub(r'^#\s*', '', title_line).strip() # Create directory and file safe_title = re.sub(r'[^\w\s-]', '', title).strip() safe_title = re.sub(r'[-\s]+', '_', safe_title).lower() section_dir = output_dir / safe_title section_dir.mkdir(exist_ok=True) file_path = section_dir / "index.md" file_path.write_text(section, encoding='utf-8') files_created.append(file_path) return files_created def _basic_implode_implementation(self, input_directory: Path) -> tuple[str, List[Path]]: """Basic implode implementation for testing purposes.""" content_parts = [] files_processed = [] # Find all markdown files md_files = sorted(input_directory.glob("**/*.md")) for file_path in md_files: if file_path.name == "manifest.md": continue file_content = file_path.read_text(encoding='utf-8') content_parts.append(file_content) files_processed.append(file_path) # Join with appropriate spacing full_content = '\n\n\n\n'.join(content_parts) return full_content, files_processed def _analyze_structure(self, content: str, output_dir: Path) -> List[StructureEntry]: """Analyze the content structure for manifest generation.""" structure = [] lines = content.split('\n') order = 1 for i, line in enumerate(lines): # Check for headings heading_match = re.match(r'^(#{1,6})\s+(.+)', line) if heading_match: level = len(heading_match.group(1)) title = heading_match.group(2).strip() # Generate path based on title using same sanitization as file creation safe_title = self._sanitize_filename(title) if level == 1: path = f"{safe_title}/index.md" else: path = f"{safe_title}.md" structure.append(StructureEntry( type=f"h{level}", title=title, path=path, order=order, level=level, original_line=i + 1 )) order += 1 return structure