feat: complete Issue #149 - Phase 2: Implement Explode-Implode Variants
Implement all three explode-implode variants with full CLI integration: 🔧 Variant Implementations: - FlatVariant: Encapsulates existing flat structure behavior - HierarchicalVariant: Numbered directory structures (01_, 02_, 03_) - SemanticVariant: Content-based organization (intro, chapters, appendices) 🏭 Factory System: - VariantFactory: Centralized variant creation and management - Auto-detection algorithms with confidence scoring - Content analysis for variant recommendation 🖥️ CLI Integration: - Enhanced md-explode command with --variant parameter - Enhanced md-implode command with auto-detection - Improved error handling and user feedback 🧪 Comprehensive Testing: - 22 unit tests covering all variant functionality - Roundtrip validation ensuring perfect reversibility - Performance testing with large documents - Error handling and edge case coverage 📊 Key Features: - Three distinct organization strategies - Automatic variant detection from directory structures - Full backward compatibility with existing behavior - Extensible architecture for future variants - Manifest-based reversibility Files Added: - markitect/explode_variants/flat_variant.py - markitect/explode_variants/hierarchical_variant.py - markitect/explode_variants/semantic_variant.py - markitect/explode_variants/variant_factory.py - tests/test_issue_149_explode_implode_variants.py - tests/test_issue_149_roundtrip_validation.py - cost_notes/issue_149_cost_2025-10-12.md Files Modified: - markitect/explode_variants/__init__.py (updated exports) - markitect/plugins/builtin/markdown_commands.py (CLI integration) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
580
markitect/explode_variants/hierarchical_variant.py
Normal file
580
markitect/explode_variants/hierarchical_variant.py
Normal file
@@ -0,0 +1,580 @@
|
||||
"""
|
||||
Hierarchical variant implementation for explode-implode operations.
|
||||
|
||||
This variant creates numbered directory structures with semantic hierarchy,
|
||||
making it easier to understand document organization at a glance.
|
||||
"""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
|
||||
from .base_variant import (
|
||||
BaseVariant, ExplodeOptions, ImplodeOptions,
|
||||
ExplodeResult, ImplodeResult
|
||||
)
|
||||
from .enums import ExplodeVariant
|
||||
from .manifest_manager import ManifestManager, StructureEntry
|
||||
|
||||
|
||||
class HierarchicalVariant(BaseVariant):
|
||||
"""
|
||||
Hierarchical variant implementation.
|
||||
|
||||
Creates numbered directory structures with nested organization.
|
||||
This provides clear document hierarchy and natural ordering.
|
||||
|
||||
Structure example:
|
||||
book.mdd/
|
||||
├── manifest.md
|
||||
├── 01_introduction/
|
||||
│ ├── index.md
|
||||
│ ├── 01_overview.md
|
||||
│ └── 02_scope.md
|
||||
├── 02_main_content/
|
||||
│ ├── index.md
|
||||
│ ├── 01_chapter_one.md
|
||||
│ └── 02_chapter_two.md
|
||||
└── 03_conclusion/
|
||||
└── index.md
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the hierarchical variant."""
|
||||
super().__init__(ExplodeVariant.HIERARCHICAL)
|
||||
self.manifest_manager = ManifestManager()
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""Human-readable name of the variant."""
|
||||
return "Hierarchical Structure"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
"""Description of the variant's behavior."""
|
||||
return ("Creates numbered directory structures with semantic hierarchy. "
|
||||
"Provides clear document organization and natural ordering.")
|
||||
|
||||
def explode(
|
||||
self,
|
||||
input_file: Path,
|
||||
options: ExplodeOptions
|
||||
) -> ExplodeResult:
|
||||
"""
|
||||
Explode a markdown file using the hierarchical structure variant.
|
||||
|
||||
Args:
|
||||
input_file: Path to the markdown file to explode
|
||||
options: Options controlling the explode operation
|
||||
|
||||
Returns:
|
||||
Result of the explode operation
|
||||
"""
|
||||
# Validate input
|
||||
validation_errors = self.validate_input_file(input_file)
|
||||
if validation_errors:
|
||||
return ExplodeResult(
|
||||
success=False,
|
||||
output_directory=options.output_dir or Path(),
|
||||
files_created=[],
|
||||
manifest_path=None,
|
||||
warnings=[],
|
||||
errors=validation_errors,
|
||||
variant_used=self.variant_type
|
||||
)
|
||||
|
||||
# Determine output directory
|
||||
if options.output_dir:
|
||||
output_dir = options.output_dir
|
||||
else:
|
||||
suffix = ".mdd" if options.create_manifest else "_exploded"
|
||||
output_dir = input_file.parent / f"{input_file.stem}{suffix}"
|
||||
|
||||
# Create output directory
|
||||
creation_errors = self.create_output_directory(output_dir, overwrite=True)
|
||||
if creation_errors:
|
||||
return ExplodeResult(
|
||||
success=False,
|
||||
output_directory=output_dir,
|
||||
files_created=[],
|
||||
manifest_path=None,
|
||||
warnings=[],
|
||||
errors=creation_errors,
|
||||
variant_used=self.variant_type
|
||||
)
|
||||
|
||||
try:
|
||||
# Parse the markdown content
|
||||
content = input_file.read_text(encoding='utf-8')
|
||||
|
||||
# Analyze document structure
|
||||
sections = self._parse_hierarchical_structure(content)
|
||||
|
||||
# Create hierarchical directory structure
|
||||
files_created = self._create_hierarchical_structure(
|
||||
output_dir, sections, options
|
||||
)
|
||||
|
||||
# Create manifest if requested
|
||||
manifest_path = None
|
||||
if options.create_manifest:
|
||||
structure = self._build_structure_entries(sections)
|
||||
manifest_path = self.manifest_manager.create_manifest(
|
||||
output_dir=output_dir,
|
||||
original_file=input_file,
|
||||
variant=self.variant_type,
|
||||
structure=structure,
|
||||
preservation_options={
|
||||
"front_matter": options.preserve_front_matter,
|
||||
"section_order": True,
|
||||
"heading_levels": True,
|
||||
"numbering_scheme": "hierarchical"
|
||||
}
|
||||
)
|
||||
files_created.append(manifest_path)
|
||||
|
||||
return ExplodeResult(
|
||||
success=True,
|
||||
output_directory=output_dir,
|
||||
files_created=files_created,
|
||||
manifest_path=manifest_path,
|
||||
warnings=[],
|
||||
errors=[],
|
||||
variant_used=self.variant_type
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return ExplodeResult(
|
||||
success=False,
|
||||
output_directory=output_dir,
|
||||
files_created=[],
|
||||
manifest_path=None,
|
||||
warnings=[],
|
||||
errors=[f"Error during hierarchical explosion: {e}"],
|
||||
variant_used=self.variant_type
|
||||
)
|
||||
|
||||
def implode(
|
||||
self,
|
||||
input_directory: Path,
|
||||
options: ImplodeOptions
|
||||
) -> ImplodeResult:
|
||||
"""
|
||||
Implode a hierarchical directory structure back into a markdown file.
|
||||
|
||||
Args:
|
||||
input_directory: Path to the directory to implode
|
||||
options: Options controlling the implode operation
|
||||
|
||||
Returns:
|
||||
Result of the implode operation
|
||||
"""
|
||||
# Validate input
|
||||
validation_errors = self.validate_input_directory(input_directory)
|
||||
if validation_errors:
|
||||
return ImplodeResult(
|
||||
success=False,
|
||||
output_file=options.output_file or Path(),
|
||||
files_processed=[],
|
||||
variant_detected=self.variant_type,
|
||||
warnings=[],
|
||||
errors=validation_errors
|
||||
)
|
||||
|
||||
# Determine output file
|
||||
if options.output_file:
|
||||
output_file = options.output_file
|
||||
else:
|
||||
output_file = input_directory.parent / f"{input_directory.name}_imploded.md"
|
||||
|
||||
try:
|
||||
# Read manifest if available
|
||||
manifest_data = self.manifest_manager.read_manifest(input_directory)
|
||||
|
||||
# Reconstruct content from hierarchical structure
|
||||
content, files_processed = self._reconstruct_from_hierarchy(
|
||||
input_directory, manifest_data, options
|
||||
)
|
||||
|
||||
# Write output file
|
||||
if not options.dry_run:
|
||||
output_file.write_text(content, encoding='utf-8')
|
||||
|
||||
return ImplodeResult(
|
||||
success=True,
|
||||
output_file=output_file,
|
||||
files_processed=files_processed,
|
||||
variant_detected=self.variant_type,
|
||||
warnings=[],
|
||||
errors=[]
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return ImplodeResult(
|
||||
success=False,
|
||||
output_file=output_file,
|
||||
files_processed=[],
|
||||
variant_detected=self.variant_type,
|
||||
warnings=[],
|
||||
errors=[f"Error during hierarchical implosion: {e}"]
|
||||
)
|
||||
|
||||
def can_handle_directory(self, directory: Path) -> bool:
|
||||
"""
|
||||
Check if this variant can handle the given directory structure.
|
||||
|
||||
Args:
|
||||
directory: Path to the directory to check
|
||||
|
||||
Returns:
|
||||
True if this variant can handle the directory
|
||||
"""
|
||||
if not directory.exists() or not directory.is_dir():
|
||||
return False
|
||||
|
||||
# Check for manifest indicating hierarchical variant
|
||||
manifest_data = self.manifest_manager.read_manifest(directory)
|
||||
if manifest_data and manifest_data.explosion_type == "hierarchical":
|
||||
return True
|
||||
|
||||
# Check for hierarchical structure patterns
|
||||
subdirs = [d for d in directory.iterdir() if d.is_dir()]
|
||||
|
||||
# Look for numbered prefixes (strong hierarchical indicator)
|
||||
numbered_dirs = sum(1 for d in subdirs if re.match(r'^\d+_', d.name))
|
||||
|
||||
# High ratio of numbered directories indicates hierarchical structure
|
||||
return (numbered_dirs / len(subdirs) if subdirs else 0) > 0.6
|
||||
|
||||
def get_detection_patterns(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get patterns used for auto-detecting this variant.
|
||||
|
||||
Returns:
|
||||
Dictionary of detection patterns and weights
|
||||
"""
|
||||
return {
|
||||
"manifest_type": "hierarchical",
|
||||
"numbered_directory_ratio": {"min": 0.6, "weight": 0.8},
|
||||
"index_file_count": {"min": 2, "weight": 0.5},
|
||||
"max_depth": {"min": 2, "weight": 0.4},
|
||||
"nested_numbered_dirs": {"weight": 0.7}
|
||||
}
|
||||
|
||||
def _parse_hierarchical_structure(self, content: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Parse markdown content into hierarchical sections.
|
||||
|
||||
Args:
|
||||
content: Markdown content to parse
|
||||
|
||||
Returns:
|
||||
List of section dictionaries with hierarchy information
|
||||
"""
|
||||
sections = []
|
||||
lines = content.split('\n')
|
||||
current_section = None
|
||||
current_content = []
|
||||
section_counter = 1
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
# Check for headings
|
||||
heading_match = re.match(r'^(#{1,6})\s+(.+)', line)
|
||||
|
||||
if heading_match:
|
||||
# Save previous section
|
||||
if current_section:
|
||||
current_section['content'] = '\n'.join(current_content)
|
||||
current_section['end_line'] = i
|
||||
sections.append(current_section)
|
||||
|
||||
# Start new section
|
||||
level = len(heading_match.group(1))
|
||||
title = heading_match.group(2).strip()
|
||||
|
||||
current_section = {
|
||||
'level': level,
|
||||
'title': title,
|
||||
'start_line': i + 1,
|
||||
'order': section_counter,
|
||||
'parent': self._find_parent_section(sections, level),
|
||||
'numbering': self._generate_numbering(sections, level, section_counter)
|
||||
}
|
||||
current_content = [line]
|
||||
section_counter += 1
|
||||
else:
|
||||
if current_content:
|
||||
current_content.append(line)
|
||||
|
||||
# Handle last section
|
||||
if current_section:
|
||||
current_section['content'] = '\n'.join(current_content)
|
||||
current_section['end_line'] = len(lines)
|
||||
sections.append(current_section)
|
||||
|
||||
return sections
|
||||
|
||||
def _find_parent_section(self, sections: List[Dict[str, Any]], level: int) -> Optional[str]:
|
||||
"""
|
||||
Find the parent section for the current heading level.
|
||||
|
||||
Args:
|
||||
sections: Previously parsed sections
|
||||
level: Current heading level
|
||||
|
||||
Returns:
|
||||
Parent section title or None
|
||||
"""
|
||||
# Look for the most recent section with a lower level
|
||||
for section in reversed(sections):
|
||||
if section['level'] < level:
|
||||
return section['title']
|
||||
return None
|
||||
|
||||
def _generate_numbering(self, sections: List[Dict[str, Any]], level: int, order: int) -> str:
|
||||
"""
|
||||
Generate hierarchical numbering for a section.
|
||||
|
||||
Args:
|
||||
sections: Previously parsed sections
|
||||
level: Current heading level
|
||||
order: Overall section order
|
||||
|
||||
Returns:
|
||||
Hierarchical numbering string (e.g., "01", "02_01", etc.)
|
||||
"""
|
||||
if level == 1:
|
||||
# Count h1 sections
|
||||
h1_count = sum(1 for s in sections if s['level'] == 1) + 1
|
||||
return f"{h1_count:02d}"
|
||||
|
||||
# Find parent numbering and append subsection number
|
||||
parent_title = self._find_parent_section(sections, level)
|
||||
if parent_title:
|
||||
parent_section = next((s for s in sections if s['title'] == parent_title), None)
|
||||
if parent_section:
|
||||
# Count subsections at this level under the same parent
|
||||
subsection_count = sum(
|
||||
1 for s in sections
|
||||
if s['level'] == level and s.get('parent') == parent_title
|
||||
) + 1
|
||||
return f"{parent_section['numbering']}_{subsection_count:02d}"
|
||||
|
||||
# Fallback numbering
|
||||
return f"{order:02d}"
|
||||
|
||||
def _create_hierarchical_structure(
|
||||
self,
|
||||
output_dir: Path,
|
||||
sections: List[Dict[str, Any]],
|
||||
options: ExplodeOptions
|
||||
) -> List[Path]:
|
||||
"""
|
||||
Create the hierarchical directory structure from parsed sections.
|
||||
|
||||
Args:
|
||||
output_dir: Output directory for the structure
|
||||
sections: Parsed sections with hierarchy information
|
||||
options: Explode options
|
||||
|
||||
Returns:
|
||||
List of created file paths
|
||||
"""
|
||||
files_created = []
|
||||
|
||||
for section in sections:
|
||||
# Generate directory name
|
||||
safe_title = self._sanitize_filename(section['title'])
|
||||
dir_name = f"{section['numbering']}_{safe_title}"
|
||||
|
||||
# Create section directory
|
||||
section_dir = output_dir / dir_name
|
||||
section_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Create index.md for this section
|
||||
index_path = section_dir / "index.md"
|
||||
|
||||
# Process content - extract subsections if any
|
||||
main_content, subsections = self._extract_subsections(
|
||||
section['content'], section['level']
|
||||
)
|
||||
|
||||
# Write main content to index.md
|
||||
index_path.write_text(main_content, encoding='utf-8')
|
||||
files_created.append(index_path)
|
||||
|
||||
# Create files for subsections
|
||||
for i, subsection in enumerate(subsections, 1):
|
||||
subsection_title = subsection.get('title', f'subsection_{i}')
|
||||
safe_sub_title = self._sanitize_filename(subsection_title)
|
||||
sub_file_name = f"{i:02d}_{safe_sub_title}.md"
|
||||
|
||||
sub_file_path = section_dir / sub_file_name
|
||||
sub_file_path.write_text(subsection['content'], encoding='utf-8')
|
||||
files_created.append(sub_file_path)
|
||||
|
||||
return files_created
|
||||
|
||||
def _extract_subsections(self, content: str, parent_level: int) -> Tuple[str, List[Dict[str, Any]]]:
|
||||
"""
|
||||
Extract subsections from section content.
|
||||
|
||||
Args:
|
||||
content: Section content
|
||||
parent_level: Level of the parent section
|
||||
|
||||
Returns:
|
||||
Tuple of (main_content, subsections_list)
|
||||
"""
|
||||
lines = content.split('\n')
|
||||
main_content_lines = []
|
||||
subsections = []
|
||||
current_subsection = None
|
||||
current_subsection_lines = []
|
||||
|
||||
for line in lines:
|
||||
heading_match = re.match(r'^(#{1,6})\s+(.+)', line)
|
||||
|
||||
if heading_match:
|
||||
level = len(heading_match.group(1))
|
||||
title = heading_match.group(2).strip()
|
||||
|
||||
if level > parent_level:
|
||||
# This is a subsection
|
||||
if current_subsection:
|
||||
# Save previous subsection
|
||||
current_subsection['content'] = '\n'.join(current_subsection_lines)
|
||||
subsections.append(current_subsection)
|
||||
|
||||
# Start new subsection
|
||||
current_subsection = {
|
||||
'level': level,
|
||||
'title': title
|
||||
}
|
||||
current_subsection_lines = [line]
|
||||
elif level <= parent_level:
|
||||
# This is the main section heading or a peer section
|
||||
if level == parent_level:
|
||||
main_content_lines.append(line)
|
||||
else:
|
||||
# Higher-level heading that shouldn't be here in normal parsing
|
||||
main_content_lines.append(line)
|
||||
else:
|
||||
# Regular content line
|
||||
if current_subsection:
|
||||
current_subsection_lines.append(line)
|
||||
else:
|
||||
main_content_lines.append(line)
|
||||
|
||||
# Handle last subsection
|
||||
if current_subsection:
|
||||
current_subsection['content'] = '\n'.join(current_subsection_lines)
|
||||
subsections.append(current_subsection)
|
||||
|
||||
main_content = '\n'.join(main_content_lines)
|
||||
return main_content, subsections
|
||||
|
||||
def _sanitize_filename(self, title: str) -> str:
|
||||
"""
|
||||
Sanitize a title for use as a filename/directory name.
|
||||
|
||||
Args:
|
||||
title: Original title
|
||||
|
||||
Returns:
|
||||
Sanitized filename
|
||||
"""
|
||||
# Remove special characters
|
||||
safe_title = re.sub(r'[^a-zA-Z0-9\s\-_]', '', title)
|
||||
# Replace spaces and hyphens with underscores
|
||||
safe_title = re.sub(r'[\s\-]+', '_', safe_title)
|
||||
# Convert to lowercase
|
||||
safe_title = safe_title.lower()
|
||||
# Remove leading/trailing underscores
|
||||
safe_title = safe_title.strip('_')
|
||||
# Limit length
|
||||
if len(safe_title) > 50:
|
||||
safe_title = safe_title[:50].rstrip('_')
|
||||
|
||||
return safe_title or 'untitled'
|
||||
|
||||
def _build_structure_entries(self, sections: List[Dict[str, Any]]) -> List[StructureEntry]:
|
||||
"""
|
||||
Build structure entries for manifest from parsed sections.
|
||||
|
||||
Args:
|
||||
sections: Parsed sections
|
||||
|
||||
Returns:
|
||||
List of structure entries
|
||||
"""
|
||||
entries = []
|
||||
|
||||
for section in sections:
|
||||
safe_title = self._sanitize_filename(section['title'])
|
||||
dir_name = f"{section['numbering']}_{safe_title}"
|
||||
path = f"{dir_name}/index.md"
|
||||
|
||||
entry = StructureEntry(
|
||||
type=f"h{section['level']}",
|
||||
title=section['title'],
|
||||
path=path,
|
||||
order=section['order'],
|
||||
parent=section.get('parent'),
|
||||
level=section['level'],
|
||||
original_line=section.get('start_line')
|
||||
)
|
||||
entries.append(entry)
|
||||
|
||||
return entries
|
||||
|
||||
def _reconstruct_from_hierarchy(
|
||||
self,
|
||||
input_directory: Path,
|
||||
manifest_data: Any,
|
||||
options: ImplodeOptions
|
||||
) -> Tuple[str, List[Path]]:
|
||||
"""
|
||||
Reconstruct markdown content from hierarchical directory structure.
|
||||
|
||||
Args:
|
||||
input_directory: Directory containing hierarchical structure
|
||||
manifest_data: Manifest data if available
|
||||
options: Implode options
|
||||
|
||||
Returns:
|
||||
Tuple of (reconstructed_content, files_processed)
|
||||
"""
|
||||
content_parts = []
|
||||
files_processed = []
|
||||
|
||||
# Get all directories in numbered order
|
||||
subdirs = sorted([
|
||||
d for d in input_directory.iterdir()
|
||||
if d.is_dir() and not d.name.startswith('.')
|
||||
], key=lambda d: d.name)
|
||||
|
||||
for subdir in subdirs:
|
||||
# Read index.md if it exists
|
||||
index_file = subdir / "index.md"
|
||||
if index_file.exists():
|
||||
index_content = index_file.read_text(encoding='utf-8')
|
||||
content_parts.append(index_content)
|
||||
files_processed.append(index_file)
|
||||
|
||||
# Read numbered subsection files
|
||||
md_files = sorted([
|
||||
f for f in subdir.glob("*.md")
|
||||
if f.name != "index.md"
|
||||
], key=lambda f: f.name)
|
||||
|
||||
for md_file in md_files:
|
||||
file_content = md_file.read_text(encoding='utf-8')
|
||||
content_parts.append(file_content)
|
||||
files_processed.append(md_file)
|
||||
|
||||
# Join with appropriate spacing
|
||||
spacing = '\n' * (options.section_spacing + 1)
|
||||
full_content = spacing.join(content_parts)
|
||||
|
||||
return full_content, files_processed
|
||||
Reference in New Issue
Block a user