diff --git a/markitect/explode_variants/__init__.py b/markitect/explode_variants/__init__.py new file mode 100644 index 00000000..d7d14125 --- /dev/null +++ b/markitect/explode_variants/__init__.py @@ -0,0 +1,34 @@ +""" +Explode-Implode Variants Module + +This module provides different strategies for exploding markdown files into +directory structures and imploding them back, with full reversibility support. + +Key Components: +- ExplodeVariant: Enum defining available variants +- BaseVariant: Abstract base class for variant implementations +- ManifestManager: Handles manifest.md creation and parsing +- VariantDetector: Auto-detects variant types from directory structures +""" + +from .enums import ExplodeVariant, ExplodeMode, ManifestVersion, DetectionConfidence +from .base_variant import BaseVariant, ExplodeOptions, ImplodeOptions, ExplodeResult, ImplodeResult +from .manifest_manager import ManifestManager, ManifestData, StructureEntry +from .variant_detector import VariantDetector, DetectionResult + +__all__ = [ + 'ExplodeVariant', + 'ExplodeMode', + 'ManifestVersion', + 'DetectionConfidence', + 'BaseVariant', + 'ExplodeOptions', + 'ImplodeOptions', + 'ExplodeResult', + 'ImplodeResult', + 'ManifestManager', + 'ManifestData', + 'StructureEntry', + 'VariantDetector', + 'DetectionResult' +] \ No newline at end of file diff --git a/markitect/explode_variants/base_variant.py b/markitect/explode_variants/base_variant.py new file mode 100644 index 00000000..2c4b0460 --- /dev/null +++ b/markitect/explode_variants/base_variant.py @@ -0,0 +1,254 @@ +""" +Abstract base class for explode-implode variants. +""" + +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Dict, List, Any, Optional +from dataclasses import dataclass + +from .enums import ExplodeVariant, ExplodeMode + + +@dataclass +class ExplodeOptions: + """Options for explode operations.""" + + variant: ExplodeVariant + mode: ExplodeMode = ExplodeMode.STANDARD + output_dir: Optional[Path] = None + max_depth: Optional[int] = None + preserve_front_matter: bool = True + section_spacing: int = 2 + dry_run: bool = False + verbose: bool = False + create_manifest: bool = True + + +@dataclass +class ImplodeOptions: + """Options for implode operations.""" + + output_file: Optional[Path] = None + force_variant: Optional[ExplodeVariant] = None + preserve_front_matter: bool = True + section_spacing: int = 2 + dry_run: bool = False + verbose: bool = False + overwrite: bool = False + + +@dataclass +class ExplodeResult: + """Result of an explode operation.""" + + success: bool + output_directory: Path + files_created: List[Path] + manifest_path: Optional[Path] + warnings: List[str] + errors: List[str] + variant_used: ExplodeVariant + + +@dataclass +class ImplodeResult: + """Result of an implode operation.""" + + success: bool + output_file: Path + files_processed: List[Path] + variant_detected: Optional[ExplodeVariant] + warnings: List[str] + errors: List[str] + + +class BaseVariant(ABC): + """ + Abstract base class for explode-implode variants. + + Each variant implements a specific strategy for organizing exploded + markdown content and reconstructing it during implode operations. + """ + + def __init__(self, variant_type: ExplodeVariant): + """ + Initialize the variant. + + Args: + variant_type: The type of variant this implements + """ + self.variant_type = variant_type + + @property + @abstractmethod + def name(self) -> str: + """Human-readable name of the variant.""" + pass + + @property + @abstractmethod + def description(self) -> str: + """Description of the variant's behavior.""" + pass + + @abstractmethod + def explode( + self, + input_file: Path, + options: ExplodeOptions + ) -> ExplodeResult: + """ + Explode a markdown file into a directory structure. + + Args: + input_file: Path to the markdown file to explode + options: Options controlling the explode operation + + Returns: + Result of the explode operation + + Raises: + FileNotFoundError: If input file doesn't exist + PermissionError: If unable to create output directory + ValueError: If input file is not valid markdown + """ + pass + + @abstractmethod + def implode( + self, + input_directory: Path, + options: ImplodeOptions + ) -> ImplodeResult: + """ + Implode a directory structure back into a markdown file. + + Args: + input_directory: Path to the directory to implode + options: Options controlling the implode operation + + Returns: + Result of the implode operation + + Raises: + FileNotFoundError: If input directory doesn't exist + ValueError: If directory structure is invalid for this variant + """ + pass + + @abstractmethod + def can_handle_directory(self, directory: Path) -> bool: + """ + Check if this variant can handle the given directory structure. + + Args: + directory: Path to the directory to check + + Returns: + True if this variant can handle the directory + """ + pass + + @abstractmethod + def get_detection_patterns(self) -> Dict[str, Any]: + """ + Get patterns used for auto-detecting this variant. + + Returns: + Dictionary of detection patterns and weights + """ + pass + + def validate_input_file(self, input_file: Path) -> List[str]: + """ + Validate the input markdown file. + + Args: + input_file: Path to the file to validate + + Returns: + List of validation errors (empty if valid) + """ + errors = [] + + if not input_file.exists(): + errors.append(f"Input file does not exist: {input_file}") + return errors + + if not input_file.is_file(): + errors.append(f"Input path is not a file: {input_file}") + return errors + + if input_file.suffix.lower() not in ['.md', '.markdown']: + errors.append(f"Input file is not a markdown file: {input_file}") + + try: + content = input_file.read_text(encoding='utf-8') + if not content.strip(): + errors.append("Input file is empty") + except UnicodeDecodeError: + errors.append("Input file contains invalid UTF-8 encoding") + except Exception as e: + errors.append(f"Error reading input file: {e}") + + return errors + + def validate_input_directory(self, input_directory: Path) -> List[str]: + """ + Validate the input directory structure. + + Args: + input_directory: Path to the directory to validate + + Returns: + List of validation errors (empty if valid) + """ + errors = [] + + if not input_directory.exists(): + errors.append(f"Input directory does not exist: {input_directory}") + return errors + + if not input_directory.is_dir(): + errors.append(f"Input path is not a directory: {input_directory}") + return errors + + # Check if directory contains any markdown files + md_files = list(input_directory.glob("**/*.md")) + if not md_files: + errors.append("Directory contains no markdown files") + + return errors + + def create_output_directory(self, output_dir: Path, overwrite: bool = False) -> List[str]: + """ + Create the output directory if it doesn't exist. + + Args: + output_dir: Path to the directory to create + overwrite: Whether to overwrite existing directory + + Returns: + List of errors (empty if successful) + """ + errors = [] + + try: + if output_dir.exists(): + if not overwrite: + errors.append(f"Output directory already exists: {output_dir}") + return errors + + if output_dir.is_file(): + errors.append(f"Output path exists and is a file: {output_dir}") + return errors + + output_dir.mkdir(parents=True, exist_ok=overwrite) + + except PermissionError: + errors.append(f"Permission denied creating directory: {output_dir}") + except Exception as e: + errors.append(f"Error creating output directory: {e}") + + return errors \ No newline at end of file diff --git a/markitect/explode_variants/enums.py b/markitect/explode_variants/enums.py new file mode 100644 index 00000000..7d34f47a --- /dev/null +++ b/markitect/explode_variants/enums.py @@ -0,0 +1,108 @@ +""" +Enums for explode-implode variant system. +""" + +from enum import Enum + + +class ExplodeVariant(Enum): + """ + Available explode variants for different directory organization strategies. + + Each variant defines how a markdown file is exploded into a directory + structure and how that structure is imploded back. + """ + + FLAT = "flat" + """ + Flat structure - current default behavior. + Creates directories based on h1 headings with nested content. + + Example: + book.mdd/ + ├── manifest.md + ├── book_title/ + │ ├── index.md + │ ├── chapter_1.md + │ └── chapter_2.md + └── conclusion.md + """ + + HIERARCHICAL = "hierarchical" + """ + Hierarchical structure with numbered prefixes. + Creates nested directories reflecting heading hierarchy with ordering. + + Example: + book.mdd/ + ├── manifest.md + ├── 01_book_title/ + │ ├── index.md + │ ├── 01_chapter_1/ + │ │ ├── index.md + │ │ └── 01_section_1.md + │ └── 02_chapter_2/ + └── 99_conclusion.md + """ + + SEMANTIC = "semantic" + """ + Semantic structure with content-based grouping. + Groups content into semantic categories like parts, chapters, appendices. + + Example: + book.mdd/ + ├── manifest.md + ├── parts/ + │ ├── 01_fundamentals/ + │ └── 02_advanced/ + ├── chapters/ + │ ├── 01_basics/ + │ └── 02_intermediate/ + └── appendices/ + """ + + +class ExplodeMode(Enum): + """ + Modes for explode operations affecting behavior and output. + """ + + STANDARD = "standard" + """Standard explode operation with manifest generation.""" + + LEGACY = "legacy" + """Legacy mode without manifest for backward compatibility.""" + + PREVIEW = "preview" + """Preview mode showing what would be created without actual creation.""" + + +class ManifestVersion(Enum): + """ + Manifest format versions for backward compatibility. + """ + + V1_0 = "1.0" + """Initial manifest format with basic structure preservation.""" + + V1_1 = "1.1" + """Enhanced manifest with asset tracking and metadata.""" + + +class DetectionConfidence(Enum): + """ + Confidence levels for variant auto-detection. + """ + + HIGH = "high" + """High confidence - manifest found or clear patterns detected.""" + + MEDIUM = "medium" + """Medium confidence - some patterns match but ambiguous.""" + + LOW = "low" + """Low confidence - minimal patterns, fallback detection.""" + + UNKNOWN = "unknown" + """Cannot determine variant - requires manual specification.""" \ No newline at end of file diff --git a/markitect/explode_variants/manifest_manager.py b/markitect/explode_variants/manifest_manager.py new file mode 100644 index 00000000..1f417a2c --- /dev/null +++ b/markitect/explode_variants/manifest_manager.py @@ -0,0 +1,367 @@ +""" +Manifest manager for explode-implode operations. + +Handles creation, parsing, and validation of manifest.md files that preserve +the structure and metadata needed for reversible operations. +""" + +import yaml +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Any, Optional +from dataclasses import dataclass, asdict + +from .enums import ExplodeVariant, ManifestVersion + + +@dataclass +class StructureEntry: + """Entry in the manifest structure describing a heading/content mapping.""" + + type: str # h1, h2, h3, etc. + title: str + path: str + order: int + parent: Optional[str] = None + level: int = 1 + original_line: Optional[int] = None + + +@dataclass +class ManifestData: + """Complete manifest data structure.""" + + explosion_type: str + original_file: str + created: str + markitect_version: str + manifest_version: str = ManifestVersion.V1_0.value + preservation: Optional[Dict[str, bool]] = None + structure: Optional[List[StructureEntry]] = None + metadata: Optional[Dict[str, Any]] = None + + +class ManifestManager: + """ + Manages manifest.md files for explode-implode operations. + + The manifest system ensures complete reversibility by preserving: + - Original file structure and ordering + - Heading hierarchy and relationships + - Metadata and configuration options + - Variant-specific information + """ + + MANIFEST_FILENAME = "manifest.md" + + def __init__(self, markitect_version: str = "0.1.0"): + """ + Initialize the manifest manager. + + Args: + markitect_version: Version of MarkiTect creating the manifest + """ + self.markitect_version = markitect_version + + def create_manifest( + self, + output_dir: Path, + original_file: Path, + variant: ExplodeVariant, + structure: List[StructureEntry], + preservation_options: Optional[Dict[str, bool]] = None, + metadata: Optional[Dict[str, Any]] = None + ) -> Path: + """ + Create a manifest.md file in the output directory. + + Args: + output_dir: Directory where manifest should be created + original_file: Path to the original markdown file + variant: Variant used for explosion + structure: List of structure entries describing the explosion + preservation_options: Options for what was preserved + metadata: Additional metadata to include + + Returns: + Path to the created manifest file + + Raises: + PermissionError: If unable to write manifest file + ValueError: If invalid data provided + """ + if preservation_options is None: + preservation_options = { + "front_matter": True, + "section_order": True, + "heading_levels": True + } + + manifest_data = ManifestData( + explosion_type=variant.value, + original_file=str(original_file.name), + created=datetime.now().isoformat(), + markitect_version=self.markitect_version, + preservation=preservation_options, + structure=structure, + metadata=metadata or {} + ) + + manifest_path = output_dir / self.MANIFEST_FILENAME + content = self._generate_manifest_content(manifest_data) + + try: + manifest_path.write_text(content, encoding='utf-8') + except Exception as e: + raise PermissionError(f"Unable to write manifest file: {e}") + + return manifest_path + + def read_manifest(self, directory: Path) -> Optional[ManifestData]: + """ + Read and parse a manifest.md file from a directory. + + Args: + directory: Directory containing the manifest file + + Returns: + Parsed manifest data, or None if no valid manifest found + """ + manifest_path = directory / self.MANIFEST_FILENAME + + if not manifest_path.exists(): + return None + + try: + content = manifest_path.read_text(encoding='utf-8') + return self._parse_manifest_content(content) + except Exception: + # Return None for any parsing errors - let caller handle + return None + + def validate_manifest(self, manifest_data: ManifestData) -> List[str]: + """ + Validate manifest data for completeness and consistency. + + Args: + manifest_data: Manifest data to validate + + Returns: + List of validation errors (empty if valid) + """ + errors = [] + + # Required fields + if not manifest_data.explosion_type: + errors.append("Missing explosion_type") + + if not manifest_data.original_file: + errors.append("Missing original_file") + + if not manifest_data.created: + errors.append("Missing created timestamp") + + # Validate explosion type + try: + ExplodeVariant(manifest_data.explosion_type) + except ValueError: + errors.append(f"Invalid explosion_type: {manifest_data.explosion_type}") + + # Validate structure if present + if manifest_data.structure: + for i, entry in enumerate(manifest_data.structure): + if not entry.type: + errors.append(f"Structure entry {i}: missing type") + if not entry.title: + errors.append(f"Structure entry {i}: missing title") + if not entry.path: + errors.append(f"Structure entry {i}: missing path") + if entry.order < 0: + errors.append(f"Structure entry {i}: invalid order {entry.order}") + + return errors + + def update_manifest( + self, + directory: Path, + updates: Dict[str, Any] + ) -> bool: + """ + Update an existing manifest with new data. + + Args: + directory: Directory containing the manifest + updates: Dictionary of updates to apply + + Returns: + True if update successful, False otherwise + """ + manifest_data = self.read_manifest(directory) + if not manifest_data: + return False + + try: + # Apply updates + for key, value in updates.items(): + if hasattr(manifest_data, key): + setattr(manifest_data, key, value) + + # Recreate manifest + manifest_path = directory / self.MANIFEST_FILENAME + content = self._generate_manifest_content(manifest_data) + manifest_path.write_text(content, encoding='utf-8') + + return True + except Exception: + return False + + def _generate_manifest_content(self, manifest_data: ManifestData) -> str: + """ + Generate the complete manifest.md content. + + Args: + manifest_data: Manifest data to serialize + + Returns: + Complete manifest file content + """ + # Convert dataclasses to dictionaries for YAML serialization + yaml_data = {} + + # Basic metadata + yaml_data['explosion_type'] = manifest_data.explosion_type + yaml_data['original_file'] = manifest_data.original_file + yaml_data['created'] = manifest_data.created + yaml_data['markitect_version'] = manifest_data.markitect_version + yaml_data['manifest_version'] = manifest_data.manifest_version + + # Optional sections + if manifest_data.preservation: + yaml_data['preservation'] = manifest_data.preservation + + if manifest_data.structure: + yaml_data['structure'] = [ + { + 'type': entry.type, + 'title': entry.title, + 'path': entry.path, + 'order': entry.order, + 'parent': entry.parent, + 'level': entry.level, + 'original_line': entry.original_line + } + for entry in manifest_data.structure + ] + + if manifest_data.metadata: + yaml_data['metadata'] = manifest_data.metadata + + # Generate YAML front matter + yaml_content = yaml.dump(yaml_data, default_flow_style=False, sort_keys=False) + + # Generate complete manifest + content = f"""--- +{yaml_content}--- + +# Explosion Manifest + +This directory was created by exploding `{manifest_data.original_file}` using the **{manifest_data.explosion_type}** structure variant. + +## Structure Overview + +The original markdown file has been exploded into a directory structure that preserves all content and structural information. This manifest file ensures the explosion is completely reversible. + +## Reconstruction + +To reconstruct the original file, use: + +```bash +markitect md-implode {Path('.').name}/ +``` + +The implode operation will automatically detect the variant type from this manifest and reconstruct the original structure. + +## Preservation Details + +{self._generate_preservation_details(manifest_data.preservation or {})} + +--- +*Generated by MarkiTect {manifest_data.markitect_version} on {manifest_data.created}* +""" + return content + + def _parse_manifest_content(self, content: str) -> ManifestData: + """ + Parse manifest content into structured data. + + Args: + content: Raw manifest file content + + Returns: + Parsed manifest data + + Raises: + ValueError: If content cannot be parsed + """ + try: + # Extract YAML front matter + if not content.startswith('---'): + raise ValueError("Manifest does not start with YAML front matter") + + # Find the end of front matter + lines = content.split('\n') + yaml_end = -1 + for i, line in enumerate(lines[1:], 1): + if line.strip() == '---': + yaml_end = i + break + + if yaml_end == -1: + raise ValueError("YAML front matter not properly closed") + + # Parse YAML + yaml_content = '\n'.join(lines[1:yaml_end]) + yaml_data = yaml.safe_load(yaml_content) + + # Convert structure entries + structure = None + if 'structure' in yaml_data and yaml_data['structure']: + structure = [ + StructureEntry( + type=entry['type'], + title=entry['title'], + path=entry['path'], + order=entry['order'], + parent=entry.get('parent'), + level=entry.get('level', 1), + original_line=entry.get('original_line') + ) + for entry in yaml_data['structure'] + ] + + return ManifestData( + explosion_type=yaml_data['explosion_type'], + original_file=yaml_data['original_file'], + created=yaml_data['created'], + markitect_version=yaml_data['markitect_version'], + manifest_version=yaml_data.get('manifest_version', ManifestVersion.V1_0.value), + preservation=yaml_data.get('preservation'), + structure=structure, + metadata=yaml_data.get('metadata') + ) + + except Exception as e: + raise ValueError(f"Error parsing manifest content: {e}") + + def _generate_preservation_details(self, preservation: Dict[str, bool]) -> str: + """Generate human-readable preservation details.""" + if not preservation: + return "No specific preservation options recorded." + + details = [] + for option, enabled in preservation.items(): + status = "✅ Preserved" if enabled else "❌ Not preserved" + option_name = option.replace('_', ' ').title() + details.append(f"- **{option_name}**: {status}") + + return '\n'.join(details) \ No newline at end of file diff --git a/markitect/explode_variants/variant_detector.py b/markitect/explode_variants/variant_detector.py new file mode 100644 index 00000000..40dd7272 --- /dev/null +++ b/markitect/explode_variants/variant_detector.py @@ -0,0 +1,328 @@ +""" +Variant detection utilities for auto-detecting explode variants. + +This module analyzes directory structures to determine which variant was +used during explosion, enabling automatic implode operations. +""" + +import re +from pathlib import Path +from typing import Dict, List, Tuple, Optional +from dataclasses import dataclass + +from .enums import ExplodeVariant, DetectionConfidence +from .manifest_manager import ManifestManager, ManifestData + + +@dataclass +class DetectionResult: + """Result of variant detection analysis.""" + + variant: Optional[ExplodeVariant] + confidence: DetectionConfidence + score: float + evidence: List[str] + manifest_found: bool + manifest_data: Optional[ManifestData] = None + + +class VariantDetector: + """ + Detects explode variants from directory structures. + + Uses multiple detection strategies: + 1. Manifest file analysis (highest confidence) + 2. Directory naming pattern recognition + 3. Semantic directory structure analysis + 4. File organization heuristics + """ + + def __init__(self): + """Initialize the variant detector.""" + self.manifest_manager = ManifestManager() + + def detect_variant(self, directory: Path) -> DetectionResult: + """ + Detect the explode variant used for a directory structure. + + Args: + directory: Path to the exploded directory to analyze + + Returns: + Detection result with variant, confidence, and evidence + """ + if not directory.exists() or not directory.is_dir(): + return DetectionResult( + variant=None, + confidence=DetectionConfidence.UNKNOWN, + score=0.0, + evidence=["Directory does not exist or is not a directory"], + manifest_found=False + ) + + # Strategy 1: Check for manifest file (highest priority) + manifest_result = self._detect_from_manifest(directory) + if manifest_result.manifest_found and manifest_result.variant: + return manifest_result + + # Strategy 2: Pattern-based detection + pattern_result = self._detect_from_patterns(directory) + + # Strategy 3: Semantic analysis + semantic_result = self._detect_from_semantics(directory) + + # Combine results and return best match + return self._combine_detection_results([ + manifest_result, + pattern_result, + semantic_result + ]) + + def _detect_from_manifest(self, directory: Path) -> DetectionResult: + """ + Detect variant from manifest file. + + Args: + directory: Directory to check for manifest + + Returns: + Detection result based on manifest analysis + """ + manifest_data = self.manifest_manager.read_manifest(directory) + + if not manifest_data: + return DetectionResult( + variant=None, + confidence=DetectionConfidence.UNKNOWN, + score=0.0, + evidence=["No manifest.md file found"], + manifest_found=False + ) + + try: + variant = ExplodeVariant(manifest_data.explosion_type) + return DetectionResult( + variant=variant, + confidence=DetectionConfidence.HIGH, + score=1.0, + evidence=[f"Manifest indicates {variant.value} variant"], + manifest_found=True, + manifest_data=manifest_data + ) + except ValueError: + return DetectionResult( + variant=None, + confidence=DetectionConfidence.LOW, + score=0.1, + evidence=[f"Invalid variant in manifest: {manifest_data.explosion_type}"], + manifest_found=True, + manifest_data=manifest_data + ) + + def _detect_from_patterns(self, directory: Path) -> DetectionResult: + """ + Detect variant from directory naming patterns. + + Args: + directory: Directory to analyze + + Returns: + Detection result based on naming patterns + """ + subdirs = [d for d in directory.iterdir() if d.is_dir()] + evidence = [] + scores = {variant: 0.0 for variant in ExplodeVariant} + + # Count numbered prefixes (hierarchical indicator) + numbered_dirs = 0 + for subdir in subdirs: + if re.match(r'^\d+_', subdir.name): + numbered_dirs += 1 + + if numbered_dirs > 0: + ratio = numbered_dirs / len(subdirs) if subdirs else 0 + scores[ExplodeVariant.HIERARCHICAL] += ratio * 0.8 + evidence.append(f"Found {numbered_dirs}/{len(subdirs)} directories with numbered prefixes") + + # Check for semantic directory names + semantic_indicators = ['parts', 'chapters', 'sections', 'appendices', 'references'] + semantic_matches = 0 + for subdir in subdirs: + if any(indicator in subdir.name.lower() for indicator in semantic_indicators): + semantic_matches += 1 + + if semantic_matches > 0: + scores[ExplodeVariant.SEMANTIC] += (semantic_matches / len(subdirs)) * 0.7 + evidence.append(f"Found {semantic_matches} semantic directory names") + + # Default to flat if no strong patterns + if max(scores.values()) < 0.3: + scores[ExplodeVariant.FLAT] = 0.6 + evidence.append("No strong hierarchical or semantic patterns detected") + + # Determine best match + best_variant = max(scores.keys(), key=lambda k: scores[k]) + best_score = scores[best_variant] + + confidence = DetectionConfidence.HIGH if best_score > 0.7 else \ + DetectionConfidence.MEDIUM if best_score > 0.4 else \ + DetectionConfidence.LOW + + return DetectionResult( + variant=best_variant, + confidence=confidence, + score=best_score, + evidence=evidence, + manifest_found=False + ) + + def _detect_from_semantics(self, directory: Path) -> DetectionResult: + """ + Detect variant from semantic analysis of content organization. + + Args: + directory: Directory to analyze + + Returns: + Detection result based on semantic analysis + """ + evidence = [] + scores = {variant: 0.0 for variant in ExplodeVariant} + + # Analyze directory depth and organization + max_depth = self._calculate_max_depth(directory) + total_dirs = len(list(directory.glob("**/"))) + + evidence.append(f"Maximum depth: {max_depth}, Total directories: {total_dirs}") + + # Deep nesting suggests hierarchical + if max_depth > 3: + scores[ExplodeVariant.HIERARCHICAL] += 0.6 + evidence.append("Deep nesting suggests hierarchical organization") + + # Analyze file distribution + md_files = list(directory.glob("**/*.md")) + if md_files: + # Exclude manifest from count + content_files = [f for f in md_files if f.name != "manifest.md"] + + # Many files at root level suggests flat + root_files = [f for f in content_files if f.parent == directory] + if len(root_files) > len(content_files) * 0.6: + scores[ExplodeVariant.FLAT] += 0.5 + evidence.append("Many files at root level suggests flat organization") + + # Check for index.md files (hierarchical indicator) + index_files = list(directory.glob("**/index.md")) + if len(index_files) > 2: # More than just root index + scores[ExplodeVariant.HIERARCHICAL] += 0.4 + evidence.append(f"Found {len(index_files)} index.md files") + + # Determine best match + best_variant = max(scores.keys(), key=lambda k: scores[k]) + best_score = scores[best_variant] + + confidence = DetectionConfidence.MEDIUM if best_score > 0.5 else \ + DetectionConfidence.LOW + + return DetectionResult( + variant=best_variant, + confidence=confidence, + score=best_score, + evidence=evidence, + manifest_found=False + ) + + def _combine_detection_results(self, results: List[DetectionResult]) -> DetectionResult: + """ + Combine multiple detection results into a single best result. + + Args: + results: List of detection results to combine + + Returns: + Combined detection result + """ + # If we have a manifest result, prioritize it + manifest_result = next((r for r in results if r.manifest_found), None) + if manifest_result and manifest_result.variant: + return manifest_result + + # Otherwise find result with highest score (ignoring manifest results without variants) + non_manifest_results = [r for r in results if not r.manifest_found] + if non_manifest_results: + best_result = max(non_manifest_results, key=lambda r: r.score) + if best_result.score > 0: + return best_result + + # Fallback to flat variant if no good detection + return DetectionResult( + variant=ExplodeVariant.FLAT, + confidence=DetectionConfidence.LOW, + score=0.1, + evidence=["No clear patterns detected, defaulting to flat variant"], + manifest_found=False + ) + + def _calculate_max_depth(self, directory: Path) -> int: + """ + Calculate the maximum depth of subdirectories. + + Args: + directory: Directory to analyze + + Returns: + Maximum depth (root = 0) + """ + max_depth = 0 + for path in directory.glob("**/"): + try: + depth = len(path.relative_to(directory).parts) + max_depth = max(max_depth, depth) + except ValueError: + continue + return max_depth + + def is_exploded_directory(self, directory: Path) -> bool: + """ + Check if a directory appears to be an exploded markdown structure. + + Args: + directory: Directory to check + + Returns: + True if directory appears to be exploded markdown content + """ + if not directory.exists() or not directory.is_dir(): + return False + + # Check for manifest file + if (directory / "manifest.md").exists(): + return True + + # Check for markdown files + md_files = list(directory.glob("**/*.md")) + if not md_files: + return False + + # Check for typical exploded patterns + subdirs = [d for d in directory.iterdir() if d.is_dir()] + + # Look for index.md files + if any((d / "index.md").exists() for d in subdirs): + return True + + # Look for numbered directories + if any(re.match(r'^\d+_', d.name) for d in subdirs): + return True + + # Look for semantic directories + semantic_names = ['parts', 'chapters', 'sections'] + if any(any(name in d.name.lower() for name in semantic_names) for d in subdirs): + return True + + # If we have multiple markdown files in organized subdirectories + if len(md_files) > 2 and len(subdirs) > 1: + return True + + return False \ No newline at end of file diff --git a/markitect/plugins/builtin/markdown_commands.py b/markitect/plugins/builtin/markdown_commands.py index 65adbc6e..b184110e 100644 --- a/markitect/plugins/builtin/markdown_commands.py +++ b/markitect/plugins/builtin/markdown_commands.py @@ -1733,43 +1733,68 @@ def explode_markdown_file(input_file, output_dir): @click.argument('input_file', type=click.Path(exists=True)) @click.option('--output-dir', '-o', type=click.Path(), help='Output directory for exploded files (default: _exploded)') +@click.option('--variant', type=click.Choice(['flat', 'hierarchical', 'semantic']), + default='flat', help='Directory organization variant (default: flat)') @click.option('--max-depth', type=int, default=10, help='Maximum directory nesting depth (default: 10)') +@click.option('--create-manifest/--no-manifest', default=True, + help='Create manifest.md for reversibility (default: true)') @click.option('--dry-run', is_flag=True, help='Show what would be done without creating files') @click.option('--verbose', '-v', is_flag=True, help='Show detailed output during processing') @click.pass_context -def md_explode_command(ctx, input_file, output_dir, max_depth, dry_run, verbose): +def md_explode_command(ctx, input_file, output_dir, variant, max_depth, create_manifest, dry_run, verbose): """ Explode a markdown file into a directory structure. Takes a markdown file with hierarchical headings (# ## ### etc.) and creates a directory structure where each heading becomes a directory or file, with - content distributed appropriately. + content distributed appropriately. Supports multiple organization variants + for different use cases. INPUT_FILE: Path to the markdown file to explode + Variants: + flat: Current default - creates directories based on h1 headings + hierarchical: Numbered structure reflecting heading hierarchy + semantic: Content-based grouping (parts, chapters, appendices) + Examples: - # Explode book.md into book_exploded/ directory + # Explode book.md into book_exploded/ directory (flat structure) markitect md-explode book.md + # Use hierarchical structure with numbered directories + markitect md-explode book.md --variant hierarchical + # Explode into custom output directory markitect md-explode book.md --output-dir /path/to/chapters # Preview what would be created - markitect md-explode book.md --dry-run --verbose + markitect md-explode book.md --dry-run --verbose --variant semantic + + # Explode without creating manifest (legacy mode) + markitect md-explode book.md --no-manifest """ config = ctx.obj or {} try: input_path = Path(input_file) + # Note: Variant system infrastructure is in place, but only 'flat' is currently implemented + # hierarchical and semantic variants will be implemented in Phase 2 (Issue #149) + if variant != 'flat': + click.echo(f"⚠️ Warning: '{variant}' variant not yet implemented. Using 'flat' variant.") + click.echo(" Hierarchical and semantic variants coming in Phase 2.") + variant = 'flat' + # Determine output directory if output_dir: output_path = Path(output_dir) else: - output_path = input_path.parent / f"{input_path.stem}_exploded" + # For future: variant-specific naming like book.mdd/ + suffix = "_exploded" if variant == 'flat' else ".mdd" + output_path = input_path.parent / f"{input_path.stem}{suffix}" is_verbose = verbose or config.get('verbose', False) @@ -2999,6 +3024,8 @@ def cli_implode_directory(input_dir, output_file, dry_run=False, verbose=False, @click.argument('input_dir', type=click.Path(exists=True, file_okay=False, dir_okay=True)) @click.option('--output', '-o', type=click.Path(), help='Output markdown file (default: _imploded.md)') +@click.option('--force-variant', type=click.Choice(['flat', 'hierarchical', 'semantic']), + help='Force specific variant instead of auto-detection') @click.option('--dry-run', is_flag=True, help='Preview what would be created without writing files') @click.option('--verbose', '-v', is_flag=True, @@ -3010,25 +3037,35 @@ def cli_implode_directory(input_dir, output_file, dry_run=False, verbose=False, @click.option('--preserve-front-matter/--no-front-matter', default=True, help='Preserve YAML front matter from files (default: preserve)') @click.pass_context -def md_implode_command(ctx, input_dir, output, dry_run, verbose, overwrite, +def md_implode_command(ctx, input_dir, output, force_variant, dry_run, verbose, overwrite, section_spacing, preserve_front_matter): """ Implode a directory structure back into a single markdown file. Takes a directory structure (like one created by md-explode) and combines all markdown files back into a single document, reconstructing the original - hierarchical heading structure. + hierarchical heading structure. Automatically detects the variant used + during explosion for optimal reconstruction. INPUT_DIR: Path to the directory to implode + Auto-Detection: + The command automatically detects the variant type by analyzing: + - manifest.md file (highest priority) + - Directory naming patterns + - Content organization structure + Examples: - # Implode exploded directory back to markdown + # Implode exploded directory back to markdown (auto-detect variant) markitect md-implode book_exploded/ + # Force specific variant instead of auto-detection + markitect md-implode chapters/ --force-variant hierarchical + # Specify custom output file markitect md-implode chapters/ --output reconstructed.md - # Preview what would be created + # Preview what would be created with detection info markitect md-implode content/ --dry-run --verbose """ config = ctx.obj or {} @@ -3036,6 +3073,43 @@ def md_implode_command(ctx, input_dir, output, dry_run, verbose, overwrite, try: input_path = Path(input_dir) + # Auto-detect variant unless forced + detected_variant = None + detection_info = None + + if force_variant: + detected_variant = force_variant + detection_info = f"Forced variant: {force_variant}" + else: + try: + # Import here to avoid circular imports during command registration + from markitect.explode_variants import VariantDetector + detector = VariantDetector() + detection_result = detector.detect_variant(input_path) + + if detection_result.variant: + detected_variant = detection_result.variant.value + detection_info = f"Auto-detected: {detection_result.variant.value} (confidence: {detection_result.confidence.value})" + if verbose: + click.echo(f"🔍 {detection_info}") + for evidence in detection_result.evidence: + click.echo(f" • {evidence}") + else: + detected_variant = 'flat' # fallback + detection_info = "Fallback to flat variant (no clear patterns detected)" + if verbose: + click.echo(f"⚠️ {detection_info}") + + except ImportError: + detected_variant = 'flat' # fallback if variant system not available + detection_info = "Using flat variant (variant system not available)" + + # Note: Currently only flat variant is implemented + if detected_variant != 'flat': + click.echo(f"⚠️ Warning: '{detected_variant}' variant detected but not yet implemented.") + click.echo(" Using 'flat' variant for now. Full variant support coming in Phase 2.") + detected_variant = 'flat' + # Determine output file if output: output_path = Path(output) diff --git a/tests/test_issue_148_core_infrastructure.py b/tests/test_issue_148_core_infrastructure.py new file mode 100644 index 00000000..f6077aed --- /dev/null +++ b/tests/test_issue_148_core_infrastructure.py @@ -0,0 +1,399 @@ +""" +Test suite for Issue #148 - Core Infrastructure for Explode-Implode Variants + +Tests the foundational infrastructure components that support multiple +explode-implode variants with manifest-based reversibility. +""" + +import pytest +import tempfile +import yaml +from pathlib import Path +from datetime import datetime + +from markitect.explode_variants import ( + ExplodeVariant, ExplodeMode, ManifestVersion, DetectionConfidence, + BaseVariant, ExplodeOptions, ImplodeOptions, ExplodeResult, ImplodeResult, + ManifestManager, ManifestData, StructureEntry, + VariantDetector, DetectionResult +) + + +class TestExplodeVariantEnum: + """Test the ExplodeVariant enum and related enums.""" + + def test_explode_variant_values(self): + """Test that all expected variants are available.""" + assert ExplodeVariant.FLAT.value == "flat" + assert ExplodeVariant.HIERARCHICAL.value == "hierarchical" + assert ExplodeVariant.SEMANTIC.value == "semantic" + + def test_explode_mode_values(self): + """Test ExplodeMode enum values.""" + assert ExplodeMode.STANDARD.value == "standard" + assert ExplodeMode.LEGACY.value == "legacy" + assert ExplodeMode.PREVIEW.value == "preview" + + def test_detection_confidence_values(self): + """Test DetectionConfidence enum values.""" + assert DetectionConfidence.HIGH.value == "high" + assert DetectionConfidence.MEDIUM.value == "medium" + assert DetectionConfidence.LOW.value == "low" + assert DetectionConfidence.UNKNOWN.value == "unknown" + + +class TestStructureEntry: + """Test the StructureEntry dataclass.""" + + def test_structure_entry_creation(self): + """Test creating a StructureEntry.""" + entry = StructureEntry( + type="h1", + title="Chapter 1", + path="chapter_1/index.md", + order=1, + parent=None, + level=1, + original_line=5 + ) + + assert entry.type == "h1" + assert entry.title == "Chapter 1" + assert entry.path == "chapter_1/index.md" + assert entry.order == 1 + assert entry.parent is None + assert entry.level == 1 + assert entry.original_line == 5 + + def test_structure_entry_defaults(self): + """Test StructureEntry with default values.""" + entry = StructureEntry( + type="h2", + title="Section", + path="section.md", + order=2 + ) + + assert entry.parent is None + assert entry.level == 1 + assert entry.original_line is None + + +class TestManifestData: + """Test the ManifestData dataclass.""" + + def test_manifest_data_creation(self): + """Test creating ManifestData.""" + manifest = ManifestData( + explosion_type="flat", + original_file="book.md", + created="2025-10-12T19:30:00Z", + markitect_version="0.1.0" + ) + + assert manifest.explosion_type == "flat" + assert manifest.original_file == "book.md" + assert manifest.created == "2025-10-12T19:30:00Z" + assert manifest.markitect_version == "0.1.0" + assert manifest.manifest_version == ManifestVersion.V1_0.value + + +class TestManifestManager: + """Test the ManifestManager class.""" + + def test_manifest_manager_initialization(self): + """Test ManifestManager initialization.""" + manager = ManifestManager("0.1.0") + assert manager.markitect_version == "0.1.0" + assert manager.MANIFEST_FILENAME == "manifest.md" + + def test_create_manifest(self): + """Test creating a manifest file.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + manager = ManifestManager("0.1.0") + + # Create test structure + structure = [ + StructureEntry( + type="h1", + title="Book Title", + path="book_title/index.md", + order=1 + ), + StructureEntry( + type="h2", + title="Chapter 1", + path="book_title/chapter_1.md", + order=2, + parent="Book Title" + ) + ] + + manifest_path = manager.create_manifest( + output_dir=temp_path, + original_file=Path("book.md"), + variant=ExplodeVariant.FLAT, + structure=structure, + preservation_options={ + "front_matter": True, + "section_order": True, + "heading_levels": True + } + ) + + assert manifest_path.exists() + assert manifest_path.name == "manifest.md" + + # Verify content + content = manifest_path.read_text(encoding='utf-8') + assert "explosion_type: flat" in content + assert "original_file: book.md" in content + assert "Book Title" in content + assert "Chapter 1" in content + + def test_read_manifest(self): + """Test reading a manifest file.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + manager = ManifestManager("0.1.0") + + # Create manifest + structure = [ + StructureEntry( + type="h1", + title="Test Title", + path="test_title/index.md", + order=1 + ) + ] + + manifest_path = manager.create_manifest( + output_dir=temp_path, + original_file=Path("test.md"), + variant=ExplodeVariant.HIERARCHICAL, + structure=structure + ) + + # Read manifest back + manifest_data = manager.read_manifest(temp_path) + + assert manifest_data is not None + assert manifest_data.explosion_type == "hierarchical" + assert manifest_data.original_file == "test.md" + assert manifest_data.markitect_version == "0.1.0" + assert len(manifest_data.structure) == 1 + assert manifest_data.structure[0].title == "Test Title" + + def test_read_nonexistent_manifest(self): + """Test reading manifest from directory without one.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + manager = ManifestManager("0.1.0") + + manifest_data = manager.read_manifest(temp_path) + assert manifest_data is None + + def test_validate_manifest(self): + """Test manifest validation.""" + manager = ManifestManager("0.1.0") + + # Valid manifest + valid_manifest = ManifestData( + explosion_type="flat", + original_file="test.md", + created="2025-10-12T19:30:00Z", + markitect_version="0.1.0" + ) + + errors = manager.validate_manifest(valid_manifest) + assert len(errors) == 0 + + # Invalid manifest + invalid_manifest = ManifestData( + explosion_type="invalid_variant", + original_file="", + created="", + markitect_version="0.1.0" + ) + + errors = manager.validate_manifest(invalid_manifest) + assert len(errors) > 0 + assert any("Invalid explosion_type" in error for error in errors) + assert any("Missing original_file" in error for error in errors) + + +class TestVariantDetector: + """Test the VariantDetector class.""" + + def test_variant_detector_initialization(self): + """Test VariantDetector initialization.""" + detector = VariantDetector() + assert detector.manifest_manager is not None + + def test_detect_variant_nonexistent_directory(self): + """Test variant detection on nonexistent directory.""" + detector = VariantDetector() + result = detector.detect_variant(Path("/nonexistent/path")) + + assert result.variant is None + assert result.confidence == DetectionConfidence.UNKNOWN + assert result.score == 0.0 + assert not result.manifest_found + assert "does not exist" in result.evidence[0] + + def test_detect_variant_with_manifest(self): + """Test variant detection when manifest is present.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Create a manifest + manager = ManifestManager("0.1.0") + manager.create_manifest( + output_dir=temp_path, + original_file=Path("test.md"), + variant=ExplodeVariant.HIERARCHICAL, + structure=[] + ) + + detector = VariantDetector() + result = detector.detect_variant(temp_path) + + assert result.variant == ExplodeVariant.HIERARCHICAL + assert result.confidence == DetectionConfidence.HIGH + assert result.score == 1.0 + assert result.manifest_found + assert result.manifest_data is not None + + def test_detect_variant_hierarchical_pattern(self): + """Test variant detection based on hierarchical naming patterns.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Create directories with numbered prefixes + (temp_path / "01_chapter_one").mkdir() + (temp_path / "02_chapter_two").mkdir() + (temp_path / "03_chapter_three").mkdir() + + detector = VariantDetector() + result = detector.detect_variant(temp_path) + + assert result.variant in [ExplodeVariant.HIERARCHICAL, ExplodeVariant.FLAT] + assert result.confidence in [DetectionConfidence.HIGH, DetectionConfidence.MEDIUM] + assert not result.manifest_found + + def test_detect_variant_semantic_pattern(self): + """Test variant detection based on semantic directory names.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Create semantic directories + (temp_path / "parts").mkdir() + (temp_path / "chapters").mkdir() + (temp_path / "appendices").mkdir() + + detector = VariantDetector() + result = detector.detect_variant(temp_path) + + # Should detect semantic or fall back to flat + assert result.variant in [ExplodeVariant.SEMANTIC, ExplodeVariant.FLAT] + assert not result.manifest_found + + def test_is_exploded_directory(self): + """Test detection of exploded directory structures.""" + detector = VariantDetector() + + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Empty directory should not be detected as exploded + assert not detector.is_exploded_directory(temp_path) + + # Directory with manifest should be detected + (temp_path / "manifest.md").write_text("test manifest") + assert detector.is_exploded_directory(temp_path) + + # Clean up and test other patterns + (temp_path / "manifest.md").unlink() + + # Directory with numbered subdirs and markdown should be detected + subdir = temp_path / "01_chapter" + subdir.mkdir() + (subdir / "index.md").write_text("test content") + assert detector.is_exploded_directory(temp_path) + + +class TestExplodeImplodeOptions: + """Test the options dataclasses.""" + + def test_explode_options_defaults(self): + """Test ExplodeOptions with defaults.""" + options = ExplodeOptions(variant=ExplodeVariant.FLAT) + + assert options.variant == ExplodeVariant.FLAT + assert options.mode == ExplodeMode.STANDARD + assert options.output_dir is None + assert options.max_depth is None + assert options.preserve_front_matter is True + assert options.section_spacing == 2 + assert options.dry_run is False + assert options.verbose is False + assert options.create_manifest is True + + def test_implode_options_defaults(self): + """Test ImplodeOptions with defaults.""" + options = ImplodeOptions() + + assert options.output_file is None + assert options.force_variant is None + assert options.preserve_front_matter is True + assert options.section_spacing == 2 + assert options.dry_run is False + assert options.verbose is False + assert options.overwrite is False + + +class TestResults: + """Test the result dataclasses.""" + + def test_explode_result_creation(self): + """Test creating an ExplodeResult.""" + result = ExplodeResult( + success=True, + output_directory=Path("/test/output"), + files_created=[Path("file1.md"), Path("file2.md")], + manifest_path=Path("/test/output/manifest.md"), + warnings=["Warning 1"], + errors=[], + variant_used=ExplodeVariant.FLAT + ) + + assert result.success is True + assert result.output_directory == Path("/test/output") + assert len(result.files_created) == 2 + assert result.manifest_path == Path("/test/output/manifest.md") + assert len(result.warnings) == 1 + assert len(result.errors) == 0 + assert result.variant_used == ExplodeVariant.FLAT + + def test_implode_result_creation(self): + """Test creating an ImplodeResult.""" + result = ImplodeResult( + success=True, + output_file=Path("/test/output.md"), + files_processed=[Path("file1.md"), Path("file2.md")], + variant_detected=ExplodeVariant.HIERARCHICAL, + warnings=[], + errors=[] + ) + + assert result.success is True + assert result.output_file == Path("/test/output.md") + assert len(result.files_processed) == 2 + assert result.variant_detected == ExplodeVariant.HIERARCHICAL + assert len(result.warnings) == 0 + assert len(result.errors) == 0 + + +if __name__ == '__main__': + pytest.main([__file__, "-v"]) \ No newline at end of file