feat: implement Issue #148 core infrastructure for explode-implode variants

Complete implementation of Phase 1 core infrastructure: Core Infrastructure Components: - ExplodeVariant enum (flat, hierarchical, semantic) - ExplodeMode, ManifestVersion, DetectionConfidence enums - BaseVariant abstract class with common interface - ExplodeOptions, ImplodeOptions, ExplodeResult, ImplodeResult dataclasses Manifest System: - ManifestManager class for manifest.md creation and parsing - StructureEntry and ManifestData dataclasses - YAML front matter with complete metadata preservation - Validation and update mechanisms Variant Detection: - VariantDetector class with multiple detection strategies - Manifest-based detection (highest priority) - Directory naming pattern recognition - Semantic structure analysis with confidence scoring - Automatic fallback and combination logic Command Interface Updates: - md-explode: Added --variant parameter with [flat|hierarchical|semantic] - md-explode: Added --create-manifest/--no-manifest option - md-implode: Added --force-variant parameter for manual override - md-implode: Integrated auto-detection with verbose output - Updated help text and examples for both commands Test Coverage: - Comprehensive test suite with 21 test cases - Tests for all enums, dataclasses, and core functionality - ManifestManager creation, reading, and validation tests - VariantDetector pattern recognition and confidence tests - 100% test pass rate with robust edge case handling Infrastructure Features: - Backward compatibility maintained (flat variant default) - Graceful handling of unimplemented variants with user warnings - Extensible design for easy addition of new variants - Clear separation between infrastructure and implementation Success Criteria Met: ✅ ExplodeVariant enum with all planned variants ✅ ManifestManager creates and parses manifest.md files ✅ Commands accept variant parameters ✅ Auto-detection logic identifies variant types ✅ Unit tests achieve 100% pass rate ✅ Backward compatibility maintained Ready for Phase 2: Variant implementations (Issue #149) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-12 20:17:41 +02:00
parent 9c8583c77a
commit a17c362653
7 changed files with 1573 additions and 9 deletions
--- a/markitect/explode_variants/variant_detector.py
+++ b/markitect/explode_variants/variant_detector.py
@@ -0,0 +1,328 @@
+"""
+Variant detection utilities for auto-detecting explode variants.
+
+This module analyzes directory structures to determine which variant was
+used during explosion, enabling automatic implode operations.
+"""
+
+import re
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional
+from dataclasses import dataclass
+
+from .enums import ExplodeVariant, DetectionConfidence
+from .manifest_manager import ManifestManager, ManifestData
+
+
+@dataclass
+class DetectionResult:
+    """Result of variant detection analysis."""
+
+    variant: Optional[ExplodeVariant]
+    confidence: DetectionConfidence
+    score: float
+    evidence: List[str]
+    manifest_found: bool
+    manifest_data: Optional[ManifestData] = None
+
+
+class VariantDetector:
+    """
+    Detects explode variants from directory structures.
+
+    Uses multiple detection strategies:
+    1. Manifest file analysis (highest confidence)
+    2. Directory naming pattern recognition
+    3. Semantic directory structure analysis
+    4. File organization heuristics
+    """
+
+    def __init__(self):
+        """Initialize the variant detector."""
+        self.manifest_manager = ManifestManager()
+
+    def detect_variant(self, directory: Path) -> DetectionResult:
+        """
+        Detect the explode variant used for a directory structure.
+
+        Args:
+            directory: Path to the exploded directory to analyze
+
+        Returns:
+            Detection result with variant, confidence, and evidence
+        """
+        if not directory.exists() or not directory.is_dir():
+            return DetectionResult(
+                variant=None,
+                confidence=DetectionConfidence.UNKNOWN,
+                score=0.0,
+                evidence=["Directory does not exist or is not a directory"],
+                manifest_found=False
+            )
+
+        # Strategy 1: Check for manifest file (highest priority)
+        manifest_result = self._detect_from_manifest(directory)
+        if manifest_result.manifest_found and manifest_result.variant:
+            return manifest_result
+
+        # Strategy 2: Pattern-based detection
+        pattern_result = self._detect_from_patterns(directory)
+
+        # Strategy 3: Semantic analysis
+        semantic_result = self._detect_from_semantics(directory)
+
+        # Combine results and return best match
+        return self._combine_detection_results([
+            manifest_result,
+            pattern_result,
+            semantic_result
+        ])
+
+    def _detect_from_manifest(self, directory: Path) -> DetectionResult:
+        """
+        Detect variant from manifest file.
+
+        Args:
+            directory: Directory to check for manifest
+
+        Returns:
+            Detection result based on manifest analysis
+        """
+        manifest_data = self.manifest_manager.read_manifest(directory)
+
+        if not manifest_data:
+            return DetectionResult(
+                variant=None,
+                confidence=DetectionConfidence.UNKNOWN,
+                score=0.0,
+                evidence=["No manifest.md file found"],
+                manifest_found=False
+            )
+
+        try:
+            variant = ExplodeVariant(manifest_data.explosion_type)
+            return DetectionResult(
+                variant=variant,
+                confidence=DetectionConfidence.HIGH,
+                score=1.0,
+                evidence=[f"Manifest indicates {variant.value} variant"],
+                manifest_found=True,
+                manifest_data=manifest_data
+            )
+        except ValueError:
+            return DetectionResult(
+                variant=None,
+                confidence=DetectionConfidence.LOW,
+                score=0.1,
+                evidence=[f"Invalid variant in manifest: {manifest_data.explosion_type}"],
+                manifest_found=True,
+                manifest_data=manifest_data
+            )
+
+    def _detect_from_patterns(self, directory: Path) -> DetectionResult:
+        """
+        Detect variant from directory naming patterns.
+
+        Args:
+            directory: Directory to analyze
+
+        Returns:
+            Detection result based on naming patterns
+        """
+        subdirs = [d for d in directory.iterdir() if d.is_dir()]
+        evidence = []
+        scores = {variant: 0.0 for variant in ExplodeVariant}
+
+        # Count numbered prefixes (hierarchical indicator)
+        numbered_dirs = 0
+        for subdir in subdirs:
+            if re.match(r'^\d+_', subdir.name):
+                numbered_dirs += 1
+
+        if numbered_dirs > 0:
+            ratio = numbered_dirs / len(subdirs) if subdirs else 0
+            scores[ExplodeVariant.HIERARCHICAL] += ratio * 0.8
+            evidence.append(f"Found {numbered_dirs}/{len(subdirs)} directories with numbered prefixes")
+
+        # Check for semantic directory names
+        semantic_indicators = ['parts', 'chapters', 'sections', 'appendices', 'references']
+        semantic_matches = 0
+        for subdir in subdirs:
+            if any(indicator in subdir.name.lower() for indicator in semantic_indicators):
+                semantic_matches += 1
+
+        if semantic_matches > 0:
+            scores[ExplodeVariant.SEMANTIC] += (semantic_matches / len(subdirs)) * 0.7
+            evidence.append(f"Found {semantic_matches} semantic directory names")
+
+        # Default to flat if no strong patterns
+        if max(scores.values()) < 0.3:
+            scores[ExplodeVariant.FLAT] = 0.6
+            evidence.append("No strong hierarchical or semantic patterns detected")
+
+        # Determine best match
+        best_variant = max(scores.keys(), key=lambda k: scores[k])
+        best_score = scores[best_variant]
+
+        confidence = DetectionConfidence.HIGH if best_score > 0.7 else \
+                    DetectionConfidence.MEDIUM if best_score > 0.4 else \
+                    DetectionConfidence.LOW
+
+        return DetectionResult(
+            variant=best_variant,
+            confidence=confidence,
+            score=best_score,
+            evidence=evidence,
+            manifest_found=False
+        )
+
+    def _detect_from_semantics(self, directory: Path) -> DetectionResult:
+        """
+        Detect variant from semantic analysis of content organization.
+
+        Args:
+            directory: Directory to analyze
+
+        Returns:
+            Detection result based on semantic analysis
+        """
+        evidence = []
+        scores = {variant: 0.0 for variant in ExplodeVariant}
+
+        # Analyze directory depth and organization
+        max_depth = self._calculate_max_depth(directory)
+        total_dirs = len(list(directory.glob("**/")))
+
+        evidence.append(f"Maximum depth: {max_depth}, Total directories: {total_dirs}")
+
+        # Deep nesting suggests hierarchical
+        if max_depth > 3:
+            scores[ExplodeVariant.HIERARCHICAL] += 0.6
+            evidence.append("Deep nesting suggests hierarchical organization")
+
+        # Analyze file distribution
+        md_files = list(directory.glob("**/*.md"))
+        if md_files:
+            # Exclude manifest from count
+            content_files = [f for f in md_files if f.name != "manifest.md"]
+
+            # Many files at root level suggests flat
+            root_files = [f for f in content_files if f.parent == directory]
+            if len(root_files) > len(content_files) * 0.6:
+                scores[ExplodeVariant.FLAT] += 0.5
+                evidence.append("Many files at root level suggests flat organization")
+
+        # Check for index.md files (hierarchical indicator)
+        index_files = list(directory.glob("**/index.md"))
+        if len(index_files) > 2:  # More than just root index
+            scores[ExplodeVariant.HIERARCHICAL] += 0.4
+            evidence.append(f"Found {len(index_files)} index.md files")
+
+        # Determine best match
+        best_variant = max(scores.keys(), key=lambda k: scores[k])
+        best_score = scores[best_variant]
+
+        confidence = DetectionConfidence.MEDIUM if best_score > 0.5 else \
+                    DetectionConfidence.LOW
+
+        return DetectionResult(
+            variant=best_variant,
+            confidence=confidence,
+            score=best_score,
+            evidence=evidence,
+            manifest_found=False
+        )
+
+    def _combine_detection_results(self, results: List[DetectionResult]) -> DetectionResult:
+        """
+        Combine multiple detection results into a single best result.
+
+        Args:
+            results: List of detection results to combine
+
+        Returns:
+            Combined detection result
+        """
+        # If we have a manifest result, prioritize it
+        manifest_result = next((r for r in results if r.manifest_found), None)
+        if manifest_result and manifest_result.variant:
+            return manifest_result
+
+        # Otherwise find result with highest score (ignoring manifest results without variants)
+        non_manifest_results = [r for r in results if not r.manifest_found]
+        if non_manifest_results:
+            best_result = max(non_manifest_results, key=lambda r: r.score)
+            if best_result.score > 0:
+                return best_result
+
+        # Fallback to flat variant if no good detection
+        return DetectionResult(
+            variant=ExplodeVariant.FLAT,
+            confidence=DetectionConfidence.LOW,
+            score=0.1,
+            evidence=["No clear patterns detected, defaulting to flat variant"],
+            manifest_found=False
+        )
+
+    def _calculate_max_depth(self, directory: Path) -> int:
+        """
+        Calculate the maximum depth of subdirectories.
+
+        Args:
+            directory: Directory to analyze
+
+        Returns:
+            Maximum depth (root = 0)
+        """
+        max_depth = 0
+        for path in directory.glob("**/"):
+            try:
+                depth = len(path.relative_to(directory).parts)
+                max_depth = max(max_depth, depth)
+            except ValueError:
+                continue
+        return max_depth
+
+    def is_exploded_directory(self, directory: Path) -> bool:
+        """
+        Check if a directory appears to be an exploded markdown structure.
+
+        Args:
+            directory: Directory to check
+
+        Returns:
+            True if directory appears to be exploded markdown content
+        """
+        if not directory.exists() or not directory.is_dir():
+            return False
+
+        # Check for manifest file
+        if (directory / "manifest.md").exists():
+            return True
+
+        # Check for markdown files
+        md_files = list(directory.glob("**/*.md"))
+        if not md_files:
+            return False
+
+        # Check for typical exploded patterns
+        subdirs = [d for d in directory.iterdir() if d.is_dir()]
+
+        # Look for index.md files
+        if any((d / "index.md").exists() for d in subdirs):
+            return True
+
+        # Look for numbered directories
+        if any(re.match(r'^\d+_', d.name) for d in subdirs):
+            return True
+
+        # Look for semantic directories
+        semantic_names = ['parts', 'chapters', 'sections']
+        if any(any(name in d.name.lower() for name in semantic_names) for d in subdirs):
+            return True
+
+        # If we have multiple markdown files in organized subdirectories
+        if len(md_files) > 2 and len(subdirs) > 1:
+            return True
+
+        return False