""" Variant detection utilities for auto-detecting explode variants. This module analyzes directory structures to determine which variant was used during explosion, enabling automatic implode operations. """ import re from pathlib import Path from typing import Dict, List, Tuple, Optional from dataclasses import dataclass from .enums import ExplodeVariant, DetectionConfidence from .manifest_manager import ManifestManager, ManifestData @dataclass class DetectionResult: """Result of variant detection analysis.""" variant: Optional[ExplodeVariant] confidence: DetectionConfidence score: float evidence: List[str] manifest_found: bool manifest_data: Optional[ManifestData] = None class VariantDetector: """ Detects explode variants from directory structures. Uses multiple detection strategies: 1. Manifest file analysis (highest confidence) 2. Directory naming pattern recognition 3. Semantic directory structure analysis 4. File organization heuristics """ def __init__(self): """Initialize the variant detector.""" self.manifest_manager = ManifestManager() def detect_variant(self, directory: Path) -> DetectionResult: """ Detect the explode variant used for a directory structure. Args: directory: Path to the exploded directory to analyze Returns: Detection result with variant, confidence, and evidence """ if not directory.exists() or not directory.is_dir(): return DetectionResult( variant=None, confidence=DetectionConfidence.UNKNOWN, score=0.0, evidence=["Directory does not exist or is not a directory"], manifest_found=False ) # Strategy 1: Check for manifest file (highest priority) manifest_result = self._detect_from_manifest(directory) if manifest_result.manifest_found and manifest_result.variant: return manifest_result # Strategy 2: Pattern-based detection pattern_result = self._detect_from_patterns(directory) # Strategy 3: Semantic analysis semantic_result = self._detect_from_semantics(directory) # Combine results and return best match return self._combine_detection_results([ manifest_result, pattern_result, semantic_result ]) def _detect_from_manifest(self, directory: Path) -> DetectionResult: """ Detect variant from manifest file. Args: directory: Directory to check for manifest Returns: Detection result based on manifest analysis """ manifest_data = self.manifest_manager.read_manifest(directory) if not manifest_data: return DetectionResult( variant=None, confidence=DetectionConfidence.UNKNOWN, score=0.0, evidence=["No manifest.md file found"], manifest_found=False ) try: variant = ExplodeVariant(manifest_data.explosion_type) return DetectionResult( variant=variant, confidence=DetectionConfidence.HIGH, score=1.0, evidence=[f"Manifest indicates {variant.value} variant"], manifest_found=True, manifest_data=manifest_data ) except ValueError: return DetectionResult( variant=None, confidence=DetectionConfidence.LOW, score=0.1, evidence=[f"Invalid variant in manifest: {manifest_data.explosion_type}"], manifest_found=True, manifest_data=manifest_data ) def _detect_from_patterns(self, directory: Path) -> DetectionResult: """ Detect variant from directory naming patterns. Args: directory: Directory to analyze Returns: Detection result based on naming patterns """ subdirs = [d for d in directory.iterdir() if d.is_dir()] evidence = [] scores = {variant: 0.0 for variant in ExplodeVariant} # Count numbered prefixes (hierarchical indicator) numbered_dirs = 0 for subdir in subdirs: if re.match(r'^\d+_', subdir.name): numbered_dirs += 1 if numbered_dirs > 0: ratio = numbered_dirs / len(subdirs) if subdirs else 0 scores[ExplodeVariant.HIERARCHICAL] += ratio * 0.8 evidence.append(f"Found {numbered_dirs}/{len(subdirs)} directories with numbered prefixes") # Check for semantic directory names semantic_indicators = ['parts', 'chapters', 'sections', 'appendices', 'references'] semantic_matches = 0 for subdir in subdirs: if any(indicator in subdir.name.lower() for indicator in semantic_indicators): semantic_matches += 1 if semantic_matches > 0: scores[ExplodeVariant.SEMANTIC] += (semantic_matches / len(subdirs)) * 0.7 evidence.append(f"Found {semantic_matches} semantic directory names") # Default to flat if no strong patterns if max(scores.values()) < 0.3: scores[ExplodeVariant.FLAT] = 0.6 evidence.append("No strong hierarchical or semantic patterns detected") # Determine best match best_variant = max(scores.keys(), key=lambda k: scores[k]) best_score = scores[best_variant] confidence = DetectionConfidence.HIGH if best_score > 0.7 else \ DetectionConfidence.MEDIUM if best_score > 0.4 else \ DetectionConfidence.LOW return DetectionResult( variant=best_variant, confidence=confidence, score=best_score, evidence=evidence, manifest_found=False ) def _detect_from_semantics(self, directory: Path) -> DetectionResult: """ Detect variant from semantic analysis of content organization. Args: directory: Directory to analyze Returns: Detection result based on semantic analysis """ evidence = [] scores = {variant: 0.0 for variant in ExplodeVariant} # Analyze directory depth and organization max_depth = self._calculate_max_depth(directory) total_dirs = len(list(directory.glob("**/"))) evidence.append(f"Maximum depth: {max_depth}, Total directories: {total_dirs}") # Deep nesting suggests hierarchical if max_depth > 3: scores[ExplodeVariant.HIERARCHICAL] += 0.6 evidence.append("Deep nesting suggests hierarchical organization") # Analyze file distribution md_files = list(directory.glob("**/*.md")) if md_files: # Exclude manifest from count content_files = [f for f in md_files if f.name != "manifest.md"] # Many files at root level suggests flat root_files = [f for f in content_files if f.parent == directory] if len(root_files) > len(content_files) * 0.6: scores[ExplodeVariant.FLAT] += 0.5 evidence.append("Many files at root level suggests flat organization") # Check for index.md files (hierarchical indicator) index_files = list(directory.glob("**/index.md")) if len(index_files) > 2: # More than just root index scores[ExplodeVariant.HIERARCHICAL] += 0.4 evidence.append(f"Found {len(index_files)} index.md files") # Determine best match best_variant = max(scores.keys(), key=lambda k: scores[k]) best_score = scores[best_variant] confidence = DetectionConfidence.MEDIUM if best_score > 0.5 else \ DetectionConfidence.LOW return DetectionResult( variant=best_variant, confidence=confidence, score=best_score, evidence=evidence, manifest_found=False ) def _combine_detection_results(self, results: List[DetectionResult]) -> DetectionResult: """ Combine multiple detection results into a single best result. Args: results: List of detection results to combine Returns: Combined detection result """ # If we have a manifest result, prioritize it manifest_result = next((r for r in results if r.manifest_found), None) if manifest_result and manifest_result.variant: return manifest_result # Otherwise find result with highest score (ignoring manifest results without variants) non_manifest_results = [r for r in results if not r.manifest_found] if non_manifest_results: best_result = max(non_manifest_results, key=lambda r: r.score) if best_result.score > 0: return best_result # Fallback to flat variant if no good detection return DetectionResult( variant=ExplodeVariant.FLAT, confidence=DetectionConfidence.LOW, score=0.1, evidence=["No clear patterns detected, defaulting to flat variant"], manifest_found=False ) def _calculate_max_depth(self, directory: Path) -> int: """ Calculate the maximum depth of subdirectories. Args: directory: Directory to analyze Returns: Maximum depth (root = 0) """ max_depth = 0 for path in directory.glob("**/"): try: depth = len(path.relative_to(directory).parts) max_depth = max(max_depth, depth) except ValueError: continue return max_depth def is_exploded_directory(self, directory: Path) -> bool: """ Check if a directory appears to be an exploded markdown structure. Args: directory: Directory to check Returns: True if directory appears to be exploded markdown content """ if not directory.exists() or not directory.is_dir(): return False # Check for manifest file if (directory / "manifest.md").exists(): return True # Check for markdown files md_files = list(directory.glob("**/*.md")) if not md_files: return False # Check for typical exploded patterns subdirs = [d for d in directory.iterdir() if d.is_dir()] # Look for index.md files if any((d / "index.md").exists() for d in subdirs): return True # Look for numbered directories if any(re.match(r'^\d+_', d.name) for d in subdirs): return True # Look for semantic directories semantic_names = ['parts', 'chapters', 'sections'] if any(any(name in d.name.lower() for name in semantic_names) for d in subdirs): return True # If we have multiple markdown files in organized subdirectories if len(md_files) > 2 and len(subdirs) > 1: return True return False