Complete implementation of Phase 1 core infrastructure: Core Infrastructure Components: - ExplodeVariant enum (flat, hierarchical, semantic) - ExplodeMode, ManifestVersion, DetectionConfidence enums - BaseVariant abstract class with common interface - ExplodeOptions, ImplodeOptions, ExplodeResult, ImplodeResult dataclasses Manifest System: - ManifestManager class for manifest.md creation and parsing - StructureEntry and ManifestData dataclasses - YAML front matter with complete metadata preservation - Validation and update mechanisms Variant Detection: - VariantDetector class with multiple detection strategies - Manifest-based detection (highest priority) - Directory naming pattern recognition - Semantic structure analysis with confidence scoring - Automatic fallback and combination logic Command Interface Updates: - md-explode: Added --variant parameter with [flat|hierarchical|semantic] - md-explode: Added --create-manifest/--no-manifest option - md-implode: Added --force-variant parameter for manual override - md-implode: Integrated auto-detection with verbose output - Updated help text and examples for both commands Test Coverage: - Comprehensive test suite with 21 test cases - Tests for all enums, dataclasses, and core functionality - ManifestManager creation, reading, and validation tests - VariantDetector pattern recognition and confidence tests - 100% test pass rate with robust edge case handling Infrastructure Features: - Backward compatibility maintained (flat variant default) - Graceful handling of unimplemented variants with user warnings - Extensible design for easy addition of new variants - Clear separation between infrastructure and implementation Success Criteria Met: ✅ ExplodeVariant enum with all planned variants ✅ ManifestManager creates and parses manifest.md files ✅ Commands accept variant parameters ✅ Auto-detection logic identifies variant types ✅ Unit tests achieve 100% pass rate ✅ Backward compatibility maintained Ready for Phase 2: Variant implementations (Issue #149) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
328 lines
11 KiB
Python
328 lines
11 KiB
Python
"""
|
|
Variant detection utilities for auto-detecting explode variants.
|
|
|
|
This module analyzes directory structures to determine which variant was
|
|
used during explosion, enabling automatic implode operations.
|
|
"""
|
|
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List, Tuple, Optional
|
|
from dataclasses import dataclass
|
|
|
|
from .enums import ExplodeVariant, DetectionConfidence
|
|
from .manifest_manager import ManifestManager, ManifestData
|
|
|
|
|
|
@dataclass
|
|
class DetectionResult:
|
|
"""Result of variant detection analysis."""
|
|
|
|
variant: Optional[ExplodeVariant]
|
|
confidence: DetectionConfidence
|
|
score: float
|
|
evidence: List[str]
|
|
manifest_found: bool
|
|
manifest_data: Optional[ManifestData] = None
|
|
|
|
|
|
class VariantDetector:
|
|
"""
|
|
Detects explode variants from directory structures.
|
|
|
|
Uses multiple detection strategies:
|
|
1. Manifest file analysis (highest confidence)
|
|
2. Directory naming pattern recognition
|
|
3. Semantic directory structure analysis
|
|
4. File organization heuristics
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the variant detector."""
|
|
self.manifest_manager = ManifestManager()
|
|
|
|
def detect_variant(self, directory: Path) -> DetectionResult:
|
|
"""
|
|
Detect the explode variant used for a directory structure.
|
|
|
|
Args:
|
|
directory: Path to the exploded directory to analyze
|
|
|
|
Returns:
|
|
Detection result with variant, confidence, and evidence
|
|
"""
|
|
if not directory.exists() or not directory.is_dir():
|
|
return DetectionResult(
|
|
variant=None,
|
|
confidence=DetectionConfidence.UNKNOWN,
|
|
score=0.0,
|
|
evidence=["Directory does not exist or is not a directory"],
|
|
manifest_found=False
|
|
)
|
|
|
|
# Strategy 1: Check for manifest file (highest priority)
|
|
manifest_result = self._detect_from_manifest(directory)
|
|
if manifest_result.manifest_found and manifest_result.variant:
|
|
return manifest_result
|
|
|
|
# Strategy 2: Pattern-based detection
|
|
pattern_result = self._detect_from_patterns(directory)
|
|
|
|
# Strategy 3: Semantic analysis
|
|
semantic_result = self._detect_from_semantics(directory)
|
|
|
|
# Combine results and return best match
|
|
return self._combine_detection_results([
|
|
manifest_result,
|
|
pattern_result,
|
|
semantic_result
|
|
])
|
|
|
|
def _detect_from_manifest(self, directory: Path) -> DetectionResult:
|
|
"""
|
|
Detect variant from manifest file.
|
|
|
|
Args:
|
|
directory: Directory to check for manifest
|
|
|
|
Returns:
|
|
Detection result based on manifest analysis
|
|
"""
|
|
manifest_data = self.manifest_manager.read_manifest(directory)
|
|
|
|
if not manifest_data:
|
|
return DetectionResult(
|
|
variant=None,
|
|
confidence=DetectionConfidence.UNKNOWN,
|
|
score=0.0,
|
|
evidence=["No manifest.md file found"],
|
|
manifest_found=False
|
|
)
|
|
|
|
try:
|
|
variant = ExplodeVariant(manifest_data.explosion_type)
|
|
return DetectionResult(
|
|
variant=variant,
|
|
confidence=DetectionConfidence.HIGH,
|
|
score=1.0,
|
|
evidence=[f"Manifest indicates {variant.value} variant"],
|
|
manifest_found=True,
|
|
manifest_data=manifest_data
|
|
)
|
|
except ValueError:
|
|
return DetectionResult(
|
|
variant=None,
|
|
confidence=DetectionConfidence.LOW,
|
|
score=0.1,
|
|
evidence=[f"Invalid variant in manifest: {manifest_data.explosion_type}"],
|
|
manifest_found=True,
|
|
manifest_data=manifest_data
|
|
)
|
|
|
|
def _detect_from_patterns(self, directory: Path) -> DetectionResult:
|
|
"""
|
|
Detect variant from directory naming patterns.
|
|
|
|
Args:
|
|
directory: Directory to analyze
|
|
|
|
Returns:
|
|
Detection result based on naming patterns
|
|
"""
|
|
subdirs = [d for d in directory.iterdir() if d.is_dir()]
|
|
evidence = []
|
|
scores = {variant: 0.0 for variant in ExplodeVariant}
|
|
|
|
# Count numbered prefixes (hierarchical indicator)
|
|
numbered_dirs = 0
|
|
for subdir in subdirs:
|
|
if re.match(r'^\d+_', subdir.name):
|
|
numbered_dirs += 1
|
|
|
|
if numbered_dirs > 0:
|
|
ratio = numbered_dirs / len(subdirs) if subdirs else 0
|
|
scores[ExplodeVariant.HIERARCHICAL] += ratio * 0.8
|
|
evidence.append(f"Found {numbered_dirs}/{len(subdirs)} directories with numbered prefixes")
|
|
|
|
# Check for semantic directory names
|
|
semantic_indicators = ['parts', 'chapters', 'sections', 'appendices', 'references']
|
|
semantic_matches = 0
|
|
for subdir in subdirs:
|
|
if any(indicator in subdir.name.lower() for indicator in semantic_indicators):
|
|
semantic_matches += 1
|
|
|
|
if semantic_matches > 0:
|
|
scores[ExplodeVariant.SEMANTIC] += (semantic_matches / len(subdirs)) * 0.7
|
|
evidence.append(f"Found {semantic_matches} semantic directory names")
|
|
|
|
# Default to flat if no strong patterns
|
|
if max(scores.values()) < 0.3:
|
|
scores[ExplodeVariant.FLAT] = 0.6
|
|
evidence.append("No strong hierarchical or semantic patterns detected")
|
|
|
|
# Determine best match
|
|
best_variant = max(scores.keys(), key=lambda k: scores[k])
|
|
best_score = scores[best_variant]
|
|
|
|
confidence = DetectionConfidence.HIGH if best_score > 0.7 else \
|
|
DetectionConfidence.MEDIUM if best_score > 0.4 else \
|
|
DetectionConfidence.LOW
|
|
|
|
return DetectionResult(
|
|
variant=best_variant,
|
|
confidence=confidence,
|
|
score=best_score,
|
|
evidence=evidence,
|
|
manifest_found=False
|
|
)
|
|
|
|
def _detect_from_semantics(self, directory: Path) -> DetectionResult:
|
|
"""
|
|
Detect variant from semantic analysis of content organization.
|
|
|
|
Args:
|
|
directory: Directory to analyze
|
|
|
|
Returns:
|
|
Detection result based on semantic analysis
|
|
"""
|
|
evidence = []
|
|
scores = {variant: 0.0 for variant in ExplodeVariant}
|
|
|
|
# Analyze directory depth and organization
|
|
max_depth = self._calculate_max_depth(directory)
|
|
total_dirs = len(list(directory.glob("**/")))
|
|
|
|
evidence.append(f"Maximum depth: {max_depth}, Total directories: {total_dirs}")
|
|
|
|
# Deep nesting suggests hierarchical
|
|
if max_depth > 3:
|
|
scores[ExplodeVariant.HIERARCHICAL] += 0.6
|
|
evidence.append("Deep nesting suggests hierarchical organization")
|
|
|
|
# Analyze file distribution
|
|
md_files = list(directory.glob("**/*.md"))
|
|
if md_files:
|
|
# Exclude manifest from count
|
|
content_files = [f for f in md_files if f.name != "manifest.md"]
|
|
|
|
# Many files at root level suggests flat
|
|
root_files = [f for f in content_files if f.parent == directory]
|
|
if len(root_files) > len(content_files) * 0.6:
|
|
scores[ExplodeVariant.FLAT] += 0.5
|
|
evidence.append("Many files at root level suggests flat organization")
|
|
|
|
# Check for index.md files (hierarchical indicator)
|
|
index_files = list(directory.glob("**/index.md"))
|
|
if len(index_files) > 2: # More than just root index
|
|
scores[ExplodeVariant.HIERARCHICAL] += 0.4
|
|
evidence.append(f"Found {len(index_files)} index.md files")
|
|
|
|
# Determine best match
|
|
best_variant = max(scores.keys(), key=lambda k: scores[k])
|
|
best_score = scores[best_variant]
|
|
|
|
confidence = DetectionConfidence.MEDIUM if best_score > 0.5 else \
|
|
DetectionConfidence.LOW
|
|
|
|
return DetectionResult(
|
|
variant=best_variant,
|
|
confidence=confidence,
|
|
score=best_score,
|
|
evidence=evidence,
|
|
manifest_found=False
|
|
)
|
|
|
|
def _combine_detection_results(self, results: List[DetectionResult]) -> DetectionResult:
|
|
"""
|
|
Combine multiple detection results into a single best result.
|
|
|
|
Args:
|
|
results: List of detection results to combine
|
|
|
|
Returns:
|
|
Combined detection result
|
|
"""
|
|
# If we have a manifest result, prioritize it
|
|
manifest_result = next((r for r in results if r.manifest_found), None)
|
|
if manifest_result and manifest_result.variant:
|
|
return manifest_result
|
|
|
|
# Otherwise find result with highest score (ignoring manifest results without variants)
|
|
non_manifest_results = [r for r in results if not r.manifest_found]
|
|
if non_manifest_results:
|
|
best_result = max(non_manifest_results, key=lambda r: r.score)
|
|
if best_result.score > 0:
|
|
return best_result
|
|
|
|
# Fallback to flat variant if no good detection
|
|
return DetectionResult(
|
|
variant=ExplodeVariant.FLAT,
|
|
confidence=DetectionConfidence.LOW,
|
|
score=0.1,
|
|
evidence=["No clear patterns detected, defaulting to flat variant"],
|
|
manifest_found=False
|
|
)
|
|
|
|
def _calculate_max_depth(self, directory: Path) -> int:
|
|
"""
|
|
Calculate the maximum depth of subdirectories.
|
|
|
|
Args:
|
|
directory: Directory to analyze
|
|
|
|
Returns:
|
|
Maximum depth (root = 0)
|
|
"""
|
|
max_depth = 0
|
|
for path in directory.glob("**/"):
|
|
try:
|
|
depth = len(path.relative_to(directory).parts)
|
|
max_depth = max(max_depth, depth)
|
|
except ValueError:
|
|
continue
|
|
return max_depth
|
|
|
|
def is_exploded_directory(self, directory: Path) -> bool:
|
|
"""
|
|
Check if a directory appears to be an exploded markdown structure.
|
|
|
|
Args:
|
|
directory: Directory to check
|
|
|
|
Returns:
|
|
True if directory appears to be exploded markdown content
|
|
"""
|
|
if not directory.exists() or not directory.is_dir():
|
|
return False
|
|
|
|
# Check for manifest file
|
|
if (directory / "manifest.md").exists():
|
|
return True
|
|
|
|
# Check for markdown files
|
|
md_files = list(directory.glob("**/*.md"))
|
|
if not md_files:
|
|
return False
|
|
|
|
# Check for typical exploded patterns
|
|
subdirs = [d for d in directory.iterdir() if d.is_dir()]
|
|
|
|
# Look for index.md files
|
|
if any((d / "index.md").exists() for d in subdirs):
|
|
return True
|
|
|
|
# Look for numbered directories
|
|
if any(re.match(r'^\d+_', d.name) for d in subdirs):
|
|
return True
|
|
|
|
# Look for semantic directories
|
|
semantic_names = ['parts', 'chapters', 'sections']
|
|
if any(any(name in d.name.lower() for name in semantic_names) for d in subdirs):
|
|
return True
|
|
|
|
# If we have multiple markdown files in organized subdirectories
|
|
if len(md_files) > 2 and len(subdirs) > 1:
|
|
return True
|
|
|
|
return False |