feat: implement Issue #148 core infrastructure for explode-implode variants
Complete implementation of Phase 1 core infrastructure: Core Infrastructure Components: - ExplodeVariant enum (flat, hierarchical, semantic) - ExplodeMode, ManifestVersion, DetectionConfidence enums - BaseVariant abstract class with common interface - ExplodeOptions, ImplodeOptions, ExplodeResult, ImplodeResult dataclasses Manifest System: - ManifestManager class for manifest.md creation and parsing - StructureEntry and ManifestData dataclasses - YAML front matter with complete metadata preservation - Validation and update mechanisms Variant Detection: - VariantDetector class with multiple detection strategies - Manifest-based detection (highest priority) - Directory naming pattern recognition - Semantic structure analysis with confidence scoring - Automatic fallback and combination logic Command Interface Updates: - md-explode: Added --variant parameter with [flat|hierarchical|semantic] - md-explode: Added --create-manifest/--no-manifest option - md-implode: Added --force-variant parameter for manual override - md-implode: Integrated auto-detection with verbose output - Updated help text and examples for both commands Test Coverage: - Comprehensive test suite with 21 test cases - Tests for all enums, dataclasses, and core functionality - ManifestManager creation, reading, and validation tests - VariantDetector pattern recognition and confidence tests - 100% test pass rate with robust edge case handling Infrastructure Features: - Backward compatibility maintained (flat variant default) - Graceful handling of unimplemented variants with user warnings - Extensible design for easy addition of new variants - Clear separation between infrastructure and implementation Success Criteria Met: ✅ ExplodeVariant enum with all planned variants ✅ ManifestManager creates and parses manifest.md files ✅ Commands accept variant parameters ✅ Auto-detection logic identifies variant types ✅ Unit tests achieve 100% pass rate ✅ Backward compatibility maintained Ready for Phase 2: Variant implementations (Issue #149) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
328
markitect/explode_variants/variant_detector.py
Normal file
328
markitect/explode_variants/variant_detector.py
Normal file
@@ -0,0 +1,328 @@
|
||||
"""
|
||||
Variant detection utilities for auto-detecting explode variants.
|
||||
|
||||
This module analyzes directory structures to determine which variant was
|
||||
used during explosion, enabling automatic implode operations.
|
||||
"""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .enums import ExplodeVariant, DetectionConfidence
|
||||
from .manifest_manager import ManifestManager, ManifestData
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionResult:
|
||||
"""Result of variant detection analysis."""
|
||||
|
||||
variant: Optional[ExplodeVariant]
|
||||
confidence: DetectionConfidence
|
||||
score: float
|
||||
evidence: List[str]
|
||||
manifest_found: bool
|
||||
manifest_data: Optional[ManifestData] = None
|
||||
|
||||
|
||||
class VariantDetector:
|
||||
"""
|
||||
Detects explode variants from directory structures.
|
||||
|
||||
Uses multiple detection strategies:
|
||||
1. Manifest file analysis (highest confidence)
|
||||
2. Directory naming pattern recognition
|
||||
3. Semantic directory structure analysis
|
||||
4. File organization heuristics
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the variant detector."""
|
||||
self.manifest_manager = ManifestManager()
|
||||
|
||||
def detect_variant(self, directory: Path) -> DetectionResult:
|
||||
"""
|
||||
Detect the explode variant used for a directory structure.
|
||||
|
||||
Args:
|
||||
directory: Path to the exploded directory to analyze
|
||||
|
||||
Returns:
|
||||
Detection result with variant, confidence, and evidence
|
||||
"""
|
||||
if not directory.exists() or not directory.is_dir():
|
||||
return DetectionResult(
|
||||
variant=None,
|
||||
confidence=DetectionConfidence.UNKNOWN,
|
||||
score=0.0,
|
||||
evidence=["Directory does not exist or is not a directory"],
|
||||
manifest_found=False
|
||||
)
|
||||
|
||||
# Strategy 1: Check for manifest file (highest priority)
|
||||
manifest_result = self._detect_from_manifest(directory)
|
||||
if manifest_result.manifest_found and manifest_result.variant:
|
||||
return manifest_result
|
||||
|
||||
# Strategy 2: Pattern-based detection
|
||||
pattern_result = self._detect_from_patterns(directory)
|
||||
|
||||
# Strategy 3: Semantic analysis
|
||||
semantic_result = self._detect_from_semantics(directory)
|
||||
|
||||
# Combine results and return best match
|
||||
return self._combine_detection_results([
|
||||
manifest_result,
|
||||
pattern_result,
|
||||
semantic_result
|
||||
])
|
||||
|
||||
def _detect_from_manifest(self, directory: Path) -> DetectionResult:
|
||||
"""
|
||||
Detect variant from manifest file.
|
||||
|
||||
Args:
|
||||
directory: Directory to check for manifest
|
||||
|
||||
Returns:
|
||||
Detection result based on manifest analysis
|
||||
"""
|
||||
manifest_data = self.manifest_manager.read_manifest(directory)
|
||||
|
||||
if not manifest_data:
|
||||
return DetectionResult(
|
||||
variant=None,
|
||||
confidence=DetectionConfidence.UNKNOWN,
|
||||
score=0.0,
|
||||
evidence=["No manifest.md file found"],
|
||||
manifest_found=False
|
||||
)
|
||||
|
||||
try:
|
||||
variant = ExplodeVariant(manifest_data.explosion_type)
|
||||
return DetectionResult(
|
||||
variant=variant,
|
||||
confidence=DetectionConfidence.HIGH,
|
||||
score=1.0,
|
||||
evidence=[f"Manifest indicates {variant.value} variant"],
|
||||
manifest_found=True,
|
||||
manifest_data=manifest_data
|
||||
)
|
||||
except ValueError:
|
||||
return DetectionResult(
|
||||
variant=None,
|
||||
confidence=DetectionConfidence.LOW,
|
||||
score=0.1,
|
||||
evidence=[f"Invalid variant in manifest: {manifest_data.explosion_type}"],
|
||||
manifest_found=True,
|
||||
manifest_data=manifest_data
|
||||
)
|
||||
|
||||
def _detect_from_patterns(self, directory: Path) -> DetectionResult:
|
||||
"""
|
||||
Detect variant from directory naming patterns.
|
||||
|
||||
Args:
|
||||
directory: Directory to analyze
|
||||
|
||||
Returns:
|
||||
Detection result based on naming patterns
|
||||
"""
|
||||
subdirs = [d for d in directory.iterdir() if d.is_dir()]
|
||||
evidence = []
|
||||
scores = {variant: 0.0 for variant in ExplodeVariant}
|
||||
|
||||
# Count numbered prefixes (hierarchical indicator)
|
||||
numbered_dirs = 0
|
||||
for subdir in subdirs:
|
||||
if re.match(r'^\d+_', subdir.name):
|
||||
numbered_dirs += 1
|
||||
|
||||
if numbered_dirs > 0:
|
||||
ratio = numbered_dirs / len(subdirs) if subdirs else 0
|
||||
scores[ExplodeVariant.HIERARCHICAL] += ratio * 0.8
|
||||
evidence.append(f"Found {numbered_dirs}/{len(subdirs)} directories with numbered prefixes")
|
||||
|
||||
# Check for semantic directory names
|
||||
semantic_indicators = ['parts', 'chapters', 'sections', 'appendices', 'references']
|
||||
semantic_matches = 0
|
||||
for subdir in subdirs:
|
||||
if any(indicator in subdir.name.lower() for indicator in semantic_indicators):
|
||||
semantic_matches += 1
|
||||
|
||||
if semantic_matches > 0:
|
||||
scores[ExplodeVariant.SEMANTIC] += (semantic_matches / len(subdirs)) * 0.7
|
||||
evidence.append(f"Found {semantic_matches} semantic directory names")
|
||||
|
||||
# Default to flat if no strong patterns
|
||||
if max(scores.values()) < 0.3:
|
||||
scores[ExplodeVariant.FLAT] = 0.6
|
||||
evidence.append("No strong hierarchical or semantic patterns detected")
|
||||
|
||||
# Determine best match
|
||||
best_variant = max(scores.keys(), key=lambda k: scores[k])
|
||||
best_score = scores[best_variant]
|
||||
|
||||
confidence = DetectionConfidence.HIGH if best_score > 0.7 else \
|
||||
DetectionConfidence.MEDIUM if best_score > 0.4 else \
|
||||
DetectionConfidence.LOW
|
||||
|
||||
return DetectionResult(
|
||||
variant=best_variant,
|
||||
confidence=confidence,
|
||||
score=best_score,
|
||||
evidence=evidence,
|
||||
manifest_found=False
|
||||
)
|
||||
|
||||
def _detect_from_semantics(self, directory: Path) -> DetectionResult:
|
||||
"""
|
||||
Detect variant from semantic analysis of content organization.
|
||||
|
||||
Args:
|
||||
directory: Directory to analyze
|
||||
|
||||
Returns:
|
||||
Detection result based on semantic analysis
|
||||
"""
|
||||
evidence = []
|
||||
scores = {variant: 0.0 for variant in ExplodeVariant}
|
||||
|
||||
# Analyze directory depth and organization
|
||||
max_depth = self._calculate_max_depth(directory)
|
||||
total_dirs = len(list(directory.glob("**/")))
|
||||
|
||||
evidence.append(f"Maximum depth: {max_depth}, Total directories: {total_dirs}")
|
||||
|
||||
# Deep nesting suggests hierarchical
|
||||
if max_depth > 3:
|
||||
scores[ExplodeVariant.HIERARCHICAL] += 0.6
|
||||
evidence.append("Deep nesting suggests hierarchical organization")
|
||||
|
||||
# Analyze file distribution
|
||||
md_files = list(directory.glob("**/*.md"))
|
||||
if md_files:
|
||||
# Exclude manifest from count
|
||||
content_files = [f for f in md_files if f.name != "manifest.md"]
|
||||
|
||||
# Many files at root level suggests flat
|
||||
root_files = [f for f in content_files if f.parent == directory]
|
||||
if len(root_files) > len(content_files) * 0.6:
|
||||
scores[ExplodeVariant.FLAT] += 0.5
|
||||
evidence.append("Many files at root level suggests flat organization")
|
||||
|
||||
# Check for index.md files (hierarchical indicator)
|
||||
index_files = list(directory.glob("**/index.md"))
|
||||
if len(index_files) > 2: # More than just root index
|
||||
scores[ExplodeVariant.HIERARCHICAL] += 0.4
|
||||
evidence.append(f"Found {len(index_files)} index.md files")
|
||||
|
||||
# Determine best match
|
||||
best_variant = max(scores.keys(), key=lambda k: scores[k])
|
||||
best_score = scores[best_variant]
|
||||
|
||||
confidence = DetectionConfidence.MEDIUM if best_score > 0.5 else \
|
||||
DetectionConfidence.LOW
|
||||
|
||||
return DetectionResult(
|
||||
variant=best_variant,
|
||||
confidence=confidence,
|
||||
score=best_score,
|
||||
evidence=evidence,
|
||||
manifest_found=False
|
||||
)
|
||||
|
||||
def _combine_detection_results(self, results: List[DetectionResult]) -> DetectionResult:
|
||||
"""
|
||||
Combine multiple detection results into a single best result.
|
||||
|
||||
Args:
|
||||
results: List of detection results to combine
|
||||
|
||||
Returns:
|
||||
Combined detection result
|
||||
"""
|
||||
# If we have a manifest result, prioritize it
|
||||
manifest_result = next((r for r in results if r.manifest_found), None)
|
||||
if manifest_result and manifest_result.variant:
|
||||
return manifest_result
|
||||
|
||||
# Otherwise find result with highest score (ignoring manifest results without variants)
|
||||
non_manifest_results = [r for r in results if not r.manifest_found]
|
||||
if non_manifest_results:
|
||||
best_result = max(non_manifest_results, key=lambda r: r.score)
|
||||
if best_result.score > 0:
|
||||
return best_result
|
||||
|
||||
# Fallback to flat variant if no good detection
|
||||
return DetectionResult(
|
||||
variant=ExplodeVariant.FLAT,
|
||||
confidence=DetectionConfidence.LOW,
|
||||
score=0.1,
|
||||
evidence=["No clear patterns detected, defaulting to flat variant"],
|
||||
manifest_found=False
|
||||
)
|
||||
|
||||
def _calculate_max_depth(self, directory: Path) -> int:
|
||||
"""
|
||||
Calculate the maximum depth of subdirectories.
|
||||
|
||||
Args:
|
||||
directory: Directory to analyze
|
||||
|
||||
Returns:
|
||||
Maximum depth (root = 0)
|
||||
"""
|
||||
max_depth = 0
|
||||
for path in directory.glob("**/"):
|
||||
try:
|
||||
depth = len(path.relative_to(directory).parts)
|
||||
max_depth = max(max_depth, depth)
|
||||
except ValueError:
|
||||
continue
|
||||
return max_depth
|
||||
|
||||
def is_exploded_directory(self, directory: Path) -> bool:
|
||||
"""
|
||||
Check if a directory appears to be an exploded markdown structure.
|
||||
|
||||
Args:
|
||||
directory: Directory to check
|
||||
|
||||
Returns:
|
||||
True if directory appears to be exploded markdown content
|
||||
"""
|
||||
if not directory.exists() or not directory.is_dir():
|
||||
return False
|
||||
|
||||
# Check for manifest file
|
||||
if (directory / "manifest.md").exists():
|
||||
return True
|
||||
|
||||
# Check for markdown files
|
||||
md_files = list(directory.glob("**/*.md"))
|
||||
if not md_files:
|
||||
return False
|
||||
|
||||
# Check for typical exploded patterns
|
||||
subdirs = [d for d in directory.iterdir() if d.is_dir()]
|
||||
|
||||
# Look for index.md files
|
||||
if any((d / "index.md").exists() for d in subdirs):
|
||||
return True
|
||||
|
||||
# Look for numbered directories
|
||||
if any(re.match(r'^\d+_', d.name) for d in subdirs):
|
||||
return True
|
||||
|
||||
# Look for semantic directories
|
||||
semantic_names = ['parts', 'chapters', 'sections']
|
||||
if any(any(name in d.name.lower() for name in semantic_names) for d in subdirs):
|
||||
return True
|
||||
|
||||
# If we have multiple markdown files in organized subdirectories
|
||||
if len(md_files) > 2 and len(subdirs) > 1:
|
||||
return True
|
||||
|
||||
return False
|
||||
Reference in New Issue
Block a user