Files
markitect-main/markitect/explode_variants/variant_detector.py
tegwick a17c362653 feat: implement Issue #148 core infrastructure for explode-implode variants
Complete implementation of Phase 1 core infrastructure:

Core Infrastructure Components:
- ExplodeVariant enum (flat, hierarchical, semantic)
- ExplodeMode, ManifestVersion, DetectionConfidence enums
- BaseVariant abstract class with common interface
- ExplodeOptions, ImplodeOptions, ExplodeResult, ImplodeResult dataclasses

Manifest System:
- ManifestManager class for manifest.md creation and parsing
- StructureEntry and ManifestData dataclasses
- YAML front matter with complete metadata preservation
- Validation and update mechanisms

Variant Detection:
- VariantDetector class with multiple detection strategies
- Manifest-based detection (highest priority)
- Directory naming pattern recognition
- Semantic structure analysis with confidence scoring
- Automatic fallback and combination logic

Command Interface Updates:
- md-explode: Added --variant parameter with [flat|hierarchical|semantic]
- md-explode: Added --create-manifest/--no-manifest option
- md-implode: Added --force-variant parameter for manual override
- md-implode: Integrated auto-detection with verbose output
- Updated help text and examples for both commands

Test Coverage:
- Comprehensive test suite with 21 test cases
- Tests for all enums, dataclasses, and core functionality
- ManifestManager creation, reading, and validation tests
- VariantDetector pattern recognition and confidence tests
- 100% test pass rate with robust edge case handling

Infrastructure Features:
- Backward compatibility maintained (flat variant default)
- Graceful handling of unimplemented variants with user warnings
- Extensible design for easy addition of new variants
- Clear separation between infrastructure and implementation

Success Criteria Met:
 ExplodeVariant enum with all planned variants
 ManifestManager creates and parses manifest.md files
 Commands accept variant parameters
 Auto-detection logic identifies variant types
 Unit tests achieve 100% pass rate
 Backward compatibility maintained

Ready for Phase 2: Variant implementations (Issue #149)

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-12 20:17:41 +02:00

328 lines
11 KiB
Python

"""
Variant detection utilities for auto-detecting explode variants.
This module analyzes directory structures to determine which variant was
used during explosion, enabling automatic implode operations.
"""
import re
from pathlib import Path
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
from .enums import ExplodeVariant, DetectionConfidence
from .manifest_manager import ManifestManager, ManifestData
@dataclass
class DetectionResult:
"""Result of variant detection analysis."""
variant: Optional[ExplodeVariant]
confidence: DetectionConfidence
score: float
evidence: List[str]
manifest_found: bool
manifest_data: Optional[ManifestData] = None
class VariantDetector:
"""
Detects explode variants from directory structures.
Uses multiple detection strategies:
1. Manifest file analysis (highest confidence)
2. Directory naming pattern recognition
3. Semantic directory structure analysis
4. File organization heuristics
"""
def __init__(self):
"""Initialize the variant detector."""
self.manifest_manager = ManifestManager()
def detect_variant(self, directory: Path) -> DetectionResult:
"""
Detect the explode variant used for a directory structure.
Args:
directory: Path to the exploded directory to analyze
Returns:
Detection result with variant, confidence, and evidence
"""
if not directory.exists() or not directory.is_dir():
return DetectionResult(
variant=None,
confidence=DetectionConfidence.UNKNOWN,
score=0.0,
evidence=["Directory does not exist or is not a directory"],
manifest_found=False
)
# Strategy 1: Check for manifest file (highest priority)
manifest_result = self._detect_from_manifest(directory)
if manifest_result.manifest_found and manifest_result.variant:
return manifest_result
# Strategy 2: Pattern-based detection
pattern_result = self._detect_from_patterns(directory)
# Strategy 3: Semantic analysis
semantic_result = self._detect_from_semantics(directory)
# Combine results and return best match
return self._combine_detection_results([
manifest_result,
pattern_result,
semantic_result
])
def _detect_from_manifest(self, directory: Path) -> DetectionResult:
"""
Detect variant from manifest file.
Args:
directory: Directory to check for manifest
Returns:
Detection result based on manifest analysis
"""
manifest_data = self.manifest_manager.read_manifest(directory)
if not manifest_data:
return DetectionResult(
variant=None,
confidence=DetectionConfidence.UNKNOWN,
score=0.0,
evidence=["No manifest.md file found"],
manifest_found=False
)
try:
variant = ExplodeVariant(manifest_data.explosion_type)
return DetectionResult(
variant=variant,
confidence=DetectionConfidence.HIGH,
score=1.0,
evidence=[f"Manifest indicates {variant.value} variant"],
manifest_found=True,
manifest_data=manifest_data
)
except ValueError:
return DetectionResult(
variant=None,
confidence=DetectionConfidence.LOW,
score=0.1,
evidence=[f"Invalid variant in manifest: {manifest_data.explosion_type}"],
manifest_found=True,
manifest_data=manifest_data
)
def _detect_from_patterns(self, directory: Path) -> DetectionResult:
"""
Detect variant from directory naming patterns.
Args:
directory: Directory to analyze
Returns:
Detection result based on naming patterns
"""
subdirs = [d for d in directory.iterdir() if d.is_dir()]
evidence = []
scores = {variant: 0.0 for variant in ExplodeVariant}
# Count numbered prefixes (hierarchical indicator)
numbered_dirs = 0
for subdir in subdirs:
if re.match(r'^\d+_', subdir.name):
numbered_dirs += 1
if numbered_dirs > 0:
ratio = numbered_dirs / len(subdirs) if subdirs else 0
scores[ExplodeVariant.HIERARCHICAL] += ratio * 0.8
evidence.append(f"Found {numbered_dirs}/{len(subdirs)} directories with numbered prefixes")
# Check for semantic directory names
semantic_indicators = ['parts', 'chapters', 'sections', 'appendices', 'references']
semantic_matches = 0
for subdir in subdirs:
if any(indicator in subdir.name.lower() for indicator in semantic_indicators):
semantic_matches += 1
if semantic_matches > 0:
scores[ExplodeVariant.SEMANTIC] += (semantic_matches / len(subdirs)) * 0.7
evidence.append(f"Found {semantic_matches} semantic directory names")
# Default to flat if no strong patterns
if max(scores.values()) < 0.3:
scores[ExplodeVariant.FLAT] = 0.6
evidence.append("No strong hierarchical or semantic patterns detected")
# Determine best match
best_variant = max(scores.keys(), key=lambda k: scores[k])
best_score = scores[best_variant]
confidence = DetectionConfidence.HIGH if best_score > 0.7 else \
DetectionConfidence.MEDIUM if best_score > 0.4 else \
DetectionConfidence.LOW
return DetectionResult(
variant=best_variant,
confidence=confidence,
score=best_score,
evidence=evidence,
manifest_found=False
)
def _detect_from_semantics(self, directory: Path) -> DetectionResult:
"""
Detect variant from semantic analysis of content organization.
Args:
directory: Directory to analyze
Returns:
Detection result based on semantic analysis
"""
evidence = []
scores = {variant: 0.0 for variant in ExplodeVariant}
# Analyze directory depth and organization
max_depth = self._calculate_max_depth(directory)
total_dirs = len(list(directory.glob("**/")))
evidence.append(f"Maximum depth: {max_depth}, Total directories: {total_dirs}")
# Deep nesting suggests hierarchical
if max_depth > 3:
scores[ExplodeVariant.HIERARCHICAL] += 0.6
evidence.append("Deep nesting suggests hierarchical organization")
# Analyze file distribution
md_files = list(directory.glob("**/*.md"))
if md_files:
# Exclude manifest from count
content_files = [f for f in md_files if f.name != "manifest.md"]
# Many files at root level suggests flat
root_files = [f for f in content_files if f.parent == directory]
if len(root_files) > len(content_files) * 0.6:
scores[ExplodeVariant.FLAT] += 0.5
evidence.append("Many files at root level suggests flat organization")
# Check for index.md files (hierarchical indicator)
index_files = list(directory.glob("**/index.md"))
if len(index_files) > 2: # More than just root index
scores[ExplodeVariant.HIERARCHICAL] += 0.4
evidence.append(f"Found {len(index_files)} index.md files")
# Determine best match
best_variant = max(scores.keys(), key=lambda k: scores[k])
best_score = scores[best_variant]
confidence = DetectionConfidence.MEDIUM if best_score > 0.5 else \
DetectionConfidence.LOW
return DetectionResult(
variant=best_variant,
confidence=confidence,
score=best_score,
evidence=evidence,
manifest_found=False
)
def _combine_detection_results(self, results: List[DetectionResult]) -> DetectionResult:
"""
Combine multiple detection results into a single best result.
Args:
results: List of detection results to combine
Returns:
Combined detection result
"""
# If we have a manifest result, prioritize it
manifest_result = next((r for r in results if r.manifest_found), None)
if manifest_result and manifest_result.variant:
return manifest_result
# Otherwise find result with highest score (ignoring manifest results without variants)
non_manifest_results = [r for r in results if not r.manifest_found]
if non_manifest_results:
best_result = max(non_manifest_results, key=lambda r: r.score)
if best_result.score > 0:
return best_result
# Fallback to flat variant if no good detection
return DetectionResult(
variant=ExplodeVariant.FLAT,
confidence=DetectionConfidence.LOW,
score=0.1,
evidence=["No clear patterns detected, defaulting to flat variant"],
manifest_found=False
)
def _calculate_max_depth(self, directory: Path) -> int:
"""
Calculate the maximum depth of subdirectories.
Args:
directory: Directory to analyze
Returns:
Maximum depth (root = 0)
"""
max_depth = 0
for path in directory.glob("**/"):
try:
depth = len(path.relative_to(directory).parts)
max_depth = max(max_depth, depth)
except ValueError:
continue
return max_depth
def is_exploded_directory(self, directory: Path) -> bool:
"""
Check if a directory appears to be an exploded markdown structure.
Args:
directory: Directory to check
Returns:
True if directory appears to be exploded markdown content
"""
if not directory.exists() or not directory.is_dir():
return False
# Check for manifest file
if (directory / "manifest.md").exists():
return True
# Check for markdown files
md_files = list(directory.glob("**/*.md"))
if not md_files:
return False
# Check for typical exploded patterns
subdirs = [d for d in directory.iterdir() if d.is_dir()]
# Look for index.md files
if any((d / "index.md").exists() for d in subdirs):
return True
# Look for numbered directories
if any(re.match(r'^\d+_', d.name) for d in subdirs):
return True
# Look for semantic directories
semantic_names = ['parts', 'chapters', 'sections']
if any(any(name in d.name.lower() for name in semantic_names) for d in subdirs):
return True
# If we have multiple markdown files in organized subdirectories
if len(md_files) > 2 and len(subdirs) > 1:
return True
return False