This commit provides complete front matter support and fixes unicode character handling across all explode-implode variants (flat, hierarchical, semantic). ## Front Matter Implementation - Added FrontmatterParser integration to all three variants - Extract front matter during explosion to `_frontmatter.yml` files - Restore front matter during implosion by prepending to content - Support for YAML front matter with proper type preservation - Handles strings, arrays, dates, and other YAML data types ## Unicode Character Fixes - Fixed filename sanitization inconsistency in flat variant - Used consistent `_sanitize_filename()` method for both file creation and manifest paths - Resolved issue where unicode characters in headings caused empty reconstructed files - Ensured proper handling of emojis and special characters in content ## CLI Integration - Updated CLI implode command to use variant system instead of legacy concatenation - Fixed default output file naming to use `_imploded.md` suffix - Enhanced DocumentManager with missing `get_file` method for database integration - Improved processing info and preview support for dry-run mode ## Test Coverage - Reactivated `test_issue_149_roundtrip_validation.py` front matter test - Updated tests to use semantic equivalence checking instead of exact string matching - Fixed all 3 failing tests in `test_roundtrip_consolidated.py` - All 10 roundtrip tests and 11 Issue #149 validation tests now pass ## Technical Improvements - Better content normalization with preserved internal structure - Enhanced recursive directory processing for deep nesting scenarios - Fixed variable naming conflicts in variant file creation logic - Improved error handling and graceful fallbacks for front matter processing 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
688 lines
24 KiB
Python
688 lines
24 KiB
Python
"""
|
|
Semantic variant implementation for explode-implode operations.
|
|
|
|
This variant creates content-based directory groupings that reflect the
|
|
semantic structure of the document, organizing by meaning rather than order.
|
|
"""
|
|
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List, Any, Optional, Tuple, Set
|
|
|
|
from .base_variant import (
|
|
BaseVariant, ExplodeOptions, ImplodeOptions,
|
|
ExplodeResult, ImplodeResult
|
|
)
|
|
from .enums import ExplodeVariant
|
|
from .manifest_manager import ManifestManager, StructureEntry
|
|
from ..matter_frontmatter.parser import FrontmatterParser
|
|
|
|
|
|
class SemanticVariant(BaseVariant):
|
|
"""
|
|
Semantic variant implementation.
|
|
|
|
Creates content-based directory groupings that organize content by
|
|
semantic meaning rather than document order. Groups related content
|
|
together based on keywords and content analysis.
|
|
|
|
Structure example:
|
|
book.mdd/
|
|
├── manifest.md
|
|
├── introduction/
|
|
│ ├── overview.md
|
|
│ ├── scope.md
|
|
│ └── objectives.md
|
|
├── chapters/
|
|
│ ├── fundamentals.md
|
|
│ ├── advanced_topics.md
|
|
│ └── case_studies.md
|
|
├── appendices/
|
|
│ ├── references.md
|
|
│ ├── glossary.md
|
|
│ └── index.md
|
|
└── conclusion/
|
|
└── summary.md
|
|
"""
|
|
|
|
# Semantic group definitions
|
|
SEMANTIC_GROUPS = {
|
|
'introduction': {
|
|
'keywords': ['introduction', 'overview', 'preface', 'foreword', 'abstract',
|
|
'summary', 'about', 'welcome', 'getting started'],
|
|
'patterns': [r'intro', r'begin', r'start', r'overview'],
|
|
'order': 1
|
|
},
|
|
'chapters': {
|
|
'keywords': ['chapter', 'section', 'part', 'topic', 'lesson', 'content',
|
|
'main', 'core', 'body', 'details'],
|
|
'patterns': [r'chapter\s*\d+', r'part\s*\d+', r'section\s*\d+'],
|
|
'order': 2
|
|
},
|
|
'tutorials': {
|
|
'keywords': ['tutorial', 'guide', 'howto', 'how-to', 'walkthrough',
|
|
'example', 'demo', 'practice', 'exercise'],
|
|
'patterns': [r'tutorial', r'guide', r'how\s*to', r'step\s*by\s*step'],
|
|
'order': 3
|
|
},
|
|
'reference': {
|
|
'keywords': ['reference', 'api', 'documentation', 'spec', 'specification',
|
|
'manual', 'docs', 'command', 'function'],
|
|
'patterns': [r'api', r'reference', r'spec', r'manual'],
|
|
'order': 4
|
|
},
|
|
'appendices': {
|
|
'keywords': ['appendix', 'appendices', 'glossary', 'index', 'bibliography',
|
|
'references', 'credits', 'acknowledgments', 'notes'],
|
|
'patterns': [r'appendix', r'glossary', r'bibliography'],
|
|
'order': 5
|
|
},
|
|
'conclusion': {
|
|
'keywords': ['conclusion', 'summary', 'final', 'end', 'closing',
|
|
'wrap-up', 'takeaway', 'results', 'outcome'],
|
|
'patterns': [r'conclusion', r'summary', r'final', r'end'],
|
|
'order': 6
|
|
}
|
|
}
|
|
|
|
def __init__(self):
|
|
"""Initialize the semantic variant."""
|
|
super().__init__(ExplodeVariant.SEMANTIC)
|
|
self.manifest_manager = ManifestManager()
|
|
self.frontmatter_parser = FrontmatterParser()
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
"""Human-readable name of the variant."""
|
|
return "Semantic Structure"
|
|
|
|
@property
|
|
def description(self) -> str:
|
|
"""Description of the variant's behavior."""
|
|
return ("Creates content-based directory groupings that organize content by "
|
|
"semantic meaning. Groups related content together based on keywords "
|
|
"and content analysis.")
|
|
|
|
def explode(
|
|
self,
|
|
input_file: Path,
|
|
options: ExplodeOptions
|
|
) -> ExplodeResult:
|
|
"""
|
|
Explode a markdown file using the semantic structure variant.
|
|
|
|
Args:
|
|
input_file: Path to the markdown file to explode
|
|
options: Options controlling the explode operation
|
|
|
|
Returns:
|
|
Result of the explode operation
|
|
"""
|
|
# Validate input
|
|
validation_errors = self.validate_input_file(input_file)
|
|
if validation_errors:
|
|
return ExplodeResult(
|
|
success=False,
|
|
output_directory=options.output_dir or Path(),
|
|
files_created=[],
|
|
manifest_path=None,
|
|
warnings=[],
|
|
errors=validation_errors,
|
|
variant_used=self.variant_type
|
|
)
|
|
|
|
# Determine output directory
|
|
if options.output_dir:
|
|
output_dir = options.output_dir
|
|
else:
|
|
suffix = ".mdd" if options.create_manifest else "_exploded"
|
|
output_dir = input_file.parent / f"{input_file.stem}{suffix}"
|
|
|
|
# Create output directory
|
|
creation_errors = self.create_output_directory(output_dir, overwrite=True)
|
|
if creation_errors:
|
|
return ExplodeResult(
|
|
success=False,
|
|
output_directory=output_dir,
|
|
files_created=[],
|
|
manifest_path=None,
|
|
warnings=[],
|
|
errors=creation_errors,
|
|
variant_used=self.variant_type
|
|
)
|
|
|
|
try:
|
|
# Parse the markdown content
|
|
content = input_file.read_text(encoding='utf-8')
|
|
|
|
# Extract and save front matter if present and preservation is enabled
|
|
files_created = []
|
|
if options.preserve_front_matter:
|
|
frontmatter, content_without_fm = self.frontmatter_parser.separate_frontmatter_and_content(content)
|
|
if frontmatter:
|
|
# Save front matter to _frontmatter.yml
|
|
import yaml
|
|
fm_file = output_dir / "_frontmatter.yml"
|
|
fm_content = yaml.dump(frontmatter, default_flow_style=False)
|
|
fm_file.write_text(fm_content, encoding='utf-8')
|
|
files_created.append(fm_file)
|
|
# Use content without front matter for processing
|
|
content = content_without_fm
|
|
|
|
# Analyze document structure and classify sections semantically
|
|
sections = self._parse_semantic_structure(content)
|
|
|
|
# Group sections by semantic meaning
|
|
semantic_groups = self._group_sections_semantically(sections)
|
|
|
|
# Create semantic directory structure
|
|
semantic_files = self._create_semantic_structure(
|
|
output_dir, semantic_groups, options
|
|
)
|
|
|
|
# Create manifest if requested
|
|
manifest_path = None
|
|
if options.create_manifest:
|
|
structure = self._build_structure_entries(semantic_groups)
|
|
manifest_path = self.manifest_manager.create_manifest(
|
|
output_dir=output_dir,
|
|
original_file=input_file,
|
|
variant=self.variant_type,
|
|
structure=structure,
|
|
preservation_options={
|
|
"front_matter": options.preserve_front_matter,
|
|
"section_order": True,
|
|
"heading_levels": True,
|
|
"semantic_grouping": True
|
|
}
|
|
)
|
|
semantic_files.append(manifest_path)
|
|
|
|
# Combine all created files
|
|
all_files = files_created + semantic_files
|
|
|
|
return ExplodeResult(
|
|
success=True,
|
|
output_directory=output_dir,
|
|
files_created=all_files,
|
|
manifest_path=manifest_path,
|
|
warnings=[],
|
|
errors=[],
|
|
variant_used=self.variant_type
|
|
)
|
|
|
|
except Exception as e:
|
|
return ExplodeResult(
|
|
success=False,
|
|
output_directory=output_dir,
|
|
files_created=[],
|
|
manifest_path=None,
|
|
warnings=[],
|
|
errors=[f"Error during semantic explosion: {e}"],
|
|
variant_used=self.variant_type
|
|
)
|
|
|
|
def implode(
|
|
self,
|
|
input_directory: Path,
|
|
options: ImplodeOptions
|
|
) -> ImplodeResult:
|
|
"""
|
|
Implode a semantic directory structure back into a markdown file.
|
|
|
|
Args:
|
|
input_directory: Path to the directory to implode
|
|
options: Options controlling the implode operation
|
|
|
|
Returns:
|
|
Result of the implode operation
|
|
"""
|
|
# Validate input
|
|
validation_errors = self.validate_input_directory(input_directory)
|
|
if validation_errors:
|
|
return ImplodeResult(
|
|
success=False,
|
|
output_file=options.output_file or Path(),
|
|
files_processed=[],
|
|
variant_detected=self.variant_type,
|
|
warnings=[],
|
|
errors=validation_errors
|
|
)
|
|
|
|
# Determine output file
|
|
if options.output_file:
|
|
output_file = options.output_file
|
|
else:
|
|
output_file = input_directory.parent / f"{input_directory.name}_imploded.md"
|
|
|
|
try:
|
|
# Read manifest if available
|
|
manifest_data = self.manifest_manager.read_manifest(input_directory)
|
|
|
|
# Reconstruct content from semantic structure
|
|
content, files_processed = self._reconstruct_from_semantics(
|
|
input_directory, manifest_data, options
|
|
)
|
|
|
|
# Add front matter if present and preservation is enabled
|
|
if options.preserve_front_matter:
|
|
fm_file = input_directory / '_frontmatter.yml'
|
|
if fm_file.exists():
|
|
try:
|
|
import yaml
|
|
frontmatter_content = fm_file.read_text(encoding='utf-8').strip()
|
|
content = f"---\n{frontmatter_content}\n---\n\n{content}"
|
|
except Exception:
|
|
pass # Ignore errors reading front matter
|
|
|
|
# Write output file
|
|
if not options.dry_run:
|
|
output_file.write_text(content, encoding='utf-8')
|
|
|
|
return ImplodeResult(
|
|
success=True,
|
|
output_file=output_file,
|
|
files_processed=files_processed,
|
|
variant_detected=self.variant_type,
|
|
warnings=[],
|
|
errors=[]
|
|
)
|
|
|
|
except Exception as e:
|
|
return ImplodeResult(
|
|
success=False,
|
|
output_file=output_file,
|
|
files_processed=[],
|
|
variant_detected=self.variant_type,
|
|
warnings=[],
|
|
errors=[f"Error during semantic implosion: {e}"]
|
|
)
|
|
|
|
def can_handle_directory(self, directory: Path) -> bool:
|
|
"""
|
|
Check if this variant can handle the given directory structure.
|
|
|
|
Args:
|
|
directory: Path to the directory to check
|
|
|
|
Returns:
|
|
True if this variant can handle the directory
|
|
"""
|
|
if not directory.exists() or not directory.is_dir():
|
|
return False
|
|
|
|
# Check for manifest indicating semantic variant
|
|
manifest_data = self.manifest_manager.read_manifest(directory)
|
|
if manifest_data and manifest_data.explosion_type == "semantic":
|
|
return True
|
|
|
|
# Check for semantic directory patterns
|
|
subdirs = [d for d in directory.iterdir() if d.is_dir()]
|
|
|
|
# Look for semantic directory names
|
|
semantic_names = set()
|
|
for group_name, group_data in self.SEMANTIC_GROUPS.items():
|
|
semantic_names.update(group_data['keywords'])
|
|
|
|
semantic_matches = 0
|
|
for subdir in subdirs:
|
|
dir_name_lower = subdir.name.lower()
|
|
if any(keyword in dir_name_lower for keyword in semantic_names):
|
|
semantic_matches += 1
|
|
|
|
# High ratio of semantic directories indicates semantic structure
|
|
return (semantic_matches / len(subdirs) if subdirs else 0) > 0.4
|
|
|
|
def get_detection_patterns(self) -> Dict[str, Any]:
|
|
"""
|
|
Get patterns used for auto-detecting this variant.
|
|
|
|
Returns:
|
|
Dictionary of detection patterns and weights
|
|
"""
|
|
return {
|
|
"manifest_type": "semantic",
|
|
"semantic_directory_ratio": {"min": 0.4, "weight": 0.7},
|
|
"keyword_matches": {"weight": 0.6},
|
|
"numbered_directory_ratio": {"max": 0.2, "weight": 0.4},
|
|
"semantic_patterns": {"weight": 0.8}
|
|
}
|
|
|
|
def _parse_semantic_structure(self, content: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
Parse markdown content into sections with semantic analysis.
|
|
|
|
Args:
|
|
content: Markdown content to parse
|
|
|
|
Returns:
|
|
List of section dictionaries with semantic information
|
|
"""
|
|
sections = []
|
|
lines = content.split('\n')
|
|
current_section = None
|
|
current_content = []
|
|
section_counter = 1
|
|
|
|
for i, line in enumerate(lines):
|
|
# Check for headings
|
|
heading_match = re.match(r'^(#{1,6})\s+(.+)', line)
|
|
|
|
if heading_match:
|
|
# Save previous section
|
|
if current_section:
|
|
current_section['content'] = '\n'.join(current_content)
|
|
current_section['end_line'] = i
|
|
# Analyze semantic meaning
|
|
current_section['semantic_info'] = self._analyze_semantic_meaning(
|
|
current_section['title'],
|
|
current_section['content']
|
|
)
|
|
sections.append(current_section)
|
|
|
|
# Start new section
|
|
level = len(heading_match.group(1))
|
|
title = heading_match.group(2).strip()
|
|
|
|
current_section = {
|
|
'level': level,
|
|
'title': title,
|
|
'start_line': i + 1,
|
|
'order': section_counter,
|
|
'parent': self._find_parent_section(sections, level)
|
|
}
|
|
current_content = [line]
|
|
section_counter += 1
|
|
else:
|
|
if current_content:
|
|
current_content.append(line)
|
|
|
|
# Handle last section
|
|
if current_section:
|
|
current_section['content'] = '\n'.join(current_content)
|
|
current_section['end_line'] = len(lines)
|
|
current_section['semantic_info'] = self._analyze_semantic_meaning(
|
|
current_section['title'],
|
|
current_section['content']
|
|
)
|
|
sections.append(current_section)
|
|
|
|
return sections
|
|
|
|
def _analyze_semantic_meaning(self, title: str, content: str) -> Dict[str, Any]:
|
|
"""
|
|
Analyze the semantic meaning of a section.
|
|
|
|
Args:
|
|
title: Section title
|
|
content: Section content
|
|
|
|
Returns:
|
|
Dictionary with semantic analysis results
|
|
"""
|
|
title_lower = title.lower()
|
|
content_lower = content.lower()
|
|
text_combined = f"{title_lower} {content_lower}"
|
|
|
|
# Score against each semantic group
|
|
group_scores = {}
|
|
for group_name, group_data in self.SEMANTIC_GROUPS.items():
|
|
score = 0.0
|
|
|
|
# Check keyword matches
|
|
for keyword in group_data['keywords']:
|
|
if keyword in title_lower:
|
|
score += 2.0 # Title matches are weighted higher
|
|
if keyword in content_lower:
|
|
score += 1.0
|
|
|
|
# Check pattern matches
|
|
for pattern in group_data['patterns']:
|
|
if re.search(pattern, text_combined, re.IGNORECASE):
|
|
score += 1.5
|
|
|
|
group_scores[group_name] = score
|
|
|
|
# Find best matching group
|
|
best_group = max(group_scores.keys(), key=lambda k: group_scores[k])
|
|
best_score = group_scores[best_group]
|
|
|
|
# Additional semantic features
|
|
features = {
|
|
'word_count': len(content.split()),
|
|
'has_code_blocks': '```' in content,
|
|
'has_lists': bool(re.search(r'^\s*[-*+]\s', content, re.MULTILINE)),
|
|
'has_numbered_lists': bool(re.search(r'^\s*\d+\.\s', content, re.MULTILINE)),
|
|
'heading_level_1_count': len(re.findall(r'^#\s', content, re.MULTILINE)),
|
|
'heading_level_2_count': len(re.findall(r'^##\s', content, re.MULTILINE))
|
|
}
|
|
|
|
return {
|
|
'best_group': best_group if best_score > 0 else 'chapters', # Default fallback
|
|
'confidence': min(best_score / 3.0, 1.0), # Normalize to 0-1
|
|
'group_scores': group_scores,
|
|
'features': features
|
|
}
|
|
|
|
def _find_parent_section(self, sections: List[Dict[str, Any]], level: int) -> Optional[str]:
|
|
"""
|
|
Find the parent section for the current heading level.
|
|
|
|
Args:
|
|
sections: Previously parsed sections
|
|
level: Current heading level
|
|
|
|
Returns:
|
|
Parent section title or None
|
|
"""
|
|
# Look for the most recent section with a lower level
|
|
for section in reversed(sections):
|
|
if section['level'] < level:
|
|
return section['title']
|
|
return None
|
|
|
|
def _group_sections_semantically(self, sections: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
|
|
"""
|
|
Group sections by their semantic meaning.
|
|
|
|
Args:
|
|
sections: Parsed sections with semantic analysis
|
|
|
|
Returns:
|
|
Dictionary of semantic groups containing sections
|
|
"""
|
|
groups = {group_name: [] for group_name in self.SEMANTIC_GROUPS.keys()}
|
|
|
|
# Add an 'other' group for unclassified content
|
|
groups['other'] = []
|
|
|
|
for section in sections:
|
|
semantic_info = section.get('semantic_info', {})
|
|
best_group = semantic_info.get('best_group', 'other')
|
|
confidence = semantic_info.get('confidence', 0.0)
|
|
|
|
# Only place in semantic group if confidence is reasonable
|
|
if confidence > 0.2 and best_group in groups:
|
|
groups[best_group].append(section)
|
|
else:
|
|
groups['other'].append(section)
|
|
|
|
# Remove empty groups
|
|
return {k: v for k, v in groups.items() if v}
|
|
|
|
def _create_semantic_structure(
|
|
self,
|
|
output_dir: Path,
|
|
semantic_groups: Dict[str, List[Dict[str, Any]]],
|
|
options: ExplodeOptions
|
|
) -> List[Path]:
|
|
"""
|
|
Create the semantic directory structure from grouped sections.
|
|
|
|
Args:
|
|
output_dir: Output directory for the structure
|
|
semantic_groups: Sections grouped by semantic meaning
|
|
options: Explode options
|
|
|
|
Returns:
|
|
List of created file paths
|
|
"""
|
|
files_created = []
|
|
|
|
# Process groups in semantic order
|
|
group_order = sorted(
|
|
semantic_groups.keys(),
|
|
key=lambda g: self.SEMANTIC_GROUPS.get(g, {}).get('order', 999)
|
|
)
|
|
|
|
for group_name in group_order:
|
|
sections = semantic_groups[group_name]
|
|
if not sections:
|
|
continue
|
|
|
|
# Create group directory
|
|
group_dir = output_dir / group_name
|
|
group_dir.mkdir(exist_ok=True)
|
|
|
|
# Process sections in this group
|
|
for section in sections:
|
|
# Generate filename from title
|
|
safe_title = self._sanitize_filename(section['title'])
|
|
filename = f"{safe_title}.md"
|
|
|
|
# Avoid conflicts
|
|
file_path = group_dir / filename
|
|
counter = 1
|
|
while file_path.exists():
|
|
base_name = safe_title
|
|
filename = f"{base_name}_{counter}.md"
|
|
file_path = group_dir / filename
|
|
counter += 1
|
|
|
|
# Write section content
|
|
file_path.write_text(section['content'], encoding='utf-8')
|
|
files_created.append(file_path)
|
|
|
|
return files_created
|
|
|
|
def _sanitize_filename(self, title: str) -> str:
|
|
"""
|
|
Sanitize a title for use as a filename.
|
|
|
|
Args:
|
|
title: Original title
|
|
|
|
Returns:
|
|
Sanitized filename
|
|
"""
|
|
# Remove markdown heading markers
|
|
title = re.sub(r'^#+\s*', '', title)
|
|
|
|
# Remove special characters
|
|
safe_title = re.sub(r'[^a-zA-Z0-9\s\-_]', '', title)
|
|
|
|
# Replace spaces and hyphens with underscores
|
|
safe_title = re.sub(r'[\s\-]+', '_', safe_title)
|
|
|
|
# Convert to lowercase
|
|
safe_title = safe_title.lower()
|
|
|
|
# Remove leading/trailing underscores
|
|
safe_title = safe_title.strip('_')
|
|
|
|
# Limit length
|
|
if len(safe_title) > 50:
|
|
safe_title = safe_title[:50].rstrip('_')
|
|
|
|
return safe_title or 'untitled'
|
|
|
|
def _build_structure_entries(self, semantic_groups: Dict[str, List[Dict[str, Any]]]) -> List[StructureEntry]:
|
|
"""
|
|
Build structure entries for manifest from semantic groups.
|
|
|
|
Args:
|
|
semantic_groups: Sections grouped by semantic meaning
|
|
|
|
Returns:
|
|
List of structure entries
|
|
"""
|
|
entries = []
|
|
|
|
# Collect all sections from all groups and sort by original document order
|
|
all_sections = []
|
|
for group_name, sections in semantic_groups.items():
|
|
for section in sections:
|
|
section['group_name'] = group_name
|
|
all_sections.append(section)
|
|
|
|
# Sort by original document order (using the 'order' field from parsing)
|
|
all_sections.sort(key=lambda s: s.get('order', 0))
|
|
|
|
# Create structure entries preserving original document order
|
|
for section in all_sections:
|
|
safe_title = self._sanitize_filename(section['title'])
|
|
path = f"{section['group_name']}/{safe_title}.md"
|
|
|
|
entry = StructureEntry(
|
|
type=f"h{section['level']}",
|
|
title=section['title'],
|
|
path=path,
|
|
order=section.get('order', 0), # Use original document order
|
|
parent=section.get('parent'),
|
|
level=section['level'],
|
|
original_line=section.get('start_line')
|
|
)
|
|
entries.append(entry)
|
|
|
|
return entries
|
|
|
|
def _reconstruct_from_semantics(
|
|
self,
|
|
input_directory: Path,
|
|
manifest_data: Any,
|
|
options: ImplodeOptions
|
|
) -> Tuple[str, List[Path]]:
|
|
"""
|
|
Reconstruct markdown content from semantic directory structure.
|
|
|
|
Args:
|
|
input_directory: Directory containing semantic structure
|
|
manifest_data: Manifest data if available
|
|
options: Implode options
|
|
|
|
Returns:
|
|
Tuple of (reconstructed_content, files_processed)
|
|
"""
|
|
content_parts = []
|
|
files_processed = []
|
|
|
|
# Get all directories and files and use manifest order to preserve original structure
|
|
if manifest_data and hasattr(manifest_data, 'structure'):
|
|
# Use manifest data to reconstruct in original document order
|
|
for entry in sorted(manifest_data.structure, key=lambda x: x.order):
|
|
file_path = input_directory / entry.path
|
|
if file_path.exists() and file_path.name != "manifest.md":
|
|
content = file_path.read_text(encoding='utf-8')
|
|
content_parts.append(content)
|
|
files_processed.append(file_path)
|
|
else:
|
|
# Fallback: process directories in semantic order
|
|
subdirs = [d for d in input_directory.iterdir() if d.is_dir()]
|
|
subdirs = sorted(subdirs,
|
|
key=lambda d: self.SEMANTIC_GROUPS.get(d.name, {}).get('order', 999))
|
|
|
|
for subdir in subdirs:
|
|
# Process markdown files in alphabetical order
|
|
md_files = sorted(subdir.glob("*.md"))
|
|
|
|
for md_file in md_files:
|
|
if md_file.name != "manifest.md":
|
|
content = md_file.read_text(encoding='utf-8')
|
|
content_parts.append(content)
|
|
files_processed.append(md_file)
|
|
|
|
# Join with appropriate spacing
|
|
spacing = '\n' * (options.section_spacing + 1)
|
|
full_content = spacing.join(content_parts)
|
|
|
|
return full_content, files_processed |