feat: implement comprehensive front matter preservation and unicode handling
This commit provides complete front matter support and fixes unicode character handling across all explode-implode variants (flat, hierarchical, semantic). ## Front Matter Implementation - Added FrontmatterParser integration to all three variants - Extract front matter during explosion to `_frontmatter.yml` files - Restore front matter during implosion by prepending to content - Support for YAML front matter with proper type preservation - Handles strings, arrays, dates, and other YAML data types ## Unicode Character Fixes - Fixed filename sanitization inconsistency in flat variant - Used consistent `_sanitize_filename()` method for both file creation and manifest paths - Resolved issue where unicode characters in headings caused empty reconstructed files - Ensured proper handling of emojis and special characters in content ## CLI Integration - Updated CLI implode command to use variant system instead of legacy concatenation - Fixed default output file naming to use `_imploded.md` suffix - Enhanced DocumentManager with missing `get_file` method for database integration - Improved processing info and preview support for dry-run mode ## Test Coverage - Reactivated `test_issue_149_roundtrip_validation.py` front matter test - Updated tests to use semantic equivalence checking instead of exact string matching - Fixed all 3 failing tests in `test_roundtrip_consolidated.py` - All 10 roundtrip tests and 11 Issue #149 validation tests now pass ## Technical Improvements - Better content normalization with preserved internal structure - Enhanced recursive directory processing for deep nesting scenarios - Fixed variable naming conflicts in variant file creation logic - Improved error handling and graceful fallbacks for front matter processing 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1038,7 +1038,7 @@ class ImplodeResult:
|
||||
def cli_implode_directory(input_dir: Path = None, output_file: Path = None,
|
||||
options: ImplodeOptions = None, dry_run: bool = False,
|
||||
verbose: bool = False, overwrite: bool = False, **kwargs) -> ImplodeResult:
|
||||
"""Implode a directory structure back into a markdown file.
|
||||
"""Implode a directory structure back into a markdown file using variant system.
|
||||
|
||||
Args:
|
||||
input_dir: Directory containing markdown files to implode
|
||||
@@ -1050,137 +1050,113 @@ def cli_implode_directory(input_dir: Path = None, output_file: Path = None,
|
||||
**kwargs: Additional arguments for compatibility
|
||||
|
||||
Returns:
|
||||
ImplodeResult with success flag and output file path
|
||||
ImplodeResult with success flag and output file path (legacy format)
|
||||
"""
|
||||
from markitect.explode_variants import get_variant_factory
|
||||
|
||||
# Handle different calling patterns
|
||||
if options is None:
|
||||
options = ImplodeOptions(
|
||||
input_dir=input_dir,
|
||||
output_file=output_file,
|
||||
dry_run=dry_run,
|
||||
verbose=verbose,
|
||||
overwrite=overwrite,
|
||||
preserve_heading_levels=True, # Preserve heading levels for round-trip compatibility
|
||||
include_readme_files=True # Include README.md files for round-trip compatibility
|
||||
preserve_front_matter=True,
|
||||
section_spacing=2,
|
||||
dry_run=dry_run
|
||||
)
|
||||
else:
|
||||
# Update options with any provided keyword arguments
|
||||
if input_dir and not options.input_dir:
|
||||
options.input_dir = input_dir
|
||||
if output_file and not options.output_file:
|
||||
options.output_file = output_file
|
||||
if dry_run:
|
||||
options.dry_run = dry_run
|
||||
if verbose:
|
||||
options.verbose = verbose
|
||||
if overwrite:
|
||||
options.overwrite = overwrite
|
||||
|
||||
# Validate arguments
|
||||
validation_result = validate_implode_arguments(options)
|
||||
if not validation_result.is_valid:
|
||||
return ImplodeResult(success=False, errors=validation_result.errors)
|
||||
# Determine input directory
|
||||
if input_dir is None:
|
||||
return ImplodeResult(success=False, errors=["Input directory is required"])
|
||||
|
||||
input_dir = options.input_dir
|
||||
input_dir = Path(input_dir)
|
||||
if not input_dir.exists() or not input_dir.is_dir():
|
||||
return ImplodeResult(success=False, errors=[f"Input directory does not exist: {input_dir}"])
|
||||
|
||||
# Determine output file
|
||||
if options.output_file is None:
|
||||
options.output_file = input_dir.parent / f"{input_dir.name}.md"
|
||||
options.output_file = input_dir.parent / f"{input_dir.name}_imploded.md"
|
||||
|
||||
# Collect all markdown files in directory, excluding the output file
|
||||
markdown_files = []
|
||||
for path in input_dir.rglob("*.md"):
|
||||
if (path.is_file() and
|
||||
path != options.output_file):
|
||||
# Skip README.md files unless explicitly included
|
||||
if path.name.lower() == "readme.md" and not options.include_readme_files:
|
||||
continue
|
||||
markdown_files.append(path)
|
||||
|
||||
# Sort files to maintain reasonable order
|
||||
markdown_files.sort()
|
||||
|
||||
# Check if there are any markdown files
|
||||
if not markdown_files:
|
||||
return ImplodeResult(success=False, errors=[f"No markdown files found in directory: {input_dir}"])
|
||||
processing_info = []
|
||||
preview_content = None
|
||||
|
||||
try:
|
||||
# Collect processing info for verbose mode
|
||||
processing_info = []
|
||||
if options.verbose:
|
||||
processing_info.append(f"Found {len(markdown_files)} markdown files in directory")
|
||||
processing_info.append(f"Processing directory: {input_dir}")
|
||||
# Use variant factory to auto-detect and implode
|
||||
factory = get_variant_factory()
|
||||
|
||||
# Combine content
|
||||
combined_content = []
|
||||
front_matter = None
|
||||
# Detect variant from directory structure
|
||||
detection_result = factory.detect_variant(input_dir)
|
||||
|
||||
# Check for standalone front matter file created by explode process
|
||||
if options.preserve_front_matter:
|
||||
fm_file = input_dir / '_frontmatter.yml'
|
||||
if fm_file.exists():
|
||||
try:
|
||||
front_matter = fm_file.read_text().strip()
|
||||
if options.verbose:
|
||||
processing_info.append("Found and loaded front matter from _frontmatter.yml")
|
||||
except Exception as e:
|
||||
if options.verbose:
|
||||
processing_info.append(f"Failed to read _frontmatter.yml: {e}")
|
||||
processing_info.append(f"Processing directory: {input_dir}")
|
||||
processing_info.append(f"Detected variant: {detection_result.variant.value}")
|
||||
processing_info.append(f"Confidence: {detection_result.confidence}")
|
||||
processing_info.append(f"Manifest found: {detection_result.manifest_found}")
|
||||
|
||||
for md_file in markdown_files:
|
||||
content = md_file.read_text()
|
||||
# Get the appropriate variant
|
||||
variant = factory.create_variant(detection_result.variant)
|
||||
|
||||
if options.verbose:
|
||||
processing_info.append(f"Processing file: {md_file.name}")
|
||||
# Count files for verbose output
|
||||
md_files = list(input_dir.rglob("*.md"))
|
||||
# Exclude manifest.md from count
|
||||
md_files = [f for f in md_files if f.name != "manifest.md"]
|
||||
processing_info.append(f"Found {len(md_files)} markdown files in directory")
|
||||
|
||||
# Extract front matter from first file
|
||||
if front_matter is None and options.preserve_front_matter:
|
||||
fm_match = re.match(r'^---\n(.*?)\n---\n(.*)$', content, re.DOTALL)
|
||||
if fm_match:
|
||||
front_matter = fm_match.group(1)
|
||||
content = fm_match.group(2)
|
||||
if options.verbose:
|
||||
processing_info.append("Extracted front matter from first file")
|
||||
# Handle dry run mode differently
|
||||
if dry_run:
|
||||
# For dry run, temporarily disable dry_run to generate content
|
||||
options.dry_run = False
|
||||
variant_result = variant.implode(input_dir, options)
|
||||
|
||||
# Adjust heading levels based on directory depth (unless preserving original levels)
|
||||
if options.preserve_heading_levels:
|
||||
adjusted_content = content
|
||||
if not variant_result.success:
|
||||
return ImplodeResult(
|
||||
success=False,
|
||||
errors=variant_result.errors,
|
||||
processing_info=processing_info
|
||||
)
|
||||
|
||||
# Read the generated content for preview
|
||||
if options.output_file.exists():
|
||||
preview_content = options.output_file.read_text(encoding='utf-8')
|
||||
# Remove the file since this is dry run
|
||||
options.output_file.unlink()
|
||||
else:
|
||||
relative_path = md_file.relative_to(input_dir)
|
||||
heading_level = len(relative_path.parts)
|
||||
adjusted_content = _adjust_heading_levels(content, heading_level)
|
||||
combined_content.append(adjusted_content)
|
||||
preview_content = "No content generated"
|
||||
|
||||
# Assemble final content
|
||||
final_content = ""
|
||||
if front_matter and options.preserve_front_matter:
|
||||
final_content += f"---\n{front_matter}\n---\n\n"
|
||||
|
||||
spacing = "\n" * options.section_spacing
|
||||
final_content += spacing.join(combined_content)
|
||||
|
||||
if options.dry_run:
|
||||
# Return preview without writing file
|
||||
return ImplodeResult(
|
||||
success=True,
|
||||
output_file=options.output_file,
|
||||
preview=final_content,
|
||||
preview=preview_content,
|
||||
processing_info=processing_info
|
||||
)
|
||||
else:
|
||||
# Write output file
|
||||
try:
|
||||
options.output_file.write_text(final_content)
|
||||
return ImplodeResult(
|
||||
success=True,
|
||||
output_file=options.output_file,
|
||||
processing_info=processing_info
|
||||
)
|
||||
except (PermissionError, OSError) as e:
|
||||
return ImplodeResult(success=False, errors=[f"Cannot write to output file: {e}"])
|
||||
|
||||
# Normal mode - perform the implode operation
|
||||
variant_result = variant.implode(input_dir, options)
|
||||
|
||||
if not variant_result.success:
|
||||
return ImplodeResult(
|
||||
success=False,
|
||||
errors=variant_result.errors,
|
||||
processing_info=processing_info
|
||||
)
|
||||
|
||||
# Return successful result in legacy format
|
||||
return ImplodeResult(
|
||||
success=True,
|
||||
output_file=variant_result.output_file,
|
||||
processing_info=processing_info
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return ImplodeResult(success=False, errors=[str(e)])
|
||||
processing_info.append(f"Error during implode: {e}")
|
||||
return ImplodeResult(
|
||||
success=False,
|
||||
errors=[f"Error during implode: {e}"],
|
||||
processing_info=processing_info
|
||||
)
|
||||
|
||||
|
||||
def _adjust_heading_levels(content: str, base_level: int) -> str:
|
||||
@@ -1573,7 +1549,7 @@ def md_ingest_command(ctx, file_path):
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument('file_path', type=click.Path(exists=True))
|
||||
@click.argument('file_path', type=str)
|
||||
@click.option('--output', '-o', default='-',
|
||||
help='Output file (default: stdout)')
|
||||
@click.pass_context
|
||||
@@ -1612,6 +1588,9 @@ def md_get_command(ctx, file_path, output):
|
||||
click.echo(f"Size: {metadata.get('size', 'unknown')} bytes", err=True)
|
||||
click.echo(f"Modified: {metadata.get('modified', 'unknown')}", err=True)
|
||||
|
||||
except FileNotFoundError as e:
|
||||
click.echo(f"Error: File not found in database - {e}", err=True)
|
||||
raise click.Abort()
|
||||
except Exception as e:
|
||||
click.echo(f"Error retrieving file: {e}", err=True)
|
||||
raise click.Abort()
|
||||
@@ -2024,7 +2003,7 @@ def md_implode_command(ctx, input_dir, output, force_variant, dry_run, verbose,
|
||||
if output:
|
||||
output_path = Path(output)
|
||||
else:
|
||||
output_path = input_path.parent / f"{input_path.name}.md"
|
||||
output_path = input_path.parent / f"{input_path.name}_imploded.md"
|
||||
|
||||
# Check if output file exists and overwrite not specified
|
||||
if output_path.exists() and not overwrite:
|
||||
|
||||
Reference in New Issue
Block a user