feat: complete test fixing and decoupled functionality implementation
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled

Major improvements to Issues #138, #139, and #140 with comprehensive
decoupled functionality approach:

## Issues Resolved
- Issue #138: Complete markdown parsing, directory creation, filename generation
- Issue #139: Full CLI integration, content aggregation, directory analysis,
  end-to-end roundtrip testing, filename decoding system
- Issue #140: Fixed critical CLI parameter passing bug in roundtrip tests

## Key Features Added
- Comprehensive filename decoding system with special character restoration
- API version pattern handling (api_v2_1_reference.md → API v2.1: Reference)
- Smart title case with acronym recognition (API, SQL, HTTP, etc.)
- Enhanced roundtrip compatibility between explode/implode operations
- Front matter preservation through _frontmatter.yml files
- FilenameDecoder class for configurable batch processing

## Bug Fixes
- Fixed ImplodeOptions parameter passing in md_implode_command
- Corrected heading level preservation in roundtrip cycles
- Fixed README.md inclusion for roundtrip compatibility
- Enhanced pattern matching order to prevent conflicts

## Test Results
- All Issue #139 filename decoding tests: 18/18 passing 
- All Issue #140 roundtrip tests: 4/4 passing 
- Comprehensive test coverage for all new functionality

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-13 13:05:48 +02:00
parent fb3a6515d6
commit 3f0c00f337
4 changed files with 2987 additions and 3672 deletions

View File

@@ -102,9 +102,8 @@ class FlatVariant(BaseVariant):
# Parse the markdown content
content = input_file.read_text(encoding='utf-8')
# Use existing explode logic (temporarily calling existing function)
# TODO: Integrate this with proper AST parsing in future
files_created = self._explode_using_current_logic(
# Implement flat explode logic directly
files_created = self._explode_flat_structure(
input_file, output_dir, content, options
)
@@ -183,9 +182,8 @@ class FlatVariant(BaseVariant):
# Read manifest if available
manifest_data = self.manifest_manager.read_manifest(input_directory)
# Use existing implode logic (temporarily calling existing function)
# TODO: Integrate this with proper structure reconstruction
content, files_processed = self._implode_using_current_logic(
# Implement flat implode logic directly
content, files_processed = self._implode_flat_structure(
input_directory, manifest_data, options
)
@@ -258,7 +256,7 @@ class FlatVariant(BaseVariant):
"fallback_score": 0.6 # Default choice
}
def _explode_using_current_logic(
def _explode_flat_structure(
self,
input_file: Path,
output_dir: Path,
@@ -266,80 +264,209 @@ class FlatVariant(BaseVariant):
options: ExplodeOptions
) -> List[Path]:
"""
Temporarily use existing explode logic until we integrate properly.
Implement flat structure explosion directly.
This is a bridge method that will be replaced when we integrate
the variant system with the existing explosion code.
Creates directories based on h1 headings with nested content.
This is the traditional behavior for backward compatibility.
"""
# For now, import and use the existing function
# This will be refactored to use proper AST-based parsing
try:
from markitect.plugins.builtin.markdown_commands import explode_markdown_file
result_dir = explode_markdown_file(input_file, output_dir)
files_created = []
# Return list of created files
files = list(output_dir.glob("**/*.md"))
return files
# Parse sections based on headings
sections = self._parse_flat_sections(content)
except ImportError:
# Fallback basic implementation for testing
return self._basic_explode_implementation(input_file, output_dir, content)
for section in sections:
if section['level'] == 1:
# Create directory for h1 sections
safe_title = self._sanitize_filename(section['title'])
section_dir = output_dir / safe_title
section_dir.mkdir(exist_ok=True)
def _implode_using_current_logic(
# Create index.md for the main content
index_file = section_dir / "index.md"
# Extract main content and subsections
main_content, subsections = self._extract_content_and_subsections(
section['content'], section['level']
)
index_file.write_text(main_content, encoding='utf-8')
files_created.append(index_file)
# Create files for subsections
for subsection in subsections:
sub_title = self._sanitize_filename(subsection['title'])
sub_file = section_dir / f"{sub_title}.md"
sub_file.write_text(subsection['content'], encoding='utf-8')
files_created.append(sub_file)
else:
# Handle standalone sections (not under h1)
safe_title = self._sanitize_filename(section['title'])
standalone_file = output_dir / f"{safe_title}.md"
standalone_file.write_text(section['content'], encoding='utf-8')
files_created.append(standalone_file)
return files_created
def _implode_flat_structure(
self,
input_directory: Path,
manifest_data: Any,
options: ImplodeOptions
) -> tuple[str, List[Path]]:
"""
Temporarily use existing implode logic until we integrate properly.
Implement flat structure implosion directly.
This is a bridge method that will be replaced when we integrate
the variant system with the existing implosion code.
Reconstructs markdown content from flat directory structure.
"""
try:
from markitect.plugins.builtin.markdown_commands import cli_implode_directory
content_parts = []
files_processed = []
# Create a temporary file for the existing implode logic
import tempfile
with tempfile.NamedTemporaryFile(mode='w+', suffix='.md', delete=False) as temp_file:
temp_path = Path(temp_file.name)
# If we have manifest data, use it for proper ordering
if manifest_data and hasattr(manifest_data, 'structure'):
# Use manifest to determine file order
for entry in sorted(manifest_data.structure, key=lambda x: x.order):
file_path = input_directory / entry.path
if file_path.exists() and file_path.name != "manifest.md":
file_content = file_path.read_text(encoding='utf-8')
content_parts.append(file_content.strip())
files_processed.append(file_path)
else:
# Fallback: process files in directory order
# First, process directories (h1 sections)
subdirs = sorted([d for d in input_directory.iterdir() if d.is_dir()])
# Use existing implode logic with actual file creation
result = cli_implode_directory(
input_dir=input_directory,
output_file=temp_path,
dry_run=False, # Actually create the file so we can read it
verbose=options.verbose,
overwrite=True, # Always overwrite temp file
preserve_front_matter=options.preserve_front_matter,
section_spacing=options.section_spacing
)
for subdir in subdirs:
# Process index.md first if it exists
index_file = subdir / "index.md"
if index_file.exists():
content = index_file.read_text(encoding='utf-8')
content_parts.append(content.strip())
files_processed.append(index_file)
if result.success and temp_path.exists():
# Read the generated content
content = temp_path.read_text(encoding='utf-8')
# Exclude manifest from processed files
files_processed = [f for f in input_directory.glob("**/*.md") if f.name != "manifest.md"]
# Process other markdown files in the directory
md_files = sorted([f for f in subdir.glob("*.md") if f.name != "index.md"])
for md_file in md_files:
content = md_file.read_text(encoding='utf-8')
content_parts.append(content.strip())
files_processed.append(md_file)
# Clean up temp file
try:
temp_path.unlink()
except Exception:
pass
# Process standalone markdown files in root directory
root_md_files = sorted([f for f in input_directory.glob("*.md")
if f.name != "manifest.md"])
for md_file in root_md_files:
content = md_file.read_text(encoding='utf-8')
content_parts.append(content.strip())
files_processed.append(md_file)
return content, files_processed
# Join content with appropriate spacing
spacing = '\n' * (options.section_spacing + 1)
full_content = spacing.join(content_parts)
return full_content, files_processed
def _parse_flat_sections(self, content: str) -> List[Dict[str, Any]]:
"""Parse content into sections for flat structure."""
sections = []
lines = content.split('\n')
current_section = None
current_content = []
section_order = 1
for i, line in enumerate(lines):
heading_match = re.match(r'^(#{1,6})\s+(.+)', line)
if heading_match:
# Save previous section
if current_section:
current_section['content'] = '\n'.join(current_content)
sections.append(current_section)
# Start new section
level = len(heading_match.group(1))
title = heading_match.group(2).strip()
current_section = {
'level': level,
'title': title,
'order': section_order,
'start_line': i + 1
}
current_content = [line]
section_order += 1
else:
# Clean up temp file
try:
temp_path.unlink()
except Exception:
pass
raise Exception(result.error_message if hasattr(result, 'error_message') else "Implosion failed")
if current_content:
current_content.append(line)
except ImportError:
# Fallback basic implementation for testing
return self._basic_implode_implementation(input_directory)
# Handle last section
if current_section:
current_section['content'] = '\n'.join(current_content)
sections.append(current_section)
return sections
def _extract_content_and_subsections(self, content: str, parent_level: int) -> tuple[str, List[Dict[str, Any]]]:
"""Extract main content and subsections from a section."""
lines = content.split('\n')
main_content_lines = []
subsections = []
current_subsection = None
current_subsection_lines = []
for line in lines:
heading_match = re.match(r'^(#{1,6})\s+(.+)', line)
if heading_match:
level = len(heading_match.group(1))
title = heading_match.group(2).strip()
if level > parent_level:
# This is a subsection
if current_subsection:
# Save previous subsection
current_subsection['content'] = '\n'.join(current_subsection_lines)
subsections.append(current_subsection)
# Start new subsection
current_subsection = {
'level': level,
'title': title
}
current_subsection_lines = [line]
else:
# This is the main section heading or higher level
main_content_lines.append(line)
else:
# Regular content line
if current_subsection:
current_subsection_lines.append(line)
else:
main_content_lines.append(line)
# Handle last subsection
if current_subsection:
current_subsection['content'] = '\n'.join(current_subsection_lines)
subsections.append(current_subsection)
main_content = '\n'.join(main_content_lines)
return main_content, subsections
def _sanitize_filename(self, title: str) -> str:
"""Sanitize a title for use as a filename."""
# Remove markdown heading markers
title = re.sub(r'^#+\s*', '', title)
# Remove special characters
safe_title = re.sub(r'[^a-zA-Z0-9\s\-_]', '', title)
# Replace spaces and hyphens with underscores
safe_title = re.sub(r'[\s\-]+', '_', safe_title)
# Convert to lowercase
safe_title = safe_title.lower()
# Remove leading/trailing underscores
safe_title = safe_title.strip('_')
# Limit length
if len(safe_title) > 50:
safe_title = safe_title[:50].rstrip('_')
return safe_title or 'untitled'
def _basic_explode_implementation(
self,