Complete implementation of md-explode command for transforming single markdown files into organized directory structures: Core Implementation: - MarkdownSection class for hierarchical document modeling - extract_headings() - Parse markdown headings with levels - parse_markdown_structure() - Build section hierarchy from content - generate_safe_filename() - Convert headings to filesystem-safe names - explode_markdown_file() - Main explosion functionality - DirectoryStructureBuilder - Create organized file/directory structures CLI Integration: - md-explode command with comprehensive options - --dry-run for previewing structure - --verbose for detailed output - --max-depth for limiting nesting - --output-dir for custom output location Key Features: - Hierarchical structure preservation (# → ## → ###) - Smart filename generation with Unicode support - Front matter handling and preservation - Content integrity maintenance - Cross-platform filesystem compatibility - Comprehensive error handling and validation Refactoring Applied: - Eliminated code duplication between filename functions - Extracted front matter processing into dedicated function - Modularized CLI command with helper functions - Improved error handling and user feedback Documentation: - Complete API documentation with docstrings - Comprehensive user documentation (docs/md-explode-command.md) - Usage examples and troubleshooting guide - Integration instructions with other MarkiTect commands Testing: 47 comprehensive tests covering all functionality Status: Production-ready, full TDD8 cycle completed Performance: Efficient for documents with thousands of sections 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
257 lines
7.7 KiB
Python
257 lines
7.7 KiB
Python
"""
|
|
Test markdown parsing functionality for Issue #138: Explode Markdown file to markdown directory.
|
|
|
|
This test module covers the core markdown structure parsing functionality,
|
|
including heading extraction, content identification, and hierarchical structure analysis.
|
|
"""
|
|
|
|
import pytest
|
|
import tempfile
|
|
from pathlib import Path
|
|
from unittest.mock import Mock, patch
|
|
|
|
# Import will fail initially (RED phase) until implementation exists
|
|
try:
|
|
from markitect.plugins.builtin.markdown_commands import (
|
|
parse_markdown_structure,
|
|
extract_headings,
|
|
extract_section_content,
|
|
MarkdownSection
|
|
)
|
|
except ImportError:
|
|
# Expected during RED phase - tests should fail initially
|
|
parse_markdown_structure = None
|
|
extract_headings = None
|
|
extract_section_content = None
|
|
MarkdownSection = None
|
|
|
|
|
|
class TestMarkdownStructureParsing:
|
|
"""Test markdown file parsing and structure extraction."""
|
|
|
|
def test_parse_simple_markdown_structure(self):
|
|
"""Test parsing a markdown file with basic heading structure."""
|
|
markdown_content = """# Part 1: Introduction
|
|
This is the introduction content.
|
|
|
|
## Chapter 1: Getting Started
|
|
Content for chapter 1.
|
|
|
|
## Chapter 2: Advanced Topics
|
|
Content for chapter 2.
|
|
|
|
### Section 2.1: Details
|
|
Detailed content here.
|
|
"""
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# This should fail initially (RED phase)
|
|
structure = parse_markdown_structure(temp_file)
|
|
|
|
# Verify structure
|
|
assert len(structure) == 1 # One part
|
|
assert structure[0].level == 1
|
|
assert structure[0].title == "Part 1: Introduction"
|
|
assert len(structure[0].children) == 2 # Two chapters
|
|
|
|
# Check chapters
|
|
assert structure[0].children[0].level == 2
|
|
assert structure[0].children[0].title == "Chapter 1: Getting Started"
|
|
|
|
assert structure[0].children[1].level == 2
|
|
assert structure[0].children[1].title == "Chapter 2: Advanced Topics"
|
|
assert len(structure[0].children[1].children) == 1 # One section
|
|
|
|
# Check section
|
|
section = structure[0].children[1].children[0]
|
|
assert section.level == 3
|
|
assert section.title == "Section 2.1: Details"
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_extract_headings_from_content(self):
|
|
"""Test extracting headings with their levels from markdown content."""
|
|
markdown_content = """# Main Title
|
|
Some intro content.
|
|
|
|
## Chapter 1
|
|
Chapter content.
|
|
|
|
### Subsection
|
|
Sub content.
|
|
|
|
## Chapter 2
|
|
More content.
|
|
"""
|
|
|
|
# This should fail initially (RED phase)
|
|
headings = extract_headings(markdown_content)
|
|
|
|
expected = [
|
|
{'level': 1, 'title': 'Main Title', 'line': 0},
|
|
{'level': 2, 'title': 'Chapter 1', 'line': 3},
|
|
{'level': 3, 'title': 'Subsection', 'line': 6},
|
|
{'level': 2, 'title': 'Chapter 2', 'line': 9}
|
|
]
|
|
|
|
assert headings == expected
|
|
|
|
def test_extract_section_content_between_headings(self):
|
|
"""Test extracting content that belongs to specific sections."""
|
|
markdown_content = """# Main Title
|
|
Intro paragraph.
|
|
Another intro line.
|
|
|
|
## Chapter 1
|
|
Chapter 1 content.
|
|
More chapter 1 content.
|
|
|
|
### Subsection
|
|
Subsection content.
|
|
|
|
## Chapter 2
|
|
Chapter 2 content.
|
|
"""
|
|
|
|
# This should fail initially (RED phase)
|
|
headings = extract_headings(markdown_content)
|
|
|
|
# Extract content for "Chapter 1"
|
|
content = extract_section_content(markdown_content, headings, 1) # Index 1 = "Chapter 1"
|
|
|
|
expected_content = """## Chapter 1
|
|
Chapter 1 content.
|
|
More chapter 1 content.
|
|
|
|
### Subsection
|
|
Subsection content."""
|
|
|
|
assert content.strip() == expected_content.strip()
|
|
|
|
def test_parse_markdown_with_front_matter(self):
|
|
"""Test parsing markdown file with YAML front matter."""
|
|
markdown_content = """---
|
|
title: "My Document"
|
|
author: "Test Author"
|
|
date: 2025-10-07
|
|
---
|
|
|
|
# Chapter 1
|
|
Content for chapter 1.
|
|
|
|
## Section 1.1
|
|
Section content.
|
|
"""
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# This should fail initially (RED phase)
|
|
structure = parse_markdown_structure(temp_file)
|
|
|
|
# Front matter should be handled appropriately
|
|
assert len(structure) == 1
|
|
assert structure[0].title == "Chapter 1"
|
|
assert structure[0].level == 1
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_parse_markdown_with_no_headings(self):
|
|
"""Test parsing markdown file with no headings."""
|
|
markdown_content = """This is just plain content.
|
|
No headings here.
|
|
|
|
Some more content.
|
|
"""
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# This should fail initially (RED phase)
|
|
structure = parse_markdown_structure(temp_file)
|
|
|
|
# Should return empty structure or handle gracefully
|
|
assert structure == [] or structure is None
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
def test_parse_markdown_with_inconsistent_levels(self):
|
|
"""Test parsing markdown with inconsistent heading levels (e.g., jump from # to ###)."""
|
|
markdown_content = """# Main Title
|
|
Main content.
|
|
|
|
### Deep Section
|
|
This jumps from level 1 to level 3.
|
|
|
|
## Normal Chapter
|
|
Back to level 2.
|
|
"""
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(markdown_content)
|
|
temp_file = Path(f.name)
|
|
|
|
try:
|
|
# This should fail initially (RED phase)
|
|
structure = parse_markdown_structure(temp_file)
|
|
|
|
# Should handle inconsistent levels gracefully
|
|
assert len(structure) == 1 # Main title
|
|
assert structure[0].level == 1
|
|
assert len(structure[0].children) >= 1 # Should have children
|
|
|
|
finally:
|
|
temp_file.unlink()
|
|
|
|
|
|
class TestMarkdownSectionModel:
|
|
"""Test the MarkdownSection data model."""
|
|
|
|
def test_markdown_section_creation(self):
|
|
"""Test creating MarkdownSection objects."""
|
|
# This should fail initially (RED phase)
|
|
section = MarkdownSection(
|
|
level=1,
|
|
title="Test Section",
|
|
content="Test content",
|
|
line_start=0,
|
|
line_end=10
|
|
)
|
|
|
|
assert section.level == 1
|
|
assert section.title == "Test Section"
|
|
assert section.content == "Test content"
|
|
assert section.children == []
|
|
|
|
def test_markdown_section_add_child(self):
|
|
"""Test adding child sections to parent sections."""
|
|
# This should fail initially (RED phase)
|
|
parent = MarkdownSection(level=1, title="Parent", content="Parent content")
|
|
child = MarkdownSection(level=2, title="Child", content="Child content")
|
|
|
|
parent.add_child(child)
|
|
|
|
assert len(parent.children) == 1
|
|
assert parent.children[0] == child
|
|
assert child.parent == parent
|
|
|
|
def test_markdown_section_hierarchy_validation(self):
|
|
"""Test that section hierarchy is validated correctly."""
|
|
# This should fail initially (RED phase)
|
|
parent = MarkdownSection(level=1, title="Parent", content="Parent content")
|
|
invalid_child = MarkdownSection(level=3, title="Invalid", content="Skip level 2")
|
|
|
|
# Should raise exception for invalid hierarchy (skipping level 2)
|
|
with pytest.raises(ValueError, match="Invalid heading hierarchy"):
|
|
parent.add_child(invalid_child) |