Files
markitect-main/tests/test_issue_138_markdown_parsing.py
tegwick 312bf8c7bf feat: complete TDD8 implementation of markdown file explosion - Issue #138
Complete implementation of md-explode command for transforming single
markdown files into organized directory structures:

Core Implementation:
- MarkdownSection class for hierarchical document modeling
- extract_headings() - Parse markdown headings with levels
- parse_markdown_structure() - Build section hierarchy from content
- generate_safe_filename() - Convert headings to filesystem-safe names
- explode_markdown_file() - Main explosion functionality
- DirectoryStructureBuilder - Create organized file/directory structures

CLI Integration:
- md-explode command with comprehensive options
- --dry-run for previewing structure
- --verbose for detailed output
- --max-depth for limiting nesting
- --output-dir for custom output location

Key Features:
- Hierarchical structure preservation (# → ## → ###)
- Smart filename generation with Unicode support
- Front matter handling and preservation
- Content integrity maintenance
- Cross-platform filesystem compatibility
- Comprehensive error handling and validation

Refactoring Applied:
- Eliminated code duplication between filename functions
- Extracted front matter processing into dedicated function
- Modularized CLI command with helper functions
- Improved error handling and user feedback

Documentation:
- Complete API documentation with docstrings
- Comprehensive user documentation (docs/md-explode-command.md)
- Usage examples and troubleshooting guide
- Integration instructions with other MarkiTect commands

Testing: 47 comprehensive tests covering all functionality
Status: Production-ready, full TDD8 cycle completed
Performance: Efficient for documents with thousands of sections

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-07 15:44:30 +02:00

257 lines
7.7 KiB
Python

"""
Test markdown parsing functionality for Issue #138: Explode Markdown file to markdown directory.
This test module covers the core markdown structure parsing functionality,
including heading extraction, content identification, and hierarchical structure analysis.
"""
import pytest
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch
# Import will fail initially (RED phase) until implementation exists
try:
from markitect.plugins.builtin.markdown_commands import (
parse_markdown_structure,
extract_headings,
extract_section_content,
MarkdownSection
)
except ImportError:
# Expected during RED phase - tests should fail initially
parse_markdown_structure = None
extract_headings = None
extract_section_content = None
MarkdownSection = None
class TestMarkdownStructureParsing:
"""Test markdown file parsing and structure extraction."""
def test_parse_simple_markdown_structure(self):
"""Test parsing a markdown file with basic heading structure."""
markdown_content = """# Part 1: Introduction
This is the introduction content.
## Chapter 1: Getting Started
Content for chapter 1.
## Chapter 2: Advanced Topics
Content for chapter 2.
### Section 2.1: Details
Detailed content here.
"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(markdown_content)
temp_file = Path(f.name)
try:
# This should fail initially (RED phase)
structure = parse_markdown_structure(temp_file)
# Verify structure
assert len(structure) == 1 # One part
assert structure[0].level == 1
assert structure[0].title == "Part 1: Introduction"
assert len(structure[0].children) == 2 # Two chapters
# Check chapters
assert structure[0].children[0].level == 2
assert structure[0].children[0].title == "Chapter 1: Getting Started"
assert structure[0].children[1].level == 2
assert structure[0].children[1].title == "Chapter 2: Advanced Topics"
assert len(structure[0].children[1].children) == 1 # One section
# Check section
section = structure[0].children[1].children[0]
assert section.level == 3
assert section.title == "Section 2.1: Details"
finally:
temp_file.unlink()
def test_extract_headings_from_content(self):
"""Test extracting headings with their levels from markdown content."""
markdown_content = """# Main Title
Some intro content.
## Chapter 1
Chapter content.
### Subsection
Sub content.
## Chapter 2
More content.
"""
# This should fail initially (RED phase)
headings = extract_headings(markdown_content)
expected = [
{'level': 1, 'title': 'Main Title', 'line': 0},
{'level': 2, 'title': 'Chapter 1', 'line': 3},
{'level': 3, 'title': 'Subsection', 'line': 6},
{'level': 2, 'title': 'Chapter 2', 'line': 9}
]
assert headings == expected
def test_extract_section_content_between_headings(self):
"""Test extracting content that belongs to specific sections."""
markdown_content = """# Main Title
Intro paragraph.
Another intro line.
## Chapter 1
Chapter 1 content.
More chapter 1 content.
### Subsection
Subsection content.
## Chapter 2
Chapter 2 content.
"""
# This should fail initially (RED phase)
headings = extract_headings(markdown_content)
# Extract content for "Chapter 1"
content = extract_section_content(markdown_content, headings, 1) # Index 1 = "Chapter 1"
expected_content = """## Chapter 1
Chapter 1 content.
More chapter 1 content.
### Subsection
Subsection content."""
assert content.strip() == expected_content.strip()
def test_parse_markdown_with_front_matter(self):
"""Test parsing markdown file with YAML front matter."""
markdown_content = """---
title: "My Document"
author: "Test Author"
date: 2025-10-07
---
# Chapter 1
Content for chapter 1.
## Section 1.1
Section content.
"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(markdown_content)
temp_file = Path(f.name)
try:
# This should fail initially (RED phase)
structure = parse_markdown_structure(temp_file)
# Front matter should be handled appropriately
assert len(structure) == 1
assert structure[0].title == "Chapter 1"
assert structure[0].level == 1
finally:
temp_file.unlink()
def test_parse_markdown_with_no_headings(self):
"""Test parsing markdown file with no headings."""
markdown_content = """This is just plain content.
No headings here.
Some more content.
"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(markdown_content)
temp_file = Path(f.name)
try:
# This should fail initially (RED phase)
structure = parse_markdown_structure(temp_file)
# Should return empty structure or handle gracefully
assert structure == [] or structure is None
finally:
temp_file.unlink()
def test_parse_markdown_with_inconsistent_levels(self):
"""Test parsing markdown with inconsistent heading levels (e.g., jump from # to ###)."""
markdown_content = """# Main Title
Main content.
### Deep Section
This jumps from level 1 to level 3.
## Normal Chapter
Back to level 2.
"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(markdown_content)
temp_file = Path(f.name)
try:
# This should fail initially (RED phase)
structure = parse_markdown_structure(temp_file)
# Should handle inconsistent levels gracefully
assert len(structure) == 1 # Main title
assert structure[0].level == 1
assert len(structure[0].children) >= 1 # Should have children
finally:
temp_file.unlink()
class TestMarkdownSectionModel:
"""Test the MarkdownSection data model."""
def test_markdown_section_creation(self):
"""Test creating MarkdownSection objects."""
# This should fail initially (RED phase)
section = MarkdownSection(
level=1,
title="Test Section",
content="Test content",
line_start=0,
line_end=10
)
assert section.level == 1
assert section.title == "Test Section"
assert section.content == "Test content"
assert section.children == []
def test_markdown_section_add_child(self):
"""Test adding child sections to parent sections."""
# This should fail initially (RED phase)
parent = MarkdownSection(level=1, title="Parent", content="Parent content")
child = MarkdownSection(level=2, title="Child", content="Child content")
parent.add_child(child)
assert len(parent.children) == 1
assert parent.children[0] == child
assert child.parent == parent
def test_markdown_section_hierarchy_validation(self):
"""Test that section hierarchy is validated correctly."""
# This should fail initially (RED phase)
parent = MarkdownSection(level=1, title="Parent", content="Parent content")
invalid_child = MarkdownSection(level=3, title="Invalid", content="Skip level 2")
# Should raise exception for invalid hierarchy (skipping level 2)
with pytest.raises(ValueError, match="Invalid heading hierarchy"):
parent.add_child(invalid_child)