Files
markitect-main/tests/test_issue_138_markdown_parsing.py
tegwick 81d3da5fe7
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat: comprehensive asset management system and testing improvements
Asset Management System (Issue #142):
- Add complete asset management framework with deduplication
- Implement AssetManager, AssetRegistry, and AssetDeduplicator classes
- Add AssetPackager for markdown document packaging
- Create comprehensive test suite for all asset management components
- Add asset constants and custom exceptions for robust error handling

Markdown Processing Enhancements:
- Update markdown_commands.py with improved functionality
- Enhanced parsing and content aggregation capabilities
- Improved filename encoding/decoding for special characters

Test Suite Improvements:
- Add comprehensive tests for Issue #138 markdown parsing
- Enhance Issue #139 content aggregation and end-to-end testing
- Complete test coverage for new asset management features

Examples and Documentation:
- Update BildungsKanonJon.md example with enhanced content
- Generate corresponding HTML output for documentation
- Add asset registry configuration

Development Tools:
- Add install script for simplified setup

This commit represents a major enhancement to MarkiTect's asset handling
capabilities with full test coverage and improved markdown processing.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-12 19:57:31 +02:00

262 lines
8.0 KiB
Python

"""
Test markdown parsing functionality for Issue #138: Explode Markdown file to markdown directory.
This test module covers the core markdown structure parsing functionality,
including heading extraction, content identification, and hierarchical structure analysis.
"""
import pytest
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch
# Import will fail initially (RED phase) until implementation exists
try:
from markitect.plugins.builtin.markdown_commands import (
parse_markdown_structure,
extract_headings,
extract_section_content,
MarkdownSection
)
except ImportError:
# Expected during RED phase - tests should fail initially
parse_markdown_structure = None
extract_headings = None
extract_section_content = None
MarkdownSection = None
class TestMarkdownStructureParsing:
"""Test markdown file parsing and structure extraction."""
def test_parse_simple_markdown_structure(self):
"""Test parsing a markdown file with basic heading structure."""
markdown_content = """# Part 1: Introduction
This is the introduction content.
## Chapter 1: Getting Started
Content for chapter 1.
## Chapter 2: Advanced Topics
Content for chapter 2.
### Section 2.1: Details
Detailed content here.
"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(markdown_content)
temp_file = Path(f.name)
try:
# This should fail initially (RED phase)
structure, front_matter = parse_markdown_structure(temp_file)
# Verify structure
assert len(structure) == 1 # One part
assert structure[0].level == 1
assert structure[0].title == "Part 1: Introduction"
assert len(structure[0].children) == 2 # Two chapters
assert front_matter is None # No front matter in this test
# Check chapters
assert structure[0].children[0].level == 2
assert structure[0].children[0].title == "Chapter 1: Getting Started"
assert structure[0].children[1].level == 2
assert structure[0].children[1].title == "Chapter 2: Advanced Topics"
assert len(structure[0].children[1].children) == 1 # One section
# Check section
section = structure[0].children[1].children[0]
assert section.level == 3
assert section.title == "Section 2.1: Details"
finally:
temp_file.unlink()
def test_extract_headings_from_content(self):
"""Test extracting headings with their levels from markdown content."""
markdown_content = """# Main Title
Some intro content.
## Chapter 1
Chapter content.
### Subsection
Sub content.
## Chapter 2
More content.
"""
# This should fail initially (RED phase)
headings = extract_headings(markdown_content)
expected = [
{'level': 1, 'title': 'Main Title', 'line': 0},
{'level': 2, 'title': 'Chapter 1', 'line': 3},
{'level': 3, 'title': 'Subsection', 'line': 6},
{'level': 2, 'title': 'Chapter 2', 'line': 9}
]
assert headings == expected
def test_extract_section_content_between_headings(self):
"""Test extracting content that belongs to specific sections."""
markdown_content = """# Main Title
Intro paragraph.
Another intro line.
## Chapter 1
Chapter 1 content.
More chapter 1 content.
### Subsection
Subsection content.
## Chapter 2
Chapter 2 content.
"""
# This should fail initially (RED phase)
headings = extract_headings(markdown_content)
# Extract content for "Chapter 1"
content = extract_section_content(markdown_content, headings, 1) # Index 1 = "Chapter 1"
expected_content = """## Chapter 1
Chapter 1 content.
More chapter 1 content.
### Subsection
Subsection content."""
assert content.strip() == expected_content.strip()
def test_parse_markdown_with_front_matter(self):
"""Test parsing markdown file with YAML front matter."""
markdown_content = """---
title: "My Document"
author: "Test Author"
date: 2025-10-07
---
# Chapter 1
Content for chapter 1.
## Section 1.1
Section content.
"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(markdown_content)
temp_file = Path(f.name)
try:
# This should fail initially (RED phase)
structure, front_matter = parse_markdown_structure(temp_file)
# Front matter should be extracted and structure parsed
assert len(structure) == 1
assert structure[0].title == "Chapter 1"
assert structure[0].level == 1
assert front_matter is not None
assert 'title: "My Document"' in front_matter
finally:
temp_file.unlink()
def test_parse_markdown_with_no_headings(self):
"""Test parsing markdown file with no headings."""
markdown_content = """This is just plain content.
No headings here.
Some more content.
"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(markdown_content)
temp_file = Path(f.name)
try:
# This should fail initially (RED phase)
structure, front_matter = parse_markdown_structure(temp_file)
# Should return empty structure or handle gracefully
assert structure == []
assert front_matter is None
finally:
temp_file.unlink()
def test_parse_markdown_with_inconsistent_levels(self):
"""Test parsing markdown with inconsistent heading levels (e.g., jump from # to ###)."""
markdown_content = """# Main Title
Main content.
### Deep Section
This jumps from level 1 to level 3.
## Normal Chapter
Back to level 2.
"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(markdown_content)
temp_file = Path(f.name)
try:
# This should fail initially (RED phase)
structure, front_matter = parse_markdown_structure(temp_file)
# Should handle inconsistent levels gracefully
assert len(structure) == 1 # Main title
assert front_matter is None
assert structure[0].level == 1
assert len(structure[0].children) >= 1 # Should have children
finally:
temp_file.unlink()
class TestMarkdownSectionModel:
"""Test the MarkdownSection data model."""
def test_markdown_section_creation(self):
"""Test creating MarkdownSection objects."""
# This should fail initially (RED phase)
section = MarkdownSection(
level=1,
title="Test Section",
content="Test content",
line_start=0,
line_end=10
)
assert section.level == 1
assert section.title == "Test Section"
assert section.content == "Test content"
assert section.children == []
def test_markdown_section_add_child(self):
"""Test adding child sections to parent sections."""
# This should fail initially (RED phase)
parent = MarkdownSection(level=1, title="Parent", content="Parent content")
child = MarkdownSection(level=2, title="Child", content="Child content")
parent.add_child(child)
assert len(parent.children) == 1
assert parent.children[0] == child
assert child.parent == parent
def test_markdown_section_hierarchy_validation(self):
"""Test that section hierarchy is validated correctly."""
# This should fail initially (RED phase)
parent = MarkdownSection(level=1, title="Parent", content="Parent content")
invalid_child = MarkdownSection(level=3, title="Invalid", content="Skip level 2")
# Should raise exception for invalid hierarchy (skipping level 2)
with pytest.raises(ValueError, match="Invalid heading hierarchy"):
parent.add_child(invalid_child)