markitect-main/tests/test_issue_138_markdown_parsing.py

"""
Test markdown parsing functionality for Issue #138: Explode Markdown file to markdown directory.

This test module covers the core markdown structure parsing functionality,
including heading extraction, content identification, and hierarchical structure analysis.
"""

import pytest
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch

# Import will fail initially (RED phase) until implementation exists
try:
    from markitect.plugins.builtin.markdown_commands import (
        parse_markdown_structure,
        extract_headings,
        extract_section_content,
        MarkdownSection
    )
except ImportError:
    # Expected during RED phase - tests should fail initially
    parse_markdown_structure = None
    extract_headings = None
    extract_section_content = None
    MarkdownSection = None


class TestMarkdownStructureParsing:
    """Test markdown file parsing and structure extraction."""

    def test_parse_simple_markdown_structure(self):
        """Test parsing a markdown file with basic heading structure."""
        markdown_content = """# Part 1: Introduction
This is the introduction content.

## Chapter 1: Getting Started
Content for chapter 1.

## Chapter 2: Advanced Topics
Content for chapter 2.

### Section 2.1: Details
Detailed content here.
"""

        with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(markdown_content)
            temp_file = Path(f.name)

        try:
            # This should fail initially (RED phase)
            structure, front_matter = parse_markdown_structure(temp_file)

            # Verify structure
            assert len(structure) == 1  # One part
            assert structure[0].level == 1
            assert structure[0].title == "Part 1: Introduction"
            assert len(structure[0].children) == 2  # Two chapters
            assert front_matter is None  # No front matter in this test

            # Check chapters
            assert structure[0].children[0].level == 2
            assert structure[0].children[0].title == "Chapter 1: Getting Started"

            assert structure[0].children[1].level == 2
            assert structure[0].children[1].title == "Chapter 2: Advanced Topics"
            assert len(structure[0].children[1].children) == 1  # One section

            # Check section
            section = structure[0].children[1].children[0]
            assert section.level == 3
            assert section.title == "Section 2.1: Details"

        finally:
            temp_file.unlink()

    def test_extract_headings_from_content(self):
        """Test extracting headings with their levels from markdown content."""
        markdown_content = """# Main Title
Some intro content.

## Chapter 1
Chapter content.

### Subsection
Sub content.

## Chapter 2
More content.
"""

        # This should fail initially (RED phase)
        headings = extract_headings(markdown_content)

        expected = [
            {'level': 1, 'title': 'Main Title', 'line': 0},
            {'level': 2, 'title': 'Chapter 1', 'line': 3},
            {'level': 3, 'title': 'Subsection', 'line': 6},
            {'level': 2, 'title': 'Chapter 2', 'line': 9}
        ]

        assert headings == expected

    def test_extract_section_content_between_headings(self):
        """Test extracting content that belongs to specific sections."""
        markdown_content = """# Main Title
Intro paragraph.
Another intro line.

## Chapter 1
Chapter 1 content.
More chapter 1 content.

### Subsection
Subsection content.

## Chapter 2
Chapter 2 content.
"""

        # This should fail initially (RED phase)
        headings = extract_headings(markdown_content)

        # Extract content for "Chapter 1"
        content = extract_section_content(markdown_content, headings, 1)  # Index 1 = "Chapter 1"

        expected_content = """## Chapter 1
Chapter 1 content.
More chapter 1 content.

### Subsection
Subsection content."""

        assert content.strip() == expected_content.strip()

    def test_parse_markdown_with_front_matter(self):
        """Test parsing markdown file with YAML front matter."""
        markdown_content = """---
title: "My Document"
author: "Test Author"
date: 2025-10-07
---

# Chapter 1
Content for chapter 1.

## Section 1.1
Section content.
"""

        with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(markdown_content)
            temp_file = Path(f.name)

        try:
            # This should fail initially (RED phase)
            structure, front_matter = parse_markdown_structure(temp_file)

            # Front matter should be extracted and structure parsed
            assert len(structure) == 1
            assert structure[0].title == "Chapter 1"
            assert structure[0].level == 1
            assert front_matter is not None
            assert 'title: "My Document"' in front_matter

        finally:
            temp_file.unlink()

    def test_parse_markdown_with_no_headings(self):
        """Test parsing markdown file with no headings."""
        markdown_content = """This is just plain content.
No headings here.

Some more content.
"""

        with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(markdown_content)
            temp_file = Path(f.name)

        try:
            # This should fail initially (RED phase)
            structure, front_matter = parse_markdown_structure(temp_file)

            # Should return empty structure or handle gracefully
            assert structure == []
            assert front_matter is None

        finally:
            temp_file.unlink()

    def test_parse_markdown_with_inconsistent_levels(self):
        """Test parsing markdown with inconsistent heading levels (e.g., jump from # to ###)."""
        markdown_content = """# Main Title
Main content.

### Deep Section
This jumps from level 1 to level 3.

## Normal Chapter
Back to level 2.
"""

        with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(markdown_content)
            temp_file = Path(f.name)

        try:
            # This should fail initially (RED phase)
            structure, front_matter = parse_markdown_structure(temp_file)

            # Should handle inconsistent levels gracefully
            assert len(structure) == 1  # Main title
            assert front_matter is None
            assert structure[0].level == 1
            assert len(structure[0].children) >= 1  # Should have children

        finally:
            temp_file.unlink()


class TestMarkdownSectionModel:
    """Test the MarkdownSection data model."""

    def test_markdown_section_creation(self):
        """Test creating MarkdownSection objects."""
        # This should fail initially (RED phase)
        section = MarkdownSection(
            level=1,
            title="Test Section",
            content="Test content",
            line_start=0,
            line_end=10
        )

        assert section.level == 1
        assert section.title == "Test Section"
        assert section.content == "Test content"
        assert section.children == []

    def test_markdown_section_add_child(self):
        """Test adding child sections to parent sections."""
        # This should fail initially (RED phase)
        parent = MarkdownSection(level=1, title="Parent", content="Parent content")
        child = MarkdownSection(level=2, title="Child", content="Child content")

        parent.add_child(child)

        assert len(parent.children) == 1
        assert parent.children[0] == child
        assert child.parent == parent

    def test_markdown_section_hierarchy_validation(self):
        """Test that section hierarchy is validated correctly."""
        # This should fail initially (RED phase)
        parent = MarkdownSection(level=1, title="Parent", content="Parent content")
        invalid_child = MarkdownSection(level=3, title="Invalid", content="Skip level 2")

        # Should raise exception for invalid hierarchy (skipping level 2)
        with pytest.raises(ValueError, match="Invalid heading hierarchy"):
            parent.add_child(invalid_child)