markitect-main/tests/test_issue_139_filename_decoding.py

"""
Test filename decoding functionality for Issue #139: Implode directory to a markdown file.

This test module covers the conversion of filesystem-safe names back to readable
headings, which is the reverse operation of the filename encoding in md-explode.
"""

import pytest
from pathlib import Path
from unittest.mock import Mock, patch

# Import will fail initially (RED phase) until implementation exists
try:
    from markitect.plugins.builtin.markdown_commands import (
        decode_filename_to_heading,
        restore_special_characters,
        reconstruct_number_format,
        apply_title_case,
        decode_directory_name_to_heading,
        FilenameDecoder
    )
except ImportError:
    # Expected during RED phase - tests should fail initially
    decode_filename_to_heading = None
    restore_special_characters = None
    reconstruct_number_format = None
    apply_title_case = None
    decode_directory_name_to_heading = None
    FilenameDecoder = None


class TestFilenameDecoding:
    """Test decoding filesystem-safe filenames back to readable headings."""

    def test_decode_simple_filename(self):
        """Test decoding simple filesystem-safe filename to heading."""
        # This should fail initially (RED phase)

        filename = "chapter_1_getting_started.md"
        decoded = decode_filename_to_heading(filename)

        assert decoded == "Chapter 1: Getting Started"

    def test_decode_numbered_sections(self):
        """Test decoding numbered section filenames."""
        # This should fail initially (RED phase)

        test_cases = [
            ("section_1_1_installation.md", "Section 1.1: Installation"),
            ("section_2_3_4_advanced.md", "Section 2.3.4: Advanced"),
            ("part_1_introduction.md", "Part 1: Introduction"),
            ("chapter_10_conclusion.md", "Chapter 10: Conclusion")
        ]

        for filename, expected in test_cases:
            decoded = decode_filename_to_heading(filename)
            assert decoded == expected

    def test_restore_special_characters(self):
        """Test restoring special characters that were encoded for filesystem safety."""
        # This should fail initially (RED phase)

        test_cases = [
            ("whats_new", "What's New"),
            ("file_path_issues", "File/Path Issues"),
            ("questions_and_answers", "Questions & Answers"),
            ("cafe_resume", "Café & Résumé"),
            ("colon_separated_title", "Colon: Separated Title"),
            ("parentheses_content", "Parentheses (Content)"),
            ("brackets_and_more", "Brackets [And More]")
        ]

        for encoded, expected in test_cases:
            restored = restore_special_characters(encoded)
            assert restored == expected

    def test_reconstruct_number_format(self):
        """Test reconstructing proper number formats from encoded versions."""
        # This should fail initially (RED phase)

        test_cases = [
            ("section_1_1_1", "Section 1.1.1"),
            ("version_2_0_3", "Version 2.0.3"),
            ("appendix_a_1", "Appendix A.1"),
            ("figure_3_2_1", "Figure 3.2.1"),
            ("table_1_4", "Table 1.4")
        ]

        for encoded, expected in test_cases:
            reconstructed = reconstruct_number_format(encoded)
            assert reconstructed == expected

    def test_apply_title_case(self):
        """Test applying appropriate title case to reconstructed headings."""
        # This should fail initially (RED phase)

        test_cases = [
            ("chapter one introduction", "Chapter One Introduction"),
            ("advanced topics and techniques", "Advanced Topics and Techniques"),
            ("api reference guide", "API Reference Guide"),
            ("getting started with the system", "Getting Started with the System"),
            ("frequently asked questions", "Frequently Asked Questions")
        ]

        for input_text, expected in test_cases:
            title_cased = apply_title_case(input_text)
            assert title_cased == expected

    def test_decode_directory_names(self):
        """Test decoding directory names to headings."""
        # This should fail initially (RED phase)

        test_cases = [
            ("part_1_introduction", "Part 1: Introduction"),
            ("chapter_2_advanced_topics", "Chapter 2: Advanced Topics"),
            ("section_a_getting_started", "Section A: Getting Started"),
            ("appendix_troubleshooting", "Appendix: Troubleshooting")
        ]

        for dirname, expected in test_cases:
            decoded = decode_directory_name_to_heading(dirname)
            assert decoded == expected

    def test_handle_very_long_filenames(self):
        """Test handling filenames that may have been truncated during encoding."""
        # This should fail initially (RED phase)

        # Simulate a long filename that was truncated during encoding
        long_filename = "this_is_a_very_long_chapter_title_that_exceeds_normal_length_limits_and_may_have_been_truncated.md"

        decoded = decode_filename_to_heading(long_filename)

        # Should handle gracefully and produce readable result
        assert decoded is not None
        assert len(decoded) > 0
        assert decoded.startswith("This Is A Very Long")

    def test_handle_edge_case_filenames(self):
        """Test handling edge case filenames."""
        # This should fail initially (RED phase)

        test_cases = [
            ("index.md", ""),  # Index files should not produce headings
            ("readme.md", "Readme"),
            ("_private_section.md", "Private Section"),
            ("01_first_chapter.md", "01: First Chapter"),
            ("999_last_section.md", "999: Last Section")
        ]

        for filename, expected in test_cases:
            decoded = decode_filename_to_heading(filename)
            assert decoded == expected

    def test_preserve_acronyms_and_abbreviations(self):
        """Test preserving common acronyms and abbreviations."""
        # This should fail initially (RED phase)

        test_cases = [
            ("api_documentation.md", "API Documentation"),
            ("sql_reference.md", "SQL Reference"),
            ("http_protocol.md", "HTTP Protocol"),
            ("json_format.md", "JSON Format"),
            ("xml_parsing.md", "XML Parsing"),
            ("css_styling.md", "CSS Styling")
        ]

        for filename, expected in test_cases:
            decoded = decode_filename_to_heading(filename)
            assert decoded == expected


class TestFilenameDecoder:
    """Test the FilenameDecoder class for comprehensive filename processing."""

    def test_filename_decoder_initialization(self):
        """Test creating FilenameDecoder instances."""
        # This should fail initially (RED phase)

        decoder = FilenameDecoder()

        assert decoder is not None
        # Should have configurable options
        assert hasattr(decoder, 'preserve_acronyms')
        assert hasattr(decoder, 'title_case_enabled')

    def test_decoder_with_custom_options(self):
        """Test decoder with custom configuration options."""
        # This should fail initially (RED phase)

        decoder = FilenameDecoder(
            preserve_acronyms=True,
            title_case_enabled=True,
            number_format_reconstruction=True
        )

        filename = "api_v2_1_reference.md"
        decoded = decoder.decode(filename)

        assert decoded == "API v2.1: Reference"

    def test_decoder_batch_processing(self):
        """Test processing multiple filenames in batch."""
        # This should fail initially (RED phase)

        decoder = FilenameDecoder()

        filenames = [
            "chapter_1_introduction.md",
            "section_2_1_setup.md",
            "appendix_a_reference.md"
        ]

        decoded_list = decoder.decode_batch(filenames)

        assert len(decoded_list) == 3
        assert "Chapter 1: Introduction" in decoded_list
        assert "Section 2.1: Setup" in decoded_list
        assert "Appendix A: Reference" in decoded_list

    def test_decoder_handles_path_objects(self):
        """Test that decoder can handle Path objects as well as strings."""
        # This should fail initially (RED phase)

        decoder = FilenameDecoder()

        path_obj = Path("advanced_topics/section_3_2_algorithms.md")
        decoded = decoder.decode(path_obj)

        assert decoded == "Section 3.2: Algorithms"

    def test_decoder_context_awareness(self):
        """Test decoder can use context from parent directories."""
        # This should fail initially (RED phase)

        decoder = FilenameDecoder(context_aware=True)

        # When in a "chapters" directory, might handle numbering differently
        path = Path("chapters/01_introduction.md")
        decoded = decoder.decode(path, parent_context="chapters")

        # Should recognize this is a chapter and format accordingly
        assert "Chapter" in decoded or "Introduction" in decoded

    def test_decoder_reversibility_validation(self):
        """Test that decoding produces results that could theoretically be encoded back."""
        # This should fail initially (RED phase)

        decoder = FilenameDecoder()

        # Test cases that should maintain some reversibility
        test_cases = [
            "chapter_1_getting_started.md",
            "section_2_3_advanced.md",
            "appendix_troubleshooting.md"
        ]

        for filename in test_cases:
            decoded = decoder.decode(filename)

            # Decoded result should be non-empty and meaningful
            assert decoded is not None
            assert len(decoded) > 0
            assert not decoded.isspace()

            # Should contain expected structural elements
            if "chapter" in filename:
                assert "Chapter" in decoded
            if "section" in filename:
                assert "Section" in decoded or any(char.isdigit() for char in decoded)


class TestFilenameDecodingIntegration:
    """Test filename decoding integration with directory structure analysis."""

    def test_decode_filenames_in_directory_context(self):
        """Test decoding filenames within the context of directory structure."""
        # This should fail initially (RED phase)

        # Simulate directory structure context
        directory_structure = {
            "part_1_introduction": [
                "index.md",
                "chapter_1_overview.md",
                "chapter_2_setup.md"
            ],
            "part_2_advanced": [
                "chapter_3_algorithms.md",
                "section_3_1_sorting.md"
            ]
        }

        decoder = FilenameDecoder()

        for dir_name, files in directory_structure.items():
            dir_heading = decode_directory_name_to_heading(dir_name)
            assert dir_heading is not None

            for filename in files:
                if filename != "index.md":  # Skip index files
                    file_heading = decoder.decode(filename, parent_context=dir_name)
                    assert file_heading is not None
                    assert len(file_heading) > 0

    def test_maintain_heading_hierarchy_through_decoding(self):
        """Test that decoding maintains logical heading hierarchy."""
        # This should fail initially (RED phase)

        decoder = FilenameDecoder()

        # Hierarchical structure should be reflected in decoded headings
        hierarchy_test = [
            ("part_1_introduction", 1, "Part 1: Introduction"),
            ("chapter_1_overview.md", 2, "Chapter 1: Overview"),
            ("section_1_1_basics.md", 3, "Section 1.1: Basics"),
            ("section_1_2_advanced.md", 3, "Section 1.2: Advanced")
        ]

        for item, expected_level, expected_text in hierarchy_test:
            if item.endswith('.md'):
                decoded = decoder.decode(item)
            else:
                decoded = decode_directory_name_to_heading(item)

            assert decoded == expected_text
            # Could also test that hierarchy levels are maintained in some way

    def test_handle_inconsistent_naming_conventions(self):
        """Test handling files with inconsistent naming conventions."""
        # This should fail initially (RED phase)

        decoder = FilenameDecoder(flexible_parsing=True)

        # Mixed naming conventions that might exist in real directories
        mixed_filenames = [
            "01-Introduction.md",
            "chapter_2_setup.md",
            "Part Three - Advanced Topics.md",
            "section4.1-deployment.md",
            "AppendixA_Reference.md"
        ]

        for filename in mixed_filenames:
            decoded = decoder.decode(filename)

            # Should handle each gracefully
            assert decoded is not None
            assert len(decoded) > 0
            # Should produce reasonable headings despite inconsistency