markitect-main/tests/test_issue_136_index_generation.py

#!/usr/bin/env python3
"""
Test suite for Issue #136: Index page for notes in a directory

This test suite validates the index page generation functionality for HTML files,
including directory scanning, HTML generation, and CLI integration.

TDD8 Workflow: ISSUE→TEST→RED→GREEN→REFACTOR→DOCUMENT→REFINE→PUBLISH
State: RED (Tests should fail initially)
"""

import pytest
import tempfile
import os
import shutil
from pathlib import Path
from unittest.mock import patch, MagicMock
import subprocess
import re
from html.parser import HTMLParser


class SimpleHTMLParser(HTMLParser):
    """Simple HTML parser to extract title and links for testing."""

    def __init__(self):
        super().__init__()
        self.title = None
        self.links = []
        self.in_title = False

    def handle_starttag(self, tag, attrs):
        if tag == 'title':
            self.in_title = True
        elif tag == 'a':
            href = dict(attrs).get('href', '')
            self.links.append({'href': href, 'text': ''})

    def handle_endtag(self, tag):
        if tag == 'title':
            self.in_title = False

    def handle_data(self, data):
        if self.in_title:
            self.title = data.strip()
        elif self.links and not self.links[-1]['text']:
            self.links[-1]['text'] = data.strip()


class TestHTMLFileDiscovery:
    """Test HTML file discovery and processing."""

    def setup_method(self):
        """Set up test environment with temporary directories and files."""
        self.temp_dir = tempfile.mkdtemp()
        self.test_dir = Path(self.temp_dir) / "test_notes"
        self.test_dir.mkdir()

        # Create test HTML files
        (self.test_dir / "index.html").write_text("""<!DOCTYPE html>
<html>
<head><title>Index Page</title></head>
<body><h1>Index Page</h1><p>Main index</p></body>
</html>""")

        (self.test_dir / "document1.html").write_text("""<!DOCTYPE html>
<html>
<head><title>Document One</title></head>
<body><h1>Document One</h1><p>Content here</p></body>
</html>""")

        (self.test_dir / "notes.html").write_text("""<!DOCTYPE html>
<html>
<head><title>My Notes</title></head>
<body><h1>My Notes</h1><p>Note content</p></body>
</html>""")

        # Create subdirectory with HTML files
        sub_dir = self.test_dir / "subdir"
        sub_dir.mkdir()
        (sub_dir / "subdoc.html").write_text("""<!DOCTYPE html>
<html>
<head><title>Sub Document</title></head>
<body><h1>Sub Document</h1><p>Sub content</p></body>
</html>""")

        # Create non-HTML files (should be ignored)
        (self.test_dir / "readme.txt").write_text("Not HTML")
        (self.test_dir / "image.png").write_bytes(b"fake image data")

    def teardown_method(self):
        """Clean up test environment."""
        shutil.rmtree(self.temp_dir)

    def test_find_html_files_in_directory(self):
        """Test finding all HTML files in a directory."""
        from markitect.plugins.builtin.markdown_commands import find_html_files

        html_files = find_html_files(self.test_dir)

        expected_files = [
            self.test_dir / "index.html",
            self.test_dir / "document1.html",
            self.test_dir / "notes.html"
        ]

        assert len(html_files) == 3
        for expected_file in expected_files:
            assert expected_file in html_files

    def test_find_html_files_recursively(self):
        """Test finding HTML files recursively in subdirectories."""
        from markitect.plugins.builtin.markdown_commands import find_html_files

        html_files = find_html_files(self.test_dir, recursive=True)

        expected_files = [
            self.test_dir / "index.html",
            self.test_dir / "document1.html",
            self.test_dir / "notes.html",
            self.test_dir / "subdir" / "subdoc.html"
        ]

        assert len(html_files) == 4
        for expected_file in expected_files:
            assert expected_file in html_files

    def test_extract_title_from_html_file(self):
        """Test extracting title from HTML file."""
        from markitect.plugins.builtin.markdown_commands import extract_html_title

        title = extract_html_title(self.test_dir / "document1.html")
        assert title == "Document One"

        title = extract_html_title(self.test_dir / "notes.html")
        assert title == "My Notes"

    def test_extract_title_from_h1_if_no_title_tag(self):
        """Test extracting title from H1 tag if no title tag exists."""
        from markitect.plugins.builtin.markdown_commands import extract_html_title

        # Create HTML file without title tag
        no_title_file = self.test_dir / "no_title.html"
        no_title_file.write_text("""<!DOCTYPE html>
<html>
<head></head>
<body><h1>Header Title</h1><p>Content</p></body>
</html>""")

        title = extract_html_title(no_title_file)
        assert title == "Header Title"

    def test_extract_title_fallback_to_filename(self):
        """Test falling back to filename if no title or H1 found."""
        from markitect.plugins.builtin.markdown_commands import extract_html_title

        # Create HTML file without title or H1
        plain_file = self.test_dir / "plain_file.html"
        plain_file.write_text("""<!DOCTYPE html>
<html>
<head></head>
<body><p>Just content</p></body>
</html>""")

        title = extract_html_title(plain_file)
        assert title == "plain_file"


class TestIndexPageGeneration:
    """Test index page HTML generation."""

    def setup_method(self):
        """Set up test environment."""
        self.temp_dir = tempfile.mkdtemp()
        self.test_dir = Path(self.temp_dir) / "test_notes"
        self.test_dir.mkdir()

    def teardown_method(self):
        """Clean up test environment."""
        shutil.rmtree(self.temp_dir)

    def test_generate_index_html_structure(self):
        """Test generating basic index HTML structure."""
        from markitect.plugins.builtin.markdown_commands import generate_index_html

        html_files = [
            {"path": self.test_dir / "doc1.html", "title": "Document One", "relative_path": "doc1.html"},
            {"path": self.test_dir / "doc2.html", "title": "Document Two", "relative_path": "doc2.html"}
        ]

        html_content = generate_index_html(html_files, "Test Directory Index")

        # Parse HTML to verify structure
        parser = SimpleHTMLParser()
        parser.feed(html_content)

        assert parser.title == "Test Directory Index"

        # Check for navigation list
        assert "<ul>" in html_content
        assert "<li>" in html_content

        # Check links
        assert len(parser.links) == 2
        assert parser.links[0]['href'] == 'doc1.html'
        assert parser.links[0]['text'] == 'Document One'
        assert parser.links[1]['href'] == 'doc2.html'
        assert parser.links[1]['text'] == 'Document Two'

    def test_generate_index_html_with_subdirectories(self):
        """Test generating index HTML with subdirectory structure."""
        from markitect.plugins.builtin.markdown_commands import generate_index_html

        html_files = [
            {"path": self.test_dir / "doc1.html", "title": "Document One", "relative_path": "doc1.html"},
            {"path": self.test_dir / "subdir" / "subdoc.html", "title": "Sub Document", "relative_path": "subdir/subdoc.html"}
        ]

        html_content = generate_index_html(html_files, "Test Directory Index")

        parser = SimpleHTMLParser()
        parser.feed(html_content)

        assert len(parser.links) == 2
        assert parser.links[0]['href'] == 'doc1.html'
        assert parser.links[1]['href'] == 'subdir/subdoc.html'

    def test_generate_index_html_with_custom_template(self):
        """Test generating index HTML with custom template."""
        from markitect.plugins.builtin.markdown_commands import generate_index_html

        html_files = [
            {"path": self.test_dir / "doc1.html", "title": "Document One", "relative_path": "doc1.html"}
        ]

        html_content = generate_index_html(html_files, "Test Index", theme="github")

        parser = SimpleHTMLParser()
        parser.feed(html_content)

        # Should contain the title
        assert parser.title == "Test Index"

        # Should contain styling (template-specific)
        assert "<style>" in html_content

    def test_generate_index_html_empty_directory(self):
        """Test generating index HTML for empty directory."""
        from markitect.plugins.builtin.markdown_commands import generate_index_html

        html_content = generate_index_html([], "Empty Directory Index")

        parser = SimpleHTMLParser()
        parser.feed(html_content)
        assert parser.title == "Empty Directory Index"

        # Should contain message about no files
        assert "No HTML files found" in html_content or "No files to display" in html_content


class TestDirectoryProcessing:
    """Test directory processing and index generation."""

    def setup_method(self):
        """Set up test directory structure."""
        self.temp_dir = tempfile.mkdtemp()
        self.test_dir = Path(self.temp_dir) / "notes"
        self.test_dir.mkdir()

        # Create HTML files
        self.html_files = [
            ("doc1.html", "Document One"),
            ("doc2.html", "Document Two"),
            ("notes.html", "My Notes")
        ]

        for filename, title in self.html_files:
            (self.test_dir / filename).write_text(f"""<!DOCTYPE html>
<html><head><title>{title}</title></head>
<body><h1>{title}</h1></body></html>""")

    def teardown_method(self):
        """Clean up test environment."""
        shutil.rmtree(self.temp_dir)

    def test_process_directory_creates_index_file(self):
        """Test that processing a directory creates an index file."""
        from markitect.plugins.builtin.markdown_commands import process_directory_for_index

        result = process_directory_for_index(self.test_dir)

        # Should return the path to the created index file
        expected_index_path = self.test_dir / "index.html"
        assert result == expected_index_path

        # Index file should exist
        assert expected_index_path.exists()

    def test_process_directory_index_content(self):
        """Test that the generated index contains correct content."""
        from markitect.plugins.builtin.markdown_commands import process_directory_for_index

        index_path = process_directory_for_index(self.test_dir)

        index_content = index_path.read_text()
        parser = SimpleHTMLParser()
        parser.feed(index_content)

        # Should contain links to all HTML files
        link_hrefs = [link['href'] for link in parser.links]
        link_texts = [link['text'] for link in parser.links]

        assert 'doc1.html' in link_hrefs
        assert 'doc2.html' in link_hrefs
        assert 'notes.html' in link_hrefs

        assert 'Document One' in link_texts
        assert 'Document Two' in link_texts
        assert 'My Notes' in link_texts

    def test_process_directory_excludes_existing_index(self):
        """Test that existing index.html is excluded from the links."""
        from markitect.plugins.builtin.markdown_commands import process_directory_for_index

        # Create existing index.html
        (self.test_dir / "index.html").write_text("""<!DOCTYPE html>
<html><head><title>Old Index</title></head>
<body><h1>Old Index</h1></body></html>""")

        index_path = process_directory_for_index(self.test_dir)
        index_content = index_path.read_text()
        parser = SimpleHTMLParser()
        parser.feed(index_content)

        # Should not contain link to index.html itself
        link_hrefs = [link['href'] for link in parser.links]

        assert 'index.html' not in link_hrefs

    def test_process_directory_with_custom_index_name(self):
        """Test processing directory with custom index filename."""
        from markitect.plugins.builtin.markdown_commands import process_directory_for_index

        custom_name = "contents.html"
        result = process_directory_for_index(self.test_dir, index_filename=custom_name)

        expected_path = self.test_dir / custom_name
        assert result == expected_path
        assert expected_path.exists()


class TestCLIIntegration:
    """Test CLI integration for index generation."""

    def setup_method(self):
        """Set up test environment."""
        self.temp_dir = tempfile.mkdtemp()
        self.test_dir = Path(self.temp_dir) / "test_notes"
        self.test_dir.mkdir()

        # Create test HTML file
        (self.test_dir / "test.html").write_text("""<!DOCTYPE html>
<html><head><title>Test Document</title></head>
<body><h1>Test</h1></body></html>""")

    def teardown_method(self):
        """Clean up test environment."""
        shutil.rmtree(self.temp_dir)

    def test_md_index_command_exists(self):
        """Test that md-index command exists."""
        result = subprocess.run(
            ["markitect", "md-index", "--help"],
            capture_output=True,
            text=True
        )

        # Should not error (command exists)
        assert result.returncode == 0
        assert "md-index" in result.stdout.lower() or "index" in result.stdout.lower()

    def test_md_index_command_processes_directory(self):
        """Test that md-index command processes a directory."""
        result = subprocess.run(
            ["markitect", "md-index", str(self.test_dir)],
            capture_output=True,
            text=True,
            timeout=30
        )

        # Should succeed
        assert result.returncode == 0

        # Should create index file
        index_file = self.test_dir / "index.html"
        assert index_file.exists()

    def test_md_index_command_with_custom_output(self):
        """Test md-index command with custom output filename."""
        custom_output = self.test_dir / "contents.html"

        result = subprocess.run(
            ["markitect", "md-index", str(self.test_dir), "--output", str(custom_output)],
            capture_output=True,
            text=True,
            timeout=30
        )

        # Should succeed
        assert result.returncode == 0
        assert custom_output.exists()

    def test_md_index_command_with_theme_option(self):
        """Test md-index command with theme option."""
        result = subprocess.run(
            ["markitect", "md-index", str(self.test_dir), "--theme", "github"],
            capture_output=True,
            text=True,
            timeout=30
        )

        # Should succeed
        assert result.returncode == 0

        # Generated file should exist
        index_file = self.test_dir / "index.html"
        assert index_file.exists()

    def test_md_index_command_help_text(self):
        """Test that md-index command has proper help text."""
        result = subprocess.run(
            ["markitect", "md-index", "--help"],
            capture_output=True,
            text=True
        )

        help_text = result.stdout.lower()
        assert "index" in help_text
        assert "directory" in help_text
        assert "html" in help_text


class TestEdgeCases:
    """Test edge cases and error conditions."""

    def setup_method(self):
        """Set up test environment."""
        self.temp_dir = tempfile.mkdtemp()

    def teardown_method(self):
        """Clean up test environment."""
        shutil.rmtree(self.temp_dir)

    def test_empty_directory_processing(self):
        """Test processing empty directory."""
        from markitect.plugins.builtin.markdown_commands import process_directory_for_index

        empty_dir = Path(self.temp_dir) / "empty"
        empty_dir.mkdir()

        result = process_directory_for_index(empty_dir)

        # Should still create an index file
        expected_path = empty_dir / "index.html"
        assert result == expected_path
        assert expected_path.exists()

        # Should contain appropriate message
        content = expected_path.read_text()
        assert "no html files" in content.lower() or "no files found" in content.lower()

    def test_directory_with_no_html_files(self):
        """Test processing directory with no HTML files."""
        from markitect.plugins.builtin.markdown_commands import process_directory_for_index

        dir_with_no_html = Path(self.temp_dir) / "no_html"
        dir_with_no_html.mkdir()

        # Create non-HTML files
        (dir_with_no_html / "readme.txt").write_text("Not HTML")
        (dir_with_no_html / "image.png").write_bytes(b"fake image")

        result = process_directory_for_index(dir_with_no_html)

        # Should create index but with no files message
        assert result.exists()
        content = result.read_text()
        assert "no html files" in content.lower() or "no files found" in content.lower()

    def test_malformed_html_file_handling(self):
        """Test handling of malformed HTML files."""
        from markitect.plugins.builtin.markdown_commands import extract_html_title

        malformed_dir = Path(self.temp_dir) / "malformed"
        malformed_dir.mkdir()

        # Create malformed HTML file
        malformed_file = malformed_dir / "malformed.html"
        malformed_file.write_text("<html><head><title>Incomplete")

        # Should not crash, should fallback to filename
        title = extract_html_title(malformed_file)
        assert title == "malformed"

    def test_nonexistent_directory_error(self):
        """Test error handling for nonexistent directory."""
        from markitect.plugins.builtin.markdown_commands import process_directory_for_index

        nonexistent_dir = Path(self.temp_dir) / "nonexistent"

        with pytest.raises(FileNotFoundError):
            process_directory_for_index(nonexistent_dir)

    def test_file_with_special_characters_in_name(self):
        """Test handling files with special characters in names."""
        from markitect.plugins.builtin.markdown_commands import find_html_files

        special_dir = Path(self.temp_dir) / "special"
        special_dir.mkdir()

        # Create files with special characters
        special_files = [
            "file with spaces.html",
            "file-with-dashes.html",
            "file_with_underscores.html",
            "file&with&ampersands.html"
        ]

        for filename in special_files:
            (special_dir / filename).write_text(f"""<!DOCTYPE html>
<html><head><title>{filename}</title></head>
<body><h1>Content</h1></body></html>""")

        html_files = find_html_files(special_dir)
        assert len(html_files) == len(special_files)


if __name__ == '__main__':
    pytest.main([__file__, "-v"])