markitect-main/tests/test_content_commands.py

"""
TDD8 Cycle 1: Content Commands Tests (RED Phase)
Issue #38 - MarkdownMatters CLI Implementation

This test file implements the RED phase tests for content command family:
- markitect content-get [path] - Extract content without frontmatter/tailmatter
- markitect content-stats [path] - Content statistics

Following TDD8 methodology, these tests MUST FAIL initially.
"""

import pytest
import tempfile
import os
from pathlib import Path
from click.testing import CliRunner

from markitect.content.parser import ContentParser
from markitect.content.stats import ContentStats
from markitect.content.commands import content_get, content_stats


class TestContentExtraction:
    """Test content extraction without matter zones."""

    @pytest.fixture
    def test_files_dir(self):
        """Path to test fixture files."""
        return Path(__file__).parent / "fixtures" / "content_test_files"

    @pytest.fixture
    def content_parser(self):
        """Content parser instance."""
        return ContentParser()

    def test_content_get_extracts_content_without_frontmatter(self, content_parser, test_files_dir):
        """Test that content extraction removes frontmatter."""
        file_path = test_files_dir / "frontmatter_only.md"

        with open(file_path, 'r') as f:
            text = f.read()

        content = content_parser.extract_content(text)

        # Content should not contain frontmatter delimiters or YAML
        assert "---" not in content
        assert "title:" not in content
        assert "author:" not in content
        assert "date:" not in content

        # Content should contain the actual document content
        assert "# Frontmatter Only Document" in content
        assert "This document only has frontmatter" in content

    def test_content_get_extracts_content_without_tailmatter(self, content_parser, test_files_dir):
        """Test that content extraction removes tailmatter."""
        file_path = test_files_dir / "tailmatter_only.md"

        with open(file_path, 'r') as f:
            text = f.read()

        content = content_parser.extract_content(text)

        # Content should not contain tailmatter blocks
        assert "```yaml tailmatter" not in content
        assert "qa_checklist:" not in content
        assert "editorial:" not in content

        # Content should contain the actual document content
        assert "# Tailmatter Only Document" in content
        assert "This document only has tailmatter" in content

    def test_content_get_extracts_content_without_both_matters(self, content_parser, test_files_dir):
        """Test that content extraction removes both frontmatter and tailmatter."""
        file_path = test_files_dir / "complete_document.md"

        with open(file_path, 'r') as f:
            text = f.read()

        content = content_parser.extract_content(text)

        # Content should not contain any matter zones
        assert "---" not in content or content.count("---") <= 1  # Allow section dividers
        assert "title:" not in content
        assert "```yaml tailmatter" not in content
        assert "qa_checklist:" not in content

        # Content should contain the main document content
        assert "# Complete Test Document" in content
        assert "This is the main content" in content
        assert "## Section 1" in content

    def test_content_get_preserves_contentmatter_inline_metadata(self, content_parser, test_files_dir):
        """Test that contentmatter (MMD key-value pairs) are preserved in content."""
        file_path = test_files_dir / "contentmatter_inline.md"

        with open(file_path, 'r') as f:
            text = f.read()

        content = content_parser.extract_content(text)

        # Contentmatter should be preserved as it's part of the content
        assert "Author: Jane Smith" in content
        assert "Project: Content Testing" in content
        assert "Keywords: markdown, contentmatter, testing" in content
        assert "Reference: https://example.com/docs" in content

    def test_content_get_handles_file_not_found(self, content_parser):
        """Test proper error handling for non-existent files."""
        with pytest.raises(FileNotFoundError):
            with open("non_existent_file.md", 'r') as f:
                text = f.read()
            content_parser.extract_content(text)


class TestContentStatistics:
    """Test content statistics calculation."""

    @pytest.fixture
    def test_files_dir(self):
        """Path to test fixture files."""
        return Path(__file__).parent / "fixtures" / "content_test_files"

    @pytest.fixture
    def content_parser(self):
        """Content parser instance."""
        return ContentParser()

    def test_content_stats_counts_words_correctly(self, content_parser, test_files_dir):
        """Test accurate word counting in content."""
        file_path = test_files_dir / "plain_markdown.md"

        with open(file_path, 'r') as f:
            text = f.read()

        content = content_parser.extract_content(text)
        stats = content_parser.calculate_stats(content)

        # Should count words in content (exact count depends on test file)
        assert stats.word_count > 0
        assert isinstance(stats.word_count, int)

    def test_content_stats_counts_paragraphs_correctly(self, content_parser, test_files_dir):
        """Test accurate paragraph counting."""
        file_path = test_files_dir / "plain_markdown.md"

        with open(file_path, 'r') as f:
            text = f.read()

        content = content_parser.extract_content(text)
        stats = content_parser.calculate_stats(content)

        # Should count paragraphs (non-empty text blocks)
        assert stats.paragraph_count > 0
        assert isinstance(stats.paragraph_count, int)

    def test_content_stats_counts_lines_correctly(self, content_parser, test_files_dir):
        """Test accurate line counting."""
        file_path = test_files_dir / "plain_markdown.md"

        with open(file_path, 'r') as f:
            text = f.read()

        content = content_parser.extract_content(text)
        stats = content_parser.calculate_stats(content)

        # Should count lines in content
        assert stats.line_count > 0
        assert isinstance(stats.line_count, int)

    def test_content_stats_excludes_frontmatter_from_counts(self, content_parser, test_files_dir):
        """Test that frontmatter is excluded from statistics."""
        file_path = test_files_dir / "frontmatter_only.md"

        with open(file_path, 'r') as f:
            text = f.read()

        content = content_parser.extract_content(text)
        stats = content_parser.calculate_stats(content)

        # Word count should not include frontmatter words
        # This requires manual calculation based on test file content
        assert "title:" not in content
        assert stats.word_count > 0  # Should still have content words

    def test_content_stats_excludes_tailmatter_from_counts(self, content_parser, test_files_dir):
        """Test that tailmatter is excluded from statistics."""
        file_path = test_files_dir / "tailmatter_only.md"

        with open(file_path, 'r') as f:
            text = f.read()

        content = content_parser.extract_content(text)
        stats = content_parser.calculate_stats(content)

        # Word count should not include tailmatter words
        assert "qa_checklist:" not in content
        assert stats.word_count > 0  # Should still have content words

    def test_content_stats_includes_contentmatter_in_counts(self, content_parser, test_files_dir):
        """Test that contentmatter (MMD) is included in statistics."""
        file_path = test_files_dir / "contentmatter_inline.md"

        with open(file_path, 'r') as f:
            text = f.read()

        content = content_parser.extract_content(text)
        stats = content_parser.calculate_stats(content)

        # Should include contentmatter key-value pairs in word count
        assert "Author: Jane Smith" in content
        assert stats.word_count > 10  # Should include contentmatter words


class TestCLIIntegration:
    """Test CLI command integration."""

    @pytest.fixture
    def runner(self):
        """CLI test runner."""
        return CliRunner()

    @pytest.fixture
    def test_files_dir(self):
        """Path to test fixture files."""
        return Path(__file__).parent / "fixtures" / "content_test_files"

    def test_content_get_cli_command_works(self, runner, test_files_dir):
        """Test that content-get CLI command executes successfully."""
        file_path = test_files_dir / "plain_markdown.md"

        result = runner.invoke(content_get, ['--file', str(file_path)])

        assert result.exit_code == 0
        assert "Plain Markdown Document" in result.output
        # Should not contain frontmatter/tailmatter markers
        assert "---" not in result.output or result.output.count("---") <= 1

    def test_content_stats_cli_command_works(self, runner, test_files_dir):
        """Test that content-stats CLI command executes successfully."""
        file_path = test_files_dir / "plain_markdown.md"

        result = runner.invoke(content_stats, ['--file', str(file_path)])

        assert result.exit_code == 0
        assert "word_count" in result.output
        assert "line_count" in result.output
        assert "paragraph_count" in result.output

    def test_content_commands_help_text_available(self, runner):
        """Test that help text is available for content commands."""
        # Test content-get help
        result = runner.invoke(content_get, ['--help'])
        assert result.exit_code == 0
        assert "Extract content without frontmatter and tailmatter" in result.output

        # Test content-stats help
        result = runner.invoke(content_stats, ['--help'])
        assert result.exit_code == 0
        assert "Calculate content statistics" in result.output


class TestContentStats:
    """Test ContentStats data class."""

    def test_content_stats_creation(self):
        """Test ContentStats object creation."""
        stats = ContentStats(
            word_count=100,
            line_count=20,
            paragraph_count=5,
            character_count=500
        )

        assert stats.word_count == 100
        assert stats.line_count == 20
        assert stats.paragraph_count == 5
        assert stats.character_count == 500

    def test_content_stats_to_dict(self):
        """Test ContentStats conversion to dictionary."""
        stats = ContentStats(
            word_count=100,
            line_count=20,
            paragraph_count=5,
            character_count=500
        )

        stats_dict = stats.to_dict()

        assert stats_dict == {
            "word_count": 100,
            "line_count": 20,
            "paragraph_count": 5,
            "character_count": 500
        }