""" TDD8 Cycle 1: Content Commands Tests (RED Phase) Issue #38 - MarkdownMatters CLI Implementation This test file implements the RED phase tests for content command family: - markitect content-get [path] - Extract content without frontmatter/tailmatter - markitect content-stats [path] - Content statistics Following TDD8 methodology, these tests MUST FAIL initially. """ import pytest import tempfile import os from pathlib import Path from click.testing import CliRunner from markitect_content.parser import ContentParser from markitect_content.stats import ContentStats from markitect_content.commands import content_get, content_stats class TestContentExtraction: """Test content extraction without matter zones.""" @pytest.fixture def test_files_dir(self): """Path to test fixture files.""" return Path(__file__).parent / "fixtures" / "content_test_files" @pytest.fixture def content_parser(self): """Content parser instance.""" return ContentParser() def test_content_get_extracts_content_without_frontmatter(self, content_parser, test_files_dir): """Test that content extraction removes frontmatter.""" file_path = test_files_dir / "frontmatter_only.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) # Content should not contain frontmatter delimiters or YAML assert "---" not in content assert "title:" not in content assert "author:" not in content assert "date:" not in content # Content should contain the actual document content assert "# Frontmatter Only Document" in content assert "This document only has frontmatter" in content def test_content_get_extracts_content_without_tailmatter(self, content_parser, test_files_dir): """Test that content extraction removes tailmatter.""" file_path = test_files_dir / "tailmatter_only.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) # Content should not contain tailmatter blocks assert "```yaml tailmatter" not in content assert "qa_checklist:" not in content assert "editorial:" not in content # Content should contain the actual document content assert "# Tailmatter Only Document" in content assert "This document only has tailmatter" in content def test_content_get_extracts_content_without_both_matters(self, content_parser, test_files_dir): """Test that content extraction removes both frontmatter and tailmatter.""" file_path = test_files_dir / "complete_document.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) # Content should not contain any matter zones assert "---" not in content or content.count("---") <= 1 # Allow section dividers assert "title:" not in content assert "```yaml tailmatter" not in content assert "qa_checklist:" not in content # Content should contain the main document content assert "# Complete Test Document" in content assert "This is the main content" in content assert "## Section 1" in content def test_content_get_preserves_contentmatter_inline_metadata(self, content_parser, test_files_dir): """Test that contentmatter (MMD key-value pairs) are preserved in content.""" file_path = test_files_dir / "contentmatter_inline.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) # Contentmatter should be preserved as it's part of the content assert "Author: Jane Smith" in content assert "Project: Content Testing" in content assert "Keywords: markdown, contentmatter, testing" in content assert "Reference: https://example.com/docs" in content def test_content_get_handles_file_not_found(self, content_parser): """Test proper error handling for non-existent files.""" with pytest.raises(FileNotFoundError): with open("non_existent_file.md", 'r') as f: text = f.read() content_parser.extract_content(text) class TestContentStatistics: """Test content statistics calculation.""" @pytest.fixture def test_files_dir(self): """Path to test fixture files.""" return Path(__file__).parent / "fixtures" / "content_test_files" @pytest.fixture def content_parser(self): """Content parser instance.""" return ContentParser() def test_content_stats_counts_words_correctly(self, content_parser, test_files_dir): """Test accurate word counting in content.""" file_path = test_files_dir / "plain_markdown.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) stats = content_parser.calculate_stats(content) # Should count words in content (exact count depends on test file) assert stats.word_count > 0 assert isinstance(stats.word_count, int) def test_content_stats_counts_paragraphs_correctly(self, content_parser, test_files_dir): """Test accurate paragraph counting.""" file_path = test_files_dir / "plain_markdown.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) stats = content_parser.calculate_stats(content) # Should count paragraphs (non-empty text blocks) assert stats.paragraph_count > 0 assert isinstance(stats.paragraph_count, int) def test_content_stats_counts_lines_correctly(self, content_parser, test_files_dir): """Test accurate line counting.""" file_path = test_files_dir / "plain_markdown.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) stats = content_parser.calculate_stats(content) # Should count lines in content assert stats.line_count > 0 assert isinstance(stats.line_count, int) def test_content_stats_excludes_frontmatter_from_counts(self, content_parser, test_files_dir): """Test that frontmatter is excluded from statistics.""" file_path = test_files_dir / "frontmatter_only.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) stats = content_parser.calculate_stats(content) # Word count should not include frontmatter words # This requires manual calculation based on test file content assert "title:" not in content assert stats.word_count > 0 # Should still have content words def test_content_stats_excludes_tailmatter_from_counts(self, content_parser, test_files_dir): """Test that tailmatter is excluded from statistics.""" file_path = test_files_dir / "tailmatter_only.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) stats = content_parser.calculate_stats(content) # Word count should not include tailmatter words assert "qa_checklist:" not in content assert stats.word_count > 0 # Should still have content words def test_content_stats_includes_contentmatter_in_counts(self, content_parser, test_files_dir): """Test that contentmatter (MMD) is included in statistics.""" file_path = test_files_dir / "contentmatter_inline.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) stats = content_parser.calculate_stats(content) # Should include contentmatter key-value pairs in word count assert "Author: Jane Smith" in content assert stats.word_count > 10 # Should include contentmatter words class TestCLIIntegration: """Test CLI command integration.""" @pytest.fixture def runner(self): """CLI test runner.""" return CliRunner() @pytest.fixture def test_files_dir(self): """Path to test fixture files.""" return Path(__file__).parent / "fixtures" / "content_test_files" def test_content_get_cli_command_works(self, runner, test_files_dir): """Test that content-get CLI command executes successfully.""" file_path = test_files_dir / "plain_markdown.md" result = runner.invoke(content_get, ['--file', str(file_path)]) assert result.exit_code == 0 assert "Plain Markdown Document" in result.output # Should not contain frontmatter/tailmatter markers assert "---" not in result.output or result.output.count("---") <= 1 def test_content_stats_cli_command_works(self, runner, test_files_dir): """Test that content-stats CLI command executes successfully.""" file_path = test_files_dir / "plain_markdown.md" result = runner.invoke(content_stats, ['--file', str(file_path)]) assert result.exit_code == 0 assert "word_count" in result.output assert "line_count" in result.output assert "paragraph_count" in result.output def test_content_commands_help_text_available(self, runner): """Test that help text is available for content commands.""" # Test content-get help result = runner.invoke(content_get, ['--help']) assert result.exit_code == 0 assert "Extract content without frontmatter and tailmatter" in result.output # Test content-stats help result = runner.invoke(content_stats, ['--help']) assert result.exit_code == 0 assert "Calculate content statistics" in result.output class TestContentStats: """Test ContentStats data class.""" def test_content_stats_creation(self): """Test ContentStats object creation.""" stats = ContentStats( word_count=100, line_count=20, paragraph_count=5, character_count=500 ) assert stats.word_count == 100 assert stats.line_count == 20 assert stats.paragraph_count == 5 assert stats.character_count == 500 def test_content_stats_to_dict(self): """Test ContentStats conversion to dictionary.""" stats = ContentStats( word_count=100, line_count=20, paragraph_count=5, character_count=500 ) stats_dict = stats.to_dict() assert stats_dict == { "word_count": 100, "line_count": 20, "paragraph_count": 5, "character_count": 500 }