""" TDD8 Cycle 1: Content Commands Tests (RED Phase) Issue #38 - MarkdownMatters CLI Implementation This test file implements the RED phase tests for content command family: - markitect content-get [path] - Extract content without frontmatter/tailmatter - markitect content-stats [path] - Content statistics Following TDD8 methodology, these tests MUST FAIL initially. """ import pytest import tempfile import os from pathlib import Path from click.testing import CliRunner from markitect.content.parser import ContentParser from markitect.content.stats import ContentStats from markitect.content.commands import content_get, content_stats class TestContentExtraction: """Test content extraction without matter zones.""" @pytest.fixture def test_files_dir(self): """Path to test fixture files.""" return Path(__file__).parent / "fixtures" / "content_test_files" @pytest.fixture def content_parser(self): """Content parser instance.""" return ContentParser() def test_content_get_extracts_content_without_frontmatter(self, content_parser, test_files_dir): """Test that content extraction removes frontmatter.""" file_path = test_files_dir / "frontmatter_only.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) # Content should not contain frontmatter delimiters or YAML assert "---" not in content assert "title:" not in content assert "author:" not in content assert "date:" not in content # Content should contain the actual document content assert "# Frontmatter Only Document" in content assert "This document only has frontmatter" in content def test_content_get_extracts_content_without_tailmatter(self, content_parser, test_files_dir): """Test that content extraction removes tailmatter.""" file_path = test_files_dir / "tailmatter_only.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) # Content should not contain tailmatter blocks assert "```yaml tailmatter" not in content assert "qa_checklist:" not in content assert "editorial:" not in content # Content should contain the actual document content assert "# Tailmatter Only Document" in content assert "This document only has tailmatter" in content def test_content_get_extracts_content_without_both_matters(self, content_parser, test_files_dir): """Test that content extraction removes both frontmatter and tailmatter.""" file_path = test_files_dir / "complete_document.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) # Content should not contain any matter zones assert "---" not in content or content.count("---") <= 1 # Allow section dividers assert "title:" not in content assert "```yaml tailmatter" not in content assert "qa_checklist:" not in content # Content should contain the main document content assert "# Complete Test Document" in content assert "This is the main content" in content assert "## Section 1" in content def test_content_get_preserves_contentmatter_inline_metadata(self, content_parser, test_files_dir): """Test that contentmatter (MMD key-value pairs) are preserved in content.""" file_path = test_files_dir / "contentmatter_inline.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) # Contentmatter should be preserved as it's part of the content assert "Author: Jane Smith" in content assert "Project: Content Testing" in content assert "Keywords: markdown, contentmatter, testing" in content assert "Reference: https://example.com/docs" in content def test_content_get_handles_file_not_found(self, content_parser): """Test proper error handling for non-existent files.""" with pytest.raises(FileNotFoundError): with open("non_existent_file.md", 'r') as f: text = f.read() content_parser.extract_content(text) class TestContentStatistics: """Test content statistics calculation.""" @pytest.fixture def test_files_dir(self): """Path to test fixture files.""" return Path(__file__).parent / "fixtures" / "content_test_files" @pytest.fixture def content_parser(self): """Content parser instance.""" return ContentParser() def test_content_stats_counts_words_correctly(self, content_parser, test_files_dir): """Test accurate word counting in content.""" file_path = test_files_dir / "plain_markdown.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) stats = content_parser.calculate_stats(content) # Should count words in content (exact count depends on test file) assert stats.word_count > 0 assert isinstance(stats.word_count, int) def test_content_stats_counts_paragraphs_correctly(self, content_parser, test_files_dir): """Test accurate paragraph counting.""" file_path = test_files_dir / "plain_markdown.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) stats = content_parser.calculate_stats(content) # Should count paragraphs (non-empty text blocks) assert stats.paragraph_count > 0 assert isinstance(stats.paragraph_count, int) def test_content_stats_counts_lines_correctly(self, content_parser, test_files_dir): """Test accurate line counting.""" file_path = test_files_dir / "plain_markdown.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) stats = content_parser.calculate_stats(content) # Should count lines in content assert stats.line_count > 0 assert isinstance(stats.line_count, int) def test_content_stats_excludes_frontmatter_from_counts(self, content_parser, test_files_dir): """Test that frontmatter is excluded from statistics.""" file_path = test_files_dir / "frontmatter_only.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) stats = content_parser.calculate_stats(content) # Word count should not include frontmatter words # This requires manual calculation based on test file content assert "title:" not in content assert stats.word_count > 0 # Should still have content words def test_content_stats_excludes_tailmatter_from_counts(self, content_parser, test_files_dir): """Test that tailmatter is excluded from statistics.""" file_path = test_files_dir / "tailmatter_only.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) stats = content_parser.calculate_stats(content) # Word count should not include tailmatter words assert "qa_checklist:" not in content assert stats.word_count > 0 # Should still have content words def test_content_stats_includes_contentmatter_in_counts(self, content_parser, test_files_dir): """Test that contentmatter (MMD) is included in statistics.""" file_path = test_files_dir / "contentmatter_inline.md" with open(file_path, 'r') as f: text = f.read() content = content_parser.extract_content(text) stats = content_parser.calculate_stats(content) # Should include contentmatter key-value pairs in word count assert "Author: Jane Smith" in content assert stats.word_count > 10 # Should include contentmatter words class TestCLIIntegration: """Test CLI command integration.""" @pytest.fixture def runner(self): """CLI test runner.""" return CliRunner() @pytest.fixture def test_files_dir(self): """Path to test fixture files.""" return Path(__file__).parent / "fixtures" / "content_test_files" def test_content_get_cli_command_works(self, runner, test_files_dir): """Test that content-get CLI command executes successfully.""" file_path = test_files_dir / "plain_markdown.md" result = runner.invoke(content_get, ['--file', str(file_path)]) assert result.exit_code == 0 assert "Plain Markdown Document" in result.output # Should not contain frontmatter/tailmatter markers assert "---" not in result.output or result.output.count("---") <= 1 def test_content_stats_cli_command_works(self, runner, test_files_dir): """Test that content-stats CLI command executes successfully.""" file_path = test_files_dir / "plain_markdown.md" result = runner.invoke(content_stats, ['--file', str(file_path)]) assert result.exit_code == 0 assert "word_count" in result.output assert "line_count" in result.output assert "paragraph_count" in result.output def test_content_commands_help_text_available(self, runner): """Test that help text is available for content commands.""" # Test content-get help result = runner.invoke(content_get, ['--help']) assert result.exit_code == 0 assert "Extract content without frontmatter and tailmatter" in result.output # Test content-stats help result = runner.invoke(content_stats, ['--help']) assert result.exit_code == 0 assert "Calculate content statistics" in result.output class TestContentStats: """Test ContentStats data class.""" def test_content_stats_creation(self): """Test ContentStats object creation.""" stats = ContentStats( word_count=100, line_count=20, paragraph_count=5, character_count=500 ) assert stats.word_count == 100 assert stats.line_count == 20 assert stats.paragraph_count == 5 assert stats.character_count == 500 def test_content_stats_to_dict(self): """Test ContentStats conversion to dictionary.""" stats = ContentStats( word_count=100, line_count=20, paragraph_count=5, character_count=500 ) stats_dict = stats.to_dict() assert stats_dict == { "word_count": 100, "line_count": 20, "paragraph_count": 5, "character_count": 500 }