""" Tests for Issue #15: AST Query and Analysis CLI. TDD approach: These tests define the exact requirements for AST introspection commands. All tests should initially FAIL (RED) and drive the implementation (GREEN). Commands to implement: - `markitect ast-show ` - Display AST structure for file - `markitect ast-query ` - Query AST using JSONPath - `markitect ast-stats ` - Show AST statistics (headings, links, etc.) Core USP: "Zero-Parsing Content Access" - Leverage cached ASTs for performance """ import json import tempfile from pathlib import Path from unittest.mock import patch, MagicMock import pytest from click.testing import CliRunner from markitect.cli import cli from markitect.ast_cache import ASTCache class TestASTCommands: """TDD test suite defining AST introspection command requirements.""" def setup_method(self): """Set up test environment.""" self.runner = CliRunner() self.temp_dir = tempfile.mkdtemp() self.cache_dir = Path(self.temp_dir) / ".ast_cache" # Create test markdown file with rich content for AST analysis self.test_file = Path(self.temp_dir) / "test.md" self.test_file.write_text("""--- title: Test Document author: Test Author tags: [test, markdown] --- # Main Heading This is a paragraph with **bold** and *italic* text. ## Second Heading - List item 1 - List item 2 with [link](https://example.com) ### Third Heading 1. Numbered item 1 2. Numbered item 2 [Another link](https://test.com) > This is a blockquote `inline code` and code block: ```python print("hello world") ``` """) def teardown_method(self): """Clean up after each test.""" import shutil if Path(self.temp_dir).exists(): shutil.rmtree(self.temp_dir) # ===== ast-show command tests ===== def test_ast_show_command_exists(self): """RED: ast-show command should exist and be callable.""" result = self.runner.invoke(cli, ['ast-show', str(self.test_file)]) # Should NOT be "No such command" - command must exist assert "No such command" not in result.output # Command exists and runs (may fail for other reasons initially) assert result.exit_code in [0, 1, 2] def test_ast_show_requires_file_argument(self): """RED: ast-show should require a file argument.""" result = self.runner.invoke(cli, ['ast-show']) assert result.exit_code != 0 assert any(phrase in result.output for phrase in ["Missing argument", "Usage:", "FILE"]) def test_ast_show_displays_ast_structure(self): """ast-show should display the AST structure of the markdown file.""" result = self.runner.invoke(cli, ['ast-show', str(self.test_file)]) assert result.exit_code == 0 # Should show AST structure with tokens assert any(token_type in result.output for token_type in [ "heading_open", "paragraph_open", "strong_open", "list_item_open" ]) # Should show hierarchical structure (indentation or level info) assert (" [" in result.output or "nesting" in result.output or "level" in result.output) def test_ast_show_handles_nonexistent_file(self): """RED: ast-show should handle non-existent files gracefully.""" nonexistent_file = Path(self.temp_dir) / "nonexistent.md" result = self.runner.invoke(cli, ['ast-show', str(nonexistent_file)]) # Should handle gracefully with clear error message assert result.exit_code != 0 assert any(phrase in result.output.lower() for phrase in [ "not found", "does not exist", "file not found" ]) def test_ast_show_command_uses_cached_data_for_improved_performance(self): """ast-show should leverage existing AST cache for performance.""" # Pre-populate cache cache = ASTCache(self.cache_dir) cache.cache_file(self.test_file) # Mock cache loading to verify it's used with patch.object(ASTCache, 'load_cached_ast') as mock_get: mock_get.return_value = [{"type": "heading_open", "tag": "h1"}] result = self.runner.invoke(cli, ['ast-show', str(self.test_file)]) assert result.exit_code == 0 # Should have called cache instead of parsing mock_get.assert_called_once() def test_ast_show_provides_readable_output_format(self): """ast-show should provide human-readable AST display.""" result = self.runner.invoke(cli, ['ast-show', str(self.test_file)]) assert result.exit_code == 0 # Should be formatted for readability, not raw JSON dump assert len(result.output.strip().split('\n')) > 5 # Multi-line output # Should contain structural information assert any(content in result.output for content in [ "Main Heading", "bold", "italic", "List item" ]) # ===== ast-query command tests ===== def test_ast_query_command_exists(self): """RED: ast-query command should exist and require arguments.""" result = self.runner.invoke(cli, ['ast-query']) assert "No such command" not in result.output # Should fail due to missing arguments, not unknown command if result.exit_code != 0: assert any(phrase in result.output for phrase in ["Missing argument", "Usage:"]) def test_ast_query_requires_file_and_jsonpath_arguments(self): """RED: ast-query should require both file and jsonpath arguments.""" # Test missing both arguments result = self.runner.invoke(cli, ['ast-query']) assert result.exit_code != 0 # Test missing jsonpath argument result = self.runner.invoke(cli, ['ast-query', str(self.test_file)]) assert result.exit_code != 0 def test_ast_query_executes_jsonpath_queries(self): """ast-query should execute JSONPath queries on AST structure.""" # Query for first 3 tokens result = self.runner.invoke(cli, [ 'ast-query', str(self.test_file), '$[:3]' ]) assert result.exit_code == 0 # Should return matching AST nodes assert len(result.output.strip()) > 0 # Should show query results, not full AST assert "type" in result.output # Should show query results, not full AST assert len(result.output.strip()) > 0 def test_ast_query_handles_invalid_jsonpath(self): """RED: ast-query should handle invalid JSONPath expressions gracefully.""" result = self.runner.invoke(cli, [ 'ast-query', str(self.test_file), '$.[invalid_syntax' # Missing closing bracket ]) # Should handle gracefully with helpful error message assert result.exit_code != 0 assert any(phrase in result.output.lower() for phrase in [ "invalid", "syntax", "jsonpath", "error" ]) def test_ast_query_returns_empty_for_no_matches(self): """ast-query should handle queries with no matches gracefully.""" result = self.runner.invoke(cli, [ 'ast-query', str(self.test_file), '$..nonexistent_field' ]) assert result.exit_code == 0 # Should indicate no matches found assert any(phrase in result.output.lower() for phrase in [ "no matches", "empty", "not found", "[]" ]) def test_ast_query_leverages_cached_ast(self): """ast-query should use cached AST for performance.""" # Pre-populate cache cache = ASTCache(self.cache_dir) cache.cache_file(self.test_file) with patch.object(ASTCache, 'load_cached_ast') as mock_get: mock_get.return_value = [{"type": "heading_open", "tag": "h1"}] result = self.runner.invoke(cli, [ 'ast-query', str(self.test_file), '$.*.type' ]) assert result.exit_code == 0 # Should have used cache mock_get.assert_called_once() # ===== ast-stats command tests ===== def test_ast_stats_command_exists(self): """RED: ast-stats command should exist and be callable.""" result = self.runner.invoke(cli, ['ast-stats', str(self.test_file)]) assert "No such command" not in result.output assert result.exit_code in [0, 1, 2] def test_ast_stats_shows_heading_statistics(self): """ast-stats should show statistics about headings.""" result = self.runner.invoke(cli, ['ast-stats', str(self.test_file)]) assert result.exit_code == 0 # Should show heading counts assert any(word in result.output.lower() for word in ["headings", "h1", "h2", "h3"]) # Should show actual counts (our test file has 3 headings) assert "3" in result.output or "three" in result.output.lower() def test_ast_stats_shows_link_statistics(self): """ast-stats should show statistics about links.""" result = self.runner.invoke(cli, ['ast-stats', str(self.test_file)]) assert result.exit_code == 0 # Should show link counts assert "links" in result.output.lower() or "link" in result.output.lower() # Our test file has 2 links assert "2" in result.output def test_ast_stats_shows_text_statistics(self): """ast-stats should show general text and structure statistics.""" result = self.runner.invoke(cli, ['ast-stats', str(self.test_file)]) assert result.exit_code == 0 # Should show various statistics statistics_keywords = [ "paragraphs", "words", "characters", "lists", "code", "blockquotes" ] assert any(keyword in result.output.lower() for keyword in statistics_keywords) def test_ast_stats_handles_empty_file(self): """RED: ast-stats should handle empty files gracefully.""" empty_file = Path(self.temp_dir) / "empty.md" empty_file.write_text("") result = self.runner.invoke(cli, ['ast-stats', str(empty_file)]) assert result.exit_code == 0 # Should show zero statistics assert "0" in result.output assert any(phrase in result.output.lower() for phrase in [ "empty", "no content", "zero" ]) def test_ast_stats_leverages_cached_ast(self): """ast-stats should use cached AST for performance.""" cache = ASTCache(self.cache_dir) cache.cache_file(self.test_file) with patch.object(ASTCache, 'load_cached_ast') as mock_get: mock_get.return_value = [ {"type": "heading_open", "tag": "h1"}, {"type": "link_open", "href": "test"} ] result = self.runner.invoke(cli, ['ast-stats', str(self.test_file)]) assert result.exit_code == 0 mock_get.assert_called_once() def test_ast_stats_provides_comprehensive_analysis(self): """ast-stats should provide comprehensive document analysis.""" result = self.runner.invoke(cli, ['ast-stats', str(self.test_file)]) assert result.exit_code == 0 # Should provide multiple types of analysis output_lower = result.output.lower() analysis_types = [ "headings", "links", "lists", "paragraphs", "code" ] # Should have at least 3 different types of analysis matching_types = [t for t in analysis_types if t in output_lower] assert len(matching_types) >= 3 # ===== Performance and Integration Tests ===== def test_ast_commands_integration_with_cache_system(self): """All AST commands should integrate seamlessly with existing cache system.""" # Test that all commands can handle cached vs non-cached scenarios commands_and_args = [ ['ast-show', str(self.test_file)], ['ast-query', str(self.test_file), '$.[0]'], ['ast-stats', str(self.test_file)] ] for cmd_args in commands_and_args: # First run (should create cache) result1 = self.runner.invoke(cli, cmd_args) assert result1.exit_code == 0 # Second run (should use cache) result2 = self.runner.invoke(cli, cmd_args) assert result2.exit_code == 0 # Results should be consistent assert len(result2.output.strip()) > 0 def test_ast_commands_error_handling_consistency(self): """All AST commands should have consistent error handling.""" nonexistent_file = Path(self.temp_dir) / "nonexistent.md" commands = [ ['ast-show', str(nonexistent_file)], ['ast-query', str(nonexistent_file), '$.test'], ['ast-stats', str(nonexistent_file)] ] for cmd_args in commands: result = self.runner.invoke(cli, cmd_args) # All should fail gracefully assert result.exit_code != 0 # All should provide meaningful error messages assert len(result.output.strip()) > 0 assert "error" in result.output.lower() or "not found" in result.output.lower()