markitect-main/tests/test_l4_service_ast_analysis.py

"""
Tests for Issue #15: AST Query and Analysis CLI.

TDD approach: These tests define the exact requirements for AST introspection commands.
All tests should initially FAIL (RED) and drive the implementation (GREEN).

Commands to implement:
- `markitect ast-show <file>` - Display AST structure for file
- `markitect ast-query <file> <jsonpath>` - Query AST using JSONPath
- `markitect ast-stats <file>` - Show AST statistics (headings, links, etc.)

Core USP: "Zero-Parsing Content Access" - Leverage cached ASTs for performance
"""

import json
import tempfile
from pathlib import Path
from unittest.mock import patch, MagicMock

import pytest
from click.testing import CliRunner

from markitect.cli import cli
from markitect.ast_cache import ASTCache


class TestASTCommands:
    """TDD test suite defining AST introspection command requirements."""

    def setup_method(self):
        """Set up test environment."""
        self.runner = CliRunner()
        self.temp_dir = tempfile.mkdtemp()
        self.cache_dir = Path(self.temp_dir) / ".ast_cache"

        # Create test markdown file with rich content for AST analysis
        self.test_file = Path(self.temp_dir) / "test.md"
        self.test_file.write_text("""---
title: Test Document
author: Test Author
tags: [test, markdown]
---

# Main Heading

This is a paragraph with **bold** and *italic* text.

## Second Heading

- List item 1
- List item 2 with [link](https://example.com)

### Third Heading

1. Numbered item 1
2. Numbered item 2

[Another link](https://test.com)

> This is a blockquote

`inline code` and code block:

```python
print("hello world")
```
""")

    def teardown_method(self):
        """Clean up after each test."""
        import shutil
        if Path(self.temp_dir).exists():
            shutil.rmtree(self.temp_dir)

    # ===== ast-show command tests =====

    def test_ast_show_command_exists(self):
        """RED: ast-show command should exist and be callable."""
        result = self.runner.invoke(cli, ['ast-show', str(self.test_file)])

        # Should NOT be "No such command" - command must exist
        assert "No such command" not in result.output
        # Command exists and runs (may fail for other reasons initially)
        assert result.exit_code in [0, 1, 2]

    def test_ast_show_requires_file_argument(self):
        """RED: ast-show should require a file argument."""
        result = self.runner.invoke(cli, ['ast-show'])

        assert result.exit_code != 0
        assert any(phrase in result.output for phrase in ["Missing argument", "Usage:", "FILE"])

    def test_ast_show_displays_ast_structure(self):
        """ast-show should display the AST structure of the markdown file."""
        result = self.runner.invoke(cli, ['ast-show', str(self.test_file)])

        assert result.exit_code == 0
        # Should show AST structure with tokens
        assert any(token_type in result.output for token_type in [
            "heading_open", "paragraph_open", "strong_open", "list_item_open"
        ])
        # Should show hierarchical structure (indentation or level info)
        assert ("  [" in result.output or "nesting" in result.output or "level" in result.output)

    def test_ast_show_handles_nonexistent_file(self):
        """RED: ast-show should handle non-existent files gracefully."""
        nonexistent_file = Path(self.temp_dir) / "nonexistent.md"

        result = self.runner.invoke(cli, ['ast-show', str(nonexistent_file)])

        # Should handle gracefully with clear error message
        assert result.exit_code != 0
        assert any(phrase in result.output.lower() for phrase in [
            "not found", "does not exist", "file not found"
        ])

    def test_ast_show_command_uses_cached_data_for_improved_performance(self):
        """ast-show should leverage existing AST cache for performance."""
        # Pre-populate cache
        cache = ASTCache(self.cache_dir)
        cache.cache_file(self.test_file)

        # Mock cache loading to verify it's used
        with patch.object(ASTCache, 'load_cached_ast') as mock_get:
            mock_get.return_value = [{"type": "heading_open", "tag": "h1"}]

            result = self.runner.invoke(cli, ['ast-show', str(self.test_file)])

            assert result.exit_code == 0
            # Should have called cache instead of parsing
            mock_get.assert_called_once()

    def test_ast_show_provides_readable_output_format(self):
        """ast-show should provide human-readable AST display."""
        result = self.runner.invoke(cli, ['ast-show', str(self.test_file)])

        assert result.exit_code == 0
        # Should be formatted for readability, not raw JSON dump
        assert len(result.output.strip().split('\n')) > 5  # Multi-line output
        # Should contain structural information
        assert any(content in result.output for content in [
            "Main Heading", "bold", "italic", "List item"
        ])

    # ===== ast-query command tests =====

    def test_ast_query_command_exists(self):
        """RED: ast-query command should exist and require arguments."""
        result = self.runner.invoke(cli, ['ast-query'])

        assert "No such command" not in result.output
        # Should fail due to missing arguments, not unknown command
        if result.exit_code != 0:
            assert any(phrase in result.output for phrase in ["Missing argument", "Usage:"])

    def test_ast_query_requires_file_and_jsonpath_arguments(self):
        """RED: ast-query should require both file and jsonpath arguments."""
        # Test missing both arguments
        result = self.runner.invoke(cli, ['ast-query'])
        assert result.exit_code != 0

        # Test missing jsonpath argument
        result = self.runner.invoke(cli, ['ast-query', str(self.test_file)])
        assert result.exit_code != 0

    def test_ast_query_executes_jsonpath_queries(self):
        """ast-query should execute JSONPath queries on AST structure."""
        # Query for first 3 tokens
        result = self.runner.invoke(cli, [
            'ast-query', str(self.test_file), '$[:3]'
        ])

        assert result.exit_code == 0
        # Should return matching AST nodes
        assert len(result.output.strip()) > 0
        # Should show query results, not full AST
        assert "type" in result.output
        # Should show query results, not full AST
        assert len(result.output.strip()) > 0

    def test_ast_query_handles_invalid_jsonpath(self):
        """RED: ast-query should handle invalid JSONPath expressions gracefully."""
        result = self.runner.invoke(cli, [
            'ast-query', str(self.test_file), '$.[invalid_syntax'  # Missing closing bracket
        ])

        # Should handle gracefully with helpful error message
        assert result.exit_code != 0
        assert any(phrase in result.output.lower() for phrase in [
            "invalid", "syntax", "jsonpath", "error"
        ])

    def test_ast_query_returns_empty_for_no_matches(self):
        """ast-query should handle queries with no matches gracefully."""
        result = self.runner.invoke(cli, [
            'ast-query', str(self.test_file), '$..nonexistent_field'
        ])

        assert result.exit_code == 0
        # Should indicate no matches found
        assert any(phrase in result.output.lower() for phrase in [
            "no matches", "empty", "not found", "[]"
        ])

    def test_ast_query_leverages_cached_ast(self):
        """ast-query should use cached AST for performance."""
        # Pre-populate cache
        cache = ASTCache(self.cache_dir)
        cache.cache_file(self.test_file)

        with patch.object(ASTCache, 'load_cached_ast') as mock_get:
            mock_get.return_value = [{"type": "heading_open", "tag": "h1"}]

            result = self.runner.invoke(cli, [
                'ast-query', str(self.test_file), '$.*.type'
            ])

            assert result.exit_code == 0
            # Should have used cache
            mock_get.assert_called_once()

    # ===== ast-stats command tests =====

    def test_ast_stats_command_exists(self):
        """RED: ast-stats command should exist and be callable."""
        result = self.runner.invoke(cli, ['ast-stats', str(self.test_file)])

        assert "No such command" not in result.output
        assert result.exit_code in [0, 1, 2]


    def test_ast_stats_shows_heading_statistics(self):
        """ast-stats should show statistics about headings."""
        result = self.runner.invoke(cli, ['ast-stats', str(self.test_file)])

        assert result.exit_code == 0
        # Should show heading counts
        assert any(word in result.output.lower() for word in ["headings", "h1", "h2", "h3"])
        # Should show actual counts (our test file has 3 headings)
        assert "3" in result.output or "three" in result.output.lower()

    def test_ast_stats_shows_link_statistics(self):
        """ast-stats should show statistics about links."""
        result = self.runner.invoke(cli, ['ast-stats', str(self.test_file)])

        assert result.exit_code == 0
        # Should show link counts
        assert "links" in result.output.lower() or "link" in result.output.lower()
        # Our test file has 2 links
        assert "2" in result.output

    def test_ast_stats_shows_text_statistics(self):
        """ast-stats should show general text and structure statistics."""
        result = self.runner.invoke(cli, ['ast-stats', str(self.test_file)])

        assert result.exit_code == 0
        # Should show various statistics
        statistics_keywords = [
            "paragraphs", "words", "characters", "lists", "code", "blockquotes"
        ]
        assert any(keyword in result.output.lower() for keyword in statistics_keywords)

    def test_ast_stats_handles_empty_file(self):
        """RED: ast-stats should handle empty files gracefully."""
        empty_file = Path(self.temp_dir) / "empty.md"
        empty_file.write_text("")

        result = self.runner.invoke(cli, ['ast-stats', str(empty_file)])

        assert result.exit_code == 0
        # Should show zero statistics
        assert "0" in result.output
        assert any(phrase in result.output.lower() for phrase in [
            "empty", "no content", "zero"
        ])

    def test_ast_stats_leverages_cached_ast(self):
        """ast-stats should use cached AST for performance."""
        cache = ASTCache(self.cache_dir)
        cache.cache_file(self.test_file)

        with patch.object(ASTCache, 'load_cached_ast') as mock_get:
            mock_get.return_value = [
                {"type": "heading_open", "tag": "h1"},
                {"type": "link_open", "href": "test"}
            ]

            result = self.runner.invoke(cli, ['ast-stats', str(self.test_file)])

            assert result.exit_code == 0
            mock_get.assert_called_once()

    def test_ast_stats_provides_comprehensive_analysis(self):
        """ast-stats should provide comprehensive document analysis."""
        result = self.runner.invoke(cli, ['ast-stats', str(self.test_file)])

        assert result.exit_code == 0
        # Should provide multiple types of analysis
        output_lower = result.output.lower()
        analysis_types = [
            "headings", "links", "lists", "paragraphs", "code"
        ]
        # Should have at least 3 different types of analysis
        matching_types = [t for t in analysis_types if t in output_lower]
        assert len(matching_types) >= 3

    # ===== Performance and Integration Tests =====

    def test_ast_commands_integration_with_cache_system(self):
        """All AST commands should integrate seamlessly with existing cache system."""
        # Test that all commands can handle cached vs non-cached scenarios
        commands_and_args = [
            ['ast-show', str(self.test_file)],
            ['ast-query', str(self.test_file), '$.[0]'],
            ['ast-stats', str(self.test_file)]
        ]

        for cmd_args in commands_and_args:
            # First run (should create cache)
            result1 = self.runner.invoke(cli, cmd_args)
            assert result1.exit_code == 0

            # Second run (should use cache)
            result2 = self.runner.invoke(cli, cmd_args)
            assert result2.exit_code == 0
            # Results should be consistent
            assert len(result2.output.strip()) > 0

    def test_ast_commands_error_handling_consistency(self):
        """All AST commands should have consistent error handling."""
        nonexistent_file = Path(self.temp_dir) / "nonexistent.md"

        commands = [
            ['ast-show', str(nonexistent_file)],
            ['ast-query', str(nonexistent_file), '$.test'],
            ['ast-stats', str(nonexistent_file)]
        ]

        for cmd_args in commands:
            result = self.runner.invoke(cli, cmd_args)
            # All should fail gracefully
            assert result.exit_code != 0
            # All should provide meaningful error messages
            assert len(result.output.strip()) > 0
            assert "error" in result.output.lower() or "not found" in result.output.lower()