""" Test suite for Issue #2: Fast Document Loading & CLI Manipulation Focus: Subtask 2a - File Ingestion & AST Caching This test suite covers the core file ingestion and AST caching functionality that forms the foundation of the performance-optimized document system. """ import json import os import tempfile import time from pathlib import Path from unittest.mock import patch, MagicMock import pytest from markitect.database import DatabaseManager from markitect.parser import parse_markdown_to_ast class TestFileIngestion: """Test file ingestion and basic AST processing.""" def setup_method(self): """Set up test database and temporary files.""" self.temp_dir = tempfile.mkdtemp() self.db_path = Path(self.temp_dir) / "test.db" self.db_manager = DatabaseManager(self.db_path) self.db_manager.initialize_database() # Initialize the database # Create test markdown file self.test_md_content = """--- title: Test Document author: Test User date: "2025-09-24" --- # Test Document This is a test document with **bold** and *italic* text. ## Section 1 - Item 1 - Item 2 - Item 3 ## Section 2 Some more content here. """ self.test_file = Path(self.temp_dir) / "test.md" self.test_file.write_text(self.test_md_content) def teardown_method(self): """Clean up test files.""" import shutil shutil.rmtree(self.temp_dir) def test_parse_markdown_file_to_ast(self): """Test parsing markdown file to AST representation.""" # This test should fail initially - we need to implement document ingestion from markitect.document_manager import DocumentManager # This will fail initially doc_manager = DocumentManager(self.db_manager) result = doc_manager.ingest_file(self.test_file) # Verify the result contains parsed AST assert result is not None assert 'ast' in result assert 'metadata' in result assert result['metadata']['filename'] == 'test.md' assert result['metadata']['title'] == 'Test Document' def test_ast_contains_expected_structure(self): """Test that parsed AST contains expected document structure.""" # Parse the test file ast = parse_markdown_to_ast(self.test_md_content) # Verify AST structure contains expected elements assert isinstance(ast, list) assert len(ast) > 0 # Should contain front matter, headings, paragraphs, lists token_types = [token['type'] for token in ast] assert 'heading_open' in token_types assert 'paragraph_open' in token_types assert 'bullet_list_open' in token_types class TestASTCaching: """Test AST caching system for performance optimization.""" def setup_method(self): """Set up test environment with caching.""" self.temp_dir = tempfile.mkdtemp() self.db_path = Path(self.temp_dir) / "test.db" self.cache_dir = Path(self.temp_dir) / "ast_cache" self.cache_dir.mkdir() self.test_file = Path(self.temp_dir) / "performance_test.md" # Create a larger test file for performance testing large_content = """--- title: Large Test Document --- # Large Document """ + "\n\n".join([f"## Section {i}\n\nContent for section {i} with multiple paragraphs." for i in range(50)]) self.test_file.write_text(large_content) def teardown_method(self): """Clean up test files.""" import shutil shutil.rmtree(self.temp_dir) def test_create_ast_cache_file(self): """Test creating AST cache file from markdown.""" # This will fail initially - need to implement AST cache system from markitect.ast_cache import ASTCache # This will fail initially cache = ASTCache(self.cache_dir) cache_info = cache.cache_file(self.test_file) # Verify cache file was created assert cache_info['cache_file'].exists() assert cache_info['cache_file'].suffix == '.json' # Verify cache contains valid AST with open(cache_info['cache_file']) as f: cached_ast = json.load(f) assert isinstance(cached_ast, list) assert len(cached_ast) > 0 def test_cache_faster_than_parsing(self): """Test that cache loading is faster than re-parsing markdown.""" # This test validates the core performance requirement from markitect.ast_cache import ASTCache cache = ASTCache(self.cache_dir) # Time the initial parse and cache creation start_time = time.time() cache_info = cache.cache_file(self.test_file) initial_parse_time = time.time() - start_time # Time loading from cache start_time = time.time() cached_ast = cache.load_cached_ast(self.test_file) cache_load_time = time.time() - start_time # Cache loading should be significantly faster assert cache_load_time < (initial_parse_time * 0.5) # Less than 50% as per requirements assert cached_ast is not None def test_cache_invalidation_on_file_change(self): """Test that cache is invalidated when source file changes.""" from markitect.ast_cache import ASTCache cache = ASTCache(self.cache_dir) original_cache = cache.cache_file(self.test_file) original_mtime = original_cache['cache_file'].stat().st_mtime # Modify the source file time.sleep(0.1) # Ensure different timestamp modified_content = self.test_file.read_text() + "\n\n## New Section\n\nAdded content." self.test_file.write_text(modified_content) # Cache should detect the change and regenerate new_cache = cache.cache_file(self.test_file) new_mtime = new_cache['cache_file'].stat().st_mtime assert new_mtime > original_mtime class TestDatabaseIntegration: """Test integration with existing database system from Issue #1.""" def setup_method(self): """Set up test database.""" self.temp_dir = tempfile.mkdtemp() self.db_path = Path(self.temp_dir) / "test.db" self.db_manager = DatabaseManager(self.db_path) self.db_manager.initialize_database() # Initialize the database self.test_file = Path(self.temp_dir) / "integration_test.md" self.test_content = """--- title: Integration Test category: testing --- # Integration Test Testing database integration. """ self.test_file.write_text(self.test_content) def teardown_method(self): """Clean up test files.""" import shutil shutil.rmtree(self.temp_dir) def test_store_document_metadata_in_database(self): """Test storing document metadata in existing database structure.""" # This should build on Issue #1's database functionality from markitect.document_manager import DocumentManager doc_manager = DocumentManager(self.db_manager) result = doc_manager.ingest_file(self.test_file) # Verify metadata is stored in database stored_files = self.db_manager.list_markdown_files() assert len(stored_files) == 1 stored_file = stored_files[0] assert stored_file['filename'] == 'integration_test.md' assert stored_file['front_matter']['title'] == 'Integration Test' assert stored_file['front_matter']['category'] == 'testing' def test_store_ast_cache_reference_in_database(self): """Test storing AST cache file reference in database.""" from markitect.document_manager import DocumentManager doc_manager = DocumentManager(self.db_manager) result = doc_manager.ingest_file(self.test_file) # Verify AST cache reference is stored assert 'ast_cache_path' in result assert result['ast_cache_path'].exists() # Verify database contains cache reference stored_files = self.db_manager.list_markdown_files() stored_file = stored_files[0] # For now, cache reference is tracked in the result object assert result['ast_cache_path'].exists() def test_performance_metadata_tracking(self): """Test tracking performance metrics for cache validation.""" from markitect.document_manager import DocumentManager doc_manager = DocumentManager(self.db_manager) result = doc_manager.ingest_file(self.test_file) # Verify performance metrics are tracked assert 'parse_time' in result assert 'cache_time' in result assert result['parse_time'] > 0 assert result['cache_time'] >= 0 class TestErrorHandling: """Test error handling for file ingestion and caching.""" def setup_method(self): """Set up test environment.""" self.temp_dir = tempfile.mkdtemp() self.db_path = Path(self.temp_dir) / "test.db" def teardown_method(self): """Clean up test files.""" import shutil shutil.rmtree(self.temp_dir) def test_handle_nonexistent_file(self): """Test handling of nonexistent file.""" from markitect.document_manager import DocumentManager db_manager = DatabaseManager(self.db_path) doc_manager = DocumentManager(db_manager) nonexistent_file = Path(self.temp_dir) / "nonexistent.md" with pytest.raises(FileNotFoundError): doc_manager.ingest_file(nonexistent_file) def test_handle_invalid_markdown(self): """Test handling of invalid or malformed markdown.""" from markitect.document_manager import DocumentManager # Create file with malformed front matter invalid_file = Path(self.temp_dir) / "invalid.md" invalid_content = """--- title: Test invalid_yaml: [unclosed bracket --- # Content """ invalid_file.write_text(invalid_content) db_manager = DatabaseManager(self.db_path) doc_manager = DocumentManager(db_manager) # Should handle gracefully, not crash result = doc_manager.ingest_file(invalid_file) assert result is not None # Front matter parsing should fail gracefully def test_handle_cache_directory_permissions(self): """Test handling of cache directory permission issues.""" from markitect.ast_cache import ASTCache # Create read-only directory to simulate permission issues readonly_dir = Path(self.temp_dir) / "readonly" readonly_dir.mkdir() readonly_dir.chmod(0o444) # Read-only test_file = Path(self.temp_dir) / "test.md" test_file.write_text("# Test") cache = ASTCache(readonly_dir) with pytest.raises(PermissionError): cache.cache_file(test_file)