""" Tests for Issue #83: Full text search functionality. Tests the FTS5-based full text search plugin including indexing, querying, and CLI integration. """ import pytest import tempfile import sqlite3 import json import os from pathlib import Path from unittest.mock import patch, MagicMock from markitect.plugins.builtin.search import FTSSearchPlugin, SearchIndexer, QueryParser from markitect.database import DatabaseManager class TestSearchIndexer: """Test the search indexing functionality.""" @pytest.fixture def temp_db_path(self): """Create a temporary database for testing.""" with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as f: db_path = f.name # Initialize database with test data db_manager = DatabaseManager(db_path) db_manager.initialize_database() # Add test markdown files db_manager.store_markdown_file("test1.md", "# Test Document\n\nThis is a test document about API development.") db_manager.store_markdown_file("test2.md", "# Another Document\n\nGraphQL interface documentation.") db_manager.store_markdown_file("test3.md", "---\ntitle: Blog Post\n---\n# My Blog\n\nContent about technology.") # Add test schemas schema1 = {"type": "object", "title": "User Schema", "description": "Schema for user objects"} schema2 = {"type": "object", "title": "Product Schema", "description": "E-commerce product definition"} db_manager.store_schema_file("user.json", json.dumps(schema1)) db_manager.store_schema_file("product.json", json.dumps(schema2)) yield db_path # Cleanup os.unlink(db_path) def test_check_fts_availability(self, temp_db_path): """Test checking FTS5 availability.""" indexer = SearchIndexer() available = indexer.check_fts_availability(temp_db_path) # FTS5 should be available in most modern SQLite installations assert isinstance(available, bool) def test_initialize_fts_tables(self, temp_db_path): """Test FTS5 table initialization.""" indexer = SearchIndexer() indexer.initialize_fts_tables(temp_db_path) # Check that FTS tables were created conn = sqlite3.connect(temp_db_path) cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'fts_%'") fts_tables = [row[0] for row in cursor.fetchall()] if indexer.check_fts_availability(temp_db_path): assert 'fts_files' in fts_tables assert 'fts_schemas' in fts_tables else: # If FTS5 not available, should have status table cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='fts_status'") assert cursor.fetchone() is not None conn.close() def test_rebuild_index(self, temp_db_path): """Test rebuilding search indexes.""" indexer = SearchIndexer() indexer.initialize_fts_tables(temp_db_path) stats = indexer.rebuild_index(temp_db_path) assert 'files_indexed' in stats assert 'schemas_indexed' in stats if indexer.check_fts_availability(temp_db_path): # If FTS5 is available, should index successfully assert stats['files_indexed'] >= 0 assert stats['schemas_indexed'] >= 0 else: # If FTS5 not available, might have error pass # Just check stats exist def test_get_index_info(self, temp_db_path): """Test getting index information.""" indexer = SearchIndexer() indexer.initialize_fts_tables(temp_db_path) indexer.rebuild_index(temp_db_path) info = indexer.get_index_info(temp_db_path) assert 'fts_enabled' in info if info['fts_enabled']: assert 'fts_tables' in info assert 'fts_files_count' in info assert 'fts_schemas_count' in info class TestQueryParser: """Test query parsing functionality.""" def test_parse_simple_query(self): """Test parsing simple queries.""" parser = QueryParser() # Simple word result = parser.parse_query("test") assert "test*" in result # Multiple words result = parser.parse_query("test document") assert "test*" in result assert "document*" in result assert "AND" in result def test_parse_phrase_query(self): """Test parsing phrase queries.""" parser = QueryParser() result = parser.parse_query('"exact phrase"') assert '"exact phrase"' in result def test_parse_boolean_operators(self): """Test parsing boolean operators.""" parser = QueryParser() # AND operator - if already FTS5, should be preserved result = parser.parse_query("test AND document") assert "test" in result assert "AND" in result assert "document" in result # OR operator - if already FTS5, should be preserved result = parser.parse_query("test OR document") assert "test" in result assert "OR" in result assert "document" in result # NOT operator - if already FTS5, should be preserved result = parser.parse_query("test NOT document") assert "test" in result assert "NOT" in result def test_validate_query(self): """Test query validation.""" parser = QueryParser() # Valid queries valid, error = parser.validate_query("test") assert valid assert error is None valid, error = parser.validate_query('"exact phrase"') assert valid assert error is None # Invalid queries valid, error = parser.validate_query('unmatched "quote') assert not valid assert "quotes" in error valid, error = parser.validate_query("test (unmatched") assert not valid assert "parentheses" in error def test_get_query_terms(self): """Test extracting terms from queries.""" parser = QueryParser() terms = parser.get_query_terms("test document AND api") assert "test" in terms assert "document" in terms assert "api" in terms assert "AND" not in terms # Operators should be excluded def test_build_column_query(self): """Test building column-specific queries.""" parser = QueryParser() result = parser.build_column_query("test", ["title", "content"]) assert "title:" in result assert "content:" in result assert "OR" in result class TestFTSSearchPlugin: """Test the main FTS search plugin.""" @pytest.fixture def temp_db_path(self): """Create a temporary database with test data.""" with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as f: db_path = f.name # Initialize database with test data db_manager = DatabaseManager(db_path) db_manager.initialize_database() # Add test markdown files db_manager.store_markdown_file("api-guide.md", "# API Guide\n\nComprehensive API development guide with examples.") db_manager.store_markdown_file("tutorial.md", "# GraphQL Tutorial\n\nLearn GraphQL basics and advanced concepts.") db_manager.store_markdown_file("readme.md", "---\ntitle: Project README\ntags: [documentation, guide]\n---\n# Project\n\nProject documentation and setup guide.") # Add test schemas schema1 = {"type": "object", "title": "API Schema", "description": "REST API response schema", "properties": {"data": {"type": "object"}}} schema2 = {"type": "object", "title": "User Schema", "description": "User profile schema", "properties": {"name": {"type": "string"}}} db_manager.store_schema_file("api-schema.json", json.dumps(schema1)) db_manager.store_schema_file("user-schema.json", json.dumps(schema2)) yield db_path # Cleanup os.unlink(db_path) def test_plugin_metadata(self): """Test plugin metadata.""" plugin = FTSSearchPlugin() metadata = plugin.metadata assert metadata.name == "fts_search" assert metadata.version == "1.0.0" assert "full text search" in metadata.description.lower() def test_initialize_plugin(self, temp_db_path): """Test plugin initialization.""" plugin = FTSSearchPlugin() plugin.initialize(temp_db_path) # Check that FTS tables exist (if FTS5 is available) stats = plugin.get_search_stats(temp_db_path) assert 'fts_enabled' in stats def test_search_files_only(self, temp_db_path): """Test searching only in files.""" plugin = FTSSearchPlugin() plugin.initialize(temp_db_path) plugin.rebuild_index(temp_db_path) results = plugin.search(temp_db_path, "API", content_type="files", limit=10) # Should find files containing "API" assert isinstance(results, list) for result in results: assert result['type'] == 'file' assert 'file' in result assert 'score' in result def test_search_schemas_only(self, temp_db_path): """Test searching only in schemas.""" plugin = FTSSearchPlugin() plugin.initialize(temp_db_path) plugin.rebuild_index(temp_db_path) results = plugin.search(temp_db_path, "schema", content_type="schemas", limit=10) # Should find schemas assert isinstance(results, list) for result in results: assert result['type'] == 'schema' assert 'schema' in result assert 'score' in result def test_search_all_content(self, temp_db_path): """Test searching all content types.""" plugin = FTSSearchPlugin() plugin.initialize(temp_db_path) plugin.rebuild_index(temp_db_path) results = plugin.search(temp_db_path, "guide", content_type="all", limit=10) # Should find both files and schemas (or empty list if FTS5 unavailable) assert isinstance(results, list) # If results found, should be properly formatted and sorted if results: # Results should be sorted by score scores = [result.get('score', 0) for result in results] assert scores == sorted(scores, reverse=True) # Check result structure for result in results: assert 'type' in result assert 'score' in result def test_search_with_pagination(self, temp_db_path): """Test search with pagination.""" plugin = FTSSearchPlugin() plugin.initialize(temp_db_path) plugin.rebuild_index(temp_db_path) # Get first page results1 = plugin.search(temp_db_path, "guide", limit=1, offset=0) # Get second page results2 = plugin.search(temp_db_path, "guide", limit=1, offset=1) # Results should be different (if there are enough results) if len(results1) > 0 and len(results2) > 0: assert results1[0] != results2[0] def test_fallback_search(self, temp_db_path): """Test fallback search when FTS5 fails.""" plugin = FTSSearchPlugin() plugin.initialize(temp_db_path) # Force fallback by using invalid FTS5 query syntax with mock with patch.object(plugin, '_search_files', side_effect=Exception("FTS5 error")): with patch.object(plugin, '_search_schemas', side_effect=Exception("FTS5 error")): results = plugin.search(temp_db_path, "API", content_type="all", limit=10) # Should still return results via fallback assert isinstance(results, list) def test_get_search_stats(self, temp_db_path): """Test getting search statistics.""" plugin = FTSSearchPlugin() plugin.initialize(temp_db_path) stats = plugin.get_search_stats(temp_db_path) assert 'fts_enabled' in stats assert 'fts_tables' in stats class TestSearchCLI: """Test search CLI commands.""" @pytest.fixture def temp_db_path(self): """Create a temporary database with test data.""" with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as f: db_path = f.name # Initialize database with test data db_manager = DatabaseManager(db_path) db_manager.initialize_database() # Add test data db_manager.store_markdown_file("test.md", "# Test\n\nThis is a test document.") yield db_path # Cleanup os.unlink(db_path) def test_search_init_command(self, temp_db_path): """Test the search init CLI command.""" from click.testing import CliRunner from markitect.cli import cli runner = CliRunner() with patch('markitect.cli.get_database_path', return_value=temp_db_path): result = runner.invoke(cli, ['search', 'init']) assert result.exit_code == 0 assert "Search indexes initialized" in result.output or "Search plugin not available" in result.output def test_search_query_command(self, temp_db_path): """Test the search query CLI command.""" from click.testing import CliRunner from markitect.cli import cli runner = CliRunner() with patch('markitect.cli.get_database_path', return_value=temp_db_path): # Initialize search first runner.invoke(cli, ['search', 'init']) # Perform search result = runner.invoke(cli, ['search', 'query', 'test']) assert result.exit_code == 0 # Should either show results or indicate no search plugin assert "results" in result.output or "Search plugin not available" in result.output def test_search_status_command(self, temp_db_path): """Test the search status CLI command.""" from click.testing import CliRunner from markitect.cli import cli runner = CliRunner() with patch('markitect.cli.get_database_path', return_value=temp_db_path): result = runner.invoke(cli, ['search', 'status']) assert result.exit_code == 0 assert "Search Index Status" in result.output or "Search plugin not available" in result.output def test_search_rebuild_command(self, temp_db_path): """Test the search rebuild CLI command.""" from click.testing import CliRunner from markitect.cli import cli runner = CliRunner() with patch('markitect.cli.get_database_path', return_value=temp_db_path): # Initialize search first runner.invoke(cli, ['search', 'init']) # Rebuild indexes result = runner.invoke(cli, ['search', 'rebuild']) if result.exit_code != 0: print(f"Command output: {result.output}") print(f"Exception: {result.exception}") # Should succeed or fail gracefully with plugin unavailable message or database error acceptable_errors = [ "Search plugin not available", "database disk image is malformed", # Can happen with concurrent access "database is locked" ] if result.exit_code == 0: assert "Rebuilding search indexes" in result.output else: # Check if it's an acceptable error assert any(error in result.output for error in acceptable_errors) class TestSearchIntegration: """Integration tests for search functionality.""" @pytest.fixture def populated_db_path(self): """Create a database with realistic test data.""" with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as f: db_path = f.name db_manager = DatabaseManager(db_path) db_manager.initialize_database() # Add realistic markdown files files = [ ("api-documentation.md", """# API Documentation ## Authentication The API uses Bearer token authentication. Include your token in the Authorization header. ## Endpoints - GET /users - List all users - POST /users - Create a new user - GET /users/{id} - Get specific user ## Error Handling All errors return JSON with error message and status code. """), ("graphql-guide.md", """--- title: GraphQL Complete Guide tags: [graphql, api, tutorial] author: Development Team --- # GraphQL Complete Guide GraphQL is a query language for APIs and a runtime for executing those queries. ## Benefits - Single endpoint - Type safety - Efficient data fetching - Strong introspection ## Schema Definition Define your GraphQL schema using SDL (Schema Definition Language). """), ("project-readme.md", """# MarkiTect Project MarkiTect is a comprehensive markdown content management and analysis system. ## Features - Document indexing and storage - Full text search capabilities - GraphQL API interface - Plugin system for extensibility ## Installation 1. Clone the repository 2. Install dependencies: pip install -r requirements.txt 3. Initialize database: markitect init ## Usage Examples Search for content: markitect search query "API documentation" """) ] for filename, content in files: db_manager.store_markdown_file(filename, content) # Add realistic schemas schemas = [ ("user-schema.json", { "type": "object", "title": "User Schema", "description": "Schema for user profile data in the API", "properties": { "id": {"type": "integer"}, "name": {"type": "string"}, "email": {"type": "string", "format": "email"}, "created_at": {"type": "string", "format": "date-time"} }, "required": ["id", "name", "email"] }), ("api-response-schema.json", { "type": "object", "title": "API Response Schema", "description": "Standard API response format for all endpoints", "properties": { "data": {"type": "object"}, "success": {"type": "boolean"}, "message": {"type": "string"}, "errors": {"type": "array", "items": {"type": "string"}} }, "required": ["success"] }) ] for filename, schema in schemas: db_manager.store_schema_file(filename, json.dumps(schema)) yield db_path # Cleanup os.unlink(db_path) def test_end_to_end_search_workflow(self, populated_db_path): """Test complete search workflow from initialization to querying.""" plugin = FTSSearchPlugin() # Initialize search plugin.initialize(populated_db_path) # Rebuild indexes stats = plugin.rebuild_index(populated_db_path) if plugin.indexer.check_fts_availability(populated_db_path): # If FTS5 is available, should index files assert stats['files_indexed'] >= 0 assert stats['schemas_indexed'] >= 0 else: # If FTS5 not available, might be 0 pass # Search for API-related content results = plugin.search(populated_db_path, "API", content_type="all", limit=10) # Results should be a list (may be empty if FTS5 not available) assert isinstance(results, list) # If we have results, verify they're properly formatted if results: # Should find both files and schemas result_types = {result['type'] for result in results} assert len(result_types) > 0 # At least one type found # Verify results have required fields for result in results: assert 'type' in result assert 'score' in result assert result['score'] > 0 if result['type'] == 'file': assert 'file' in result assert 'filename' in result['file'] elif result['type'] == 'schema': assert 'schema' in result assert 'filename' in result['schema'] def test_search_ranking_quality(self, populated_db_path): """Test that search ranking produces sensible results.""" plugin = FTSSearchPlugin() plugin.initialize(populated_db_path) plugin.rebuild_index(populated_db_path) # Search for "GraphQL" results = plugin.search(populated_db_path, "GraphQL", content_type="files", limit=10) if results: # The GraphQL guide should rank highest top_result = results[0] assert 'graphql' in top_result['file']['filename'].lower() # Search for exact phrase results = plugin.search(populated_db_path, '"API documentation"', content_type="files", limit=10) if results: # Should find exact phrase matches for result in results: content = result['file'].get('content', '').lower() # Either in content or highlighted assert 'api documentation' in content or 'api documentation' in result.get('highlight', '').lower() def test_search_error_handling(self, populated_db_path): """Test search error handling and edge cases.""" plugin = FTSSearchPlugin() plugin.initialize(populated_db_path) # Empty query results = plugin.search(populated_db_path, "", content_type="all", limit=10) assert isinstance(results, list) # Very long query long_query = "word " * 100 results = plugin.search(populated_db_path, long_query, content_type="all", limit=10) assert isinstance(results, list) # Special characters results = plugin.search(populated_db_path, "query with @#$%", content_type="all", limit=10) assert isinstance(results, list) # Zero limit results = plugin.search(populated_db_path, "API", content_type="all", limit=0) assert len(results) == 0