feat: implement lightweight full text search plugin using SQLite FTS5 (issue #83)

Added comprehensive full text search capabilities as a lightweight plugin. Key features: - SQLite FTS5-based search engine with no external dependencies - Automatic indexing via database triggers for real-time updates - Advanced query support: phrase search, boolean operators, proximity search - Complete CLI interface with search commands - Graceful fallback to LIKE queries when FTS5 unavailable - Plugin architecture integration for extensibility CLI Commands: - `markitect search init` - Initialize search indexes - `markitect search query` - Perform full text searches - `markitect search status` - View index statistics - `markitect search rebuild` - Rebuild indexes from scratch Search Features: - Content type filtering (files, schemas, all) - Result pagination and formatting options - Query validation and syntax assistance - Performance optimization and index maintenance Technical Implementation: - FTSSearchPlugin: Main search plugin class - SearchIndexer: FTS5 table management and indexing - QueryParser: Query optimization and FTS5 syntax conversion - Comprehensive error handling and fallback mechanisms - 25 test cases covering all functionality Documentation includes complete usage guide and examples. Resolves issue #83: Full text search 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-03 17:03:11 +02:00
parent 2a15dde228
commit 8179929a4a
7 changed files with 1994 additions and 0 deletions
--- a/markitect/plugins/builtin/search/init.py
+++ b/markitect/plugins/builtin/search/init.py
@@ -0,0 +1,12 @@
+"""
+Full text search plugin for MarkiTect using SQLite FTS5.
+
+Provides lightweight, high-performance full text search capabilities
+as a plugin to the MarkiTect system.
+"""
+
+from .fts_search import FTSSearchPlugin
+from .indexer import SearchIndexer
+from .query_parser import QueryParser
+
+__all__ = ['FTSSearchPlugin', 'SearchIndexer', 'QueryParser']
--- a/markitect/plugins/builtin/search/fts_search.py
+++ b/markitect/plugins/builtin/search/fts_search.py
@@ -0,0 +1,307 @@
+"""
+SQLite FTS5 full text search plugin for MarkiTect.
+
+Provides advanced full text search capabilities using SQLite's built-in
+FTS5 virtual table extension for lightweight, high-performance search.
+"""
+
+import sqlite3
+import json
+from typing import Dict, Any, List, Optional, Tuple
+from pathlib import Path
+
+from ...base import BasePlugin, PluginMetadata, PluginType
+from ...decorators import register_plugin
+from .indexer import SearchIndexer
+from .query_parser import QueryParser
+
+
+@register_plugin("fts_search")
+class FTSSearchPlugin(BasePlugin):
+    """Full Text Search plugin using SQLite FTS5."""
+
+    def __init__(self):
+        super().__init__()
+        self.indexer = SearchIndexer()
+        self.query_parser = QueryParser()
+
+    @property
+    def metadata(self) -> PluginMetadata:
+        return PluginMetadata(
+            name="fts_search",
+            version="1.0.0",
+            description="Full text search using SQLite FTS5",
+            author="MarkiTect Team",
+            plugin_type=PluginType.EXTENSION
+        )
+
+    def initialize(self, db_path: str) -> None:
+        """Initialize FTS5 search tables and indexes."""
+        self.db_path = db_path
+        self.indexer.initialize_fts_tables(db_path)
+
+    def rebuild_index(self, db_path: str) -> Dict[str, int]:
+        """Rebuild the full text search index."""
+        return self.indexer.rebuild_index(db_path)
+
+    def search(self,
+               db_path: str,
+               query: str,
+               content_type: str = "all",
+               limit: int = 20,
+               offset: int = 0) -> List[Dict[str, Any]]:
+        """
+        Perform full text search.
+
+        Args:
+            db_path: Path to SQLite database
+            query: Search query (supports FTS5 syntax)
+            content_type: Type of content to search ("all", "files", "schemas")
+            limit: Maximum number of results
+            offset: Result offset for pagination
+
+        Returns:
+            List of search results with relevance scores
+        """
+        conn = sqlite3.connect(db_path)
+        conn.row_factory = sqlite3.Row
+        cursor = conn.cursor()
+
+        results = []
+
+        try:
+            # Parse and validate query
+            parsed_query = self.query_parser.parse_query(query)
+
+            if content_type in ["all", "files"]:
+                results.extend(self._search_files(cursor, parsed_query, limit, offset))
+
+            if content_type in ["all", "schemas"]:
+                results.extend(self._search_schemas(cursor, parsed_query, limit, offset))
+
+            # Sort by relevance score and apply global limit
+            results.sort(key=lambda x: x.get('score', 0), reverse=True)
+
+            if content_type == "all":
+                results = results[:limit]
+
+        except Exception as e:
+            # Fall back to simple LIKE search if FTS5 fails
+            results = self._fallback_search(cursor, query, content_type, limit, offset)
+
+        finally:
+            conn.close()
+
+        return results
+
+    def _search_files(self, cursor: sqlite3.Cursor, query: str, limit: int, offset: int) -> List[Dict[str, Any]]:
+        """Search in markdown files using FTS5."""
+        cursor.execute("""
+            SELECT
+                mf.id, mf.filename, mf.content, mf.front_matter, mf.created_at,
+                fts.rank, bm25(fts_files) as score,
+                snippet(fts_files, 1, '<mark>', '</mark>', '...', 32) as highlight
+            FROM fts_files fts
+            JOIN markdown_files mf ON mf.id = fts.rowid
+            WHERE fts_files MATCH ?
+            ORDER BY score DESC
+            LIMIT ? OFFSET ?
+        """, (query, limit, offset))
+
+        results = []
+        for row in cursor.fetchall():
+            # Parse front matter
+            front_matter_raw = {}
+            if row['front_matter']:
+                try:
+                    front_matter_raw = json.loads(row['front_matter'])
+                except json.JSONDecodeError:
+                    pass
+
+            results.append({
+                'type': 'file',
+                'score': abs(row['score']) if row['score'] else 1.0,
+                'file': {
+                    'id': row['id'],
+                    'filename': row['filename'],
+                    'content': row['content'],
+                    'front_matter_raw': front_matter_raw,
+                    'created_at': row['created_at']
+                },
+                'highlight': row['highlight']
+            })
+
+        return results
+
+    def _search_schemas(self, cursor: sqlite3.Cursor, query: str, limit: int, offset: int) -> List[Dict[str, Any]]:
+        """Search in schemas using FTS5."""
+        cursor.execute("""
+            SELECT
+                s.id, s.filename, s.title, s.description, s.schema_content,
+                s.created_at, s.updated_at,
+                fts.rank, bm25(fts_schemas) as score,
+                snippet(fts_schemas, 1, '<mark>', '</mark>', '...', 32) as highlight
+            FROM fts_schemas fts
+            JOIN schemas s ON s.id = fts.rowid
+            WHERE fts_schemas MATCH ?
+            ORDER BY score DESC
+            LIMIT ? OFFSET ?
+        """, (query, limit, offset))
+
+        results = []
+        for row in cursor.fetchall():
+            # Parse schema content
+            schema_content = {}
+            if row['schema_content']:
+                try:
+                    schema_content = json.loads(row['schema_content'])
+                except json.JSONDecodeError:
+                    pass
+
+            results.append({
+                'type': 'schema',
+                'score': abs(row['score']) if row['score'] else 1.0,
+                'schema': {
+                    'id': row['id'],
+                    'filename': row['filename'],
+                    'title': row['title'],
+                    'description': row['description'],
+                    'schema_content': schema_content,
+                    'created_at': row['created_at'],
+                    'updated_at': row['updated_at']
+                },
+                'highlight': row['highlight']
+            })
+
+        return results
+
+    def _fallback_search(self, cursor: sqlite3.Cursor, query: str, content_type: str, limit: int, offset: int) -> List[Dict[str, Any]]:
+        """Fallback to simple LIKE search if FTS5 fails."""
+        results = []
+
+        if content_type in ["all", "files"]:
+            cursor.execute("""
+                SELECT id, filename, content, front_matter, created_at
+                FROM markdown_files
+                WHERE filename LIKE ? OR content LIKE ?
+                ORDER BY
+                    CASE WHEN filename LIKE ? THEN 1 ELSE 2 END,
+                    created_at DESC
+                LIMIT ? OFFSET ?
+            """, (f"%{query}%", f"%{query}%", f"%{query}%", limit, offset))
+
+            for row in cursor.fetchall():
+                front_matter_raw = {}
+                if row['front_matter']:
+                    try:
+                        front_matter_raw = json.loads(row['front_matter'])
+                    except json.JSONDecodeError:
+                        pass
+
+                results.append({
+                    'type': 'file',
+                    'score': 1.0,
+                    'file': {
+                        'id': row['id'],
+                        'filename': row['filename'],
+                        'content': row['content'],
+                        'front_matter_raw': front_matter_raw,
+                        'created_at': row['created_at']
+                    },
+                    'highlight': self._extract_highlight(row['content'] or '', query)
+                })
+
+        if content_type in ["all", "schemas"]:
+            cursor.execute("""
+                SELECT id, filename, title, description, schema_content, created_at, updated_at
+                FROM schemas
+                WHERE filename LIKE ? OR title LIKE ? OR description LIKE ?
+                ORDER BY created_at DESC
+                LIMIT ? OFFSET ?
+            """, (f"%{query}%", f"%{query}%", f"%{query}%", limit, offset))
+
+            for row in cursor.fetchall():
+                schema_content = {}
+                if row['schema_content']:
+                    try:
+                        schema_content = json.loads(row['schema_content'])
+                    except json.JSONDecodeError:
+                        pass
+
+                results.append({
+                    'type': 'schema',
+                    'score': 1.0,
+                    'schema': {
+                        'id': row['id'],
+                        'filename': row['filename'],
+                        'title': row['title'],
+                        'description': row['description'],
+                        'schema_content': schema_content,
+                        'created_at': row['created_at'],
+                        'updated_at': row['updated_at']
+                    },
+                    'highlight': self._extract_highlight(row['description'] or '', query)
+                })
+
+        return results
+
+    def _extract_highlight(self, text: str, query: str, max_length: int = 100) -> str:
+        """Extract highlighted snippet from text."""
+        if not text or not query:
+            return ""
+
+        query_lower = query.lower()
+        text_lower = text.lower()
+
+        # Find the first occurrence
+        start = text_lower.find(query_lower)
+        if start == -1:
+            return text[:max_length] + "..." if len(text) > max_length else text
+
+        # Calculate snippet boundaries
+        snippet_start = max(0, start - max_length // 4)
+        snippet_end = min(len(text), start + len(query) + max_length // 2)
+
+        snippet = text[snippet_start:snippet_end]
+
+        # Add ellipsis if truncated
+        if snippet_start > 0:
+            snippet = "..." + snippet
+        if snippet_end < len(text):
+            snippet = snippet + "..."
+
+        return snippet
+
+    def get_search_stats(self, db_path: str) -> Dict[str, Any]:
+        """Get search index statistics."""
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+
+        stats = {}
+
+        try:
+            # Check if FTS tables exist
+            cursor.execute("""
+                SELECT name FROM sqlite_master
+                WHERE type='table' AND name LIKE 'fts_%'
+            """)
+            fts_tables = [row[0] for row in cursor.fetchall()]
+
+            stats['fts_enabled'] = len(fts_tables) > 0
+            stats['fts_tables'] = fts_tables
+
+            if stats['fts_enabled']:
+                # Get index statistics
+                for table in fts_tables:
+                    cursor.execute(f"SELECT COUNT(*) FROM {table}")
+                    count = cursor.fetchone()[0]
+                    stats[f'{table}_documents'] = count
+
+        except sqlite3.Error:
+            stats['fts_enabled'] = False
+            stats['error'] = "FTS tables not available"
+
+        finally:
+            conn.close()
+
+        return stats
--- a/markitect/plugins/builtin/search/indexer.py
+++ b/markitect/plugins/builtin/search/indexer.py
@@ -0,0 +1,225 @@
+"""
+Search indexing functionality using SQLite FTS5.
+
+Handles creating and maintaining full text search indexes for MarkiTect content.
+"""
+
+import sqlite3
+import json
+from typing import Dict, Any, Optional
+from pathlib import Path
+
+
+class SearchIndexer:
+    """Manages full text search indexes using SQLite FTS5."""
+
+    def initialize_fts_tables(self, db_path: str) -> None:
+        """Initialize FTS5 virtual tables for full text search."""
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+
+        try:
+            # Create FTS5 table for markdown files
+            cursor.execute("""
+                CREATE VIRTUAL TABLE IF NOT EXISTS fts_files USING fts5(
+                    filename,
+                    content,
+                    front_matter,
+                    content='markdown_files',
+                    content_rowid='id'
+                )
+            """)
+
+            # Create FTS5 table for schemas
+            cursor.execute("""
+                CREATE VIRTUAL TABLE IF NOT EXISTS fts_schemas USING fts5(
+                    filename,
+                    title,
+                    description,
+                    content='schemas',
+                    content_rowid='id'
+                )
+            """)
+
+            # Create triggers to keep FTS5 indexes synchronized
+            self._create_fts_triggers(cursor)
+
+            conn.commit()
+
+        except sqlite3.Error as e:
+            # If FTS5 is not available, create a fallback indicator
+            cursor.execute("""
+                CREATE TABLE IF NOT EXISTS fts_status (
+                    fts_enabled INTEGER DEFAULT 0,
+                    error_message TEXT
+                )
+            """)
+            cursor.execute("""
+                INSERT OR REPLACE INTO fts_status (fts_enabled, error_message)
+                VALUES (0, ?)
+            """, (str(e),))
+            conn.commit()
+
+        finally:
+            conn.close()
+
+    def _create_fts_triggers(self, cursor: sqlite3.Cursor) -> None:
+        """Create triggers to automatically maintain FTS5 indexes."""
+
+        # Triggers for markdown_files table
+        cursor.execute("""
+            CREATE TRIGGER IF NOT EXISTS fts_files_insert AFTER INSERT ON markdown_files BEGIN
+                INSERT INTO fts_files(rowid, filename, content, front_matter)
+                VALUES (new.id, new.filename, new.content, new.front_matter);
+            END
+        """)
+
+        cursor.execute("""
+            CREATE TRIGGER IF NOT EXISTS fts_files_delete AFTER DELETE ON markdown_files BEGIN
+                INSERT INTO fts_files(fts_files, rowid, filename, content, front_matter)
+                VALUES('delete', old.id, old.filename, old.content, old.front_matter);
+            END
+        """)
+
+        cursor.execute("""
+            CREATE TRIGGER IF NOT EXISTS fts_files_update AFTER UPDATE ON markdown_files BEGIN
+                INSERT INTO fts_files(fts_files, rowid, filename, content, front_matter)
+                VALUES('delete', old.id, old.filename, old.content, old.front_matter);
+                INSERT INTO fts_files(rowid, filename, content, front_matter)
+                VALUES (new.id, new.filename, new.content, new.front_matter);
+            END
+        """)
+
+        # Triggers for schemas table
+        cursor.execute("""
+            CREATE TRIGGER IF NOT EXISTS fts_schemas_insert AFTER INSERT ON schemas BEGIN
+                INSERT INTO fts_schemas(rowid, filename, title, description)
+                VALUES (new.id, new.filename, new.title, new.description);
+            END
+        """)
+
+        cursor.execute("""
+            CREATE TRIGGER IF NOT EXISTS fts_schemas_delete AFTER DELETE ON schemas BEGIN
+                INSERT INTO fts_schemas(fts_schemas, rowid, filename, title, description)
+                VALUES('delete', old.id, old.filename, old.title, old.description);
+            END
+        """)
+
+        cursor.execute("""
+            CREATE TRIGGER IF NOT EXISTS fts_schemas_update AFTER UPDATE ON schemas BEGIN
+                INSERT INTO fts_schemas(fts_schemas, rowid, filename, title, description)
+                VALUES('delete', old.id, old.filename, old.title, old.description);
+                INSERT INTO fts_schemas(rowid, filename, title, description)
+                VALUES (new.id, new.filename, new.title, new.description);
+            END
+        """)
+
+    def rebuild_index(self, db_path: str) -> Dict[str, int]:
+        """Rebuild the full text search index from scratch."""
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+
+        stats = {'files_indexed': 0, 'schemas_indexed': 0}
+
+        try:
+            # Clear existing FTS5 data
+            cursor.execute("DELETE FROM fts_files")
+            cursor.execute("DELETE FROM fts_schemas")
+
+            # Rebuild files index
+            cursor.execute("""
+                INSERT INTO fts_files(rowid, filename, content, front_matter)
+                SELECT id, filename, content, front_matter FROM markdown_files
+            """)
+            stats['files_indexed'] = cursor.rowcount
+
+            # Rebuild schemas index
+            cursor.execute("""
+                INSERT INTO fts_schemas(rowid, filename, title, description)
+                SELECT id, filename, title, description FROM schemas
+            """)
+            stats['schemas_indexed'] = cursor.rowcount
+
+            # Optimize the FTS5 indexes
+            cursor.execute("INSERT INTO fts_files(fts_files) VALUES('optimize')")
+            cursor.execute("INSERT INTO fts_schemas(fts_schemas) VALUES('optimize')")
+
+            conn.commit()
+
+        except sqlite3.Error as e:
+            stats['error'] = str(e)
+            conn.rollback()
+
+        finally:
+            conn.close()
+
+        return stats
+
+    def optimize_index(self, db_path: str) -> None:
+        """Optimize FTS5 indexes for better performance."""
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+
+        try:
+            cursor.execute("INSERT INTO fts_files(fts_files) VALUES('optimize')")
+            cursor.execute("INSERT INTO fts_schemas(fts_schemas) VALUES('optimize')")
+            conn.commit()
+        except sqlite3.Error:
+            pass
+        finally:
+            conn.close()
+
+    def get_index_info(self, db_path: str) -> Dict[str, Any]:
+        """Get information about the current search indexes."""
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+
+        info = {}
+
+        try:
+            # Check if FTS tables exist
+            cursor.execute("""
+                SELECT name FROM sqlite_master
+                WHERE type='table' AND name LIKE 'fts_%'
+            """)
+            fts_tables = [row[0] for row in cursor.fetchall()]
+            info['fts_tables'] = fts_tables
+            info['fts_enabled'] = len(fts_tables) > 0
+
+            if info['fts_enabled']:
+                # Get document counts
+                for table in ['fts_files', 'fts_schemas']:
+                    if table in fts_tables:
+                        cursor.execute(f"SELECT COUNT(*) FROM {table}")
+                        info[f'{table}_count'] = cursor.fetchone()[0]
+
+                # Get FTS5 integrity check
+                try:
+                    cursor.execute("INSERT INTO fts_files(fts_files) VALUES('integrity-check')")
+                    cursor.execute("INSERT INTO fts_schemas(fts_schemas) VALUES('integrity-check')")
+                    info['integrity_check'] = 'passed'
+                except sqlite3.Error as e:
+                    info['integrity_check'] = f'failed: {str(e)}'
+
+        except sqlite3.Error as e:
+            info['error'] = str(e)
+            info['fts_enabled'] = False
+
+        finally:
+            conn.close()
+
+        return info
+
+    def check_fts_availability(self, db_path: str) -> bool:
+        """Check if FTS5 is available in SQLite."""
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+
+        try:
+            cursor.execute("CREATE VIRTUAL TABLE IF NOT EXISTS fts_test USING fts5(content)")
+            cursor.execute("DROP TABLE fts_test")
+            return True
+        except sqlite3.Error:
+            return False
+        finally:
+            conn.close()
--- a/markitect/plugins/builtin/search/query_parser.py
+++ b/markitect/plugins/builtin/search/query_parser.py
@@ -0,0 +1,273 @@
+"""
+Query parsing and processing for FTS5 full text search.
+
+Handles converting user queries into FTS5-compatible syntax and provides
+query validation and enhancement features.
+"""
+
+import re
+from typing import List, Dict, Any, Optional, Tuple
+
+
+class QueryParser:
+    """Parses and processes search queries for FTS5."""
+
+    def __init__(self):
+        # FTS5 operators and syntax
+        self.fts_operators = ['AND', 'OR', 'NOT', 'NEAR']
+        self.fts_special_chars = ['"', '*', '^', '(', ')']
+
+    def parse_query(self, query: str) -> str:
+        """
+        Parse and convert user query to FTS5-compatible syntax.
+
+        Args:
+            query: Raw user search query
+
+        Returns:
+            FTS5-compatible query string
+        """
+        if not query or not query.strip():
+            return ""
+
+        # Clean and normalize the query
+        query = query.strip()
+
+        # If query is already using FTS5 syntax, return as-is
+        if self._is_fts5_query(query):
+            return query
+
+        # Convert natural language query to FTS5
+        return self._convert_to_fts5(query)
+
+    def _is_fts5_query(self, query: str) -> bool:
+        """Check if query already uses FTS5 syntax."""
+        # Look for FTS5 operators or special syntax
+        for operator in self.fts_operators:
+            if f' {operator} ' in query.upper():
+                return True
+
+        # Look for quoted phrases
+        if '"' in query:
+            return True
+
+        # Look for prefix matching
+        if '*' in query:
+            return True
+
+        # Look for column specifications
+        if ':' in query:
+            return True
+
+        return False
+
+    def _convert_to_fts5(self, query: str) -> str:
+        """Convert natural language query to FTS5 syntax."""
+        # Handle quoted phrases - preserve them
+        phrases = []
+        phrase_pattern = r'"([^"]*)"'
+
+        def preserve_phrase(match):
+            phrases.append(match.group(0))
+            return f"__PHRASE_{len(phrases) - 1}__"
+
+        query = re.sub(phrase_pattern, preserve_phrase, query)
+
+        # Split into words, preserving operators
+        words = self._tokenize_query(query)
+
+        # Process each word
+        processed_words = []
+        i = 0
+        while i < len(words):
+            word = words[i].strip()
+
+            if not word:
+                i += 1
+                continue
+
+            # Restore preserved phrases
+            if word.startswith("__PHRASE_"):
+                phrase_index = int(word.replace("__PHRASE_", "").replace("__", ""))
+                processed_words.append(phrases[phrase_index])
+                i += 1
+                continue
+
+            # Handle negation (convert "not" to NOT)
+            if word.lower() in ['not', '-']:
+                if i + 1 < len(words):
+                    next_word = words[i + 1].strip()
+                    if next_word and not next_word.upper() in self.fts_operators:
+                        processed_words.append(f'NOT {self._escape_term(next_word)}')
+                        i += 2
+                        continue
+
+            # Handle AND/OR operators
+            if word.upper() in self.fts_operators:
+                processed_words.append(word.upper())
+                i += 1
+                continue
+
+            # Handle prefix matching (add * for partial matches)
+            if len(word) >= 3 and word.isalnum():
+                processed_words.append(f'{self._escape_term(word)}*')
+            else:
+                processed_words.append(self._escape_term(word))
+
+            i += 1
+
+        # Join with spaces, but add AND between terms if no operator specified
+        result_parts = []
+        for i, part in enumerate(processed_words):
+            if i > 0 and part.upper() not in self.fts_operators:
+                prev_part = processed_words[i - 1]
+                if prev_part.upper() not in self.fts_operators and not prev_part.startswith('NOT'):
+                    result_parts.append('AND')
+
+            result_parts.append(part)
+
+        return ' '.join(result_parts)
+
+    def _tokenize_query(self, query: str) -> List[str]:
+        """Tokenize query into words and operators."""
+        # Split on whitespace but preserve quoted content
+        tokens = []
+        current_token = ""
+        in_quotes = False
+
+        for char in query:
+            if char == '"':
+                in_quotes = not in_quotes
+                current_token += char
+            elif char.isspace() and not in_quotes:
+                if current_token:
+                    tokens.append(current_token)
+                    current_token = ""
+            else:
+                current_token += char
+
+        if current_token:
+            tokens.append(current_token)
+
+        return tokens
+
+    def _escape_term(self, term: str) -> str:
+        """Escape special characters in search terms."""
+        # Escape FTS5 special characters
+        for char in ['"']:
+            term = term.replace(char, '\\' + char)
+
+        return term
+
+    def build_column_query(self, query: str, columns: List[str]) -> str:
+        """Build FTS5 query targeting specific columns."""
+        if not columns:
+            return query
+
+        # Parse the main query
+        parsed_query = self.parse_query(query)
+
+        # Create column-specific queries
+        column_queries = []
+        for column in columns:
+            column_queries.append(f'{column}:{parsed_query}')
+
+        return ' OR '.join(column_queries)
+
+    def build_phrase_query(self, phrase: str) -> str:
+        """Build FTS5 query for exact phrase matching."""
+        return f'"{phrase}"'
+
+    def build_proximity_query(self, terms: List[str], distance: int = 10) -> str:
+        """Build FTS5 NEAR query for proximity searching."""
+        if len(terms) < 2:
+            return ' '.join(terms)
+
+        escaped_terms = [self._escape_term(term) for term in terms]
+        return f'NEAR({" ".join(escaped_terms)}, {distance})'
+
+    def validate_query(self, query: str) -> Tuple[bool, Optional[str]]:
+        """
+        Validate FTS5 query syntax.
+
+        Returns:
+            Tuple of (is_valid, error_message)
+        """
+        if not query or not query.strip():
+            return False, "Query cannot be empty"
+
+        # Check for balanced quotes
+        quote_count = query.count('"')
+        if quote_count % 2 != 0:
+            return False, "Unmatched quotes in query"
+
+        # Check for balanced parentheses
+        open_parens = query.count('(')
+        close_parens = query.count(')')
+        if open_parens != close_parens:
+            return False, "Unmatched parentheses in query"
+
+        # Check for empty operators
+        for operator in self.fts_operators:
+            if f' {operator} ' in query.upper():
+                # Make sure operator isn't at start or end
+                if query.upper().startswith(f'{operator} ') or query.upper().endswith(f' {operator}'):
+                    return False, f"Operator {operator} cannot be at start or end of query"
+
+        return True, None
+
+    def get_query_terms(self, query: str) -> List[str]:
+        """Extract individual search terms from query."""
+        # Parse query and extract terms
+        parsed = self.parse_query(query)
+
+        # Remove operators and special syntax
+        terms = []
+        tokens = self._tokenize_query(parsed)
+
+        for token in tokens:
+            token = token.strip()
+            if not token:
+                continue
+
+            # Skip operators
+            if token.upper() in self.fts_operators:
+                continue
+
+            # Remove NOT prefix
+            if token.upper().startswith('NOT '):
+                token = token[4:]
+
+            # Remove quotes
+            token = token.strip('"')
+
+            # Remove prefix wildcard
+            token = token.rstrip('*')
+
+            # Remove column specification
+            if ':' in token:
+                token = token.split(':', 1)[1]
+
+            if token and len(token) > 1:
+                terms.append(token.lower())
+
+        return list(set(terms))  # Remove duplicates
+
+    def suggest_corrections(self, query: str, available_terms: List[str]) -> List[str]:
+        """Suggest query corrections based on available terms."""
+        suggestions = []
+        query_terms = self.get_query_terms(query)
+
+        for term in query_terms:
+            # Find similar terms using simple string matching
+            matches = []
+            for available in available_terms:
+                if available.lower().startswith(term.lower()):
+                    matches.append(available)
+                elif term.lower() in available.lower():
+                    matches.append(available)
+
+            if matches:
+                suggestions.extend(matches[:3])  # Limit suggestions
+
+        return list(set(suggestions))[:5]  # Return top 5 unique suggestions