""" Search indexing functionality using SQLite FTS5. Handles creating and maintaining full text search indexes for MarkiTect content. """ import sqlite3 import json from typing import Dict, Any, Optional from pathlib import Path class SearchIndexer: """Manages full text search indexes using SQLite FTS5.""" def initialize_fts_tables(self, db_path: str) -> None: """Initialize FTS5 virtual tables for full text search.""" conn = sqlite3.connect(db_path) cursor = conn.cursor() try: # Create FTS5 table for markdown files cursor.execute(""" CREATE VIRTUAL TABLE IF NOT EXISTS fts_files USING fts5( filename, content, front_matter, content='markdown_files', content_rowid='id' ) """) # Create FTS5 table for schemas cursor.execute(""" CREATE VIRTUAL TABLE IF NOT EXISTS fts_schemas USING fts5( filename, title, description, content='schemas', content_rowid='id' ) """) # Create triggers to keep FTS5 indexes synchronized self._create_fts_triggers(cursor) conn.commit() except sqlite3.Error as e: # If FTS5 is not available, create a fallback indicator cursor.execute(""" CREATE TABLE IF NOT EXISTS fts_status ( fts_enabled INTEGER DEFAULT 0, error_message TEXT ) """) cursor.execute(""" INSERT OR REPLACE INTO fts_status (fts_enabled, error_message) VALUES (0, ?) """, (str(e),)) conn.commit() finally: conn.close() def _create_fts_triggers(self, cursor: sqlite3.Cursor) -> None: """Create triggers to automatically maintain FTS5 indexes.""" # Triggers for markdown_files table cursor.execute(""" CREATE TRIGGER IF NOT EXISTS fts_files_insert AFTER INSERT ON markdown_files BEGIN INSERT INTO fts_files(rowid, filename, content, front_matter) VALUES (new.id, new.filename, new.content, new.front_matter); END """) cursor.execute(""" CREATE TRIGGER IF NOT EXISTS fts_files_delete AFTER DELETE ON markdown_files BEGIN INSERT INTO fts_files(fts_files, rowid, filename, content, front_matter) VALUES('delete', old.id, old.filename, old.content, old.front_matter); END """) cursor.execute(""" CREATE TRIGGER IF NOT EXISTS fts_files_update AFTER UPDATE ON markdown_files BEGIN INSERT INTO fts_files(fts_files, rowid, filename, content, front_matter) VALUES('delete', old.id, old.filename, old.content, old.front_matter); INSERT INTO fts_files(rowid, filename, content, front_matter) VALUES (new.id, new.filename, new.content, new.front_matter); END """) # Triggers for schemas table cursor.execute(""" CREATE TRIGGER IF NOT EXISTS fts_schemas_insert AFTER INSERT ON schemas BEGIN INSERT INTO fts_schemas(rowid, filename, title, description) VALUES (new.id, new.filename, new.title, new.description); END """) cursor.execute(""" CREATE TRIGGER IF NOT EXISTS fts_schemas_delete AFTER DELETE ON schemas BEGIN INSERT INTO fts_schemas(fts_schemas, rowid, filename, title, description) VALUES('delete', old.id, old.filename, old.title, old.description); END """) cursor.execute(""" CREATE TRIGGER IF NOT EXISTS fts_schemas_update AFTER UPDATE ON schemas BEGIN INSERT INTO fts_schemas(fts_schemas, rowid, filename, title, description) VALUES('delete', old.id, old.filename, old.title, old.description); INSERT INTO fts_schemas(rowid, filename, title, description) VALUES (new.id, new.filename, new.title, new.description); END """) def rebuild_index(self, db_path: str) -> Dict[str, int]: """Rebuild the full text search index from scratch.""" conn = sqlite3.connect(db_path) cursor = conn.cursor() stats = {'files_indexed': 0, 'schemas_indexed': 0} try: # Clear existing FTS5 data cursor.execute("DELETE FROM fts_files") cursor.execute("DELETE FROM fts_schemas") # Rebuild files index cursor.execute(""" INSERT INTO fts_files(rowid, filename, content, front_matter) SELECT id, filename, content, front_matter FROM markdown_files """) stats['files_indexed'] = cursor.rowcount # Rebuild schemas index cursor.execute(""" INSERT INTO fts_schemas(rowid, filename, title, description) SELECT id, filename, title, description FROM schemas """) stats['schemas_indexed'] = cursor.rowcount # Optimize the FTS5 indexes cursor.execute("INSERT INTO fts_files(fts_files) VALUES('optimize')") cursor.execute("INSERT INTO fts_schemas(fts_schemas) VALUES('optimize')") conn.commit() except sqlite3.Error as e: stats['error'] = str(e) conn.rollback() finally: conn.close() return stats def optimize_index(self, db_path: str) -> None: """Optimize FTS5 indexes for better performance.""" conn = sqlite3.connect(db_path) cursor = conn.cursor() try: cursor.execute("INSERT INTO fts_files(fts_files) VALUES('optimize')") cursor.execute("INSERT INTO fts_schemas(fts_schemas) VALUES('optimize')") conn.commit() except sqlite3.Error: pass finally: conn.close() def get_index_info(self, db_path: str) -> Dict[str, Any]: """Get information about the current search indexes.""" conn = sqlite3.connect(db_path) cursor = conn.cursor() info = {} try: # Check if FTS tables exist cursor.execute(""" SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'fts_%' """) fts_tables = [row[0] for row in cursor.fetchall()] info['fts_tables'] = fts_tables info['fts_enabled'] = len(fts_tables) > 0 if info['fts_enabled']: # Get document counts for table in ['fts_files', 'fts_schemas']: if table in fts_tables: cursor.execute(f"SELECT COUNT(*) FROM {table}") info[f'{table}_count'] = cursor.fetchone()[0] # Get FTS5 integrity check try: cursor.execute("INSERT INTO fts_files(fts_files) VALUES('integrity-check')") cursor.execute("INSERT INTO fts_schemas(fts_schemas) VALUES('integrity-check')") info['integrity_check'] = 'passed' except sqlite3.Error as e: info['integrity_check'] = f'failed: {str(e)}' except sqlite3.Error as e: info['error'] = str(e) info['fts_enabled'] = False finally: conn.close() return info def check_fts_availability(self, db_path: str) -> bool: """Check if FTS5 is available in SQLite.""" conn = sqlite3.connect(db_path) cursor = conn.cursor() try: cursor.execute("CREATE VIRTUAL TABLE IF NOT EXISTS fts_test USING fts5(content)") cursor.execute("DROP TABLE fts_test") return True except sqlite3.Error: return False finally: conn.close()