Added comprehensive full text search capabilities as a lightweight plugin. Key features: - SQLite FTS5-based search engine with no external dependencies - Automatic indexing via database triggers for real-time updates - Advanced query support: phrase search, boolean operators, proximity search - Complete CLI interface with search commands - Graceful fallback to LIKE queries when FTS5 unavailable - Plugin architecture integration for extensibility CLI Commands: - `markitect search init` - Initialize search indexes - `markitect search query` - Perform full text searches - `markitect search status` - View index statistics - `markitect search rebuild` - Rebuild indexes from scratch Search Features: - Content type filtering (files, schemas, all) - Result pagination and formatting options - Query validation and syntax assistance - Performance optimization and index maintenance Technical Implementation: - FTSSearchPlugin: Main search plugin class - SearchIndexer: FTS5 table management and indexing - QueryParser: Query optimization and FTS5 syntax conversion - Comprehensive error handling and fallback mechanisms - 25 test cases covering all functionality Documentation includes complete usage guide and examples. Resolves issue #83: Full text search 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
225 lines
8.0 KiB
Python
225 lines
8.0 KiB
Python
"""
|
|
Search indexing functionality using SQLite FTS5.
|
|
|
|
Handles creating and maintaining full text search indexes for MarkiTect content.
|
|
"""
|
|
|
|
import sqlite3
|
|
import json
|
|
from typing import Dict, Any, Optional
|
|
from pathlib import Path
|
|
|
|
|
|
class SearchIndexer:
|
|
"""Manages full text search indexes using SQLite FTS5."""
|
|
|
|
def initialize_fts_tables(self, db_path: str) -> None:
|
|
"""Initialize FTS5 virtual tables for full text search."""
|
|
conn = sqlite3.connect(db_path)
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
# Create FTS5 table for markdown files
|
|
cursor.execute("""
|
|
CREATE VIRTUAL TABLE IF NOT EXISTS fts_files USING fts5(
|
|
filename,
|
|
content,
|
|
front_matter,
|
|
content='markdown_files',
|
|
content_rowid='id'
|
|
)
|
|
""")
|
|
|
|
# Create FTS5 table for schemas
|
|
cursor.execute("""
|
|
CREATE VIRTUAL TABLE IF NOT EXISTS fts_schemas USING fts5(
|
|
filename,
|
|
title,
|
|
description,
|
|
content='schemas',
|
|
content_rowid='id'
|
|
)
|
|
""")
|
|
|
|
# Create triggers to keep FTS5 indexes synchronized
|
|
self._create_fts_triggers(cursor)
|
|
|
|
conn.commit()
|
|
|
|
except sqlite3.Error as e:
|
|
# If FTS5 is not available, create a fallback indicator
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS fts_status (
|
|
fts_enabled INTEGER DEFAULT 0,
|
|
error_message TEXT
|
|
)
|
|
""")
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO fts_status (fts_enabled, error_message)
|
|
VALUES (0, ?)
|
|
""", (str(e),))
|
|
conn.commit()
|
|
|
|
finally:
|
|
conn.close()
|
|
|
|
def _create_fts_triggers(self, cursor: sqlite3.Cursor) -> None:
|
|
"""Create triggers to automatically maintain FTS5 indexes."""
|
|
|
|
# Triggers for markdown_files table
|
|
cursor.execute("""
|
|
CREATE TRIGGER IF NOT EXISTS fts_files_insert AFTER INSERT ON markdown_files BEGIN
|
|
INSERT INTO fts_files(rowid, filename, content, front_matter)
|
|
VALUES (new.id, new.filename, new.content, new.front_matter);
|
|
END
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE TRIGGER IF NOT EXISTS fts_files_delete AFTER DELETE ON markdown_files BEGIN
|
|
INSERT INTO fts_files(fts_files, rowid, filename, content, front_matter)
|
|
VALUES('delete', old.id, old.filename, old.content, old.front_matter);
|
|
END
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE TRIGGER IF NOT EXISTS fts_files_update AFTER UPDATE ON markdown_files BEGIN
|
|
INSERT INTO fts_files(fts_files, rowid, filename, content, front_matter)
|
|
VALUES('delete', old.id, old.filename, old.content, old.front_matter);
|
|
INSERT INTO fts_files(rowid, filename, content, front_matter)
|
|
VALUES (new.id, new.filename, new.content, new.front_matter);
|
|
END
|
|
""")
|
|
|
|
# Triggers for schemas table
|
|
cursor.execute("""
|
|
CREATE TRIGGER IF NOT EXISTS fts_schemas_insert AFTER INSERT ON schemas BEGIN
|
|
INSERT INTO fts_schemas(rowid, filename, title, description)
|
|
VALUES (new.id, new.filename, new.title, new.description);
|
|
END
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE TRIGGER IF NOT EXISTS fts_schemas_delete AFTER DELETE ON schemas BEGIN
|
|
INSERT INTO fts_schemas(fts_schemas, rowid, filename, title, description)
|
|
VALUES('delete', old.id, old.filename, old.title, old.description);
|
|
END
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE TRIGGER IF NOT EXISTS fts_schemas_update AFTER UPDATE ON schemas BEGIN
|
|
INSERT INTO fts_schemas(fts_schemas, rowid, filename, title, description)
|
|
VALUES('delete', old.id, old.filename, old.title, old.description);
|
|
INSERT INTO fts_schemas(rowid, filename, title, description)
|
|
VALUES (new.id, new.filename, new.title, new.description);
|
|
END
|
|
""")
|
|
|
|
def rebuild_index(self, db_path: str) -> Dict[str, int]:
|
|
"""Rebuild the full text search index from scratch."""
|
|
conn = sqlite3.connect(db_path)
|
|
cursor = conn.cursor()
|
|
|
|
stats = {'files_indexed': 0, 'schemas_indexed': 0}
|
|
|
|
try:
|
|
# Clear existing FTS5 data
|
|
cursor.execute("DELETE FROM fts_files")
|
|
cursor.execute("DELETE FROM fts_schemas")
|
|
|
|
# Rebuild files index
|
|
cursor.execute("""
|
|
INSERT INTO fts_files(rowid, filename, content, front_matter)
|
|
SELECT id, filename, content, front_matter FROM markdown_files
|
|
""")
|
|
stats['files_indexed'] = cursor.rowcount
|
|
|
|
# Rebuild schemas index
|
|
cursor.execute("""
|
|
INSERT INTO fts_schemas(rowid, filename, title, description)
|
|
SELECT id, filename, title, description FROM schemas
|
|
""")
|
|
stats['schemas_indexed'] = cursor.rowcount
|
|
|
|
# Optimize the FTS5 indexes
|
|
cursor.execute("INSERT INTO fts_files(fts_files) VALUES('optimize')")
|
|
cursor.execute("INSERT INTO fts_schemas(fts_schemas) VALUES('optimize')")
|
|
|
|
conn.commit()
|
|
|
|
except sqlite3.Error as e:
|
|
stats['error'] = str(e)
|
|
conn.rollback()
|
|
|
|
finally:
|
|
conn.close()
|
|
|
|
return stats
|
|
|
|
def optimize_index(self, db_path: str) -> None:
|
|
"""Optimize FTS5 indexes for better performance."""
|
|
conn = sqlite3.connect(db_path)
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("INSERT INTO fts_files(fts_files) VALUES('optimize')")
|
|
cursor.execute("INSERT INTO fts_schemas(fts_schemas) VALUES('optimize')")
|
|
conn.commit()
|
|
except sqlite3.Error:
|
|
pass
|
|
finally:
|
|
conn.close()
|
|
|
|
def get_index_info(self, db_path: str) -> Dict[str, Any]:
|
|
"""Get information about the current search indexes."""
|
|
conn = sqlite3.connect(db_path)
|
|
cursor = conn.cursor()
|
|
|
|
info = {}
|
|
|
|
try:
|
|
# Check if FTS tables exist
|
|
cursor.execute("""
|
|
SELECT name FROM sqlite_master
|
|
WHERE type='table' AND name LIKE 'fts_%'
|
|
""")
|
|
fts_tables = [row[0] for row in cursor.fetchall()]
|
|
info['fts_tables'] = fts_tables
|
|
info['fts_enabled'] = len(fts_tables) > 0
|
|
|
|
if info['fts_enabled']:
|
|
# Get document counts
|
|
for table in ['fts_files', 'fts_schemas']:
|
|
if table in fts_tables:
|
|
cursor.execute(f"SELECT COUNT(*) FROM {table}")
|
|
info[f'{table}_count'] = cursor.fetchone()[0]
|
|
|
|
# Get FTS5 integrity check
|
|
try:
|
|
cursor.execute("INSERT INTO fts_files(fts_files) VALUES('integrity-check')")
|
|
cursor.execute("INSERT INTO fts_schemas(fts_schemas) VALUES('integrity-check')")
|
|
info['integrity_check'] = 'passed'
|
|
except sqlite3.Error as e:
|
|
info['integrity_check'] = f'failed: {str(e)}'
|
|
|
|
except sqlite3.Error as e:
|
|
info['error'] = str(e)
|
|
info['fts_enabled'] = False
|
|
|
|
finally:
|
|
conn.close()
|
|
|
|
return info
|
|
|
|
def check_fts_availability(self, db_path: str) -> bool:
|
|
"""Check if FTS5 is available in SQLite."""
|
|
conn = sqlite3.connect(db_path)
|
|
cursor = conn.cursor()
|
|
|
|
try:
|
|
cursor.execute("CREATE VIRTUAL TABLE IF NOT EXISTS fts_test USING fts5(content)")
|
|
cursor.execute("DROP TABLE fts_test")
|
|
return True
|
|
except sqlite3.Error:
|
|
return False
|
|
finally:
|
|
conn.close() |