Files
markitect-main/markitect/plugins/builtin/search/indexer.py
tegwick 8179929a4a feat: implement lightweight full text search plugin using SQLite FTS5 (issue #83)
Added comprehensive full text search capabilities as a lightweight plugin.

Key features:
- SQLite FTS5-based search engine with no external dependencies
- Automatic indexing via database triggers for real-time updates
- Advanced query support: phrase search, boolean operators, proximity search
- Complete CLI interface with search commands
- Graceful fallback to LIKE queries when FTS5 unavailable
- Plugin architecture integration for extensibility

CLI Commands:
- `markitect search init` - Initialize search indexes
- `markitect search query` - Perform full text searches
- `markitect search status` - View index statistics
- `markitect search rebuild` - Rebuild indexes from scratch

Search Features:
- Content type filtering (files, schemas, all)
- Result pagination and formatting options
- Query validation and syntax assistance
- Performance optimization and index maintenance

Technical Implementation:
- FTSSearchPlugin: Main search plugin class
- SearchIndexer: FTS5 table management and indexing
- QueryParser: Query optimization and FTS5 syntax conversion
- Comprehensive error handling and fallback mechanisms
- 25 test cases covering all functionality

Documentation includes complete usage guide and examples.

Resolves issue #83: Full text search

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-03 17:03:11 +02:00

225 lines
8.0 KiB
Python

"""
Search indexing functionality using SQLite FTS5.
Handles creating and maintaining full text search indexes for MarkiTect content.
"""
import sqlite3
import json
from typing import Dict, Any, Optional
from pathlib import Path
class SearchIndexer:
"""Manages full text search indexes using SQLite FTS5."""
def initialize_fts_tables(self, db_path: str) -> None:
"""Initialize FTS5 virtual tables for full text search."""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
try:
# Create FTS5 table for markdown files
cursor.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS fts_files USING fts5(
filename,
content,
front_matter,
content='markdown_files',
content_rowid='id'
)
""")
# Create FTS5 table for schemas
cursor.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS fts_schemas USING fts5(
filename,
title,
description,
content='schemas',
content_rowid='id'
)
""")
# Create triggers to keep FTS5 indexes synchronized
self._create_fts_triggers(cursor)
conn.commit()
except sqlite3.Error as e:
# If FTS5 is not available, create a fallback indicator
cursor.execute("""
CREATE TABLE IF NOT EXISTS fts_status (
fts_enabled INTEGER DEFAULT 0,
error_message TEXT
)
""")
cursor.execute("""
INSERT OR REPLACE INTO fts_status (fts_enabled, error_message)
VALUES (0, ?)
""", (str(e),))
conn.commit()
finally:
conn.close()
def _create_fts_triggers(self, cursor: sqlite3.Cursor) -> None:
"""Create triggers to automatically maintain FTS5 indexes."""
# Triggers for markdown_files table
cursor.execute("""
CREATE TRIGGER IF NOT EXISTS fts_files_insert AFTER INSERT ON markdown_files BEGIN
INSERT INTO fts_files(rowid, filename, content, front_matter)
VALUES (new.id, new.filename, new.content, new.front_matter);
END
""")
cursor.execute("""
CREATE TRIGGER IF NOT EXISTS fts_files_delete AFTER DELETE ON markdown_files BEGIN
INSERT INTO fts_files(fts_files, rowid, filename, content, front_matter)
VALUES('delete', old.id, old.filename, old.content, old.front_matter);
END
""")
cursor.execute("""
CREATE TRIGGER IF NOT EXISTS fts_files_update AFTER UPDATE ON markdown_files BEGIN
INSERT INTO fts_files(fts_files, rowid, filename, content, front_matter)
VALUES('delete', old.id, old.filename, old.content, old.front_matter);
INSERT INTO fts_files(rowid, filename, content, front_matter)
VALUES (new.id, new.filename, new.content, new.front_matter);
END
""")
# Triggers for schemas table
cursor.execute("""
CREATE TRIGGER IF NOT EXISTS fts_schemas_insert AFTER INSERT ON schemas BEGIN
INSERT INTO fts_schemas(rowid, filename, title, description)
VALUES (new.id, new.filename, new.title, new.description);
END
""")
cursor.execute("""
CREATE TRIGGER IF NOT EXISTS fts_schemas_delete AFTER DELETE ON schemas BEGIN
INSERT INTO fts_schemas(fts_schemas, rowid, filename, title, description)
VALUES('delete', old.id, old.filename, old.title, old.description);
END
""")
cursor.execute("""
CREATE TRIGGER IF NOT EXISTS fts_schemas_update AFTER UPDATE ON schemas BEGIN
INSERT INTO fts_schemas(fts_schemas, rowid, filename, title, description)
VALUES('delete', old.id, old.filename, old.title, old.description);
INSERT INTO fts_schemas(rowid, filename, title, description)
VALUES (new.id, new.filename, new.title, new.description);
END
""")
def rebuild_index(self, db_path: str) -> Dict[str, int]:
"""Rebuild the full text search index from scratch."""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
stats = {'files_indexed': 0, 'schemas_indexed': 0}
try:
# Clear existing FTS5 data
cursor.execute("DELETE FROM fts_files")
cursor.execute("DELETE FROM fts_schemas")
# Rebuild files index
cursor.execute("""
INSERT INTO fts_files(rowid, filename, content, front_matter)
SELECT id, filename, content, front_matter FROM markdown_files
""")
stats['files_indexed'] = cursor.rowcount
# Rebuild schemas index
cursor.execute("""
INSERT INTO fts_schemas(rowid, filename, title, description)
SELECT id, filename, title, description FROM schemas
""")
stats['schemas_indexed'] = cursor.rowcount
# Optimize the FTS5 indexes
cursor.execute("INSERT INTO fts_files(fts_files) VALUES('optimize')")
cursor.execute("INSERT INTO fts_schemas(fts_schemas) VALUES('optimize')")
conn.commit()
except sqlite3.Error as e:
stats['error'] = str(e)
conn.rollback()
finally:
conn.close()
return stats
def optimize_index(self, db_path: str) -> None:
"""Optimize FTS5 indexes for better performance."""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
try:
cursor.execute("INSERT INTO fts_files(fts_files) VALUES('optimize')")
cursor.execute("INSERT INTO fts_schemas(fts_schemas) VALUES('optimize')")
conn.commit()
except sqlite3.Error:
pass
finally:
conn.close()
def get_index_info(self, db_path: str) -> Dict[str, Any]:
"""Get information about the current search indexes."""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
info = {}
try:
# Check if FTS tables exist
cursor.execute("""
SELECT name FROM sqlite_master
WHERE type='table' AND name LIKE 'fts_%'
""")
fts_tables = [row[0] for row in cursor.fetchall()]
info['fts_tables'] = fts_tables
info['fts_enabled'] = len(fts_tables) > 0
if info['fts_enabled']:
# Get document counts
for table in ['fts_files', 'fts_schemas']:
if table in fts_tables:
cursor.execute(f"SELECT COUNT(*) FROM {table}")
info[f'{table}_count'] = cursor.fetchone()[0]
# Get FTS5 integrity check
try:
cursor.execute("INSERT INTO fts_files(fts_files) VALUES('integrity-check')")
cursor.execute("INSERT INTO fts_schemas(fts_schemas) VALUES('integrity-check')")
info['integrity_check'] = 'passed'
except sqlite3.Error as e:
info['integrity_check'] = f'failed: {str(e)}'
except sqlite3.Error as e:
info['error'] = str(e)
info['fts_enabled'] = False
finally:
conn.close()
return info
def check_fts_availability(self, db_path: str) -> bool:
"""Check if FTS5 is available in SQLite."""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
try:
cursor.execute("CREATE VIRTUAL TABLE IF NOT EXISTS fts_test USING fts5(content)")
cursor.execute("DROP TABLE fts_test")
return True
except sqlite3.Error:
return False
finally:
conn.close()