feat: implement lightweight full text search plugin using SQLite FTS5 (issue #83)

Added comprehensive full text search capabilities as a lightweight plugin.

Key features:
- SQLite FTS5-based search engine with no external dependencies
- Automatic indexing via database triggers for real-time updates
- Advanced query support: phrase search, boolean operators, proximity search
- Complete CLI interface with search commands
- Graceful fallback to LIKE queries when FTS5 unavailable
- Plugin architecture integration for extensibility

CLI Commands:
- `markitect search init` - Initialize search indexes
- `markitect search query` - Perform full text searches
- `markitect search status` - View index statistics
- `markitect search rebuild` - Rebuild indexes from scratch

Search Features:
- Content type filtering (files, schemas, all)
- Result pagination and formatting options
- Query validation and syntax assistance
- Performance optimization and index maintenance

Technical Implementation:
- FTSSearchPlugin: Main search plugin class
- SearchIndexer: FTS5 table management and indexing
- QueryParser: Query optimization and FTS5 syntax conversion
- Comprehensive error handling and fallback mechanisms
- 25 test cases covering all functionality

Documentation includes complete usage guide and examples.

Resolves issue #83: Full text search

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-03 17:03:11 +02:00
parent 2a15dde228
commit 8179929a4a
7 changed files with 1994 additions and 0 deletions

View File

@@ -31,6 +31,12 @@ from .__version__ import get_version_info, get_release_info
from .batch_processor import BatchProcessor, ProcessingMode, ErrorHandling, create_file_processor
from .config_manager import ConfigurationManager
def get_database_path(config):
"""Get database path from config."""
return config.get('database_path', os.path.expanduser('~/.markitect/markitect.db'))
# Import legacy system components for advanced management
try:
from .legacy import (
@@ -5795,6 +5801,243 @@ def graphql_mutate(config, mutation, variables, endpoint, local, output_format):
sys.exit(1)
# =============================================================================
# Full Text Search Commands (Issue #83)
# =============================================================================
@cli.group('search')
@pass_config
def search_group(config):
"""Full text search operations using FTS5."""
pass
@search_group.command('init')
@click.option('--rebuild', is_flag=True, help='Rebuild existing indexes')
@pass_config
def search_init(config, rebuild):
"""Initialize full text search indexes."""
db_path = get_database_path(config)
try:
from .plugins.builtin.search import FTSSearchPlugin
search_plugin = FTSSearchPlugin()
search_plugin.initialize(db_path)
if rebuild:
click.echo("🔄 Rebuilding search indexes...")
stats = search_plugin.rebuild_index(db_path)
click.echo(f"✅ Indexed {stats.get('files_indexed', 0)} files and {stats.get('schemas_indexed', 0)} schemas")
if 'error' in stats:
click.echo(f"⚠️ Warning: {stats['error']}", err=True)
else:
click.echo("✅ Search indexes initialized")
# Show status
search_stats = search_plugin.get_search_stats(db_path)
if search_stats.get('fts_enabled'):
click.echo(f"📊 FTS5 enabled with {len(search_stats.get('fts_tables', []))} tables")
else:
click.echo("⚠️ FTS5 not available, will fall back to simple search")
except ImportError as e:
click.echo(f"❌ Search plugin not available: {e}", err=True)
sys.exit(1)
except Exception as e:
click.echo(f"❌ Failed to initialize search: {e}", err=True)
if config.get('verbose'):
import traceback
click.echo(traceback.format_exc(), err=True)
sys.exit(1)
@search_group.command('query')
@click.argument('query')
@click.option('--type', 'content_type', default='all',
type=click.Choice(['all', 'files', 'schemas']),
help='Content type to search')
@click.option('--limit', default=20, help='Maximum number of results')
@click.option('--offset', default=0, help='Result offset for pagination')
@click.option('--format', 'output_format', default='table',
type=click.Choice(['json', 'yaml', 'table']),
help='Output format')
@click.option('--no-highlight', is_flag=True, help='Disable result highlighting')
@pass_config
def search_query(config, query, content_type, limit, offset, output_format, no_highlight):
"""Perform full text search query."""
db_path = get_database_path(config)
try:
from .plugins.builtin.search import FTSSearchPlugin
search_plugin = FTSSearchPlugin()
results = search_plugin.search(db_path, query, content_type, limit, offset)
if output_format == 'json':
click.echo(json.dumps(results, indent=2, default=str))
elif output_format == 'yaml':
click.echo(yaml.dump(results, default_flow_style=False))
else:
# Table format
if not results:
click.echo(f"No results found for '{query}'")
return
# Prepare table data
table_data = []
headers = ['Score', 'Type', 'File/Schema', 'Preview']
for result in results:
score = f"{result.get('score', 0):.2f}"
result_type = result.get('type', 'unknown')
if result_type == 'file':
file_info = result.get('file', {})
name = file_info.get('filename', 'Unknown')
if not no_highlight:
preview = result.get('highlight', '')[:80]
else:
content = file_info.get('content', '')
preview = content[:80] + '...' if len(content) > 80 else content
elif result_type == 'schema':
schema_info = result.get('schema', {})
name = schema_info.get('filename', 'Unknown')
if not no_highlight:
preview = result.get('highlight', '')[:80]
else:
desc = schema_info.get('description', '')
preview = desc[:80] + '...' if len(desc) > 80 else desc
else:
name = 'Unknown'
preview = ''
table_data.append([score, result_type, name, preview])
click.echo(f"\n🔍 Found {len(results)} results for '{query}':\n")
click.echo(tabulate(table_data, headers=headers, tablefmt='grid'))
if len(results) == limit:
click.echo(f"\n💡 Showing first {limit} results. Use --limit and --offset for more.")
except ImportError as e:
click.echo(f"❌ Search plugin not available: {e}", err=True)
sys.exit(1)
except Exception as e:
click.echo(f"❌ Search failed: {e}", err=True)
if config.get('verbose'):
import traceback
click.echo(traceback.format_exc(), err=True)
sys.exit(1)
@search_group.command('status')
@click.option('--format', 'output_format', default='table',
type=click.Choice(['json', 'yaml', 'table']),
help='Output format')
@pass_config
def search_status(config, output_format):
"""Show search index status and statistics."""
db_path = get_database_path(config)
try:
from .plugins.builtin.search import FTSSearchPlugin
search_plugin = FTSSearchPlugin()
stats = search_plugin.get_search_stats(db_path)
if output_format == 'json':
click.echo(json.dumps(stats, indent=2))
elif output_format == 'yaml':
click.echo(yaml.dump(stats, default_flow_style=False))
else:
# Table format
click.echo("📊 Search Index Status\n")
if stats.get('fts_enabled'):
click.echo("✅ FTS5 Full Text Search: Enabled")
# Show table information
if stats.get('fts_tables'):
click.echo(f"📋 FTS Tables: {', '.join(stats['fts_tables'])}")
# Show document counts
for key, value in stats.items():
if key.endswith('_documents'):
table_name = key.replace('_documents', '')
click.echo(f"📄 {table_name}: {value} documents")
else:
click.echo("❌ FTS5 Full Text Search: Disabled")
if 'error' in stats:
click.echo(f" Error: {stats['error']}")
click.echo(" Falling back to simple LIKE-based search")
# Additional index info
from .plugins.builtin.search import SearchIndexer
indexer = SearchIndexer()
index_info = indexer.get_index_info(db_path)
if index_info.get('integrity_check'):
status = "" if index_info['integrity_check'] == 'passed' else ""
click.echo(f"{status} Index Integrity: {index_info['integrity_check']}")
except ImportError as e:
click.echo(f"❌ Search plugin not available: {e}", err=True)
sys.exit(1)
except Exception as e:
click.echo(f"❌ Failed to get search status: {e}", err=True)
if config.get('verbose'):
import traceback
click.echo(traceback.format_exc(), err=True)
sys.exit(1)
@search_group.command('rebuild')
@click.option('--optimize', is_flag=True, help='Optimize indexes after rebuild')
@pass_config
def search_rebuild(config, optimize):
"""Rebuild search indexes from scratch."""
db_path = get_database_path(config)
try:
from .plugins.builtin.search import FTSSearchPlugin, SearchIndexer
click.echo("🔄 Rebuilding search indexes...")
search_plugin = FTSSearchPlugin()
stats = search_plugin.rebuild_index(db_path)
if 'error' in stats:
click.echo(f"❌ Rebuild failed: {stats['error']}", err=True)
sys.exit(1)
click.echo(f"✅ Rebuilt indexes successfully")
click.echo(f"📄 Files indexed: {stats.get('files_indexed', 0)}")
click.echo(f"📋 Schemas indexed: {stats.get('schemas_indexed', 0)}")
if optimize:
click.echo("🔧 Optimizing indexes...")
indexer = SearchIndexer()
indexer.optimize_index(db_path)
click.echo("✅ Indexes optimized")
except ImportError as e:
click.echo(f"❌ Search plugin not available: {e}", err=True)
sys.exit(1)
except Exception as e:
click.echo(f"❌ Rebuild failed: {e}", err=True)
if config.get('verbose'):
import traceback
click.echo(traceback.format_exc(), err=True)
sys.exit(1)
# Register search commands
cli.add_command(search_group)
# Register issue management commands
cli.add_command(issues_group)