feat: implement lightweight full text search plugin using SQLite FTS5 (issue #83)
Added comprehensive full text search capabilities as a lightweight plugin. Key features: - SQLite FTS5-based search engine with no external dependencies - Automatic indexing via database triggers for real-time updates - Advanced query support: phrase search, boolean operators, proximity search - Complete CLI interface with search commands - Graceful fallback to LIKE queries when FTS5 unavailable - Plugin architecture integration for extensibility CLI Commands: - `markitect search init` - Initialize search indexes - `markitect search query` - Perform full text searches - `markitect search status` - View index statistics - `markitect search rebuild` - Rebuild indexes from scratch Search Features: - Content type filtering (files, schemas, all) - Result pagination and formatting options - Query validation and syntax assistance - Performance optimization and index maintenance Technical Implementation: - FTSSearchPlugin: Main search plugin class - SearchIndexer: FTS5 table management and indexing - QueryParser: Query optimization and FTS5 syntax conversion - Comprehensive error handling and fallback mechanisms - 25 test cases covering all functionality Documentation includes complete usage guide and examples. Resolves issue #83: Full text search 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
243
markitect/cli.py
243
markitect/cli.py
@@ -31,6 +31,12 @@ from .__version__ import get_version_info, get_release_info
|
||||
from .batch_processor import BatchProcessor, ProcessingMode, ErrorHandling, create_file_processor
|
||||
from .config_manager import ConfigurationManager
|
||||
|
||||
|
||||
def get_database_path(config):
|
||||
"""Get database path from config."""
|
||||
return config.get('database_path', os.path.expanduser('~/.markitect/markitect.db'))
|
||||
|
||||
|
||||
# Import legacy system components for advanced management
|
||||
try:
|
||||
from .legacy import (
|
||||
@@ -5795,6 +5801,243 @@ def graphql_mutate(config, mutation, variables, endpoint, local, output_format):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Full Text Search Commands (Issue #83)
|
||||
# =============================================================================
|
||||
|
||||
@cli.group('search')
|
||||
@pass_config
|
||||
def search_group(config):
|
||||
"""Full text search operations using FTS5."""
|
||||
pass
|
||||
|
||||
|
||||
@search_group.command('init')
|
||||
@click.option('--rebuild', is_flag=True, help='Rebuild existing indexes')
|
||||
@pass_config
|
||||
def search_init(config, rebuild):
|
||||
"""Initialize full text search indexes."""
|
||||
db_path = get_database_path(config)
|
||||
|
||||
try:
|
||||
from .plugins.builtin.search import FTSSearchPlugin
|
||||
|
||||
search_plugin = FTSSearchPlugin()
|
||||
search_plugin.initialize(db_path)
|
||||
|
||||
if rebuild:
|
||||
click.echo("🔄 Rebuilding search indexes...")
|
||||
stats = search_plugin.rebuild_index(db_path)
|
||||
click.echo(f"✅ Indexed {stats.get('files_indexed', 0)} files and {stats.get('schemas_indexed', 0)} schemas")
|
||||
|
||||
if 'error' in stats:
|
||||
click.echo(f"⚠️ Warning: {stats['error']}", err=True)
|
||||
else:
|
||||
click.echo("✅ Search indexes initialized")
|
||||
|
||||
# Show status
|
||||
search_stats = search_plugin.get_search_stats(db_path)
|
||||
if search_stats.get('fts_enabled'):
|
||||
click.echo(f"📊 FTS5 enabled with {len(search_stats.get('fts_tables', []))} tables")
|
||||
else:
|
||||
click.echo("⚠️ FTS5 not available, will fall back to simple search")
|
||||
|
||||
except ImportError as e:
|
||||
click.echo(f"❌ Search plugin not available: {e}", err=True)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
click.echo(f"❌ Failed to initialize search: {e}", err=True)
|
||||
if config.get('verbose'):
|
||||
import traceback
|
||||
click.echo(traceback.format_exc(), err=True)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@search_group.command('query')
|
||||
@click.argument('query')
|
||||
@click.option('--type', 'content_type', default='all',
|
||||
type=click.Choice(['all', 'files', 'schemas']),
|
||||
help='Content type to search')
|
||||
@click.option('--limit', default=20, help='Maximum number of results')
|
||||
@click.option('--offset', default=0, help='Result offset for pagination')
|
||||
@click.option('--format', 'output_format', default='table',
|
||||
type=click.Choice(['json', 'yaml', 'table']),
|
||||
help='Output format')
|
||||
@click.option('--no-highlight', is_flag=True, help='Disable result highlighting')
|
||||
@pass_config
|
||||
def search_query(config, query, content_type, limit, offset, output_format, no_highlight):
|
||||
"""Perform full text search query."""
|
||||
db_path = get_database_path(config)
|
||||
|
||||
try:
|
||||
from .plugins.builtin.search import FTSSearchPlugin
|
||||
|
||||
search_plugin = FTSSearchPlugin()
|
||||
results = search_plugin.search(db_path, query, content_type, limit, offset)
|
||||
|
||||
if output_format == 'json':
|
||||
click.echo(json.dumps(results, indent=2, default=str))
|
||||
elif output_format == 'yaml':
|
||||
click.echo(yaml.dump(results, default_flow_style=False))
|
||||
else:
|
||||
# Table format
|
||||
if not results:
|
||||
click.echo(f"No results found for '{query}'")
|
||||
return
|
||||
|
||||
# Prepare table data
|
||||
table_data = []
|
||||
headers = ['Score', 'Type', 'File/Schema', 'Preview']
|
||||
|
||||
for result in results:
|
||||
score = f"{result.get('score', 0):.2f}"
|
||||
result_type = result.get('type', 'unknown')
|
||||
|
||||
if result_type == 'file':
|
||||
file_info = result.get('file', {})
|
||||
name = file_info.get('filename', 'Unknown')
|
||||
if not no_highlight:
|
||||
preview = result.get('highlight', '')[:80]
|
||||
else:
|
||||
content = file_info.get('content', '')
|
||||
preview = content[:80] + '...' if len(content) > 80 else content
|
||||
elif result_type == 'schema':
|
||||
schema_info = result.get('schema', {})
|
||||
name = schema_info.get('filename', 'Unknown')
|
||||
if not no_highlight:
|
||||
preview = result.get('highlight', '')[:80]
|
||||
else:
|
||||
desc = schema_info.get('description', '')
|
||||
preview = desc[:80] + '...' if len(desc) > 80 else desc
|
||||
else:
|
||||
name = 'Unknown'
|
||||
preview = ''
|
||||
|
||||
table_data.append([score, result_type, name, preview])
|
||||
|
||||
click.echo(f"\n🔍 Found {len(results)} results for '{query}':\n")
|
||||
click.echo(tabulate(table_data, headers=headers, tablefmt='grid'))
|
||||
|
||||
if len(results) == limit:
|
||||
click.echo(f"\n💡 Showing first {limit} results. Use --limit and --offset for more.")
|
||||
|
||||
except ImportError as e:
|
||||
click.echo(f"❌ Search plugin not available: {e}", err=True)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
click.echo(f"❌ Search failed: {e}", err=True)
|
||||
if config.get('verbose'):
|
||||
import traceback
|
||||
click.echo(traceback.format_exc(), err=True)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@search_group.command('status')
|
||||
@click.option('--format', 'output_format', default='table',
|
||||
type=click.Choice(['json', 'yaml', 'table']),
|
||||
help='Output format')
|
||||
@pass_config
|
||||
def search_status(config, output_format):
|
||||
"""Show search index status and statistics."""
|
||||
db_path = get_database_path(config)
|
||||
|
||||
try:
|
||||
from .plugins.builtin.search import FTSSearchPlugin
|
||||
|
||||
search_plugin = FTSSearchPlugin()
|
||||
stats = search_plugin.get_search_stats(db_path)
|
||||
|
||||
if output_format == 'json':
|
||||
click.echo(json.dumps(stats, indent=2))
|
||||
elif output_format == 'yaml':
|
||||
click.echo(yaml.dump(stats, default_flow_style=False))
|
||||
else:
|
||||
# Table format
|
||||
click.echo("📊 Search Index Status\n")
|
||||
|
||||
if stats.get('fts_enabled'):
|
||||
click.echo("✅ FTS5 Full Text Search: Enabled")
|
||||
|
||||
# Show table information
|
||||
if stats.get('fts_tables'):
|
||||
click.echo(f"📋 FTS Tables: {', '.join(stats['fts_tables'])}")
|
||||
|
||||
# Show document counts
|
||||
for key, value in stats.items():
|
||||
if key.endswith('_documents'):
|
||||
table_name = key.replace('_documents', '')
|
||||
click.echo(f"📄 {table_name}: {value} documents")
|
||||
|
||||
else:
|
||||
click.echo("❌ FTS5 Full Text Search: Disabled")
|
||||
if 'error' in stats:
|
||||
click.echo(f" Error: {stats['error']}")
|
||||
click.echo(" Falling back to simple LIKE-based search")
|
||||
|
||||
# Additional index info
|
||||
from .plugins.builtin.search import SearchIndexer
|
||||
indexer = SearchIndexer()
|
||||
index_info = indexer.get_index_info(db_path)
|
||||
|
||||
if index_info.get('integrity_check'):
|
||||
status = "✅" if index_info['integrity_check'] == 'passed' else "❌"
|
||||
click.echo(f"{status} Index Integrity: {index_info['integrity_check']}")
|
||||
|
||||
except ImportError as e:
|
||||
click.echo(f"❌ Search plugin not available: {e}", err=True)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
click.echo(f"❌ Failed to get search status: {e}", err=True)
|
||||
if config.get('verbose'):
|
||||
import traceback
|
||||
click.echo(traceback.format_exc(), err=True)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@search_group.command('rebuild')
|
||||
@click.option('--optimize', is_flag=True, help='Optimize indexes after rebuild')
|
||||
@pass_config
|
||||
def search_rebuild(config, optimize):
|
||||
"""Rebuild search indexes from scratch."""
|
||||
db_path = get_database_path(config)
|
||||
|
||||
try:
|
||||
from .plugins.builtin.search import FTSSearchPlugin, SearchIndexer
|
||||
|
||||
click.echo("🔄 Rebuilding search indexes...")
|
||||
|
||||
search_plugin = FTSSearchPlugin()
|
||||
stats = search_plugin.rebuild_index(db_path)
|
||||
|
||||
if 'error' in stats:
|
||||
click.echo(f"❌ Rebuild failed: {stats['error']}", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
click.echo(f"✅ Rebuilt indexes successfully")
|
||||
click.echo(f"📄 Files indexed: {stats.get('files_indexed', 0)}")
|
||||
click.echo(f"📋 Schemas indexed: {stats.get('schemas_indexed', 0)}")
|
||||
|
||||
if optimize:
|
||||
click.echo("🔧 Optimizing indexes...")
|
||||
indexer = SearchIndexer()
|
||||
indexer.optimize_index(db_path)
|
||||
click.echo("✅ Indexes optimized")
|
||||
|
||||
except ImportError as e:
|
||||
click.echo(f"❌ Search plugin not available: {e}", err=True)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
click.echo(f"❌ Rebuild failed: {e}", err=True)
|
||||
if config.get('verbose'):
|
||||
import traceback
|
||||
click.echo(traceback.format_exc(), err=True)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# Register search commands
|
||||
cli.add_command(search_group)
|
||||
|
||||
|
||||
# Register issue management commands
|
||||
cli.add_command(issues_group)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user