""" Full Text Search Paradigm - FTS5-powered content search. """ import time from typing import Dict, Any, List, Optional from ..base import BaseQueryParadigm, QueryResult class FullTextSearchParadigm(BaseQueryParadigm): """Full text search paradigm using FTS5 for content discovery.""" @property def name(self) -> str: return "FTS" @property def description(self) -> str: return "Full text search across markdown content using SQLite FTS5 for semantic discovery" @property def category(self) -> str: return "textual" @property def complexity(self) -> str: return "beginner" def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult: """Execute full text search query.""" start_time = time.time() try: from ...plugins.builtin.search.fts_search import FTSSearchPlugin # Get database path from config db_path = config.get('db_path') if config else 'markitect.db' # Create FTS search plugin fts_plugin = FTSSearchPlugin() # Execute search content_type = config.get('content_type', 'all') if config else 'all' limit = config.get('limit', 20) if config else 20 offset = config.get('offset', 0) if config else 0 search_results = fts_plugin.search( db_path=db_path, query=query, content_type=content_type, limit=limit, offset=offset ) execution_time = (time.time() - start_time) * 1000 # Convert FTS results to standard format results = [] for result in search_results: if result['type'] == 'file': results.append({ 'type': 'file', 'score': result['score'], 'filename': result['file']['filename'], 'content_preview': result.get('highlight', ''), 'file_id': result['file']['id'], 'created_at': result['file']['created_at'] }) elif result['type'] == 'schema': results.append({ 'type': 'schema', 'score': result['score'], 'filename': result['schema']['filename'], 'title': result['schema']['title'], 'description': result['schema']['description'], 'schema_id': result['schema']['id'], 'highlight': result.get('highlight', '') }) return QueryResult( paradigm="FTS", query=query, execution_time_ms=execution_time, result_count=len(results), results=results, metadata={ "content_type": content_type, "fts_enabled": True, "query_type": self._detect_query_type(query) }, success=True ) except Exception as e: execution_time = (time.time() - start_time) * 1000 return QueryResult( paradigm="FTS", query=query, execution_time_ms=execution_time, result_count=0, results=[], metadata={"fts_enabled": False}, success=False, error_message=str(e) ) def get_examples(self) -> List[Dict[str, str]]: """Get example FTS queries.""" return [ { "name": "Simple search", "description": "Find documents containing specific words", "query": "documentation" }, { "name": "Multiple terms", "description": "Search for documents with multiple terms", "query": "API documentation" }, { "name": "Exact phrase", "description": "Search for exact phrases", "query": '"getting started"' }, { "name": "Boolean search", "description": "Use AND/OR operators", "query": "API AND documentation NOT deprecated" }, { "name": "Wildcard search", "description": "Prefix matching with wildcards", "query": "config*" }, { "name": "Proximity search", "description": "Find terms near each other", "query": "NEAR(database query, 5)" } ] def validate_query(self, query: str) -> tuple[bool, Optional[str]]: """Validate FTS query syntax.""" if not query or not query.strip(): return False, "Query cannot be empty" # Use the FTS query parser for validation try: from ...plugins.builtin.search.query_parser import QueryParser parser = QueryParser() return parser.validate_query(query) except ImportError: # Fallback validation return self._basic_validation(query) def _basic_validation(self, query: str) -> tuple[bool, Optional[str]]: """Basic FTS query validation.""" # Check for balanced quotes quote_count = query.count('"') if quote_count % 2 != 0: return False, "Unmatched quotes in query" # Check for balanced parentheses open_parens = query.count('(') close_parens = query.count(')') if open_parens != close_parens: return False, "Unmatched parentheses in query" return True, None def get_syntax_help(self) -> str: """Get FTS syntax help.""" return """Full Text Search Syntax: Basic Search: word - Find documents containing 'word' multiple words - Find documents with all words (implicit AND) Phrase Search: "exact phrase" - Find exact phrase Boolean Operators: word1 AND word2 - Both words must be present word1 OR word2 - Either word can be present word1 NOT word2 - First word present, second word absent Wildcards: prefix* - Prefix matching (config* matches configuration) Proximity Search: NEAR(word1 word2, 5) - Words within 5 words of each other Column-Specific: filename:readme - Search only in filename field content:tutorial - Search only in content field Examples: documentation "getting started" API AND documentation config* OR setting* NEAR(database query, 10) """ def _detect_query_type(self, query: str) -> str: """Detect FTS query type.""" query_upper = query.upper() if '"' in query: return "phrase_search" elif 'NEAR(' in query_upper: return "proximity_search" elif any(op in query_upper for op in [' AND ', ' OR ', ' NOT ']): return "boolean_search" elif '*' in query: return "wildcard_search" elif ':' in query: return "column_search" else: return "simple_search" def can_translate_from(self, other_paradigm: str) -> bool: """Check if we can translate from another paradigm.""" return other_paradigm.lower() in ["natural_language", "sql"] def translate_query(self, query: str, from_paradigm: str) -> Optional[str]: """Translate from another paradigm to FTS.""" if from_paradigm.lower() == "natural_language": return self._translate_natural_language_to_fts(query) elif from_paradigm.lower() == "sql": return self._translate_sql_to_fts(query) return None def _translate_natural_language_to_fts(self, query: str) -> Optional[str]: """Translate natural language to FTS query.""" query_lower = query.lower() # Extract key terms and convert to FTS syntax if "search for" in query_lower: # Extract what comes after "search for" parts = query_lower.split("search for", 1) if len(parts) > 1: search_term = parts[1].strip() return search_term.replace(" and ", " AND ").replace(" or ", " OR ") if "find" in query_lower and "contain" in query_lower: # Extract terms between "find" and "contain" import re match = re.search(r'find.*?contain.*?["\'](.+?)["\']', query_lower) if match: return f'"{match.group(1)}"' # Simple keyword extraction keywords = [word for word in query.split() if len(word) > 3 and word.lower() not in ['find', 'search', 'for', 'documents', 'files']] if keywords: return " AND ".join(keywords) return None def _translate_sql_to_fts(self, query: str) -> Optional[str]: """Translate simple SQL LIKE queries to FTS.""" if 'LIKE' in query.upper(): import re # Extract LIKE patterns like_matches = re.findall(r"LIKE\s+'%(.+?)%'", query, re.IGNORECASE) if like_matches: return " AND ".join(like_matches) return None