""" Natural Language Query Paradigm - Human-friendly query interface. """ import time import re from typing import Dict, Any, List, Optional from ..base import BaseQueryParadigm, QueryResult class NaturalLanguageQueryParadigm(BaseQueryParadigm): """Natural language query paradigm for intuitive, human-friendly queries.""" @property def name(self) -> str: return "Natural Language" @property def description(self) -> str: return "Human-friendly queries that translate to appropriate technical paradigms" @property def category(self) -> str: return "semantic" @property def complexity(self) -> str: return "beginner" def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult: """Execute natural language query by translating to appropriate paradigm.""" start_time = time.time() try: # Analyze the query and determine the best paradigm best_paradigm, translated_query = self._analyze_and_translate(query) if not best_paradigm or not translated_query: raise ValueError(f"Could not understand query: '{query}'") # Import the appropriate paradigm from ..registry import registry paradigm_instance = registry.get(best_paradigm) if not paradigm_instance: raise ValueError(f"Paradigm '{best_paradigm}' not available") # Execute using the target paradigm result = paradigm_instance.execute(translated_query, config) # Update result to show it came from natural language result.paradigm = "Natural Language" result.metadata.update({ "original_query": query, "translated_to": best_paradigm, "translated_query": translated_query, "query_intent": self._detect_intent(query) }) execution_time = (time.time() - start_time) * 1000 result.execution_time_ms = execution_time return result except Exception as e: execution_time = (time.time() - start_time) * 1000 return QueryResult( paradigm="Natural Language", query=query, execution_time_ms=execution_time, result_count=0, results=[], metadata={"query_intent": self._detect_intent(query)}, success=False, error_message=str(e) ) def get_examples(self) -> List[Dict[str, str]]: """Get example natural language queries.""" return [ { "name": "Find files", "description": "List and discover files in the system", "query": "Show me all the files" }, { "name": "Search content", "description": "Search for specific content", "query": "Find documents about API documentation" }, { "name": "Recent activity", "description": "Find recently modified content", "query": "What files were created recently?" }, { "name": "File statistics", "description": "Get information about file sizes and counts", "query": "How many files do I have?" }, { "name": "Content analysis", "description": "Analyze document structure", "query": "Show me all the headings in the documentation" }, { "name": "Schema exploration", "description": "Discover schemas and their properties", "query": "What schemas are available?" }, { "name": "Large files", "description": "Find files by size criteria", "query": "Which files are the largest?" }, { "name": "Front matter search", "description": "Find files with metadata", "query": "Show files that have front matter" } ] def validate_query(self, query: str) -> tuple[bool, Optional[str]]: """Validate natural language query.""" if not query or not query.strip(): return False, "Query cannot be empty" # Natural language queries are generally always valid # Just check for reasonable length if len(query.strip()) < 3: return False, "Query too short - please be more specific" if len(query) > 500: return False, "Query too long - please be more concise" return True, None def get_syntax_help(self) -> str: """Get natural language syntax help.""" return """Natural Language Query Help: You can ask questions in plain English! The system will automatically translate your query to the most appropriate technical format. Common Patterns: File Discovery: "Show me all files" "List the markdown files" "What files do I have?" Content Search: "Find documents about X" "Search for API documentation" "Show files containing 'tutorial'" File Analysis: "Which files are the largest?" "Show recent files" "Find files with front matter" Structure Analysis: "Show me all headings" "Find all code blocks" "What links are in the files?" Statistics: "How many files do I have?" "What's the total size?" "Show database statistics" Schema Queries: "What schemas are available?" "Show schema information" Tips: - Be specific about what you want to find - Use natural questions like "What..." or "Show me..." - Mention specific content types (files, schemas, headings, etc.) - Use time references like "recent" or "latest" The system supports various query types and will choose the best method to answer your question automatically. """ def _analyze_and_translate(self, query: str) -> tuple[Optional[str], Optional[str]]: """Analyze natural language query and translate to appropriate paradigm.""" query_lower = query.lower().strip() # Intent detection with paradigm mapping intent_patterns = [ # Full text search patterns (r'find.*about|search.*for|documents.*contain|content.*with', 'fts', self._translate_to_fts), # File listing patterns (r'show.*files|list.*files|all.*files|files.*have', 'sql', self._translate_to_sql_files), # Statistics patterns (r'how many|count|total|statistics|stats', 'sql', self._translate_to_sql_stats), # Size/analysis patterns (r'largest|biggest|smallest|size|length', 'sql', self._translate_to_sql_size), # Recent/time patterns (r'recent|latest|new|created.*ago|modified', 'sql', self._translate_to_sql_recent), # Schema patterns (r'schema|schemas|json.*schema', 'graphql', self._translate_to_graphql_schemas), # Structure patterns (headings, links, etc.) (r'heading|headings|links|code.*block|structure', 'jsonpath', self._translate_to_jsonpath), # Front matter patterns (r'front.*matter|metadata|yaml.*header', 'sql', self._translate_to_sql_frontmatter), # General GraphQL patterns (r'show.*detailed|complete.*information|comprehensive', 'graphql', self._translate_to_graphql_detailed) ] # Try to match patterns for pattern, paradigm, translator in intent_patterns: if re.search(pattern, query_lower): translated = translator(query) if translated: return paradigm, translated # Fallback: try FTS for any remaining search-like queries if any(word in query_lower for word in ['find', 'search', 'show', 'get', 'contains']): translated = self._translate_to_fts(query) if translated: return 'fts', translated return None, None def _detect_intent(self, query: str) -> str: """Detect the intent of the natural language query.""" query_lower = query.lower() if any(word in query_lower for word in ['find', 'search', 'about', 'contain']): return "content_search" elif any(word in query_lower for word in ['list', 'show', 'all', 'files']): return "file_listing" elif any(word in query_lower for word in ['count', 'how many', 'statistics']): return "statistics" elif any(word in query_lower for word in ['recent', 'latest', 'new']): return "temporal_query" elif any(word in query_lower for word in ['large', 'big', 'small', 'size']): return "size_analysis" elif any(word in query_lower for word in ['schema', 'schemas']): return "schema_query" elif any(word in query_lower for word in ['heading', 'structure', 'link']): return "structure_analysis" else: return "general_query" def _translate_to_fts(self, query: str) -> Optional[str]: """Translate to full text search query.""" query_lower = query.lower() # Extract search terms search_terms = [] # Look for "about X" or "containing X" about_match = re.search(r'about\s+(.+?)(?:\s+in|\s+from|$)', query_lower) if about_match: search_terms.append(about_match.group(1)) contain_match = re.search(r'contain(?:ing)?\s+["\']?(.+?)["\']?(?:\s+|$)', query_lower) if contain_match: search_terms.append(contain_match.group(1)) for_match = re.search(r'(?:search\s+)?for\s+(.+?)(?:\s+in|\s+from|$)', query_lower) if for_match: search_terms.append(for_match.group(1)) # Clean up search terms if search_terms: term = search_terms[0].strip(' "\'') # Remove common stop words stop_words = ['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'] words = [w for w in term.split() if w.lower() not in stop_words and len(w) > 2] if words: return ' AND '.join(words) # Fallback: extract meaningful words from the entire query meaningful_words = [] for word in query.split(): word_clean = re.sub(r'[^\w]', '', word).lower() if (len(word_clean) > 3 and word_clean not in ['find', 'search', 'show', 'documents', 'files', 'about', 'containing']): meaningful_words.append(word_clean) if meaningful_words: return ' AND '.join(meaningful_words[:3]) # Limit to 3 terms return None def _translate_to_sql_files(self, query: str) -> Optional[str]: """Translate to SQL file listing query.""" query_lower = query.lower() if any(phrase in query_lower for phrase in ['all files', 'show files', 'list files']): return "SELECT id, filename, created_at FROM markdown_files ORDER BY created_at DESC LIMIT 20" return "SELECT filename FROM markdown_files ORDER BY filename" def _translate_to_sql_stats(self, query: str) -> Optional[str]: """Translate to SQL statistics query.""" query_lower = query.lower() if 'files' in query_lower: return "SELECT COUNT(*) as file_count FROM markdown_files" elif 'schema' in query_lower: return "SELECT COUNT(*) as schema_count FROM schemas" else: return "SELECT (SELECT COUNT(*) FROM markdown_files) as files, (SELECT COUNT(*) FROM schemas) as schemas" def _translate_to_sql_size(self, query: str) -> Optional[str]: """Translate to SQL size/length query.""" query_lower = query.lower() if any(word in query_lower for word in ['largest', 'biggest']): return "SELECT filename, LENGTH(content) as size FROM markdown_files WHERE content IS NOT NULL ORDER BY size DESC LIMIT 10" elif any(word in query_lower for word in ['smallest', 'small']): return "SELECT filename, LENGTH(content) as size FROM markdown_files WHERE content IS NOT NULL ORDER BY size ASC LIMIT 10" else: return "SELECT filename, LENGTH(content) as size FROM markdown_files WHERE content IS NOT NULL ORDER BY size DESC LIMIT 10" def _translate_to_sql_recent(self, query: str) -> Optional[str]: """Translate to SQL recent files query.""" return "SELECT filename, created_at FROM markdown_files WHERE created_at > datetime('now', '-7 days') ORDER BY created_at DESC" def _translate_to_sql_frontmatter(self, query: str) -> Optional[str]: """Translate to SQL front matter query.""" return "SELECT filename, front_matter FROM markdown_files WHERE front_matter IS NOT NULL AND front_matter != '{}'" def _translate_to_graphql_schemas(self, query: str) -> Optional[str]: """Translate to GraphQL schema query.""" return """query { schemas { filename title description schemaVersion propertyCount } }""" def _translate_to_graphql_detailed(self, query: str) -> Optional[str]: """Translate to detailed GraphQL query.""" query_lower = query.lower() if 'file' in query_lower: return """query { markdownFiles(limit: 10) { id filename wordCount lineCount frontMatter { key value } createdAt } }""" else: return """query { databaseStats { totalFiles totalSchemas totalSizeBytes lastUpdated } }""" def _translate_to_jsonpath(self, query: str) -> Optional[str]: """Translate to JSONPath query.""" query_lower = query.lower() if 'heading' in query_lower: return "$..heading" elif 'link' in query_lower: return "$..link" elif 'code' in query_lower: return "$..code_block" elif 'image' in query_lower: return "$..image" else: return "$..heading" # Default to headings def can_translate_from(self, other_paradigm: str) -> bool: """Natural language doesn't translate from other paradigms.""" return False def translate_query(self, query: str, from_paradigm: str) -> Optional[str]: """Natural language doesn't translate from other paradigms.""" return None