- Created extensible BaseQueryParadigm interface with standardized QueryResult format - Implemented QueryParadigmRegistry for paradigm discovery and management - Added 5 working paradigms: SQL, FTS, GraphQL, JSONPath, Natural Language - Documented 9 additional paradigms: QBE, Batch Manipulation, Visual Query Builder, REST API, NoSQL, UNIX Pipeline, XPath/XQuery, RAG, Data Transformation - Integrated full CLI interface: list, search, show, exec, categories commands - Added comprehensive test suite with 23 test cases covering all components - Auto-registration system enables easy addition of new paradigms - Organized paradigms by category (structural, textual, semantic, visual, procedural, network) and complexity (beginner, intermediate, advanced) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
400 lines
14 KiB
Python
400 lines
14 KiB
Python
"""
|
|
Natural Language Query Paradigm - Human-friendly query interface.
|
|
"""
|
|
|
|
import time
|
|
import re
|
|
from typing import Dict, Any, List, Optional
|
|
|
|
from ..base import BaseQueryParadigm, QueryResult
|
|
|
|
|
|
class NaturalLanguageQueryParadigm(BaseQueryParadigm):
|
|
"""Natural language query paradigm for intuitive, human-friendly queries."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "Natural Language"
|
|
|
|
@property
|
|
def description(self) -> str:
|
|
return "Human-friendly queries that translate to appropriate technical paradigms"
|
|
|
|
@property
|
|
def category(self) -> str:
|
|
return "semantic"
|
|
|
|
@property
|
|
def complexity(self) -> str:
|
|
return "beginner"
|
|
|
|
def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult:
|
|
"""Execute natural language query by translating to appropriate paradigm."""
|
|
start_time = time.time()
|
|
|
|
try:
|
|
# Analyze the query and determine the best paradigm
|
|
best_paradigm, translated_query = self._analyze_and_translate(query)
|
|
|
|
if not best_paradigm or not translated_query:
|
|
raise ValueError(f"Could not understand query: '{query}'")
|
|
|
|
# Import the appropriate paradigm
|
|
from ..registry import registry
|
|
paradigm_instance = registry.get(best_paradigm)
|
|
|
|
if not paradigm_instance:
|
|
raise ValueError(f"Paradigm '{best_paradigm}' not available")
|
|
|
|
# Execute using the target paradigm
|
|
result = paradigm_instance.execute(translated_query, config)
|
|
|
|
# Update result to show it came from natural language
|
|
result.paradigm = "Natural Language"
|
|
result.metadata.update({
|
|
"original_query": query,
|
|
"translated_to": best_paradigm,
|
|
"translated_query": translated_query,
|
|
"query_intent": self._detect_intent(query)
|
|
})
|
|
|
|
execution_time = (time.time() - start_time) * 1000
|
|
result.execution_time_ms = execution_time
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
execution_time = (time.time() - start_time) * 1000
|
|
|
|
return QueryResult(
|
|
paradigm="Natural Language",
|
|
query=query,
|
|
execution_time_ms=execution_time,
|
|
result_count=0,
|
|
results=[],
|
|
metadata={"query_intent": self._detect_intent(query)},
|
|
success=False,
|
|
error_message=str(e)
|
|
)
|
|
|
|
def get_examples(self) -> List[Dict[str, str]]:
|
|
"""Get example natural language queries."""
|
|
return [
|
|
{
|
|
"name": "Find files",
|
|
"description": "List and discover files in the system",
|
|
"query": "Show me all the files"
|
|
},
|
|
{
|
|
"name": "Search content",
|
|
"description": "Search for specific content",
|
|
"query": "Find documents about API documentation"
|
|
},
|
|
{
|
|
"name": "Recent activity",
|
|
"description": "Find recently modified content",
|
|
"query": "What files were created recently?"
|
|
},
|
|
{
|
|
"name": "File statistics",
|
|
"description": "Get information about file sizes and counts",
|
|
"query": "How many files do I have?"
|
|
},
|
|
{
|
|
"name": "Content analysis",
|
|
"description": "Analyze document structure",
|
|
"query": "Show me all the headings in the documentation"
|
|
},
|
|
{
|
|
"name": "Schema exploration",
|
|
"description": "Discover schemas and their properties",
|
|
"query": "What schemas are available?"
|
|
},
|
|
{
|
|
"name": "Large files",
|
|
"description": "Find files by size criteria",
|
|
"query": "Which files are the largest?"
|
|
},
|
|
{
|
|
"name": "Front matter search",
|
|
"description": "Find files with metadata",
|
|
"query": "Show files that have front matter"
|
|
}
|
|
]
|
|
|
|
def validate_query(self, query: str) -> tuple[bool, Optional[str]]:
|
|
"""Validate natural language query."""
|
|
if not query or not query.strip():
|
|
return False, "Query cannot be empty"
|
|
|
|
# Natural language queries are generally always valid
|
|
# Just check for reasonable length
|
|
if len(query.strip()) < 3:
|
|
return False, "Query too short - please be more specific"
|
|
|
|
if len(query) > 500:
|
|
return False, "Query too long - please be more concise"
|
|
|
|
return True, None
|
|
|
|
def get_syntax_help(self) -> str:
|
|
"""Get natural language syntax help."""
|
|
return """Natural Language Query Help:
|
|
|
|
You can ask questions in plain English! The system will automatically
|
|
translate your query to the most appropriate technical format.
|
|
|
|
Common Patterns:
|
|
|
|
File Discovery:
|
|
"Show me all files"
|
|
"List the markdown files"
|
|
"What files do I have?"
|
|
|
|
Content Search:
|
|
"Find documents about X"
|
|
"Search for API documentation"
|
|
"Show files containing 'tutorial'"
|
|
|
|
File Analysis:
|
|
"Which files are the largest?"
|
|
"Show recent files"
|
|
"Find files with front matter"
|
|
|
|
Structure Analysis:
|
|
"Show me all headings"
|
|
"Find all code blocks"
|
|
"What links are in the files?"
|
|
|
|
Statistics:
|
|
"How many files do I have?"
|
|
"What's the total size?"
|
|
"Show database statistics"
|
|
|
|
Schema Queries:
|
|
"What schemas are available?"
|
|
"Show schema information"
|
|
|
|
Tips:
|
|
- Be specific about what you want to find
|
|
- Use natural questions like "What..." or "Show me..."
|
|
- Mention specific content types (files, schemas, headings, etc.)
|
|
- Use time references like "recent" or "latest"
|
|
|
|
The system supports various query types and will choose the best
|
|
method to answer your question automatically.
|
|
"""
|
|
|
|
def _analyze_and_translate(self, query: str) -> tuple[Optional[str], Optional[str]]:
|
|
"""Analyze natural language query and translate to appropriate paradigm."""
|
|
query_lower = query.lower().strip()
|
|
|
|
# Intent detection with paradigm mapping
|
|
intent_patterns = [
|
|
# Full text search patterns
|
|
(r'find.*about|search.*for|documents.*contain|content.*with', 'fts', self._translate_to_fts),
|
|
|
|
# File listing patterns
|
|
(r'show.*files|list.*files|all.*files|files.*have', 'sql', self._translate_to_sql_files),
|
|
|
|
# Statistics patterns
|
|
(r'how many|count|total|statistics|stats', 'sql', self._translate_to_sql_stats),
|
|
|
|
# Size/analysis patterns
|
|
(r'largest|biggest|smallest|size|length', 'sql', self._translate_to_sql_size),
|
|
|
|
# Recent/time patterns
|
|
(r'recent|latest|new|created.*ago|modified', 'sql', self._translate_to_sql_recent),
|
|
|
|
# Schema patterns
|
|
(r'schema|schemas|json.*schema', 'graphql', self._translate_to_graphql_schemas),
|
|
|
|
# Structure patterns (headings, links, etc.)
|
|
(r'heading|headings|links|code.*block|structure', 'jsonpath', self._translate_to_jsonpath),
|
|
|
|
# Front matter patterns
|
|
(r'front.*matter|metadata|yaml.*header', 'sql', self._translate_to_sql_frontmatter),
|
|
|
|
# General GraphQL patterns
|
|
(r'show.*detailed|complete.*information|comprehensive', 'graphql', self._translate_to_graphql_detailed)
|
|
]
|
|
|
|
# Try to match patterns
|
|
for pattern, paradigm, translator in intent_patterns:
|
|
if re.search(pattern, query_lower):
|
|
translated = translator(query)
|
|
if translated:
|
|
return paradigm, translated
|
|
|
|
# Fallback: try FTS for any remaining search-like queries
|
|
if any(word in query_lower for word in ['find', 'search', 'show', 'get', 'contains']):
|
|
translated = self._translate_to_fts(query)
|
|
if translated:
|
|
return 'fts', translated
|
|
|
|
return None, None
|
|
|
|
def _detect_intent(self, query: str) -> str:
|
|
"""Detect the intent of the natural language query."""
|
|
query_lower = query.lower()
|
|
|
|
if any(word in query_lower for word in ['find', 'search', 'about', 'contain']):
|
|
return "content_search"
|
|
elif any(word in query_lower for word in ['list', 'show', 'all', 'files']):
|
|
return "file_listing"
|
|
elif any(word in query_lower for word in ['count', 'how many', 'statistics']):
|
|
return "statistics"
|
|
elif any(word in query_lower for word in ['recent', 'latest', 'new']):
|
|
return "temporal_query"
|
|
elif any(word in query_lower for word in ['large', 'big', 'small', 'size']):
|
|
return "size_analysis"
|
|
elif any(word in query_lower for word in ['schema', 'schemas']):
|
|
return "schema_query"
|
|
elif any(word in query_lower for word in ['heading', 'structure', 'link']):
|
|
return "structure_analysis"
|
|
else:
|
|
return "general_query"
|
|
|
|
def _translate_to_fts(self, query: str) -> Optional[str]:
|
|
"""Translate to full text search query."""
|
|
query_lower = query.lower()
|
|
|
|
# Extract search terms
|
|
search_terms = []
|
|
|
|
# Look for "about X" or "containing X"
|
|
about_match = re.search(r'about\s+(.+?)(?:\s+in|\s+from|$)', query_lower)
|
|
if about_match:
|
|
search_terms.append(about_match.group(1))
|
|
|
|
contain_match = re.search(r'contain(?:ing)?\s+["\']?(.+?)["\']?(?:\s+|$)', query_lower)
|
|
if contain_match:
|
|
search_terms.append(contain_match.group(1))
|
|
|
|
for_match = re.search(r'(?:search\s+)?for\s+(.+?)(?:\s+in|\s+from|$)', query_lower)
|
|
if for_match:
|
|
search_terms.append(for_match.group(1))
|
|
|
|
# Clean up search terms
|
|
if search_terms:
|
|
term = search_terms[0].strip(' "\'')
|
|
# Remove common stop words
|
|
stop_words = ['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by']
|
|
words = [w for w in term.split() if w.lower() not in stop_words and len(w) > 2]
|
|
if words:
|
|
return ' AND '.join(words)
|
|
|
|
# Fallback: extract meaningful words from the entire query
|
|
meaningful_words = []
|
|
for word in query.split():
|
|
word_clean = re.sub(r'[^\w]', '', word).lower()
|
|
if (len(word_clean) > 3 and
|
|
word_clean not in ['find', 'search', 'show', 'documents', 'files', 'about', 'containing']):
|
|
meaningful_words.append(word_clean)
|
|
|
|
if meaningful_words:
|
|
return ' AND '.join(meaningful_words[:3]) # Limit to 3 terms
|
|
|
|
return None
|
|
|
|
def _translate_to_sql_files(self, query: str) -> Optional[str]:
|
|
"""Translate to SQL file listing query."""
|
|
query_lower = query.lower()
|
|
|
|
if any(phrase in query_lower for phrase in ['all files', 'show files', 'list files']):
|
|
return "SELECT id, filename, created_at FROM markdown_files ORDER BY created_at DESC LIMIT 20"
|
|
|
|
return "SELECT filename FROM markdown_files ORDER BY filename"
|
|
|
|
def _translate_to_sql_stats(self, query: str) -> Optional[str]:
|
|
"""Translate to SQL statistics query."""
|
|
query_lower = query.lower()
|
|
|
|
if 'files' in query_lower:
|
|
return "SELECT COUNT(*) as file_count FROM markdown_files"
|
|
elif 'schema' in query_lower:
|
|
return "SELECT COUNT(*) as schema_count FROM schemas"
|
|
else:
|
|
return "SELECT (SELECT COUNT(*) FROM markdown_files) as files, (SELECT COUNT(*) FROM schemas) as schemas"
|
|
|
|
def _translate_to_sql_size(self, query: str) -> Optional[str]:
|
|
"""Translate to SQL size/length query."""
|
|
query_lower = query.lower()
|
|
|
|
if any(word in query_lower for word in ['largest', 'biggest']):
|
|
return "SELECT filename, LENGTH(content) as size FROM markdown_files WHERE content IS NOT NULL ORDER BY size DESC LIMIT 10"
|
|
elif any(word in query_lower for word in ['smallest', 'small']):
|
|
return "SELECT filename, LENGTH(content) as size FROM markdown_files WHERE content IS NOT NULL ORDER BY size ASC LIMIT 10"
|
|
else:
|
|
return "SELECT filename, LENGTH(content) as size FROM markdown_files WHERE content IS NOT NULL ORDER BY size DESC LIMIT 10"
|
|
|
|
def _translate_to_sql_recent(self, query: str) -> Optional[str]:
|
|
"""Translate to SQL recent files query."""
|
|
return "SELECT filename, created_at FROM markdown_files WHERE created_at > datetime('now', '-7 days') ORDER BY created_at DESC"
|
|
|
|
def _translate_to_sql_frontmatter(self, query: str) -> Optional[str]:
|
|
"""Translate to SQL front matter query."""
|
|
return "SELECT filename, front_matter FROM markdown_files WHERE front_matter IS NOT NULL AND front_matter != '{}'"
|
|
|
|
def _translate_to_graphql_schemas(self, query: str) -> Optional[str]:
|
|
"""Translate to GraphQL schema query."""
|
|
return """query {
|
|
schemas {
|
|
filename
|
|
title
|
|
description
|
|
schemaVersion
|
|
propertyCount
|
|
}
|
|
}"""
|
|
|
|
def _translate_to_graphql_detailed(self, query: str) -> Optional[str]:
|
|
"""Translate to detailed GraphQL query."""
|
|
query_lower = query.lower()
|
|
|
|
if 'file' in query_lower:
|
|
return """query {
|
|
markdownFiles(limit: 10) {
|
|
id
|
|
filename
|
|
wordCount
|
|
lineCount
|
|
frontMatter {
|
|
key
|
|
value
|
|
}
|
|
createdAt
|
|
}
|
|
}"""
|
|
else:
|
|
return """query {
|
|
databaseStats {
|
|
totalFiles
|
|
totalSchemas
|
|
totalSizeBytes
|
|
lastUpdated
|
|
}
|
|
}"""
|
|
|
|
def _translate_to_jsonpath(self, query: str) -> Optional[str]:
|
|
"""Translate to JSONPath query."""
|
|
query_lower = query.lower()
|
|
|
|
if 'heading' in query_lower:
|
|
return "$..heading"
|
|
elif 'link' in query_lower:
|
|
return "$..link"
|
|
elif 'code' in query_lower:
|
|
return "$..code_block"
|
|
elif 'image' in query_lower:
|
|
return "$..image"
|
|
else:
|
|
return "$..heading" # Default to headings
|
|
|
|
def can_translate_from(self, other_paradigm: str) -> bool:
|
|
"""Natural language doesn't translate from other paradigms."""
|
|
return False
|
|
|
|
def translate_query(self, query: str, from_paradigm: str) -> Optional[str]:
|
|
"""Natural language doesn't translate from other paradigms."""
|
|
return None |