feat: implement comprehensive query paradigm zoo system (issue #62)
- Created extensible BaseQueryParadigm interface with standardized QueryResult format - Implemented QueryParadigmRegistry for paradigm discovery and management - Added 5 working paradigms: SQL, FTS, GraphQL, JSONPath, Natural Language - Documented 9 additional paradigms: QBE, Batch Manipulation, Visual Query Builder, REST API, NoSQL, UNIX Pipeline, XPath/XQuery, RAG, Data Transformation - Integrated full CLI interface: list, search, show, exec, categories commands - Added comprehensive test suite with 23 test cases covering all components - Auto-registration system enables easy addition of new paradigms - Organized paradigms by category (structural, textual, semantic, visual, procedural, network) and complexity (beginner, intermediate, advanced) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
400
markitect/query_paradigms/paradigms/natural_language_paradigm.py
Normal file
400
markitect/query_paradigms/paradigms/natural_language_paradigm.py
Normal file
@@ -0,0 +1,400 @@
|
||||
"""
|
||||
Natural Language Query Paradigm - Human-friendly query interface.
|
||||
"""
|
||||
|
||||
import time
|
||||
import re
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
from ..base import BaseQueryParadigm, QueryResult
|
||||
|
||||
|
||||
class NaturalLanguageQueryParadigm(BaseQueryParadigm):
|
||||
"""Natural language query paradigm for intuitive, human-friendly queries."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "Natural Language"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return "Human-friendly queries that translate to appropriate technical paradigms"
|
||||
|
||||
@property
|
||||
def category(self) -> str:
|
||||
return "semantic"
|
||||
|
||||
@property
|
||||
def complexity(self) -> str:
|
||||
return "beginner"
|
||||
|
||||
def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult:
|
||||
"""Execute natural language query by translating to appropriate paradigm."""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# Analyze the query and determine the best paradigm
|
||||
best_paradigm, translated_query = self._analyze_and_translate(query)
|
||||
|
||||
if not best_paradigm or not translated_query:
|
||||
raise ValueError(f"Could not understand query: '{query}'")
|
||||
|
||||
# Import the appropriate paradigm
|
||||
from ..registry import registry
|
||||
paradigm_instance = registry.get(best_paradigm)
|
||||
|
||||
if not paradigm_instance:
|
||||
raise ValueError(f"Paradigm '{best_paradigm}' not available")
|
||||
|
||||
# Execute using the target paradigm
|
||||
result = paradigm_instance.execute(translated_query, config)
|
||||
|
||||
# Update result to show it came from natural language
|
||||
result.paradigm = "Natural Language"
|
||||
result.metadata.update({
|
||||
"original_query": query,
|
||||
"translated_to": best_paradigm,
|
||||
"translated_query": translated_query,
|
||||
"query_intent": self._detect_intent(query)
|
||||
})
|
||||
|
||||
execution_time = (time.time() - start_time) * 1000
|
||||
result.execution_time_ms = execution_time
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time = (time.time() - start_time) * 1000
|
||||
|
||||
return QueryResult(
|
||||
paradigm="Natural Language",
|
||||
query=query,
|
||||
execution_time_ms=execution_time,
|
||||
result_count=0,
|
||||
results=[],
|
||||
metadata={"query_intent": self._detect_intent(query)},
|
||||
success=False,
|
||||
error_message=str(e)
|
||||
)
|
||||
|
||||
def get_examples(self) -> List[Dict[str, str]]:
|
||||
"""Get example natural language queries."""
|
||||
return [
|
||||
{
|
||||
"name": "Find files",
|
||||
"description": "List and discover files in the system",
|
||||
"query": "Show me all the files"
|
||||
},
|
||||
{
|
||||
"name": "Search content",
|
||||
"description": "Search for specific content",
|
||||
"query": "Find documents about API documentation"
|
||||
},
|
||||
{
|
||||
"name": "Recent activity",
|
||||
"description": "Find recently modified content",
|
||||
"query": "What files were created recently?"
|
||||
},
|
||||
{
|
||||
"name": "File statistics",
|
||||
"description": "Get information about file sizes and counts",
|
||||
"query": "How many files do I have?"
|
||||
},
|
||||
{
|
||||
"name": "Content analysis",
|
||||
"description": "Analyze document structure",
|
||||
"query": "Show me all the headings in the documentation"
|
||||
},
|
||||
{
|
||||
"name": "Schema exploration",
|
||||
"description": "Discover schemas and their properties",
|
||||
"query": "What schemas are available?"
|
||||
},
|
||||
{
|
||||
"name": "Large files",
|
||||
"description": "Find files by size criteria",
|
||||
"query": "Which files are the largest?"
|
||||
},
|
||||
{
|
||||
"name": "Front matter search",
|
||||
"description": "Find files with metadata",
|
||||
"query": "Show files that have front matter"
|
||||
}
|
||||
]
|
||||
|
||||
def validate_query(self, query: str) -> tuple[bool, Optional[str]]:
|
||||
"""Validate natural language query."""
|
||||
if not query or not query.strip():
|
||||
return False, "Query cannot be empty"
|
||||
|
||||
# Natural language queries are generally always valid
|
||||
# Just check for reasonable length
|
||||
if len(query.strip()) < 3:
|
||||
return False, "Query too short - please be more specific"
|
||||
|
||||
if len(query) > 500:
|
||||
return False, "Query too long - please be more concise"
|
||||
|
||||
return True, None
|
||||
|
||||
def get_syntax_help(self) -> str:
|
||||
"""Get natural language syntax help."""
|
||||
return """Natural Language Query Help:
|
||||
|
||||
You can ask questions in plain English! The system will automatically
|
||||
translate your query to the most appropriate technical format.
|
||||
|
||||
Common Patterns:
|
||||
|
||||
File Discovery:
|
||||
"Show me all files"
|
||||
"List the markdown files"
|
||||
"What files do I have?"
|
||||
|
||||
Content Search:
|
||||
"Find documents about X"
|
||||
"Search for API documentation"
|
||||
"Show files containing 'tutorial'"
|
||||
|
||||
File Analysis:
|
||||
"Which files are the largest?"
|
||||
"Show recent files"
|
||||
"Find files with front matter"
|
||||
|
||||
Structure Analysis:
|
||||
"Show me all headings"
|
||||
"Find all code blocks"
|
||||
"What links are in the files?"
|
||||
|
||||
Statistics:
|
||||
"How many files do I have?"
|
||||
"What's the total size?"
|
||||
"Show database statistics"
|
||||
|
||||
Schema Queries:
|
||||
"What schemas are available?"
|
||||
"Show schema information"
|
||||
|
||||
Tips:
|
||||
- Be specific about what you want to find
|
||||
- Use natural questions like "What..." or "Show me..."
|
||||
- Mention specific content types (files, schemas, headings, etc.)
|
||||
- Use time references like "recent" or "latest"
|
||||
|
||||
The system supports various query types and will choose the best
|
||||
method to answer your question automatically.
|
||||
"""
|
||||
|
||||
def _analyze_and_translate(self, query: str) -> tuple[Optional[str], Optional[str]]:
|
||||
"""Analyze natural language query and translate to appropriate paradigm."""
|
||||
query_lower = query.lower().strip()
|
||||
|
||||
# Intent detection with paradigm mapping
|
||||
intent_patterns = [
|
||||
# Full text search patterns
|
||||
(r'find.*about|search.*for|documents.*contain|content.*with', 'fts', self._translate_to_fts),
|
||||
|
||||
# File listing patterns
|
||||
(r'show.*files|list.*files|all.*files|files.*have', 'sql', self._translate_to_sql_files),
|
||||
|
||||
# Statistics patterns
|
||||
(r'how many|count|total|statistics|stats', 'sql', self._translate_to_sql_stats),
|
||||
|
||||
# Size/analysis patterns
|
||||
(r'largest|biggest|smallest|size|length', 'sql', self._translate_to_sql_size),
|
||||
|
||||
# Recent/time patterns
|
||||
(r'recent|latest|new|created.*ago|modified', 'sql', self._translate_to_sql_recent),
|
||||
|
||||
# Schema patterns
|
||||
(r'schema|schemas|json.*schema', 'graphql', self._translate_to_graphql_schemas),
|
||||
|
||||
# Structure patterns (headings, links, etc.)
|
||||
(r'heading|headings|links|code.*block|structure', 'jsonpath', self._translate_to_jsonpath),
|
||||
|
||||
# Front matter patterns
|
||||
(r'front.*matter|metadata|yaml.*header', 'sql', self._translate_to_sql_frontmatter),
|
||||
|
||||
# General GraphQL patterns
|
||||
(r'show.*detailed|complete.*information|comprehensive', 'graphql', self._translate_to_graphql_detailed)
|
||||
]
|
||||
|
||||
# Try to match patterns
|
||||
for pattern, paradigm, translator in intent_patterns:
|
||||
if re.search(pattern, query_lower):
|
||||
translated = translator(query)
|
||||
if translated:
|
||||
return paradigm, translated
|
||||
|
||||
# Fallback: try FTS for any remaining search-like queries
|
||||
if any(word in query_lower for word in ['find', 'search', 'show', 'get', 'contains']):
|
||||
translated = self._translate_to_fts(query)
|
||||
if translated:
|
||||
return 'fts', translated
|
||||
|
||||
return None, None
|
||||
|
||||
def _detect_intent(self, query: str) -> str:
|
||||
"""Detect the intent of the natural language query."""
|
||||
query_lower = query.lower()
|
||||
|
||||
if any(word in query_lower for word in ['find', 'search', 'about', 'contain']):
|
||||
return "content_search"
|
||||
elif any(word in query_lower for word in ['list', 'show', 'all', 'files']):
|
||||
return "file_listing"
|
||||
elif any(word in query_lower for word in ['count', 'how many', 'statistics']):
|
||||
return "statistics"
|
||||
elif any(word in query_lower for word in ['recent', 'latest', 'new']):
|
||||
return "temporal_query"
|
||||
elif any(word in query_lower for word in ['large', 'big', 'small', 'size']):
|
||||
return "size_analysis"
|
||||
elif any(word in query_lower for word in ['schema', 'schemas']):
|
||||
return "schema_query"
|
||||
elif any(word in query_lower for word in ['heading', 'structure', 'link']):
|
||||
return "structure_analysis"
|
||||
else:
|
||||
return "general_query"
|
||||
|
||||
def _translate_to_fts(self, query: str) -> Optional[str]:
|
||||
"""Translate to full text search query."""
|
||||
query_lower = query.lower()
|
||||
|
||||
# Extract search terms
|
||||
search_terms = []
|
||||
|
||||
# Look for "about X" or "containing X"
|
||||
about_match = re.search(r'about\s+(.+?)(?:\s+in|\s+from|$)', query_lower)
|
||||
if about_match:
|
||||
search_terms.append(about_match.group(1))
|
||||
|
||||
contain_match = re.search(r'contain(?:ing)?\s+["\']?(.+?)["\']?(?:\s+|$)', query_lower)
|
||||
if contain_match:
|
||||
search_terms.append(contain_match.group(1))
|
||||
|
||||
for_match = re.search(r'(?:search\s+)?for\s+(.+?)(?:\s+in|\s+from|$)', query_lower)
|
||||
if for_match:
|
||||
search_terms.append(for_match.group(1))
|
||||
|
||||
# Clean up search terms
|
||||
if search_terms:
|
||||
term = search_terms[0].strip(' "\'')
|
||||
# Remove common stop words
|
||||
stop_words = ['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by']
|
||||
words = [w for w in term.split() if w.lower() not in stop_words and len(w) > 2]
|
||||
if words:
|
||||
return ' AND '.join(words)
|
||||
|
||||
# Fallback: extract meaningful words from the entire query
|
||||
meaningful_words = []
|
||||
for word in query.split():
|
||||
word_clean = re.sub(r'[^\w]', '', word).lower()
|
||||
if (len(word_clean) > 3 and
|
||||
word_clean not in ['find', 'search', 'show', 'documents', 'files', 'about', 'containing']):
|
||||
meaningful_words.append(word_clean)
|
||||
|
||||
if meaningful_words:
|
||||
return ' AND '.join(meaningful_words[:3]) # Limit to 3 terms
|
||||
|
||||
return None
|
||||
|
||||
def _translate_to_sql_files(self, query: str) -> Optional[str]:
|
||||
"""Translate to SQL file listing query."""
|
||||
query_lower = query.lower()
|
||||
|
||||
if any(phrase in query_lower for phrase in ['all files', 'show files', 'list files']):
|
||||
return "SELECT id, filename, created_at FROM markdown_files ORDER BY created_at DESC LIMIT 20"
|
||||
|
||||
return "SELECT filename FROM markdown_files ORDER BY filename"
|
||||
|
||||
def _translate_to_sql_stats(self, query: str) -> Optional[str]:
|
||||
"""Translate to SQL statistics query."""
|
||||
query_lower = query.lower()
|
||||
|
||||
if 'files' in query_lower:
|
||||
return "SELECT COUNT(*) as file_count FROM markdown_files"
|
||||
elif 'schema' in query_lower:
|
||||
return "SELECT COUNT(*) as schema_count FROM schemas"
|
||||
else:
|
||||
return "SELECT (SELECT COUNT(*) FROM markdown_files) as files, (SELECT COUNT(*) FROM schemas) as schemas"
|
||||
|
||||
def _translate_to_sql_size(self, query: str) -> Optional[str]:
|
||||
"""Translate to SQL size/length query."""
|
||||
query_lower = query.lower()
|
||||
|
||||
if any(word in query_lower for word in ['largest', 'biggest']):
|
||||
return "SELECT filename, LENGTH(content) as size FROM markdown_files WHERE content IS NOT NULL ORDER BY size DESC LIMIT 10"
|
||||
elif any(word in query_lower for word in ['smallest', 'small']):
|
||||
return "SELECT filename, LENGTH(content) as size FROM markdown_files WHERE content IS NOT NULL ORDER BY size ASC LIMIT 10"
|
||||
else:
|
||||
return "SELECT filename, LENGTH(content) as size FROM markdown_files WHERE content IS NOT NULL ORDER BY size DESC LIMIT 10"
|
||||
|
||||
def _translate_to_sql_recent(self, query: str) -> Optional[str]:
|
||||
"""Translate to SQL recent files query."""
|
||||
return "SELECT filename, created_at FROM markdown_files WHERE created_at > datetime('now', '-7 days') ORDER BY created_at DESC"
|
||||
|
||||
def _translate_to_sql_frontmatter(self, query: str) -> Optional[str]:
|
||||
"""Translate to SQL front matter query."""
|
||||
return "SELECT filename, front_matter FROM markdown_files WHERE front_matter IS NOT NULL AND front_matter != '{}'"
|
||||
|
||||
def _translate_to_graphql_schemas(self, query: str) -> Optional[str]:
|
||||
"""Translate to GraphQL schema query."""
|
||||
return """query {
|
||||
schemas {
|
||||
filename
|
||||
title
|
||||
description
|
||||
schemaVersion
|
||||
propertyCount
|
||||
}
|
||||
}"""
|
||||
|
||||
def _translate_to_graphql_detailed(self, query: str) -> Optional[str]:
|
||||
"""Translate to detailed GraphQL query."""
|
||||
query_lower = query.lower()
|
||||
|
||||
if 'file' in query_lower:
|
||||
return """query {
|
||||
markdownFiles(limit: 10) {
|
||||
id
|
||||
filename
|
||||
wordCount
|
||||
lineCount
|
||||
frontMatter {
|
||||
key
|
||||
value
|
||||
}
|
||||
createdAt
|
||||
}
|
||||
}"""
|
||||
else:
|
||||
return """query {
|
||||
databaseStats {
|
||||
totalFiles
|
||||
totalSchemas
|
||||
totalSizeBytes
|
||||
lastUpdated
|
||||
}
|
||||
}"""
|
||||
|
||||
def _translate_to_jsonpath(self, query: str) -> Optional[str]:
|
||||
"""Translate to JSONPath query."""
|
||||
query_lower = query.lower()
|
||||
|
||||
if 'heading' in query_lower:
|
||||
return "$..heading"
|
||||
elif 'link' in query_lower:
|
||||
return "$..link"
|
||||
elif 'code' in query_lower:
|
||||
return "$..code_block"
|
||||
elif 'image' in query_lower:
|
||||
return "$..image"
|
||||
else:
|
||||
return "$..heading" # Default to headings
|
||||
|
||||
def can_translate_from(self, other_paradigm: str) -> bool:
|
||||
"""Natural language doesn't translate from other paradigms."""
|
||||
return False
|
||||
|
||||
def translate_query(self, query: str, from_paradigm: str) -> Optional[str]:
|
||||
"""Natural language doesn't translate from other paradigms."""
|
||||
return None
|
||||
Reference in New Issue
Block a user