feat: implement comprehensive query paradigm zoo system (issue #62)
- Created extensible BaseQueryParadigm interface with standardized QueryResult format - Implemented QueryParadigmRegistry for paradigm discovery and management - Added 5 working paradigms: SQL, FTS, GraphQL, JSONPath, Natural Language - Documented 9 additional paradigms: QBE, Batch Manipulation, Visual Query Builder, REST API, NoSQL, UNIX Pipeline, XPath/XQuery, RAG, Data Transformation - Integrated full CLI interface: list, search, show, exec, categories commands - Added comprehensive test suite with 23 test cases covering all components - Auto-registration system enables easy addition of new paradigms - Organized paradigms by category (structural, textual, semantic, visual, procedural, network) and complexity (beginner, intermediate, advanced) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
271
markitect/query_paradigms/paradigms/fts_paradigm.py
Normal file
271
markitect/query_paradigms/paradigms/fts_paradigm.py
Normal file
@@ -0,0 +1,271 @@
|
||||
"""
|
||||
Full Text Search Paradigm - FTS5-powered content search.
|
||||
"""
|
||||
|
||||
import time
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
from ..base import BaseQueryParadigm, QueryResult
|
||||
|
||||
|
||||
class FullTextSearchParadigm(BaseQueryParadigm):
|
||||
"""Full text search paradigm using FTS5 for content discovery."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "FTS"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return "Full text search across markdown content using SQLite FTS5 for semantic discovery"
|
||||
|
||||
@property
|
||||
def category(self) -> str:
|
||||
return "textual"
|
||||
|
||||
@property
|
||||
def complexity(self) -> str:
|
||||
return "beginner"
|
||||
|
||||
def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult:
|
||||
"""Execute full text search query."""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
from ...plugins.builtin.search.fts_search import FTSSearchPlugin
|
||||
|
||||
# Get database path from config
|
||||
db_path = config.get('db_path') if config else 'markitect.db'
|
||||
|
||||
# Create FTS search plugin
|
||||
fts_plugin = FTSSearchPlugin()
|
||||
|
||||
# Execute search
|
||||
content_type = config.get('content_type', 'all') if config else 'all'
|
||||
limit = config.get('limit', 20) if config else 20
|
||||
offset = config.get('offset', 0) if config else 0
|
||||
|
||||
search_results = fts_plugin.search(
|
||||
db_path=db_path,
|
||||
query=query,
|
||||
content_type=content_type,
|
||||
limit=limit,
|
||||
offset=offset
|
||||
)
|
||||
|
||||
execution_time = (time.time() - start_time) * 1000
|
||||
|
||||
# Convert FTS results to standard format
|
||||
results = []
|
||||
for result in search_results:
|
||||
if result['type'] == 'file':
|
||||
results.append({
|
||||
'type': 'file',
|
||||
'score': result['score'],
|
||||
'filename': result['file']['filename'],
|
||||
'content_preview': result.get('highlight', ''),
|
||||
'file_id': result['file']['id'],
|
||||
'created_at': result['file']['created_at']
|
||||
})
|
||||
elif result['type'] == 'schema':
|
||||
results.append({
|
||||
'type': 'schema',
|
||||
'score': result['score'],
|
||||
'filename': result['schema']['filename'],
|
||||
'title': result['schema']['title'],
|
||||
'description': result['schema']['description'],
|
||||
'schema_id': result['schema']['id'],
|
||||
'highlight': result.get('highlight', '')
|
||||
})
|
||||
|
||||
return QueryResult(
|
||||
paradigm="FTS",
|
||||
query=query,
|
||||
execution_time_ms=execution_time,
|
||||
result_count=len(results),
|
||||
results=results,
|
||||
metadata={
|
||||
"content_type": content_type,
|
||||
"fts_enabled": True,
|
||||
"query_type": self._detect_query_type(query)
|
||||
},
|
||||
success=True
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
execution_time = (time.time() - start_time) * 1000
|
||||
|
||||
return QueryResult(
|
||||
paradigm="FTS",
|
||||
query=query,
|
||||
execution_time_ms=execution_time,
|
||||
result_count=0,
|
||||
results=[],
|
||||
metadata={"fts_enabled": False},
|
||||
success=False,
|
||||
error_message=str(e)
|
||||
)
|
||||
|
||||
def get_examples(self) -> List[Dict[str, str]]:
|
||||
"""Get example FTS queries."""
|
||||
return [
|
||||
{
|
||||
"name": "Simple search",
|
||||
"description": "Find documents containing specific words",
|
||||
"query": "documentation"
|
||||
},
|
||||
{
|
||||
"name": "Multiple terms",
|
||||
"description": "Search for documents with multiple terms",
|
||||
"query": "API documentation"
|
||||
},
|
||||
{
|
||||
"name": "Exact phrase",
|
||||
"description": "Search for exact phrases",
|
||||
"query": '"getting started"'
|
||||
},
|
||||
{
|
||||
"name": "Boolean search",
|
||||
"description": "Use AND/OR operators",
|
||||
"query": "API AND documentation NOT deprecated"
|
||||
},
|
||||
{
|
||||
"name": "Wildcard search",
|
||||
"description": "Prefix matching with wildcards",
|
||||
"query": "config*"
|
||||
},
|
||||
{
|
||||
"name": "Proximity search",
|
||||
"description": "Find terms near each other",
|
||||
"query": "NEAR(database query, 5)"
|
||||
}
|
||||
]
|
||||
|
||||
def validate_query(self, query: str) -> tuple[bool, Optional[str]]:
|
||||
"""Validate FTS query syntax."""
|
||||
if not query or not query.strip():
|
||||
return False, "Query cannot be empty"
|
||||
|
||||
# Use the FTS query parser for validation
|
||||
try:
|
||||
from ...plugins.builtin.search.query_parser import QueryParser
|
||||
parser = QueryParser()
|
||||
return parser.validate_query(query)
|
||||
except ImportError:
|
||||
# Fallback validation
|
||||
return self._basic_validation(query)
|
||||
|
||||
def _basic_validation(self, query: str) -> tuple[bool, Optional[str]]:
|
||||
"""Basic FTS query validation."""
|
||||
# Check for balanced quotes
|
||||
quote_count = query.count('"')
|
||||
if quote_count % 2 != 0:
|
||||
return False, "Unmatched quotes in query"
|
||||
|
||||
# Check for balanced parentheses
|
||||
open_parens = query.count('(')
|
||||
close_parens = query.count(')')
|
||||
if open_parens != close_parens:
|
||||
return False, "Unmatched parentheses in query"
|
||||
|
||||
return True, None
|
||||
|
||||
def get_syntax_help(self) -> str:
|
||||
"""Get FTS syntax help."""
|
||||
return """Full Text Search Syntax:
|
||||
|
||||
Basic Search:
|
||||
word - Find documents containing 'word'
|
||||
multiple words - Find documents with all words (implicit AND)
|
||||
|
||||
Phrase Search:
|
||||
"exact phrase" - Find exact phrase
|
||||
|
||||
Boolean Operators:
|
||||
word1 AND word2 - Both words must be present
|
||||
word1 OR word2 - Either word can be present
|
||||
word1 NOT word2 - First word present, second word absent
|
||||
|
||||
Wildcards:
|
||||
prefix* - Prefix matching (config* matches configuration)
|
||||
|
||||
Proximity Search:
|
||||
NEAR(word1 word2, 5) - Words within 5 words of each other
|
||||
|
||||
Column-Specific:
|
||||
filename:readme - Search only in filename field
|
||||
content:tutorial - Search only in content field
|
||||
|
||||
Examples:
|
||||
documentation
|
||||
"getting started"
|
||||
API AND documentation
|
||||
config* OR setting*
|
||||
NEAR(database query, 10)
|
||||
"""
|
||||
|
||||
def _detect_query_type(self, query: str) -> str:
|
||||
"""Detect FTS query type."""
|
||||
query_upper = query.upper()
|
||||
|
||||
if '"' in query:
|
||||
return "phrase_search"
|
||||
elif 'NEAR(' in query_upper:
|
||||
return "proximity_search"
|
||||
elif any(op in query_upper for op in [' AND ', ' OR ', ' NOT ']):
|
||||
return "boolean_search"
|
||||
elif '*' in query:
|
||||
return "wildcard_search"
|
||||
elif ':' in query:
|
||||
return "column_search"
|
||||
else:
|
||||
return "simple_search"
|
||||
|
||||
def can_translate_from(self, other_paradigm: str) -> bool:
|
||||
"""Check if we can translate from another paradigm."""
|
||||
return other_paradigm.lower() in ["natural_language", "sql"]
|
||||
|
||||
def translate_query(self, query: str, from_paradigm: str) -> Optional[str]:
|
||||
"""Translate from another paradigm to FTS."""
|
||||
if from_paradigm.lower() == "natural_language":
|
||||
return self._translate_natural_language_to_fts(query)
|
||||
elif from_paradigm.lower() == "sql":
|
||||
return self._translate_sql_to_fts(query)
|
||||
return None
|
||||
|
||||
def _translate_natural_language_to_fts(self, query: str) -> Optional[str]:
|
||||
"""Translate natural language to FTS query."""
|
||||
query_lower = query.lower()
|
||||
|
||||
# Extract key terms and convert to FTS syntax
|
||||
if "search for" in query_lower:
|
||||
# Extract what comes after "search for"
|
||||
parts = query_lower.split("search for", 1)
|
||||
if len(parts) > 1:
|
||||
search_term = parts[1].strip()
|
||||
return search_term.replace(" and ", " AND ").replace(" or ", " OR ")
|
||||
|
||||
if "find" in query_lower and "contain" in query_lower:
|
||||
# Extract terms between "find" and "contain"
|
||||
import re
|
||||
match = re.search(r'find.*?contain.*?["\'](.+?)["\']', query_lower)
|
||||
if match:
|
||||
return f'"{match.group(1)}"'
|
||||
|
||||
# Simple keyword extraction
|
||||
keywords = [word for word in query.split() if len(word) > 3 and word.lower() not in ['find', 'search', 'for', 'documents', 'files']]
|
||||
if keywords:
|
||||
return " AND ".join(keywords)
|
||||
|
||||
return None
|
||||
|
||||
def _translate_sql_to_fts(self, query: str) -> Optional[str]:
|
||||
"""Translate simple SQL LIKE queries to FTS."""
|
||||
if 'LIKE' in query.upper():
|
||||
import re
|
||||
# Extract LIKE patterns
|
||||
like_matches = re.findall(r"LIKE\s+'%(.+?)%'", query, re.IGNORECASE)
|
||||
if like_matches:
|
||||
return " AND ".join(like_matches)
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user