- Created extensible BaseQueryParadigm interface with standardized QueryResult format - Implemented QueryParadigmRegistry for paradigm discovery and management - Added 5 working paradigms: SQL, FTS, GraphQL, JSONPath, Natural Language - Documented 9 additional paradigms: QBE, Batch Manipulation, Visual Query Builder, REST API, NoSQL, UNIX Pipeline, XPath/XQuery, RAG, Data Transformation - Integrated full CLI interface: list, search, show, exec, categories commands - Added comprehensive test suite with 23 test cases covering all components - Auto-registration system enables easy addition of new paradigms - Organized paradigms by category (structural, textual, semantic, visual, procedural, network) and complexity (beginner, intermediate, advanced) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
271 lines
9.2 KiB
Python
271 lines
9.2 KiB
Python
"""
|
|
Full Text Search Paradigm - FTS5-powered content search.
|
|
"""
|
|
|
|
import time
|
|
from typing import Dict, Any, List, Optional
|
|
|
|
from ..base import BaseQueryParadigm, QueryResult
|
|
|
|
|
|
class FullTextSearchParadigm(BaseQueryParadigm):
|
|
"""Full text search paradigm using FTS5 for content discovery."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "FTS"
|
|
|
|
@property
|
|
def description(self) -> str:
|
|
return "Full text search across markdown content using SQLite FTS5 for semantic discovery"
|
|
|
|
@property
|
|
def category(self) -> str:
|
|
return "textual"
|
|
|
|
@property
|
|
def complexity(self) -> str:
|
|
return "beginner"
|
|
|
|
def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult:
|
|
"""Execute full text search query."""
|
|
start_time = time.time()
|
|
|
|
try:
|
|
from ...plugins.builtin.search.fts_search import FTSSearchPlugin
|
|
|
|
# Get database path from config
|
|
db_path = config.get('db_path') if config else 'markitect.db'
|
|
|
|
# Create FTS search plugin
|
|
fts_plugin = FTSSearchPlugin()
|
|
|
|
# Execute search
|
|
content_type = config.get('content_type', 'all') if config else 'all'
|
|
limit = config.get('limit', 20) if config else 20
|
|
offset = config.get('offset', 0) if config else 0
|
|
|
|
search_results = fts_plugin.search(
|
|
db_path=db_path,
|
|
query=query,
|
|
content_type=content_type,
|
|
limit=limit,
|
|
offset=offset
|
|
)
|
|
|
|
execution_time = (time.time() - start_time) * 1000
|
|
|
|
# Convert FTS results to standard format
|
|
results = []
|
|
for result in search_results:
|
|
if result['type'] == 'file':
|
|
results.append({
|
|
'type': 'file',
|
|
'score': result['score'],
|
|
'filename': result['file']['filename'],
|
|
'content_preview': result.get('highlight', ''),
|
|
'file_id': result['file']['id'],
|
|
'created_at': result['file']['created_at']
|
|
})
|
|
elif result['type'] == 'schema':
|
|
results.append({
|
|
'type': 'schema',
|
|
'score': result['score'],
|
|
'filename': result['schema']['filename'],
|
|
'title': result['schema']['title'],
|
|
'description': result['schema']['description'],
|
|
'schema_id': result['schema']['id'],
|
|
'highlight': result.get('highlight', '')
|
|
})
|
|
|
|
return QueryResult(
|
|
paradigm="FTS",
|
|
query=query,
|
|
execution_time_ms=execution_time,
|
|
result_count=len(results),
|
|
results=results,
|
|
metadata={
|
|
"content_type": content_type,
|
|
"fts_enabled": True,
|
|
"query_type": self._detect_query_type(query)
|
|
},
|
|
success=True
|
|
)
|
|
|
|
except Exception as e:
|
|
execution_time = (time.time() - start_time) * 1000
|
|
|
|
return QueryResult(
|
|
paradigm="FTS",
|
|
query=query,
|
|
execution_time_ms=execution_time,
|
|
result_count=0,
|
|
results=[],
|
|
metadata={"fts_enabled": False},
|
|
success=False,
|
|
error_message=str(e)
|
|
)
|
|
|
|
def get_examples(self) -> List[Dict[str, str]]:
|
|
"""Get example FTS queries."""
|
|
return [
|
|
{
|
|
"name": "Simple search",
|
|
"description": "Find documents containing specific words",
|
|
"query": "documentation"
|
|
},
|
|
{
|
|
"name": "Multiple terms",
|
|
"description": "Search for documents with multiple terms",
|
|
"query": "API documentation"
|
|
},
|
|
{
|
|
"name": "Exact phrase",
|
|
"description": "Search for exact phrases",
|
|
"query": '"getting started"'
|
|
},
|
|
{
|
|
"name": "Boolean search",
|
|
"description": "Use AND/OR operators",
|
|
"query": "API AND documentation NOT deprecated"
|
|
},
|
|
{
|
|
"name": "Wildcard search",
|
|
"description": "Prefix matching with wildcards",
|
|
"query": "config*"
|
|
},
|
|
{
|
|
"name": "Proximity search",
|
|
"description": "Find terms near each other",
|
|
"query": "NEAR(database query, 5)"
|
|
}
|
|
]
|
|
|
|
def validate_query(self, query: str) -> tuple[bool, Optional[str]]:
|
|
"""Validate FTS query syntax."""
|
|
if not query or not query.strip():
|
|
return False, "Query cannot be empty"
|
|
|
|
# Use the FTS query parser for validation
|
|
try:
|
|
from ...plugins.builtin.search.query_parser import QueryParser
|
|
parser = QueryParser()
|
|
return parser.validate_query(query)
|
|
except ImportError:
|
|
# Fallback validation
|
|
return self._basic_validation(query)
|
|
|
|
def _basic_validation(self, query: str) -> tuple[bool, Optional[str]]:
|
|
"""Basic FTS query validation."""
|
|
# Check for balanced quotes
|
|
quote_count = query.count('"')
|
|
if quote_count % 2 != 0:
|
|
return False, "Unmatched quotes in query"
|
|
|
|
# Check for balanced parentheses
|
|
open_parens = query.count('(')
|
|
close_parens = query.count(')')
|
|
if open_parens != close_parens:
|
|
return False, "Unmatched parentheses in query"
|
|
|
|
return True, None
|
|
|
|
def get_syntax_help(self) -> str:
|
|
"""Get FTS syntax help."""
|
|
return """Full Text Search Syntax:
|
|
|
|
Basic Search:
|
|
word - Find documents containing 'word'
|
|
multiple words - Find documents with all words (implicit AND)
|
|
|
|
Phrase Search:
|
|
"exact phrase" - Find exact phrase
|
|
|
|
Boolean Operators:
|
|
word1 AND word2 - Both words must be present
|
|
word1 OR word2 - Either word can be present
|
|
word1 NOT word2 - First word present, second word absent
|
|
|
|
Wildcards:
|
|
prefix* - Prefix matching (config* matches configuration)
|
|
|
|
Proximity Search:
|
|
NEAR(word1 word2, 5) - Words within 5 words of each other
|
|
|
|
Column-Specific:
|
|
filename:readme - Search only in filename field
|
|
content:tutorial - Search only in content field
|
|
|
|
Examples:
|
|
documentation
|
|
"getting started"
|
|
API AND documentation
|
|
config* OR setting*
|
|
NEAR(database query, 10)
|
|
"""
|
|
|
|
def _detect_query_type(self, query: str) -> str:
|
|
"""Detect FTS query type."""
|
|
query_upper = query.upper()
|
|
|
|
if '"' in query:
|
|
return "phrase_search"
|
|
elif 'NEAR(' in query_upper:
|
|
return "proximity_search"
|
|
elif any(op in query_upper for op in [' AND ', ' OR ', ' NOT ']):
|
|
return "boolean_search"
|
|
elif '*' in query:
|
|
return "wildcard_search"
|
|
elif ':' in query:
|
|
return "column_search"
|
|
else:
|
|
return "simple_search"
|
|
|
|
def can_translate_from(self, other_paradigm: str) -> bool:
|
|
"""Check if we can translate from another paradigm."""
|
|
return other_paradigm.lower() in ["natural_language", "sql"]
|
|
|
|
def translate_query(self, query: str, from_paradigm: str) -> Optional[str]:
|
|
"""Translate from another paradigm to FTS."""
|
|
if from_paradigm.lower() == "natural_language":
|
|
return self._translate_natural_language_to_fts(query)
|
|
elif from_paradigm.lower() == "sql":
|
|
return self._translate_sql_to_fts(query)
|
|
return None
|
|
|
|
def _translate_natural_language_to_fts(self, query: str) -> Optional[str]:
|
|
"""Translate natural language to FTS query."""
|
|
query_lower = query.lower()
|
|
|
|
# Extract key terms and convert to FTS syntax
|
|
if "search for" in query_lower:
|
|
# Extract what comes after "search for"
|
|
parts = query_lower.split("search for", 1)
|
|
if len(parts) > 1:
|
|
search_term = parts[1].strip()
|
|
return search_term.replace(" and ", " AND ").replace(" or ", " OR ")
|
|
|
|
if "find" in query_lower and "contain" in query_lower:
|
|
# Extract terms between "find" and "contain"
|
|
import re
|
|
match = re.search(r'find.*?contain.*?["\'](.+?)["\']', query_lower)
|
|
if match:
|
|
return f'"{match.group(1)}"'
|
|
|
|
# Simple keyword extraction
|
|
keywords = [word for word in query.split() if len(word) > 3 and word.lower() not in ['find', 'search', 'for', 'documents', 'files']]
|
|
if keywords:
|
|
return " AND ".join(keywords)
|
|
|
|
return None
|
|
|
|
def _translate_sql_to_fts(self, query: str) -> Optional[str]:
|
|
"""Translate simple SQL LIKE queries to FTS."""
|
|
if 'LIKE' in query.upper():
|
|
import re
|
|
# Extract LIKE patterns
|
|
like_matches = re.findall(r"LIKE\s+'%(.+?)%'", query, re.IGNORECASE)
|
|
if like_matches:
|
|
return " AND ".join(like_matches)
|
|
|
|
return None |