Files
markitect-main/markitect/query_paradigms/paradigms/fts_paradigm.py
tegwick 5143864a86 feat: implement comprehensive query paradigm zoo system (issue #62)
- Created extensible BaseQueryParadigm interface with standardized QueryResult format
- Implemented QueryParadigmRegistry for paradigm discovery and management
- Added 5 working paradigms: SQL, FTS, GraphQL, JSONPath, Natural Language
- Documented 9 additional paradigms: QBE, Batch Manipulation, Visual Query Builder, REST API, NoSQL, UNIX Pipeline, XPath/XQuery, RAG, Data Transformation
- Integrated full CLI interface: list, search, show, exec, categories commands
- Added comprehensive test suite with 23 test cases covering all components
- Auto-registration system enables easy addition of new paradigms
- Organized paradigms by category (structural, textual, semantic, visual, procedural, network) and complexity (beginner, intermediate, advanced)

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-03 23:06:57 +02:00

271 lines
9.2 KiB
Python

"""
Full Text Search Paradigm - FTS5-powered content search.
"""
import time
from typing import Dict, Any, List, Optional
from ..base import BaseQueryParadigm, QueryResult
class FullTextSearchParadigm(BaseQueryParadigm):
"""Full text search paradigm using FTS5 for content discovery."""
@property
def name(self) -> str:
return "FTS"
@property
def description(self) -> str:
return "Full text search across markdown content using SQLite FTS5 for semantic discovery"
@property
def category(self) -> str:
return "textual"
@property
def complexity(self) -> str:
return "beginner"
def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult:
"""Execute full text search query."""
start_time = time.time()
try:
from ...plugins.builtin.search.fts_search import FTSSearchPlugin
# Get database path from config
db_path = config.get('db_path') if config else 'markitect.db'
# Create FTS search plugin
fts_plugin = FTSSearchPlugin()
# Execute search
content_type = config.get('content_type', 'all') if config else 'all'
limit = config.get('limit', 20) if config else 20
offset = config.get('offset', 0) if config else 0
search_results = fts_plugin.search(
db_path=db_path,
query=query,
content_type=content_type,
limit=limit,
offset=offset
)
execution_time = (time.time() - start_time) * 1000
# Convert FTS results to standard format
results = []
for result in search_results:
if result['type'] == 'file':
results.append({
'type': 'file',
'score': result['score'],
'filename': result['file']['filename'],
'content_preview': result.get('highlight', ''),
'file_id': result['file']['id'],
'created_at': result['file']['created_at']
})
elif result['type'] == 'schema':
results.append({
'type': 'schema',
'score': result['score'],
'filename': result['schema']['filename'],
'title': result['schema']['title'],
'description': result['schema']['description'],
'schema_id': result['schema']['id'],
'highlight': result.get('highlight', '')
})
return QueryResult(
paradigm="FTS",
query=query,
execution_time_ms=execution_time,
result_count=len(results),
results=results,
metadata={
"content_type": content_type,
"fts_enabled": True,
"query_type": self._detect_query_type(query)
},
success=True
)
except Exception as e:
execution_time = (time.time() - start_time) * 1000
return QueryResult(
paradigm="FTS",
query=query,
execution_time_ms=execution_time,
result_count=0,
results=[],
metadata={"fts_enabled": False},
success=False,
error_message=str(e)
)
def get_examples(self) -> List[Dict[str, str]]:
"""Get example FTS queries."""
return [
{
"name": "Simple search",
"description": "Find documents containing specific words",
"query": "documentation"
},
{
"name": "Multiple terms",
"description": "Search for documents with multiple terms",
"query": "API documentation"
},
{
"name": "Exact phrase",
"description": "Search for exact phrases",
"query": '"getting started"'
},
{
"name": "Boolean search",
"description": "Use AND/OR operators",
"query": "API AND documentation NOT deprecated"
},
{
"name": "Wildcard search",
"description": "Prefix matching with wildcards",
"query": "config*"
},
{
"name": "Proximity search",
"description": "Find terms near each other",
"query": "NEAR(database query, 5)"
}
]
def validate_query(self, query: str) -> tuple[bool, Optional[str]]:
"""Validate FTS query syntax."""
if not query or not query.strip():
return False, "Query cannot be empty"
# Use the FTS query parser for validation
try:
from ...plugins.builtin.search.query_parser import QueryParser
parser = QueryParser()
return parser.validate_query(query)
except ImportError:
# Fallback validation
return self._basic_validation(query)
def _basic_validation(self, query: str) -> tuple[bool, Optional[str]]:
"""Basic FTS query validation."""
# Check for balanced quotes
quote_count = query.count('"')
if quote_count % 2 != 0:
return False, "Unmatched quotes in query"
# Check for balanced parentheses
open_parens = query.count('(')
close_parens = query.count(')')
if open_parens != close_parens:
return False, "Unmatched parentheses in query"
return True, None
def get_syntax_help(self) -> str:
"""Get FTS syntax help."""
return """Full Text Search Syntax:
Basic Search:
word - Find documents containing 'word'
multiple words - Find documents with all words (implicit AND)
Phrase Search:
"exact phrase" - Find exact phrase
Boolean Operators:
word1 AND word2 - Both words must be present
word1 OR word2 - Either word can be present
word1 NOT word2 - First word present, second word absent
Wildcards:
prefix* - Prefix matching (config* matches configuration)
Proximity Search:
NEAR(word1 word2, 5) - Words within 5 words of each other
Column-Specific:
filename:readme - Search only in filename field
content:tutorial - Search only in content field
Examples:
documentation
"getting started"
API AND documentation
config* OR setting*
NEAR(database query, 10)
"""
def _detect_query_type(self, query: str) -> str:
"""Detect FTS query type."""
query_upper = query.upper()
if '"' in query:
return "phrase_search"
elif 'NEAR(' in query_upper:
return "proximity_search"
elif any(op in query_upper for op in [' AND ', ' OR ', ' NOT ']):
return "boolean_search"
elif '*' in query:
return "wildcard_search"
elif ':' in query:
return "column_search"
else:
return "simple_search"
def can_translate_from(self, other_paradigm: str) -> bool:
"""Check if we can translate from another paradigm."""
return other_paradigm.lower() in ["natural_language", "sql"]
def translate_query(self, query: str, from_paradigm: str) -> Optional[str]:
"""Translate from another paradigm to FTS."""
if from_paradigm.lower() == "natural_language":
return self._translate_natural_language_to_fts(query)
elif from_paradigm.lower() == "sql":
return self._translate_sql_to_fts(query)
return None
def _translate_natural_language_to_fts(self, query: str) -> Optional[str]:
"""Translate natural language to FTS query."""
query_lower = query.lower()
# Extract key terms and convert to FTS syntax
if "search for" in query_lower:
# Extract what comes after "search for"
parts = query_lower.split("search for", 1)
if len(parts) > 1:
search_term = parts[1].strip()
return search_term.replace(" and ", " AND ").replace(" or ", " OR ")
if "find" in query_lower and "contain" in query_lower:
# Extract terms between "find" and "contain"
import re
match = re.search(r'find.*?contain.*?["\'](.+?)["\']', query_lower)
if match:
return f'"{match.group(1)}"'
# Simple keyword extraction
keywords = [word for word in query.split() if len(word) > 3 and word.lower() not in ['find', 'search', 'for', 'documents', 'files']]
if keywords:
return " AND ".join(keywords)
return None
def _translate_sql_to_fts(self, query: str) -> Optional[str]:
"""Translate simple SQL LIKE queries to FTS."""
if 'LIKE' in query.upper():
import re
# Extract LIKE patterns
like_matches = re.findall(r"LIKE\s+'%(.+?)%'", query, re.IGNORECASE)
if like_matches:
return " AND ".join(like_matches)
return None