Files
markitect-main/markitect/query_paradigms/paradigms/sql_paradigm.py
tegwick 5143864a86 feat: implement comprehensive query paradigm zoo system (issue #62)
- Created extensible BaseQueryParadigm interface with standardized QueryResult format
- Implemented QueryParadigmRegistry for paradigm discovery and management
- Added 5 working paradigms: SQL, FTS, GraphQL, JSONPath, Natural Language
- Documented 9 additional paradigms: QBE, Batch Manipulation, Visual Query Builder, REST API, NoSQL, UNIX Pipeline, XPath/XQuery, RAG, Data Transformation
- Integrated full CLI interface: list, search, show, exec, categories commands
- Added comprehensive test suite with 23 test cases covering all components
- Auto-registration system enables easy addition of new paradigms
- Organized paradigms by category (structural, textual, semantic, visual, procedural, network) and complexity (beginner, intermediate, advanced)

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-03 23:06:57 +02:00

197 lines
7.2 KiB
Python

"""
SQL Query Paradigm - Direct database queries using SQL.
"""
import sqlite3
import time
from typing import Dict, Any, List, Optional
from ..base import BaseQueryParadigm, QueryResult
class SQLQueryParadigm(BaseQueryParadigm):
"""SQL database query paradigm for direct data access."""
@property
def name(self) -> str:
return "SQL"
@property
def description(self) -> str:
return "Direct SQL queries against the MarkiTect database for precise data extraction"
@property
def category(self) -> str:
return "structural"
@property
def complexity(self) -> str:
return "intermediate"
def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult:
"""Execute SQL query against the database."""
start_time = time.time()
try:
# Get database path from config
db_path = config.get('db_path') if config else 'markitect.db'
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute(query)
rows = cursor.fetchall()
results = [dict(row) for row in rows]
conn.close()
execution_time = (time.time() - start_time) * 1000
return QueryResult(
paradigm="SQL",
query=query,
execution_time_ms=execution_time,
result_count=len(results),
results=results,
metadata={
"database_path": db_path,
"query_type": self._detect_query_type(query)
},
success=True
)
except Exception as e:
execution_time = (time.time() - start_time) * 1000
return QueryResult(
paradigm="SQL",
query=query,
execution_time_ms=execution_time,
result_count=0,
results=[],
metadata={},
success=False,
error_message=str(e)
)
def get_examples(self) -> List[Dict[str, str]]:
"""Get example SQL queries."""
return [
{
"name": "List all files",
"description": "Get all markdown files with basic info",
"query": "SELECT id, filename, created_at FROM markdown_files ORDER BY created_at DESC LIMIT 10"
},
{
"name": "Files with front matter",
"description": "Find files that have front matter metadata",
"query": "SELECT filename, front_matter FROM markdown_files WHERE front_matter IS NOT NULL AND front_matter != '{}'"
},
{
"name": "Large files",
"description": "Find files with more than 1000 characters",
"query": "SELECT filename, LENGTH(content) as size FROM markdown_files WHERE LENGTH(content) > 1000 ORDER BY size DESC"
},
{
"name": "Schema statistics",
"description": "Get schema counts and information",
"query": "SELECT COUNT(*) as total_schemas, AVG(LENGTH(schema_content)) as avg_size FROM schemas"
},
{
"name": "Recent activity",
"description": "Show recent file activity",
"query": "SELECT filename, created_at FROM markdown_files WHERE created_at > datetime('now', '-7 days') ORDER BY created_at DESC"
}
]
def validate_query(self, query: str) -> tuple[bool, Optional[str]]:
"""Validate SQL query syntax."""
if not query or not query.strip():
return False, "Query cannot be empty"
query_upper = query.upper().strip()
# Only allow SELECT queries for safety
if not query_upper.startswith('SELECT'):
return False, "Only SELECT queries are allowed for safety"
# Check for dangerous keywords
dangerous_keywords = ['DROP', 'DELETE', 'UPDATE', 'INSERT', 'ALTER', 'CREATE']
for keyword in dangerous_keywords:
if keyword in query_upper:
return False, f"Keyword '{keyword}' is not allowed for safety"
return True, None
def get_syntax_help(self) -> str:
"""Get SQL syntax help."""
return """SQL Query Syntax:
Basic Structure:
SELECT columns FROM table WHERE condition ORDER BY column
Available Tables:
- markdown_files (id, filename, content, front_matter, created_at)
- schemas (id, filename, title, description, schema_content, created_at, updated_at)
Common Functions:
- LENGTH(column) - Get text length
- datetime('now') - Current timestamp
- datetime('now', '-7 days') - Date arithmetic
Examples:
SELECT * FROM markdown_files LIMIT 5
SELECT filename FROM markdown_files WHERE content LIKE '%TODO%'
SELECT COUNT(*) FROM schemas WHERE title IS NOT NULL
Safety Notes:
- Only SELECT queries are allowed
- No data modification operations (INSERT, UPDATE, DELETE)
- No schema changes (CREATE, ALTER, DROP)
"""
def _detect_query_type(self, query: str) -> str:
"""Detect the type of SQL query."""
query_upper = query.upper().strip()
if query_upper.startswith('SELECT'):
if 'COUNT(' in query_upper or 'SUM(' in query_upper or 'AVG(' in query_upper:
return "aggregation"
elif 'JOIN' in query_upper:
return "join"
elif 'WHERE' in query_upper:
return "filtered_select"
else:
return "simple_select"
return "unknown"
def can_translate_from(self, other_paradigm: str) -> bool:
"""Check if we can translate from another paradigm."""
# Could potentially translate simple natural language to SQL
return other_paradigm.lower() in ["natural_language"]
def translate_query(self, query: str, from_paradigm: str) -> Optional[str]:
"""Translate from another paradigm to SQL."""
if from_paradigm.lower() == "natural_language":
return self._translate_natural_language_to_sql(query)
return None
def _translate_natural_language_to_sql(self, query: str) -> Optional[str]:
"""Simple natural language to SQL translation."""
query_lower = query.lower()
# Simple pattern matching for common requests
if "all files" in query_lower or "list files" in query_lower:
return "SELECT id, filename, created_at FROM markdown_files ORDER BY created_at DESC"
elif "recent files" in query_lower:
return "SELECT filename, created_at FROM markdown_files WHERE created_at > datetime('now', '-7 days') ORDER BY created_at DESC"
elif "large files" in query_lower or "big files" in query_lower:
return "SELECT filename, LENGTH(content) as size FROM markdown_files WHERE LENGTH(content) > 1000 ORDER BY size DESC"
elif "schemas" in query_lower and "count" in query_lower:
return "SELECT COUNT(*) as total_schemas FROM schemas"
elif "front matter" in query_lower:
return "SELECT filename, front_matter FROM markdown_files WHERE front_matter IS NOT NULL AND front_matter != '{}'"
return None