Files
markitect-main/markitect/query_paradigms/paradigms/rag_paradigm.py
tegwick 5143864a86 feat: implement comprehensive query paradigm zoo system (issue #62)
- Created extensible BaseQueryParadigm interface with standardized QueryResult format
- Implemented QueryParadigmRegistry for paradigm discovery and management
- Added 5 working paradigms: SQL, FTS, GraphQL, JSONPath, Natural Language
- Documented 9 additional paradigms: QBE, Batch Manipulation, Visual Query Builder, REST API, NoSQL, UNIX Pipeline, XPath/XQuery, RAG, Data Transformation
- Integrated full CLI interface: list, search, show, exec, categories commands
- Added comprehensive test suite with 23 test cases covering all components
- Auto-registration system enables easy addition of new paradigms
- Organized paradigms by category (structural, textual, semantic, visual, procedural, network) and complexity (beginner, intermediate, advanced)

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-03 23:06:57 +02:00

110 lines
3.7 KiB
Python

"""
Retrieval-Augmented Generation (RAG) Paradigm - LLM + Vector Database.
"""
import time
from typing import Dict, Any, List, Optional
from ..base import BaseQueryParadigm, QueryResult
class RAGParadigm(BaseQueryParadigm):
"""RAG paradigm for LLM-powered semantic search and generation."""
@property
def name(self) -> str:
return "RAG (Retrieval-Augmented Generation)"
@property
def description(self) -> str:
return "Large Language Model retrieves relevant facts from vector database for enhanced responses"
@property
def category(self) -> str:
return "semantic"
@property
def complexity(self) -> str:
return "advanced"
def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult:
"""Execute RAG query (not yet implemented)."""
start_time = time.time()
execution_time = (time.time() - start_time) * 1000
return QueryResult(
paradigm=self.name,
query=query,
execution_time_ms=execution_time,
result_count=0,
results=[],
metadata={
"status": "not_implemented",
"implementation_issue": "TBD - to be created",
"description": "RAG combines semantic search with LLM generation for intelligent responses"
},
success=False,
error_message="RAG paradigm not yet implemented."
)
def get_examples(self) -> List[Dict[str, str]]:
"""Get example RAG queries."""
return [
{
"name": "Semantic question",
"description": "Ask questions about content semantically",
"query": "What are the main configuration options for the API?"
},
{
"name": "Summarization request",
"description": "Generate summaries of related content",
"query": "Summarize all documentation about authentication methods"
},
{
"name": "Code explanation",
"description": "Explain code patterns found in files",
"query": "Explain the error handling patterns used in the codebase"
},
{
"name": "Comparative analysis",
"description": "Compare different approaches in documentation",
"query": "Compare the database migration strategies mentioned in the docs"
}
]
def validate_query(self, query: str) -> tuple[bool, Optional[str]]:
"""Validate RAG query."""
if not query.strip():
return False, "RAG query cannot be empty"
if len(query.strip()) < 10:
return False, "RAG query should be a descriptive question or request"
return True, None
def get_syntax_help(self) -> str:
"""Get syntax help for RAG queries."""
return """RAG (Retrieval-Augmented Generation) Syntax:
RAG queries are natural language questions or requests that combine:
1. Semantic retrieval from vector database
2. LLM generation for comprehensive answers
Query Types:
- Questions: "What is...?", "How does...?", "Why...?"
- Summaries: "Summarize...", "Overview of..."
- Comparisons: "Compare...", "Differences between..."
- Analysis: "Analyze...", "Explain the pattern..."
Examples:
"What are the main API endpoints and their purposes?"
"Summarize the security best practices mentioned in the documentation"
"How do I configure the database connection?"
"Compare SQL vs NoSQL approaches discussed in the docs"
The system will:
1. Convert your query to vector embeddings
2. Retrieve relevant document chunks
3. Generate a comprehensive response using LLM
4. Provide source citations
"""