markitect-main/markitect/query_paradigms/paradigms/rag_paradigm.py

"""
Retrieval-Augmented Generation (RAG) Paradigm - LLM + Vector Database.
"""

import time
from typing import Dict, Any, List, Optional

from ..base import BaseQueryParadigm, QueryResult


class RAGParadigm(BaseQueryParadigm):
    """RAG paradigm for LLM-powered semantic search and generation."""

    @property
    def name(self) -> str:
        return "RAG (Retrieval-Augmented Generation)"

    @property
    def description(self) -> str:
        return "Large Language Model retrieves relevant facts from vector database for enhanced responses"

    @property
    def category(self) -> str:
        return "semantic"

    @property
    def complexity(self) -> str:
        return "advanced"

    def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult:
        """Execute RAG query (not yet implemented)."""
        start_time = time.time()
        execution_time = (time.time() - start_time) * 1000

        return QueryResult(
            paradigm=self.name,
            query=query,
            execution_time_ms=execution_time,
            result_count=0,
            results=[],
            metadata={
                "status": "not_implemented",
                "implementation_issue": "TBD - to be created",
                "description": "RAG combines semantic search with LLM generation for intelligent responses"
            },
            success=False,
            error_message="RAG paradigm not yet implemented."
        )

    def get_examples(self) -> List[Dict[str, str]]:
        """Get example RAG queries."""
        return [
            {
                "name": "Semantic question",
                "description": "Ask questions about content semantically",
                "query": "What are the main configuration options for the API?"
            },
            {
                "name": "Summarization request",
                "description": "Generate summaries of related content",
                "query": "Summarize all documentation about authentication methods"
            },
            {
                "name": "Code explanation",
                "description": "Explain code patterns found in files",
                "query": "Explain the error handling patterns used in the codebase"
            },
            {
                "name": "Comparative analysis",
                "description": "Compare different approaches in documentation",
                "query": "Compare the database migration strategies mentioned in the docs"
            }
        ]

    def validate_query(self, query: str) -> tuple[bool, Optional[str]]:
        """Validate RAG query."""
        if not query.strip():
            return False, "RAG query cannot be empty"

        if len(query.strip()) < 10:
            return False, "RAG query should be a descriptive question or request"

        return True, None

    def get_syntax_help(self) -> str:
        """Get syntax help for RAG queries."""
        return """RAG (Retrieval-Augmented Generation) Syntax:

RAG queries are natural language questions or requests that combine:
1. Semantic retrieval from vector database
2. LLM generation for comprehensive answers

Query Types:
- Questions: "What is...?", "How does...?", "Why...?"
- Summaries: "Summarize...", "Overview of..."
- Comparisons: "Compare...", "Differences between..."
- Analysis: "Analyze...", "Explain the pattern..."

Examples:
"What are the main API endpoints and their purposes?"
"Summarize the security best practices mentioned in the documentation"
"How do I configure the database connection?"
"Compare SQL vs NoSQL approaches discussed in the docs"

The system will:
1. Convert your query to vector embeddings
2. Retrieve relevant document chunks
3. Generate a comprehensive response using LLM
4. Provide source citations
"""