feat: implement comprehensive query paradigm zoo system (issue #62)

- Created extensible BaseQueryParadigm interface with standardized QueryResult format - Implemented QueryParadigmRegistry for paradigm discovery and management - Added 5 working paradigms: SQL, FTS, GraphQL, JSONPath, Natural Language - Documented 9 additional paradigms: QBE, Batch Manipulation, Visual Query Builder, REST API, NoSQL, UNIX Pipeline, XPath/XQuery, RAG, Data Transformation - Integrated full CLI interface: list, search, show, exec, categories commands - Added comprehensive test suite with 23 test cases covering all components - Auto-registration system enables easy addition of new paradigms - Organized paradigms by category (structural, textual, semantic, visual, procedural, network) and complexity (beginner, intermediate, advanced) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-03 23:06:57 +02:00
parent 1d13cbb355
commit 5143864a86
21 changed files with 3659 additions and 0 deletions
--- a/markitect/query_paradigms/paradigms/rag_paradigm.py
+++ b/markitect/query_paradigms/paradigms/rag_paradigm.py
@@ -0,0 +1,110 @@
+"""
+Retrieval-Augmented Generation (RAG) Paradigm - LLM + Vector Database.
+"""
+
+import time
+from typing import Dict, Any, List, Optional
+
+from ..base import BaseQueryParadigm, QueryResult
+
+
+class RAGParadigm(BaseQueryParadigm):
+    """RAG paradigm for LLM-powered semantic search and generation."""
+
+    @property
+    def name(self) -> str:
+        return "RAG (Retrieval-Augmented Generation)"
+
+    @property
+    def description(self) -> str:
+        return "Large Language Model retrieves relevant facts from vector database for enhanced responses"
+
+    @property
+    def category(self) -> str:
+        return "semantic"
+
+    @property
+    def complexity(self) -> str:
+        return "advanced"
+
+    def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult:
+        """Execute RAG query (not yet implemented)."""
+        start_time = time.time()
+        execution_time = (time.time() - start_time) * 1000
+
+        return QueryResult(
+            paradigm=self.name,
+            query=query,
+            execution_time_ms=execution_time,
+            result_count=0,
+            results=[],
+            metadata={
+                "status": "not_implemented",
+                "implementation_issue": "TBD - to be created",
+                "description": "RAG combines semantic search with LLM generation for intelligent responses"
+            },
+            success=False,
+            error_message="RAG paradigm not yet implemented."
+        )
+
+    def get_examples(self) -> List[Dict[str, str]]:
+        """Get example RAG queries."""
+        return [
+            {
+                "name": "Semantic question",
+                "description": "Ask questions about content semantically",
+                "query": "What are the main configuration options for the API?"
+            },
+            {
+                "name": "Summarization request",
+                "description": "Generate summaries of related content",
+                "query": "Summarize all documentation about authentication methods"
+            },
+            {
+                "name": "Code explanation",
+                "description": "Explain code patterns found in files",
+                "query": "Explain the error handling patterns used in the codebase"
+            },
+            {
+                "name": "Comparative analysis",
+                "description": "Compare different approaches in documentation",
+                "query": "Compare the database migration strategies mentioned in the docs"
+            }
+        ]
+
+    def validate_query(self, query: str) -> tuple[bool, Optional[str]]:
+        """Validate RAG query."""
+        if not query.strip():
+            return False, "RAG query cannot be empty"
+
+        if len(query.strip()) < 10:
+            return False, "RAG query should be a descriptive question or request"
+
+        return True, None
+
+    def get_syntax_help(self) -> str:
+        """Get syntax help for RAG queries."""
+        return """RAG (Retrieval-Augmented Generation) Syntax:
+
+RAG queries are natural language questions or requests that combine:
+1. Semantic retrieval from vector database
+2. LLM generation for comprehensive answers
+
+Query Types:
+- Questions: "What is...?", "How does...?", "Why...?"
+- Summaries: "Summarize...", "Overview of..."
+- Comparisons: "Compare...", "Differences between..."
+- Analysis: "Analyze...", "Explain the pattern..."
+
+Examples:
+"What are the main API endpoints and their purposes?"
+"Summarize the security best practices mentioned in the documentation"
+"How do I configure the database connection?"
+"Compare SQL vs NoSQL approaches discussed in the docs"
+
+The system will:
+1. Convert your query to vector embeddings
+2. Retrieve relevant document chunks
+3. Generate a comprehensive response using LLM
+4. Provide source citations
+"""