feat: implement comprehensive query paradigm zoo system (issue #62)
- Created extensible BaseQueryParadigm interface with standardized QueryResult format - Implemented QueryParadigmRegistry for paradigm discovery and management - Added 5 working paradigms: SQL, FTS, GraphQL, JSONPath, Natural Language - Documented 9 additional paradigms: QBE, Batch Manipulation, Visual Query Builder, REST API, NoSQL, UNIX Pipeline, XPath/XQuery, RAG, Data Transformation - Integrated full CLI interface: list, search, show, exec, categories commands - Added comprehensive test suite with 23 test cases covering all components - Auto-registration system enables easy addition of new paradigms - Organized paradigms by category (structural, textual, semantic, visual, procedural, network) and complexity (beginner, intermediate, advanced) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
107
markitect/query_paradigms/paradigms/unix_pipeline_paradigm.py
Normal file
107
markitect/query_paradigms/paradigms/unix_pipeline_paradigm.py
Normal file
@@ -0,0 +1,107 @@
|
||||
"""
|
||||
UNIX Pipeline Paradigm - Stream processing with awk, sed, perl.
|
||||
"""
|
||||
|
||||
import time
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
from ..base import BaseQueryParadigm, QueryResult
|
||||
|
||||
|
||||
class UNIXPipelineParadigm(BaseQueryParadigm):
|
||||
"""UNIX pipeline paradigm for stream processing."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "UNIX Pipeline"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return "Stream processing with UNIX tools like awk, sed, grep, sort for line-by-line data manipulation"
|
||||
|
||||
@property
|
||||
def category(self) -> str:
|
||||
return "procedural"
|
||||
|
||||
@property
|
||||
def complexity(self) -> str:
|
||||
return "advanced"
|
||||
|
||||
def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult:
|
||||
"""Execute UNIX pipeline (not yet implemented)."""
|
||||
start_time = time.time()
|
||||
execution_time = (time.time() - start_time) * 1000
|
||||
|
||||
return QueryResult(
|
||||
paradigm=self.name,
|
||||
query=query,
|
||||
execution_time_ms=execution_time,
|
||||
result_count=0,
|
||||
results=[],
|
||||
metadata={
|
||||
"status": "not_implemented",
|
||||
"implementation_issue": "TBD - to be created",
|
||||
"description": "UNIX pipelines enable powerful stream processing of MarkiTect data"
|
||||
},
|
||||
success=False,
|
||||
error_message="UNIX Pipeline paradigm not yet implemented."
|
||||
)
|
||||
|
||||
def get_examples(self) -> List[Dict[str, str]]:
|
||||
"""Get example UNIX pipeline commands."""
|
||||
return [
|
||||
{
|
||||
"name": "Filter and count",
|
||||
"description": "Find files by author and count",
|
||||
"query": "markitect export --format=csv | grep 'Alice' | wc -l"
|
||||
},
|
||||
{
|
||||
"name": "Extract and sort",
|
||||
"description": "Extract unique authors and sort",
|
||||
"query": "markitect export --format=csv | cut -d',' -f3 | sort | uniq -c | sort -rn"
|
||||
},
|
||||
{
|
||||
"name": "Complex awk processing",
|
||||
"description": "Process file metadata with awk",
|
||||
"query": "markitect export --format=csv | awk -F',' '{if($4>1000) print $1,$2}' | sort"
|
||||
},
|
||||
{
|
||||
"name": "Sed text transformation",
|
||||
"description": "Transform file paths using sed",
|
||||
"query": "markitect list-files | sed 's|/old/path|/new/path|g' | sort"
|
||||
}
|
||||
]
|
||||
|
||||
def validate_query(self, query: str) -> tuple[bool, Optional[str]]:
|
||||
"""Validate UNIX pipeline command."""
|
||||
if not query.strip():
|
||||
return False, "UNIX pipeline cannot be empty"
|
||||
|
||||
if '|' not in query and not any(cmd in query for cmd in ['grep', 'awk', 'sed', 'sort', 'cut', 'wc']):
|
||||
return False, "Query should contain UNIX pipeline commands"
|
||||
|
||||
return True, None
|
||||
|
||||
def get_syntax_help(self) -> str:
|
||||
"""Get syntax help for UNIX pipelines."""
|
||||
return """UNIX Pipeline Syntax:
|
||||
|
||||
Basic Structure:
|
||||
markitect <export_command> | <unix_tools> | <more_tools>
|
||||
|
||||
Common Tools:
|
||||
- grep: Filter lines matching pattern
|
||||
- awk: Process fields and records
|
||||
- sed: Stream editor for text transformation
|
||||
- sort: Sort lines
|
||||
- uniq: Remove duplicate lines
|
||||
- cut: Extract fields
|
||||
- wc: Count lines/words/characters
|
||||
|
||||
Examples:
|
||||
markitect export --format=csv | grep 'documentation' | cut -d',' -f1,2
|
||||
markitect list-files | awk '{print $1}' | sort | uniq
|
||||
markitect export --format=csv | sed 's/old/new/g' | grep -v '^#'
|
||||
|
||||
The pipeline starts with MarkiTect data export and processes it through UNIX tools.
|
||||
"""
|
||||
Reference in New Issue
Block a user