""" UNIX Pipeline Paradigm - Stream processing with awk, sed, perl. """ import time from typing import Dict, Any, List, Optional from ..base import BaseQueryParadigm, QueryResult class UNIXPipelineParadigm(BaseQueryParadigm): """UNIX pipeline paradigm for stream processing.""" @property def name(self) -> str: return "UNIX Pipeline" @property def description(self) -> str: return "Stream processing with UNIX tools like awk, sed, grep, sort for line-by-line data manipulation" @property def category(self) -> str: return "procedural" @property def complexity(self) -> str: return "advanced" def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult: """Execute UNIX pipeline (not yet implemented).""" start_time = time.time() execution_time = (time.time() - start_time) * 1000 return QueryResult( paradigm=self.name, query=query, execution_time_ms=execution_time, result_count=0, results=[], metadata={ "status": "not_implemented", "implementation_issue": "TBD - to be created", "description": "UNIX pipelines enable powerful stream processing of MarkiTect data" }, success=False, error_message="UNIX Pipeline paradigm not yet implemented." ) def get_examples(self) -> List[Dict[str, str]]: """Get example UNIX pipeline commands.""" return [ { "name": "Filter and count", "description": "Find files by author and count", "query": "markitect export --format=csv | grep 'Alice' | wc -l" }, { "name": "Extract and sort", "description": "Extract unique authors and sort", "query": "markitect export --format=csv | cut -d',' -f3 | sort | uniq -c | sort -rn" }, { "name": "Complex awk processing", "description": "Process file metadata with awk", "query": "markitect export --format=csv | awk -F',' '{if($4>1000) print $1,$2}' | sort" }, { "name": "Sed text transformation", "description": "Transform file paths using sed", "query": "markitect list-files | sed 's|/old/path|/new/path|g' | sort" } ] def validate_query(self, query: str) -> tuple[bool, Optional[str]]: """Validate UNIX pipeline command.""" if not query.strip(): return False, "UNIX pipeline cannot be empty" if '|' not in query and not any(cmd in query for cmd in ['grep', 'awk', 'sed', 'sort', 'cut', 'wc']): return False, "Query should contain UNIX pipeline commands" return True, None def get_syntax_help(self) -> str: """Get syntax help for UNIX pipelines.""" return """UNIX Pipeline Syntax: Basic Structure: markitect | | Common Tools: - grep: Filter lines matching pattern - awk: Process fields and records - sed: Stream editor for text transformation - sort: Sort lines - uniq: Remove duplicate lines - cut: Extract fields - wc: Count lines/words/characters Examples: markitect export --format=csv | grep 'documentation' | cut -d',' -f1,2 markitect list-files | awk '{print $1}' | sort | uniq markitect export --format=csv | sed 's/old/new/g' | grep -v '^#' The pipeline starts with MarkiTect data export and processes it through UNIX tools. """