""" JSONPath Query Paradigm - Path-based navigation through AST structures. """ import time import json from typing import Dict, Any, List, Optional from ..base import BaseQueryParadigm, QueryResult class JSONPathQueryParadigm(BaseQueryParadigm): """JSONPath query paradigm for navigating AST structures.""" @property def name(self) -> str: return "JSONPath" @property def description(self) -> str: return "XPath-like navigation through AST trees for precise structural queries" @property def category(self) -> str: return "structural" @property def complexity(self) -> str: return "advanced" def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult: """Execute JSONPath query against AST data.""" start_time = time.time() try: from ...ast_service import ASTService # Get database path and file info from config db_path = config.get('db_path') if config else 'markitect.db' file_id = config.get('file_id') if config else None filename = config.get('filename') if config else None if not file_id and not filename: raise ValueError("Either file_id or filename must be provided for JSONPath queries") # Get AST service ast_service = ASTService(db_path) # Get AST for the specified file if file_id: ast_data = ast_service.get_ast_by_file_id(file_id) else: ast_data = ast_service.get_ast_by_filename(filename) if not ast_data: raise ValueError(f"No AST found for {'file_id=' + str(file_id) if file_id else 'filename=' + filename}") # Execute JSONPath query try: import jsonpath_ng parser = jsonpath_ng.parse(query) matches = parser.find(ast_data) results = [] for match in matches: result_item = { "path": str(match.full_path), "value": match.value, "context": self._get_context(match, ast_data) } results.append(result_item) except ImportError: # Fallback: simple dot-notation parsing results = self._simple_path_query(query, ast_data) execution_time = (time.time() - start_time) * 1000 return QueryResult( paradigm="JSONPath", query=query, execution_time_ms=execution_time, result_count=len(results), results=results, metadata={ "file_id": file_id, "filename": filename, "ast_available": True, "query_type": self._detect_query_type(query) }, success=True ) except Exception as e: execution_time = (time.time() - start_time) * 1000 return QueryResult( paradigm="JSONPath", query=query, execution_time_ms=execution_time, result_count=0, results=[], metadata={"ast_available": False}, success=False, error_message=str(e) ) def get_examples(self) -> List[Dict[str, str]]: """Get example JSONPath queries.""" return [ { "name": "All headings", "description": "Find all heading nodes in the AST", "query": "$..heading" }, { "name": "Top-level headings", "description": "Find only H1 headings", "query": "$..heading[?(@.level == 1)]" }, { "name": "Code blocks", "description": "Find all code block nodes", "query": "$..code_block" }, { "name": "Links with URLs", "description": "Find all link nodes with their URLs", "query": "$..link[?(@.url)]" }, { "name": "Image sources", "description": "Extract all image source URLs", "query": "$..image.src" }, { "name": "List items", "description": "Find all list item contents", "query": "$..list_item.children[*].text" }, { "name": "Nested structures", "description": "Find deeply nested elements", "query": "$..children[*].children[*].type" }, { "name": "Content with attributes", "description": "Find nodes with specific attributes", "query": "$..node[?(@.attrs.class)]" } ] def validate_query(self, query: str) -> tuple[bool, Optional[str]]: """Validate JSONPath query syntax.""" if not query or not query.strip(): return False, "Query cannot be empty" # Basic JSONPath validation if not query.startswith('$'): return False, "JSONPath queries must start with '$'" # Check for balanced brackets open_brackets = query.count('[') close_brackets = query.count(']') if open_brackets != close_brackets: return False, "Unmatched brackets in JSONPath query" # Check for balanced parentheses in filter expressions open_parens = query.count('(') close_parens = query.count(')') if open_parens != close_parens: return False, "Unmatched parentheses in JSONPath query" return True, None def get_syntax_help(self) -> str: """Get JSONPath syntax help.""" return """JSONPath Query Syntax: Basic Navigation: $ - Root node .child - Direct child access ..child - Recursive descent (any level) [*] - All array elements [0] - First array element [-1] - Last array element Array Operations: [start:end] - Array slice [0,1,2] - Multiple specific indices Filter Expressions: [?(@.field)] - Filter by field existence [?(@.field == 'value')] - Filter by field value [?(@.level > 1)] - Numeric comparison [?(@.type == 'heading')] - String comparison Common AST Patterns: $..heading - All headings at any level $..heading[?(@.level==1)] - Only H1 headings $..code_block.language - Programming languages used $..link.url - All link URLs $..image.src - All image sources $..list_item.text - List item contents Advanced Examples: $.children[*].type - Types of top-level elements $..children[?(@.type=='text')].content - All text content $..node[?(@.attrs.class)] - Nodes with CSS classes Operators: ==, != - Equality <, <=, >, >= - Comparison =~ - Regular expression match in - Membership test Note: JSONPath queries operate on the parsed AST structure of markdown files. Use 'markitect ast-show ' to see the AST structure first. """ def _detect_query_type(self, query: str) -> str: """Detect JSONPath query type.""" if '..' in query: return "recursive_descent" elif '[?' in query: return "filtered_query" elif '[*]' in query: return "array_wildcard" elif any(op in query for op in ['[0]', '[1]', '[-1]']): return "indexed_access" elif ':' in query and '[' in query: return "array_slice" else: return "direct_access" def _get_context(self, match, ast_data: Dict) -> Dict[str, Any]: """Get context information for a JSONPath match.""" context = { "parent_path": None, "sibling_count": 0, "depth": len(str(match.full_path).split('.')) } # Try to get parent context path_parts = str(match.full_path).split('.') if len(path_parts) > 1: context["parent_path"] = '.'.join(path_parts[:-1]) return context def _simple_path_query(self, query: str, data: Any) -> List[Dict[str, Any]]: """Simple fallback JSONPath implementation using dot notation.""" results = [] try: # Very basic implementation for simple paths if query == '$': results.append({ "path": "$", "value": data, "context": {"depth": 0} }) elif query.startswith('$.'): # Simple dot notation path_parts = query[2:].split('.') current = data current_path = "$" for part in path_parts: current_path += f".{part}" if isinstance(current, dict) and part in current: current = current[part] elif isinstance(current, list) and part == '*': # Handle wildcard for arrays for i, item in enumerate(current): results.append({ "path": f"{current_path}[{i}]", "value": item, "context": {"depth": len(path_parts)} }) return results else: break if current is not None: results.append({ "path": current_path, "value": current, "context": {"depth": len(path_parts)} }) except Exception: pass return results def can_translate_from(self, other_paradigm: str) -> bool: """Check if we can translate from another paradigm.""" return other_paradigm.lower() in ["natural_language"] def translate_query(self, query: str, from_paradigm: str) -> Optional[str]: """Translate from another paradigm to JSONPath.""" if from_paradigm.lower() == "natural_language": return self._translate_natural_language_to_jsonpath(query) return None def _translate_natural_language_to_jsonpath(self, query: str) -> Optional[str]: """Translate natural language to JSONPath.""" query_lower = query.lower() # Common patterns if "all headings" in query_lower or "find headings" in query_lower: return "$..heading" elif "first heading" in query_lower or "main heading" in query_lower: return "$..heading[0]" elif "code blocks" in query_lower: return "$..code_block" elif "links" in query_lower: return "$..link" elif "images" in query_lower: return "$..image" elif "list items" in query_lower: return "$..list_item" elif "all text" in query_lower: return "$..text" # Level-specific patterns if "h1" in query_lower or "level 1" in query_lower: return "$..heading[?(@.level == 1)]" elif "h2" in query_lower or "level 2" in query_lower: return "$..heading[?(@.level == 2)]" return None