feat: implement comprehensive query paradigm zoo system (issue #62)
- Created extensible BaseQueryParadigm interface with standardized QueryResult format - Implemented QueryParadigmRegistry for paradigm discovery and management - Added 5 working paradigms: SQL, FTS, GraphQL, JSONPath, Natural Language - Documented 9 additional paradigms: QBE, Batch Manipulation, Visual Query Builder, REST API, NoSQL, UNIX Pipeline, XPath/XQuery, RAG, Data Transformation - Integrated full CLI interface: list, search, show, exec, categories commands - Added comprehensive test suite with 23 test cases covering all components - Auto-registration system enables easy addition of new paradigms - Organized paradigms by category (structural, textual, semantic, visual, procedural, network) and complexity (beginner, intermediate, advanced) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
333
markitect/query_paradigms/paradigms/jsonpath_paradigm.py
Normal file
333
markitect/query_paradigms/paradigms/jsonpath_paradigm.py
Normal file
@@ -0,0 +1,333 @@
|
||||
"""
|
||||
JSONPath Query Paradigm - Path-based navigation through AST structures.
|
||||
"""
|
||||
|
||||
import time
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
from ..base import BaseQueryParadigm, QueryResult
|
||||
|
||||
|
||||
class JSONPathQueryParadigm(BaseQueryParadigm):
|
||||
"""JSONPath query paradigm for navigating AST structures."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "JSONPath"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return "XPath-like navigation through AST trees for precise structural queries"
|
||||
|
||||
@property
|
||||
def category(self) -> str:
|
||||
return "structural"
|
||||
|
||||
@property
|
||||
def complexity(self) -> str:
|
||||
return "advanced"
|
||||
|
||||
def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult:
|
||||
"""Execute JSONPath query against AST data."""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
from ...ast_service import ASTService
|
||||
|
||||
# Get database path and file info from config
|
||||
db_path = config.get('db_path') if config else 'markitect.db'
|
||||
file_id = config.get('file_id') if config else None
|
||||
filename = config.get('filename') if config else None
|
||||
|
||||
if not file_id and not filename:
|
||||
raise ValueError("Either file_id or filename must be provided for JSONPath queries")
|
||||
|
||||
# Get AST service
|
||||
ast_service = ASTService(db_path)
|
||||
|
||||
# Get AST for the specified file
|
||||
if file_id:
|
||||
ast_data = ast_service.get_ast_by_file_id(file_id)
|
||||
else:
|
||||
ast_data = ast_service.get_ast_by_filename(filename)
|
||||
|
||||
if not ast_data:
|
||||
raise ValueError(f"No AST found for {'file_id=' + str(file_id) if file_id else 'filename=' + filename}")
|
||||
|
||||
# Execute JSONPath query
|
||||
try:
|
||||
import jsonpath_ng
|
||||
parser = jsonpath_ng.parse(query)
|
||||
matches = parser.find(ast_data)
|
||||
|
||||
results = []
|
||||
for match in matches:
|
||||
result_item = {
|
||||
"path": str(match.full_path),
|
||||
"value": match.value,
|
||||
"context": self._get_context(match, ast_data)
|
||||
}
|
||||
results.append(result_item)
|
||||
|
||||
except ImportError:
|
||||
# Fallback: simple dot-notation parsing
|
||||
results = self._simple_path_query(query, ast_data)
|
||||
|
||||
execution_time = (time.time() - start_time) * 1000
|
||||
|
||||
return QueryResult(
|
||||
paradigm="JSONPath",
|
||||
query=query,
|
||||
execution_time_ms=execution_time,
|
||||
result_count=len(results),
|
||||
results=results,
|
||||
metadata={
|
||||
"file_id": file_id,
|
||||
"filename": filename,
|
||||
"ast_available": True,
|
||||
"query_type": self._detect_query_type(query)
|
||||
},
|
||||
success=True
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
execution_time = (time.time() - start_time) * 1000
|
||||
|
||||
return QueryResult(
|
||||
paradigm="JSONPath",
|
||||
query=query,
|
||||
execution_time_ms=execution_time,
|
||||
result_count=0,
|
||||
results=[],
|
||||
metadata={"ast_available": False},
|
||||
success=False,
|
||||
error_message=str(e)
|
||||
)
|
||||
|
||||
def get_examples(self) -> List[Dict[str, str]]:
|
||||
"""Get example JSONPath queries."""
|
||||
return [
|
||||
{
|
||||
"name": "All headings",
|
||||
"description": "Find all heading nodes in the AST",
|
||||
"query": "$..heading"
|
||||
},
|
||||
{
|
||||
"name": "Top-level headings",
|
||||
"description": "Find only H1 headings",
|
||||
"query": "$..heading[?(@.level == 1)]"
|
||||
},
|
||||
{
|
||||
"name": "Code blocks",
|
||||
"description": "Find all code block nodes",
|
||||
"query": "$..code_block"
|
||||
},
|
||||
{
|
||||
"name": "Links with URLs",
|
||||
"description": "Find all link nodes with their URLs",
|
||||
"query": "$..link[?(@.url)]"
|
||||
},
|
||||
{
|
||||
"name": "Image sources",
|
||||
"description": "Extract all image source URLs",
|
||||
"query": "$..image.src"
|
||||
},
|
||||
{
|
||||
"name": "List items",
|
||||
"description": "Find all list item contents",
|
||||
"query": "$..list_item.children[*].text"
|
||||
},
|
||||
{
|
||||
"name": "Nested structures",
|
||||
"description": "Find deeply nested elements",
|
||||
"query": "$..children[*].children[*].type"
|
||||
},
|
||||
{
|
||||
"name": "Content with attributes",
|
||||
"description": "Find nodes with specific attributes",
|
||||
"query": "$..node[?(@.attrs.class)]"
|
||||
}
|
||||
]
|
||||
|
||||
def validate_query(self, query: str) -> tuple[bool, Optional[str]]:
|
||||
"""Validate JSONPath query syntax."""
|
||||
if not query or not query.strip():
|
||||
return False, "Query cannot be empty"
|
||||
|
||||
# Basic JSONPath validation
|
||||
if not query.startswith('$'):
|
||||
return False, "JSONPath queries must start with '$'"
|
||||
|
||||
# Check for balanced brackets
|
||||
open_brackets = query.count('[')
|
||||
close_brackets = query.count(']')
|
||||
if open_brackets != close_brackets:
|
||||
return False, "Unmatched brackets in JSONPath query"
|
||||
|
||||
# Check for balanced parentheses in filter expressions
|
||||
open_parens = query.count('(')
|
||||
close_parens = query.count(')')
|
||||
if open_parens != close_parens:
|
||||
return False, "Unmatched parentheses in JSONPath query"
|
||||
|
||||
return True, None
|
||||
|
||||
def get_syntax_help(self) -> str:
|
||||
"""Get JSONPath syntax help."""
|
||||
return """JSONPath Query Syntax:
|
||||
|
||||
Basic Navigation:
|
||||
$ - Root node
|
||||
.child - Direct child access
|
||||
..child - Recursive descent (any level)
|
||||
[*] - All array elements
|
||||
[0] - First array element
|
||||
[-1] - Last array element
|
||||
|
||||
Array Operations:
|
||||
[start:end] - Array slice
|
||||
[0,1,2] - Multiple specific indices
|
||||
|
||||
Filter Expressions:
|
||||
[?(@.field)] - Filter by field existence
|
||||
[?(@.field == 'value')] - Filter by field value
|
||||
[?(@.level > 1)] - Numeric comparison
|
||||
[?(@.type == 'heading')] - String comparison
|
||||
|
||||
Common AST Patterns:
|
||||
$..heading - All headings at any level
|
||||
$..heading[?(@.level==1)] - Only H1 headings
|
||||
$..code_block.language - Programming languages used
|
||||
$..link.url - All link URLs
|
||||
$..image.src - All image sources
|
||||
$..list_item.text - List item contents
|
||||
|
||||
Advanced Examples:
|
||||
$.children[*].type - Types of top-level elements
|
||||
$..children[?(@.type=='text')].content - All text content
|
||||
$..node[?(@.attrs.class)] - Nodes with CSS classes
|
||||
|
||||
Operators:
|
||||
==, != - Equality
|
||||
<, <=, >, >= - Comparison
|
||||
=~ - Regular expression match
|
||||
in - Membership test
|
||||
|
||||
Note: JSONPath queries operate on the parsed AST structure of markdown files.
|
||||
Use 'markitect ast-show <file>' to see the AST structure first.
|
||||
"""
|
||||
|
||||
def _detect_query_type(self, query: str) -> str:
|
||||
"""Detect JSONPath query type."""
|
||||
if '..' in query:
|
||||
return "recursive_descent"
|
||||
elif '[?' in query:
|
||||
return "filtered_query"
|
||||
elif '[*]' in query:
|
||||
return "array_wildcard"
|
||||
elif any(op in query for op in ['[0]', '[1]', '[-1]']):
|
||||
return "indexed_access"
|
||||
elif ':' in query and '[' in query:
|
||||
return "array_slice"
|
||||
else:
|
||||
return "direct_access"
|
||||
|
||||
def _get_context(self, match, ast_data: Dict) -> Dict[str, Any]:
|
||||
"""Get context information for a JSONPath match."""
|
||||
context = {
|
||||
"parent_path": None,
|
||||
"sibling_count": 0,
|
||||
"depth": len(str(match.full_path).split('.'))
|
||||
}
|
||||
|
||||
# Try to get parent context
|
||||
path_parts = str(match.full_path).split('.')
|
||||
if len(path_parts) > 1:
|
||||
context["parent_path"] = '.'.join(path_parts[:-1])
|
||||
|
||||
return context
|
||||
|
||||
def _simple_path_query(self, query: str, data: Any) -> List[Dict[str, Any]]:
|
||||
"""Simple fallback JSONPath implementation using dot notation."""
|
||||
results = []
|
||||
|
||||
try:
|
||||
# Very basic implementation for simple paths
|
||||
if query == '$':
|
||||
results.append({
|
||||
"path": "$",
|
||||
"value": data,
|
||||
"context": {"depth": 0}
|
||||
})
|
||||
elif query.startswith('$.'):
|
||||
# Simple dot notation
|
||||
path_parts = query[2:].split('.')
|
||||
current = data
|
||||
current_path = "$"
|
||||
|
||||
for part in path_parts:
|
||||
current_path += f".{part}"
|
||||
if isinstance(current, dict) and part in current:
|
||||
current = current[part]
|
||||
elif isinstance(current, list) and part == '*':
|
||||
# Handle wildcard for arrays
|
||||
for i, item in enumerate(current):
|
||||
results.append({
|
||||
"path": f"{current_path}[{i}]",
|
||||
"value": item,
|
||||
"context": {"depth": len(path_parts)}
|
||||
})
|
||||
return results
|
||||
else:
|
||||
break
|
||||
|
||||
if current is not None:
|
||||
results.append({
|
||||
"path": current_path,
|
||||
"value": current,
|
||||
"context": {"depth": len(path_parts)}
|
||||
})
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return results
|
||||
|
||||
def can_translate_from(self, other_paradigm: str) -> bool:
|
||||
"""Check if we can translate from another paradigm."""
|
||||
return other_paradigm.lower() in ["natural_language"]
|
||||
|
||||
def translate_query(self, query: str, from_paradigm: str) -> Optional[str]:
|
||||
"""Translate from another paradigm to JSONPath."""
|
||||
if from_paradigm.lower() == "natural_language":
|
||||
return self._translate_natural_language_to_jsonpath(query)
|
||||
return None
|
||||
|
||||
def _translate_natural_language_to_jsonpath(self, query: str) -> Optional[str]:
|
||||
"""Translate natural language to JSONPath."""
|
||||
query_lower = query.lower()
|
||||
|
||||
# Common patterns
|
||||
if "all headings" in query_lower or "find headings" in query_lower:
|
||||
return "$..heading"
|
||||
elif "first heading" in query_lower or "main heading" in query_lower:
|
||||
return "$..heading[0]"
|
||||
elif "code blocks" in query_lower:
|
||||
return "$..code_block"
|
||||
elif "links" in query_lower:
|
||||
return "$..link"
|
||||
elif "images" in query_lower:
|
||||
return "$..image"
|
||||
elif "list items" in query_lower:
|
||||
return "$..list_item"
|
||||
elif "all text" in query_lower:
|
||||
return "$..text"
|
||||
|
||||
# Level-specific patterns
|
||||
if "h1" in query_lower or "level 1" in query_lower:
|
||||
return "$..heading[?(@.level == 1)]"
|
||||
elif "h2" in query_lower or "level 2" in query_lower:
|
||||
return "$..heading[?(@.level == 2)]"
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user