- Created extensible BaseQueryParadigm interface with standardized QueryResult format - Implemented QueryParadigmRegistry for paradigm discovery and management - Added 5 working paradigms: SQL, FTS, GraphQL, JSONPath, Natural Language - Documented 9 additional paradigms: QBE, Batch Manipulation, Visual Query Builder, REST API, NoSQL, UNIX Pipeline, XPath/XQuery, RAG, Data Transformation - Integrated full CLI interface: list, search, show, exec, categories commands - Added comprehensive test suite with 23 test cases covering all components - Auto-registration system enables easy addition of new paradigms - Organized paradigms by category (structural, textual, semantic, visual, procedural, network) and complexity (beginner, intermediate, advanced) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
333 lines
11 KiB
Python
333 lines
11 KiB
Python
"""
|
|
JSONPath Query Paradigm - Path-based navigation through AST structures.
|
|
"""
|
|
|
|
import time
|
|
import json
|
|
from typing import Dict, Any, List, Optional
|
|
|
|
from ..base import BaseQueryParadigm, QueryResult
|
|
|
|
|
|
class JSONPathQueryParadigm(BaseQueryParadigm):
|
|
"""JSONPath query paradigm for navigating AST structures."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "JSONPath"
|
|
|
|
@property
|
|
def description(self) -> str:
|
|
return "XPath-like navigation through AST trees for precise structural queries"
|
|
|
|
@property
|
|
def category(self) -> str:
|
|
return "structural"
|
|
|
|
@property
|
|
def complexity(self) -> str:
|
|
return "advanced"
|
|
|
|
def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult:
|
|
"""Execute JSONPath query against AST data."""
|
|
start_time = time.time()
|
|
|
|
try:
|
|
from ...ast_service import ASTService
|
|
|
|
# Get database path and file info from config
|
|
db_path = config.get('db_path') if config else 'markitect.db'
|
|
file_id = config.get('file_id') if config else None
|
|
filename = config.get('filename') if config else None
|
|
|
|
if not file_id and not filename:
|
|
raise ValueError("Either file_id or filename must be provided for JSONPath queries")
|
|
|
|
# Get AST service
|
|
ast_service = ASTService(db_path)
|
|
|
|
# Get AST for the specified file
|
|
if file_id:
|
|
ast_data = ast_service.get_ast_by_file_id(file_id)
|
|
else:
|
|
ast_data = ast_service.get_ast_by_filename(filename)
|
|
|
|
if not ast_data:
|
|
raise ValueError(f"No AST found for {'file_id=' + str(file_id) if file_id else 'filename=' + filename}")
|
|
|
|
# Execute JSONPath query
|
|
try:
|
|
import jsonpath_ng
|
|
parser = jsonpath_ng.parse(query)
|
|
matches = parser.find(ast_data)
|
|
|
|
results = []
|
|
for match in matches:
|
|
result_item = {
|
|
"path": str(match.full_path),
|
|
"value": match.value,
|
|
"context": self._get_context(match, ast_data)
|
|
}
|
|
results.append(result_item)
|
|
|
|
except ImportError:
|
|
# Fallback: simple dot-notation parsing
|
|
results = self._simple_path_query(query, ast_data)
|
|
|
|
execution_time = (time.time() - start_time) * 1000
|
|
|
|
return QueryResult(
|
|
paradigm="JSONPath",
|
|
query=query,
|
|
execution_time_ms=execution_time,
|
|
result_count=len(results),
|
|
results=results,
|
|
metadata={
|
|
"file_id": file_id,
|
|
"filename": filename,
|
|
"ast_available": True,
|
|
"query_type": self._detect_query_type(query)
|
|
},
|
|
success=True
|
|
)
|
|
|
|
except Exception as e:
|
|
execution_time = (time.time() - start_time) * 1000
|
|
|
|
return QueryResult(
|
|
paradigm="JSONPath",
|
|
query=query,
|
|
execution_time_ms=execution_time,
|
|
result_count=0,
|
|
results=[],
|
|
metadata={"ast_available": False},
|
|
success=False,
|
|
error_message=str(e)
|
|
)
|
|
|
|
def get_examples(self) -> List[Dict[str, str]]:
|
|
"""Get example JSONPath queries."""
|
|
return [
|
|
{
|
|
"name": "All headings",
|
|
"description": "Find all heading nodes in the AST",
|
|
"query": "$..heading"
|
|
},
|
|
{
|
|
"name": "Top-level headings",
|
|
"description": "Find only H1 headings",
|
|
"query": "$..heading[?(@.level == 1)]"
|
|
},
|
|
{
|
|
"name": "Code blocks",
|
|
"description": "Find all code block nodes",
|
|
"query": "$..code_block"
|
|
},
|
|
{
|
|
"name": "Links with URLs",
|
|
"description": "Find all link nodes with their URLs",
|
|
"query": "$..link[?(@.url)]"
|
|
},
|
|
{
|
|
"name": "Image sources",
|
|
"description": "Extract all image source URLs",
|
|
"query": "$..image.src"
|
|
},
|
|
{
|
|
"name": "List items",
|
|
"description": "Find all list item contents",
|
|
"query": "$..list_item.children[*].text"
|
|
},
|
|
{
|
|
"name": "Nested structures",
|
|
"description": "Find deeply nested elements",
|
|
"query": "$..children[*].children[*].type"
|
|
},
|
|
{
|
|
"name": "Content with attributes",
|
|
"description": "Find nodes with specific attributes",
|
|
"query": "$..node[?(@.attrs.class)]"
|
|
}
|
|
]
|
|
|
|
def validate_query(self, query: str) -> tuple[bool, Optional[str]]:
|
|
"""Validate JSONPath query syntax."""
|
|
if not query or not query.strip():
|
|
return False, "Query cannot be empty"
|
|
|
|
# Basic JSONPath validation
|
|
if not query.startswith('$'):
|
|
return False, "JSONPath queries must start with '$'"
|
|
|
|
# Check for balanced brackets
|
|
open_brackets = query.count('[')
|
|
close_brackets = query.count(']')
|
|
if open_brackets != close_brackets:
|
|
return False, "Unmatched brackets in JSONPath query"
|
|
|
|
# Check for balanced parentheses in filter expressions
|
|
open_parens = query.count('(')
|
|
close_parens = query.count(')')
|
|
if open_parens != close_parens:
|
|
return False, "Unmatched parentheses in JSONPath query"
|
|
|
|
return True, None
|
|
|
|
def get_syntax_help(self) -> str:
|
|
"""Get JSONPath syntax help."""
|
|
return """JSONPath Query Syntax:
|
|
|
|
Basic Navigation:
|
|
$ - Root node
|
|
.child - Direct child access
|
|
..child - Recursive descent (any level)
|
|
[*] - All array elements
|
|
[0] - First array element
|
|
[-1] - Last array element
|
|
|
|
Array Operations:
|
|
[start:end] - Array slice
|
|
[0,1,2] - Multiple specific indices
|
|
|
|
Filter Expressions:
|
|
[?(@.field)] - Filter by field existence
|
|
[?(@.field == 'value')] - Filter by field value
|
|
[?(@.level > 1)] - Numeric comparison
|
|
[?(@.type == 'heading')] - String comparison
|
|
|
|
Common AST Patterns:
|
|
$..heading - All headings at any level
|
|
$..heading[?(@.level==1)] - Only H1 headings
|
|
$..code_block.language - Programming languages used
|
|
$..link.url - All link URLs
|
|
$..image.src - All image sources
|
|
$..list_item.text - List item contents
|
|
|
|
Advanced Examples:
|
|
$.children[*].type - Types of top-level elements
|
|
$..children[?(@.type=='text')].content - All text content
|
|
$..node[?(@.attrs.class)] - Nodes with CSS classes
|
|
|
|
Operators:
|
|
==, != - Equality
|
|
<, <=, >, >= - Comparison
|
|
=~ - Regular expression match
|
|
in - Membership test
|
|
|
|
Note: JSONPath queries operate on the parsed AST structure of markdown files.
|
|
Use 'markitect ast-show <file>' to see the AST structure first.
|
|
"""
|
|
|
|
def _detect_query_type(self, query: str) -> str:
|
|
"""Detect JSONPath query type."""
|
|
if '..' in query:
|
|
return "recursive_descent"
|
|
elif '[?' in query:
|
|
return "filtered_query"
|
|
elif '[*]' in query:
|
|
return "array_wildcard"
|
|
elif any(op in query for op in ['[0]', '[1]', '[-1]']):
|
|
return "indexed_access"
|
|
elif ':' in query and '[' in query:
|
|
return "array_slice"
|
|
else:
|
|
return "direct_access"
|
|
|
|
def _get_context(self, match, ast_data: Dict) -> Dict[str, Any]:
|
|
"""Get context information for a JSONPath match."""
|
|
context = {
|
|
"parent_path": None,
|
|
"sibling_count": 0,
|
|
"depth": len(str(match.full_path).split('.'))
|
|
}
|
|
|
|
# Try to get parent context
|
|
path_parts = str(match.full_path).split('.')
|
|
if len(path_parts) > 1:
|
|
context["parent_path"] = '.'.join(path_parts[:-1])
|
|
|
|
return context
|
|
|
|
def _simple_path_query(self, query: str, data: Any) -> List[Dict[str, Any]]:
|
|
"""Simple fallback JSONPath implementation using dot notation."""
|
|
results = []
|
|
|
|
try:
|
|
# Very basic implementation for simple paths
|
|
if query == '$':
|
|
results.append({
|
|
"path": "$",
|
|
"value": data,
|
|
"context": {"depth": 0}
|
|
})
|
|
elif query.startswith('$.'):
|
|
# Simple dot notation
|
|
path_parts = query[2:].split('.')
|
|
current = data
|
|
current_path = "$"
|
|
|
|
for part in path_parts:
|
|
current_path += f".{part}"
|
|
if isinstance(current, dict) and part in current:
|
|
current = current[part]
|
|
elif isinstance(current, list) and part == '*':
|
|
# Handle wildcard for arrays
|
|
for i, item in enumerate(current):
|
|
results.append({
|
|
"path": f"{current_path}[{i}]",
|
|
"value": item,
|
|
"context": {"depth": len(path_parts)}
|
|
})
|
|
return results
|
|
else:
|
|
break
|
|
|
|
if current is not None:
|
|
results.append({
|
|
"path": current_path,
|
|
"value": current,
|
|
"context": {"depth": len(path_parts)}
|
|
})
|
|
|
|
except Exception:
|
|
pass
|
|
|
|
return results
|
|
|
|
def can_translate_from(self, other_paradigm: str) -> bool:
|
|
"""Check if we can translate from another paradigm."""
|
|
return other_paradigm.lower() in ["natural_language"]
|
|
|
|
def translate_query(self, query: str, from_paradigm: str) -> Optional[str]:
|
|
"""Translate from another paradigm to JSONPath."""
|
|
if from_paradigm.lower() == "natural_language":
|
|
return self._translate_natural_language_to_jsonpath(query)
|
|
return None
|
|
|
|
def _translate_natural_language_to_jsonpath(self, query: str) -> Optional[str]:
|
|
"""Translate natural language to JSONPath."""
|
|
query_lower = query.lower()
|
|
|
|
# Common patterns
|
|
if "all headings" in query_lower or "find headings" in query_lower:
|
|
return "$..heading"
|
|
elif "first heading" in query_lower or "main heading" in query_lower:
|
|
return "$..heading[0]"
|
|
elif "code blocks" in query_lower:
|
|
return "$..code_block"
|
|
elif "links" in query_lower:
|
|
return "$..link"
|
|
elif "images" in query_lower:
|
|
return "$..image"
|
|
elif "list items" in query_lower:
|
|
return "$..list_item"
|
|
elif "all text" in query_lower:
|
|
return "$..text"
|
|
|
|
# Level-specific patterns
|
|
if "h1" in query_lower or "level 1" in query_lower:
|
|
return "$..heading[?(@.level == 1)]"
|
|
elif "h2" in query_lower or "level 2" in query_lower:
|
|
return "$..heading[?(@.level == 2)]"
|
|
|
|
return None |