Files
markitect-main/markitect/query_paradigms/paradigms/jsonpath_paradigm.py
tegwick 5143864a86 feat: implement comprehensive query paradigm zoo system (issue #62)
- Created extensible BaseQueryParadigm interface with standardized QueryResult format
- Implemented QueryParadigmRegistry for paradigm discovery and management
- Added 5 working paradigms: SQL, FTS, GraphQL, JSONPath, Natural Language
- Documented 9 additional paradigms: QBE, Batch Manipulation, Visual Query Builder, REST API, NoSQL, UNIX Pipeline, XPath/XQuery, RAG, Data Transformation
- Integrated full CLI interface: list, search, show, exec, categories commands
- Added comprehensive test suite with 23 test cases covering all components
- Auto-registration system enables easy addition of new paradigms
- Organized paradigms by category (structural, textual, semantic, visual, procedural, network) and complexity (beginner, intermediate, advanced)

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-03 23:06:57 +02:00

333 lines
11 KiB
Python

"""
JSONPath Query Paradigm - Path-based navigation through AST structures.
"""
import time
import json
from typing import Dict, Any, List, Optional
from ..base import BaseQueryParadigm, QueryResult
class JSONPathQueryParadigm(BaseQueryParadigm):
"""JSONPath query paradigm for navigating AST structures."""
@property
def name(self) -> str:
return "JSONPath"
@property
def description(self) -> str:
return "XPath-like navigation through AST trees for precise structural queries"
@property
def category(self) -> str:
return "structural"
@property
def complexity(self) -> str:
return "advanced"
def execute(self, query: str, config: Dict[str, Any] = None) -> QueryResult:
"""Execute JSONPath query against AST data."""
start_time = time.time()
try:
from ...ast_service import ASTService
# Get database path and file info from config
db_path = config.get('db_path') if config else 'markitect.db'
file_id = config.get('file_id') if config else None
filename = config.get('filename') if config else None
if not file_id and not filename:
raise ValueError("Either file_id or filename must be provided for JSONPath queries")
# Get AST service
ast_service = ASTService(db_path)
# Get AST for the specified file
if file_id:
ast_data = ast_service.get_ast_by_file_id(file_id)
else:
ast_data = ast_service.get_ast_by_filename(filename)
if not ast_data:
raise ValueError(f"No AST found for {'file_id=' + str(file_id) if file_id else 'filename=' + filename}")
# Execute JSONPath query
try:
import jsonpath_ng
parser = jsonpath_ng.parse(query)
matches = parser.find(ast_data)
results = []
for match in matches:
result_item = {
"path": str(match.full_path),
"value": match.value,
"context": self._get_context(match, ast_data)
}
results.append(result_item)
except ImportError:
# Fallback: simple dot-notation parsing
results = self._simple_path_query(query, ast_data)
execution_time = (time.time() - start_time) * 1000
return QueryResult(
paradigm="JSONPath",
query=query,
execution_time_ms=execution_time,
result_count=len(results),
results=results,
metadata={
"file_id": file_id,
"filename": filename,
"ast_available": True,
"query_type": self._detect_query_type(query)
},
success=True
)
except Exception as e:
execution_time = (time.time() - start_time) * 1000
return QueryResult(
paradigm="JSONPath",
query=query,
execution_time_ms=execution_time,
result_count=0,
results=[],
metadata={"ast_available": False},
success=False,
error_message=str(e)
)
def get_examples(self) -> List[Dict[str, str]]:
"""Get example JSONPath queries."""
return [
{
"name": "All headings",
"description": "Find all heading nodes in the AST",
"query": "$..heading"
},
{
"name": "Top-level headings",
"description": "Find only H1 headings",
"query": "$..heading[?(@.level == 1)]"
},
{
"name": "Code blocks",
"description": "Find all code block nodes",
"query": "$..code_block"
},
{
"name": "Links with URLs",
"description": "Find all link nodes with their URLs",
"query": "$..link[?(@.url)]"
},
{
"name": "Image sources",
"description": "Extract all image source URLs",
"query": "$..image.src"
},
{
"name": "List items",
"description": "Find all list item contents",
"query": "$..list_item.children[*].text"
},
{
"name": "Nested structures",
"description": "Find deeply nested elements",
"query": "$..children[*].children[*].type"
},
{
"name": "Content with attributes",
"description": "Find nodes with specific attributes",
"query": "$..node[?(@.attrs.class)]"
}
]
def validate_query(self, query: str) -> tuple[bool, Optional[str]]:
"""Validate JSONPath query syntax."""
if not query or not query.strip():
return False, "Query cannot be empty"
# Basic JSONPath validation
if not query.startswith('$'):
return False, "JSONPath queries must start with '$'"
# Check for balanced brackets
open_brackets = query.count('[')
close_brackets = query.count(']')
if open_brackets != close_brackets:
return False, "Unmatched brackets in JSONPath query"
# Check for balanced parentheses in filter expressions
open_parens = query.count('(')
close_parens = query.count(')')
if open_parens != close_parens:
return False, "Unmatched parentheses in JSONPath query"
return True, None
def get_syntax_help(self) -> str:
"""Get JSONPath syntax help."""
return """JSONPath Query Syntax:
Basic Navigation:
$ - Root node
.child - Direct child access
..child - Recursive descent (any level)
[*] - All array elements
[0] - First array element
[-1] - Last array element
Array Operations:
[start:end] - Array slice
[0,1,2] - Multiple specific indices
Filter Expressions:
[?(@.field)] - Filter by field existence
[?(@.field == 'value')] - Filter by field value
[?(@.level > 1)] - Numeric comparison
[?(@.type == 'heading')] - String comparison
Common AST Patterns:
$..heading - All headings at any level
$..heading[?(@.level==1)] - Only H1 headings
$..code_block.language - Programming languages used
$..link.url - All link URLs
$..image.src - All image sources
$..list_item.text - List item contents
Advanced Examples:
$.children[*].type - Types of top-level elements
$..children[?(@.type=='text')].content - All text content
$..node[?(@.attrs.class)] - Nodes with CSS classes
Operators:
==, != - Equality
<, <=, >, >= - Comparison
=~ - Regular expression match
in - Membership test
Note: JSONPath queries operate on the parsed AST structure of markdown files.
Use 'markitect ast-show <file>' to see the AST structure first.
"""
def _detect_query_type(self, query: str) -> str:
"""Detect JSONPath query type."""
if '..' in query:
return "recursive_descent"
elif '[?' in query:
return "filtered_query"
elif '[*]' in query:
return "array_wildcard"
elif any(op in query for op in ['[0]', '[1]', '[-1]']):
return "indexed_access"
elif ':' in query and '[' in query:
return "array_slice"
else:
return "direct_access"
def _get_context(self, match, ast_data: Dict) -> Dict[str, Any]:
"""Get context information for a JSONPath match."""
context = {
"parent_path": None,
"sibling_count": 0,
"depth": len(str(match.full_path).split('.'))
}
# Try to get parent context
path_parts = str(match.full_path).split('.')
if len(path_parts) > 1:
context["parent_path"] = '.'.join(path_parts[:-1])
return context
def _simple_path_query(self, query: str, data: Any) -> List[Dict[str, Any]]:
"""Simple fallback JSONPath implementation using dot notation."""
results = []
try:
# Very basic implementation for simple paths
if query == '$':
results.append({
"path": "$",
"value": data,
"context": {"depth": 0}
})
elif query.startswith('$.'):
# Simple dot notation
path_parts = query[2:].split('.')
current = data
current_path = "$"
for part in path_parts:
current_path += f".{part}"
if isinstance(current, dict) and part in current:
current = current[part]
elif isinstance(current, list) and part == '*':
# Handle wildcard for arrays
for i, item in enumerate(current):
results.append({
"path": f"{current_path}[{i}]",
"value": item,
"context": {"depth": len(path_parts)}
})
return results
else:
break
if current is not None:
results.append({
"path": current_path,
"value": current,
"context": {"depth": len(path_parts)}
})
except Exception:
pass
return results
def can_translate_from(self, other_paradigm: str) -> bool:
"""Check if we can translate from another paradigm."""
return other_paradigm.lower() in ["natural_language"]
def translate_query(self, query: str, from_paradigm: str) -> Optional[str]:
"""Translate from another paradigm to JSONPath."""
if from_paradigm.lower() == "natural_language":
return self._translate_natural_language_to_jsonpath(query)
return None
def _translate_natural_language_to_jsonpath(self, query: str) -> Optional[str]:
"""Translate natural language to JSONPath."""
query_lower = query.lower()
# Common patterns
if "all headings" in query_lower or "find headings" in query_lower:
return "$..heading"
elif "first heading" in query_lower or "main heading" in query_lower:
return "$..heading[0]"
elif "code blocks" in query_lower:
return "$..code_block"
elif "links" in query_lower:
return "$..link"
elif "images" in query_lower:
return "$..image"
elif "list items" in query_lower:
return "$..list_item"
elif "all text" in query_lower:
return "$..text"
# Level-specific patterns
if "h1" in query_lower or "level 1" in query_lower:
return "$..heading[?(@.level == 1)]"
elif "h2" in query_lower or "level 2" in query_lower:
return "$..heading[?(@.level == 2)]"
return None