generated from coulomb/repo-seed
Optional JSONPath query/extract support, FTS5 section/block search, mkt cache query and search. Local SQLite backend now supports parsed snapshot persistence, incremental refresh, cached querying, and ranked full-text search
This commit is contained in:
@@ -4,12 +4,16 @@ from markitect_tool.query.engine import (
|
||||
InvalidQueryError,
|
||||
QueryMatch,
|
||||
extract_document,
|
||||
extract_document_jsonpath,
|
||||
query_document,
|
||||
query_document_jsonpath,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"InvalidQueryError",
|
||||
"QueryMatch",
|
||||
"extract_document",
|
||||
"extract_document_jsonpath",
|
||||
"query_document",
|
||||
"query_document_jsonpath",
|
||||
]
|
||||
|
||||
@@ -60,6 +60,42 @@ def query_document(document: Document, selector: str) -> list[QueryMatch]:
|
||||
raise InvalidQueryError(f"Unsupported selector target `{parsed.target}`")
|
||||
|
||||
|
||||
def query_document_jsonpath(document: Document, expression: str) -> list[QueryMatch]:
|
||||
"""Query a parsed document with JSONPath over ``Document.to_dict()``.
|
||||
|
||||
JSONPath support is intentionally optional so the core selector engine
|
||||
remains dependency-light. Install ``markitect-tool[query]`` to enable it.
|
||||
"""
|
||||
|
||||
try:
|
||||
from jsonpath_ng.ext import parse as parse_jsonpath
|
||||
except ImportError as exc: # pragma: no cover - branch depends on env deps
|
||||
raise InvalidQueryError(
|
||||
"JSONPath queries require the optional `jsonpath-ng` dependency. "
|
||||
"Install `markitect-tool[query]`."
|
||||
) from exc
|
||||
|
||||
try:
|
||||
compiled = parse_jsonpath(expression)
|
||||
except Exception as exc: # jsonpath-ng raises parser-specific exceptions
|
||||
raise InvalidQueryError(f"Invalid JSONPath expression `{expression}`: {exc}") from exc
|
||||
|
||||
matches: list[QueryMatch] = []
|
||||
for match in compiled.find(document.to_dict()):
|
||||
path = "$" + str(match.full_path)
|
||||
value = match.value
|
||||
matches.append(
|
||||
QueryMatch(
|
||||
kind=_jsonpath_kind(path, value),
|
||||
path=path,
|
||||
value=value,
|
||||
text=_text_value(value),
|
||||
line=_jsonpath_line(value),
|
||||
)
|
||||
)
|
||||
return matches
|
||||
|
||||
|
||||
def extract_document(document: Document, selector: str) -> list[str]:
|
||||
"""Extract text content from query matches."""
|
||||
|
||||
@@ -74,6 +110,16 @@ def extract_document(document: Document, selector: str) -> list[str]:
|
||||
return extracted
|
||||
|
||||
|
||||
def extract_document_jsonpath(document: Document, expression: str) -> list[str]:
|
||||
"""Extract textual JSONPath matches from a parsed document."""
|
||||
|
||||
extracted: list[str] = []
|
||||
for match in query_document_jsonpath(document, expression):
|
||||
if match.text is not None:
|
||||
extracted.append(match.text)
|
||||
return extracted
|
||||
|
||||
|
||||
def _parse_selector(selector: str) -> _Selector:
|
||||
raw = selector.strip()
|
||||
if not raw:
|
||||
@@ -240,3 +286,25 @@ def _text_value(value: Any) -> str | None:
|
||||
if isinstance(value, int | float | bool):
|
||||
return str(value)
|
||||
return None
|
||||
|
||||
|
||||
def _jsonpath_kind(path: str, value: Any) -> str:
|
||||
if ".frontmatter" in path:
|
||||
return "frontmatter"
|
||||
if ".headings" in path:
|
||||
return "heading" if isinstance(value, dict) else "heading_value"
|
||||
if ".sections" in path:
|
||||
return "section" if isinstance(value, dict) else "section_value"
|
||||
if ".blocks" in path:
|
||||
return "block" if isinstance(value, dict) else "block_value"
|
||||
if ".tokens" in path:
|
||||
return "token" if isinstance(value, dict) else "token_value"
|
||||
return "jsonpath"
|
||||
|
||||
|
||||
def _jsonpath_line(value: Any) -> int | None:
|
||||
if isinstance(value, dict):
|
||||
raw_line = value.get("line") or value.get("line_start")
|
||||
if isinstance(raw_line, int):
|
||||
return raw_line
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user