Optional JSONPath query/extract support, FTS5 section/block search, mkt cache query and search. Local SQLite backend now supports parsed snapshot persistence, incremental refresh, cached querying, and ranked full-text search

This commit is contained in:
2026-05-04 10:32:06 +02:00
parent 36ff4cedab
commit 0015c8a385
11 changed files with 540 additions and 22 deletions

View File

@@ -4,12 +4,16 @@ from markitect_tool.query.engine import (
InvalidQueryError,
QueryMatch,
extract_document,
extract_document_jsonpath,
query_document,
query_document_jsonpath,
)
__all__ = [
"InvalidQueryError",
"QueryMatch",
"extract_document",
"extract_document_jsonpath",
"query_document",
"query_document_jsonpath",
]

View File

@@ -60,6 +60,42 @@ def query_document(document: Document, selector: str) -> list[QueryMatch]:
raise InvalidQueryError(f"Unsupported selector target `{parsed.target}`")
def query_document_jsonpath(document: Document, expression: str) -> list[QueryMatch]:
"""Query a parsed document with JSONPath over ``Document.to_dict()``.
JSONPath support is intentionally optional so the core selector engine
remains dependency-light. Install ``markitect-tool[query]`` to enable it.
"""
try:
from jsonpath_ng.ext import parse as parse_jsonpath
except ImportError as exc: # pragma: no cover - branch depends on env deps
raise InvalidQueryError(
"JSONPath queries require the optional `jsonpath-ng` dependency. "
"Install `markitect-tool[query]`."
) from exc
try:
compiled = parse_jsonpath(expression)
except Exception as exc: # jsonpath-ng raises parser-specific exceptions
raise InvalidQueryError(f"Invalid JSONPath expression `{expression}`: {exc}") from exc
matches: list[QueryMatch] = []
for match in compiled.find(document.to_dict()):
path = "$" + str(match.full_path)
value = match.value
matches.append(
QueryMatch(
kind=_jsonpath_kind(path, value),
path=path,
value=value,
text=_text_value(value),
line=_jsonpath_line(value),
)
)
return matches
def extract_document(document: Document, selector: str) -> list[str]:
"""Extract text content from query matches."""
@@ -74,6 +110,16 @@ def extract_document(document: Document, selector: str) -> list[str]:
return extracted
def extract_document_jsonpath(document: Document, expression: str) -> list[str]:
"""Extract textual JSONPath matches from a parsed document."""
extracted: list[str] = []
for match in query_document_jsonpath(document, expression):
if match.text is not None:
extracted.append(match.text)
return extracted
def _parse_selector(selector: str) -> _Selector:
raw = selector.strip()
if not raw:
@@ -240,3 +286,25 @@ def _text_value(value: Any) -> str | None:
if isinstance(value, int | float | bool):
return str(value)
return None
def _jsonpath_kind(path: str, value: Any) -> str:
if ".frontmatter" in path:
return "frontmatter"
if ".headings" in path:
return "heading" if isinstance(value, dict) else "heading_value"
if ".sections" in path:
return "section" if isinstance(value, dict) else "section_value"
if ".blocks" in path:
return "block" if isinstance(value, dict) else "block_value"
if ".tokens" in path:
return "token" if isinstance(value, dict) else "token_value"
return "jsonpath"
def _jsonpath_line(value: Any) -> int | None:
if isinstance(value, dict):
raw_line = value.get("line") or value.get("line_start")
if isinstance(raw_line, int):
return raw_line
return None