Optional JSONPath query/extract support, FTS5 section/block search, mkt cache query and search. Local SQLite backend now supports parsed snapshot persistence, incremental refresh, cached querying, and ranked full-text search

This commit is contained in:
2026-05-04 10:32:06 +02:00
parent 36ff4cedab
commit 0015c8a385
11 changed files with 540 additions and 22 deletions

View File

@@ -29,7 +29,7 @@ from markitect_tool.content_class import (
ContentClassResolutionError,
load_content_class_file,
)
from markitect_tool.core import parse_markdown_file
from markitect_tool.core import Document, parse_markdown_file
from markitect_tool.contract import (
ContractLoaderError,
check_markdown_file,
@@ -52,7 +52,13 @@ from markitect_tool.generation import (
from markitect_tool.literate import tangle_markdown, weave_markdown, write_tangle_files
from markitect_tool.ops import IncludeError, compose_files, resolve_includes, transform_markdown
from markitect_tool.processor import ProcessorContext, run_fenced_processors
from markitect_tool.query import InvalidQueryError, extract_document, query_document
from markitect_tool.query import (
InvalidQueryError,
extract_document,
extract_document_jsonpath,
query_document,
query_document_jsonpath,
)
from markitect_tool.reference import (
ReferenceContext,
ReferenceResolutionError,
@@ -162,6 +168,13 @@ def metrics(file: Path, output_format: str) -> None:
@main.command()
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.argument("selector")
@click.option(
"--engine",
type=click.Choice(["selector", "jsonpath"], case_sensitive=False),
default="selector",
show_default=True,
help="Query engine to use.",
)
@click.option(
"--format",
"output_format",
@@ -169,16 +182,21 @@ def metrics(file: Path, output_format: str) -> None:
default="json",
show_default=True,
)
def query(file: Path, selector: str, output_format: str) -> None:
def query(file: Path, selector: str, engine: str, output_format: str) -> None:
"""Query structured Markdown content with a small selector."""
document = parse_markdown_file(file)
try:
matches = query_document(document, selector)
matches = (
query_document_jsonpath(document, selector)
if engine == "jsonpath"
else query_document(document, selector)
)
except InvalidQueryError as exc:
raise click.ClickException(str(exc)) from exc
data = {
"selector": selector,
"engine": engine,
"document_path": str(file),
"count": len(matches),
"matches": [match.to_dict() for match in matches],
@@ -189,6 +207,13 @@ def query(file: Path, selector: str, output_format: str) -> None:
@main.command()
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.argument("selector")
@click.option(
"--engine",
type=click.Choice(["selector", "jsonpath"], case_sensitive=False),
default="selector",
show_default=True,
help="Query engine to use.",
)
@click.option(
"--format",
"output_format",
@@ -196,16 +221,21 @@ def query(file: Path, selector: str, output_format: str) -> None:
default="text",
show_default=True,
)
def extract(file: Path, selector: str, output_format: str) -> None:
def extract(file: Path, selector: str, engine: str, output_format: str) -> None:
"""Extract text or Markdown content from structured Markdown."""
document = parse_markdown_file(file)
try:
items = extract_document(document, selector)
items = (
extract_document_jsonpath(document, selector)
if engine == "jsonpath"
else extract_document(document, selector)
)
except InvalidQueryError as exc:
raise click.ClickException(str(exc)) from exc
data = {
"selector": selector,
"engine": engine,
"document_path": str(file),
"count": len(items),
"items": items,
@@ -976,6 +1006,124 @@ def cache_index(
_emit_local_index_data(result.to_dict(), output_format)
@cache.command("query")
@click.argument("selector")
@click.option(
"--root",
type=click.Path(exists=True, file_okay=False, path_type=Path),
default=Path("."),
show_default=True,
help="Root used for the default local index path.",
)
@click.option(
"--index-path",
type=click.Path(dir_okay=False, path_type=Path),
help="SQLite index path. Defaults to .markitect/cache/index.sqlite3 under root.",
)
@click.option(
"--path",
"paths",
multiple=True,
help="Restrict query to one or more indexed relative paths.",
)
@click.option(
"--engine",
type=click.Choice(["selector", "jsonpath"], case_sensitive=False),
default="selector",
show_default=True,
help="Query engine to use.",
)
@click.option(
"--format",
"output_format",
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
default="json",
show_default=True,
)
def cache_query(
selector: str,
root: Path,
index_path: Path | None,
paths: tuple[str, ...],
engine: str,
output_format: str,
) -> None:
"""Run a selector or JSONPath query over indexed document snapshots."""
store = LocalSnapshotStore(local_index_path_for(root, index_path))
indexed_paths = sorted(paths or [state.path for state in store.load_state()])
all_matches = []
try:
for indexed_path in indexed_paths:
document = Document.from_dict(store.get_document(indexed_path))
matches = (
query_document_jsonpath(document, selector)
if engine == "jsonpath"
else query_document(document, selector)
)
for match in matches:
item = match.to_dict()
item["source_path"] = indexed_path
all_matches.append(item)
except KeyError as exc:
raise click.ClickException(str(exc)) from exc
except InvalidQueryError as exc:
raise click.ClickException(str(exc)) from exc
data = {
"selector": selector,
"engine": engine,
"index_path": str(local_index_path_for(root, index_path)),
"count": len(all_matches),
"matches": all_matches,
}
_emit_query(data, output_format)
@main.command()
@click.argument("text")
@click.option(
"--root",
type=click.Path(exists=True, file_okay=False, path_type=Path),
default=Path("."),
show_default=True,
help="Root used for the default local index path.",
)
@click.option(
"--index-path",
type=click.Path(dir_okay=False, path_type=Path),
help="SQLite index path. Defaults to .markitect/cache/index.sqlite3 under root.",
)
@click.option("--limit", type=int, default=20, show_default=True)
@click.option(
"--format",
"output_format",
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
default="text",
show_default=True,
)
def search(
text: str,
root: Path,
index_path: Path | None,
limit: int,
output_format: str,
) -> None:
"""Search the local SQLite index with FTS5."""
try:
store = LocalSnapshotStore(local_index_path_for(root, index_path))
results = store.search(text, limit=limit)
except ValueError as exc:
raise click.ClickException(str(exc)) from exc
data = {
"query": text,
"index_path": str(local_index_path_for(root, index_path)),
"count": len(results),
"matches": [result.to_dict() for result in results],
}
_emit_search_results(data, output_format)
@main.group()
def template() -> None:
"""Render and inspect deterministic Markdown templates."""
@@ -1392,6 +1540,26 @@ def _emit_local_index_data(data: dict, output_format: str) -> None:
click.echo(f"- {value}")
def _emit_search_results(data: dict, output_format: str) -> None:
if output_format == "json":
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
elif output_format == "yaml":
click.echo(yaml.safe_dump(data, sort_keys=False))
else:
click.echo(f"{data['count']} match(es)")
for match in data["matches"]:
span = ""
if match.get("line_start"):
span = f":{match['line_start']}"
heading = f" [{match['heading']}]" if match.get("heading") else ""
click.echo(
f"- {match['path']}{span} {match['unit_kind']}#{match['unit_index']}{heading}"
)
preview = " ".join(str(match.get("text", "")).split())
if preview:
click.echo(f" {preview[:160]}")
def _emit_reference_result(data: dict, output_format: str) -> None:
if output_format == "json":
click.echo(json.dumps(data, indent=2, ensure_ascii=False))