generated from coulomb/repo-seed
Optional JSONPath query/extract support, FTS5 section/block search, mkt cache query and search. Local SQLite backend now supports parsed snapshot persistence, incremental refresh, cached querying, and ranked full-text search
This commit is contained in:
@@ -29,7 +29,7 @@ from markitect_tool.content_class import (
|
||||
ContentClassResolutionError,
|
||||
load_content_class_file,
|
||||
)
|
||||
from markitect_tool.core import parse_markdown_file
|
||||
from markitect_tool.core import Document, parse_markdown_file
|
||||
from markitect_tool.contract import (
|
||||
ContractLoaderError,
|
||||
check_markdown_file,
|
||||
@@ -52,7 +52,13 @@ from markitect_tool.generation import (
|
||||
from markitect_tool.literate import tangle_markdown, weave_markdown, write_tangle_files
|
||||
from markitect_tool.ops import IncludeError, compose_files, resolve_includes, transform_markdown
|
||||
from markitect_tool.processor import ProcessorContext, run_fenced_processors
|
||||
from markitect_tool.query import InvalidQueryError, extract_document, query_document
|
||||
from markitect_tool.query import (
|
||||
InvalidQueryError,
|
||||
extract_document,
|
||||
extract_document_jsonpath,
|
||||
query_document,
|
||||
query_document_jsonpath,
|
||||
)
|
||||
from markitect_tool.reference import (
|
||||
ReferenceContext,
|
||||
ReferenceResolutionError,
|
||||
@@ -162,6 +168,13 @@ def metrics(file: Path, output_format: str) -> None:
|
||||
@main.command()
|
||||
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
||||
@click.argument("selector")
|
||||
@click.option(
|
||||
"--engine",
|
||||
type=click.Choice(["selector", "jsonpath"], case_sensitive=False),
|
||||
default="selector",
|
||||
show_default=True,
|
||||
help="Query engine to use.",
|
||||
)
|
||||
@click.option(
|
||||
"--format",
|
||||
"output_format",
|
||||
@@ -169,16 +182,21 @@ def metrics(file: Path, output_format: str) -> None:
|
||||
default="json",
|
||||
show_default=True,
|
||||
)
|
||||
def query(file: Path, selector: str, output_format: str) -> None:
|
||||
def query(file: Path, selector: str, engine: str, output_format: str) -> None:
|
||||
"""Query structured Markdown content with a small selector."""
|
||||
|
||||
document = parse_markdown_file(file)
|
||||
try:
|
||||
matches = query_document(document, selector)
|
||||
matches = (
|
||||
query_document_jsonpath(document, selector)
|
||||
if engine == "jsonpath"
|
||||
else query_document(document, selector)
|
||||
)
|
||||
except InvalidQueryError as exc:
|
||||
raise click.ClickException(str(exc)) from exc
|
||||
data = {
|
||||
"selector": selector,
|
||||
"engine": engine,
|
||||
"document_path": str(file),
|
||||
"count": len(matches),
|
||||
"matches": [match.to_dict() for match in matches],
|
||||
@@ -189,6 +207,13 @@ def query(file: Path, selector: str, output_format: str) -> None:
|
||||
@main.command()
|
||||
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
||||
@click.argument("selector")
|
||||
@click.option(
|
||||
"--engine",
|
||||
type=click.Choice(["selector", "jsonpath"], case_sensitive=False),
|
||||
default="selector",
|
||||
show_default=True,
|
||||
help="Query engine to use.",
|
||||
)
|
||||
@click.option(
|
||||
"--format",
|
||||
"output_format",
|
||||
@@ -196,16 +221,21 @@ def query(file: Path, selector: str, output_format: str) -> None:
|
||||
default="text",
|
||||
show_default=True,
|
||||
)
|
||||
def extract(file: Path, selector: str, output_format: str) -> None:
|
||||
def extract(file: Path, selector: str, engine: str, output_format: str) -> None:
|
||||
"""Extract text or Markdown content from structured Markdown."""
|
||||
|
||||
document = parse_markdown_file(file)
|
||||
try:
|
||||
items = extract_document(document, selector)
|
||||
items = (
|
||||
extract_document_jsonpath(document, selector)
|
||||
if engine == "jsonpath"
|
||||
else extract_document(document, selector)
|
||||
)
|
||||
except InvalidQueryError as exc:
|
||||
raise click.ClickException(str(exc)) from exc
|
||||
data = {
|
||||
"selector": selector,
|
||||
"engine": engine,
|
||||
"document_path": str(file),
|
||||
"count": len(items),
|
||||
"items": items,
|
||||
@@ -976,6 +1006,124 @@ def cache_index(
|
||||
_emit_local_index_data(result.to_dict(), output_format)
|
||||
|
||||
|
||||
@cache.command("query")
|
||||
@click.argument("selector")
|
||||
@click.option(
|
||||
"--root",
|
||||
type=click.Path(exists=True, file_okay=False, path_type=Path),
|
||||
default=Path("."),
|
||||
show_default=True,
|
||||
help="Root used for the default local index path.",
|
||||
)
|
||||
@click.option(
|
||||
"--index-path",
|
||||
type=click.Path(dir_okay=False, path_type=Path),
|
||||
help="SQLite index path. Defaults to .markitect/cache/index.sqlite3 under root.",
|
||||
)
|
||||
@click.option(
|
||||
"--path",
|
||||
"paths",
|
||||
multiple=True,
|
||||
help="Restrict query to one or more indexed relative paths.",
|
||||
)
|
||||
@click.option(
|
||||
"--engine",
|
||||
type=click.Choice(["selector", "jsonpath"], case_sensitive=False),
|
||||
default="selector",
|
||||
show_default=True,
|
||||
help="Query engine to use.",
|
||||
)
|
||||
@click.option(
|
||||
"--format",
|
||||
"output_format",
|
||||
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
|
||||
default="json",
|
||||
show_default=True,
|
||||
)
|
||||
def cache_query(
|
||||
selector: str,
|
||||
root: Path,
|
||||
index_path: Path | None,
|
||||
paths: tuple[str, ...],
|
||||
engine: str,
|
||||
output_format: str,
|
||||
) -> None:
|
||||
"""Run a selector or JSONPath query over indexed document snapshots."""
|
||||
|
||||
store = LocalSnapshotStore(local_index_path_for(root, index_path))
|
||||
indexed_paths = sorted(paths or [state.path for state in store.load_state()])
|
||||
all_matches = []
|
||||
try:
|
||||
for indexed_path in indexed_paths:
|
||||
document = Document.from_dict(store.get_document(indexed_path))
|
||||
matches = (
|
||||
query_document_jsonpath(document, selector)
|
||||
if engine == "jsonpath"
|
||||
else query_document(document, selector)
|
||||
)
|
||||
for match in matches:
|
||||
item = match.to_dict()
|
||||
item["source_path"] = indexed_path
|
||||
all_matches.append(item)
|
||||
except KeyError as exc:
|
||||
raise click.ClickException(str(exc)) from exc
|
||||
except InvalidQueryError as exc:
|
||||
raise click.ClickException(str(exc)) from exc
|
||||
data = {
|
||||
"selector": selector,
|
||||
"engine": engine,
|
||||
"index_path": str(local_index_path_for(root, index_path)),
|
||||
"count": len(all_matches),
|
||||
"matches": all_matches,
|
||||
}
|
||||
_emit_query(data, output_format)
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.argument("text")
|
||||
@click.option(
|
||||
"--root",
|
||||
type=click.Path(exists=True, file_okay=False, path_type=Path),
|
||||
default=Path("."),
|
||||
show_default=True,
|
||||
help="Root used for the default local index path.",
|
||||
)
|
||||
@click.option(
|
||||
"--index-path",
|
||||
type=click.Path(dir_okay=False, path_type=Path),
|
||||
help="SQLite index path. Defaults to .markitect/cache/index.sqlite3 under root.",
|
||||
)
|
||||
@click.option("--limit", type=int, default=20, show_default=True)
|
||||
@click.option(
|
||||
"--format",
|
||||
"output_format",
|
||||
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
|
||||
default="text",
|
||||
show_default=True,
|
||||
)
|
||||
def search(
|
||||
text: str,
|
||||
root: Path,
|
||||
index_path: Path | None,
|
||||
limit: int,
|
||||
output_format: str,
|
||||
) -> None:
|
||||
"""Search the local SQLite index with FTS5."""
|
||||
|
||||
try:
|
||||
store = LocalSnapshotStore(local_index_path_for(root, index_path))
|
||||
results = store.search(text, limit=limit)
|
||||
except ValueError as exc:
|
||||
raise click.ClickException(str(exc)) from exc
|
||||
data = {
|
||||
"query": text,
|
||||
"index_path": str(local_index_path_for(root, index_path)),
|
||||
"count": len(results),
|
||||
"matches": [result.to_dict() for result in results],
|
||||
}
|
||||
_emit_search_results(data, output_format)
|
||||
|
||||
|
||||
@main.group()
|
||||
def template() -> None:
|
||||
"""Render and inspect deterministic Markdown templates."""
|
||||
@@ -1392,6 +1540,26 @@ def _emit_local_index_data(data: dict, output_format: str) -> None:
|
||||
click.echo(f"- {value}")
|
||||
|
||||
|
||||
def _emit_search_results(data: dict, output_format: str) -> None:
|
||||
if output_format == "json":
|
||||
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
elif output_format == "yaml":
|
||||
click.echo(yaml.safe_dump(data, sort_keys=False))
|
||||
else:
|
||||
click.echo(f"{data['count']} match(es)")
|
||||
for match in data["matches"]:
|
||||
span = ""
|
||||
if match.get("line_start"):
|
||||
span = f":{match['line_start']}"
|
||||
heading = f" [{match['heading']}]" if match.get("heading") else ""
|
||||
click.echo(
|
||||
f"- {match['path']}{span} {match['unit_kind']}#{match['unit_index']}{heading}"
|
||||
)
|
||||
preview = " ".join(str(match.get("text", "")).split())
|
||||
if preview:
|
||||
click.echo(f" {preview[:160]}")
|
||||
|
||||
|
||||
def _emit_reference_result(data: dict, output_format: str) -> None:
|
||||
if output_format == "json":
|
||||
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
|
||||
Reference in New Issue
Block a user