diff --git a/docs/local-index-backend.md b/docs/local-index-backend.md index a9d9a54..4960b24 100644 --- a/docs/local-index-backend.md +++ b/docs/local-index-backend.md @@ -31,6 +31,19 @@ Build or refresh the local index: mkt cache index docs workplans --root . ``` +Query indexed snapshots: + +```text +mkt cache query 'sections[heading=Decision]' --root . +mkt cache query '$.headings[*].text' --engine jsonpath --root . +``` + +Search indexed section/block text: + +```text +mkt search SQLite --root . +``` + Inspect a parsed AST without using the cache: ```text @@ -71,6 +84,7 @@ The first schema stores: - `blocks`: block type, text, source span, and heading level - `dependencies`: reserved dependency edge table for references, transclusion, literate chunks, and future invalidation graphs +- `search_units`: FTS5 virtual table over sections and blocks This is enough to recover the useful markitect-main idea of keeping parsed structure available for faster and richer query backends, while keeping the @@ -78,10 +92,6 @@ normal CLI usable without a cache. ## Future Work -`MKTT-WP-0007` still needs: - -- JSONPath query adapter over stored or live document JSON -- FTS5 search over section/block rows -- cache-backed query commands -- richer dependency extraction from references, transclusion, and literate - chunks +Follow-on backend work can now focus on richer dependency extraction from +references, transclusion, and literate chunks; access-controlled query gateways; +and larger-scale memory/context packages. diff --git a/docs/workplan-planning-map.md b/docs/workplan-planning-map.md index 4816c10..b6d25f5 100644 --- a/docs/workplan-planning-map.md +++ b/docs/workplan-planning-map.md @@ -33,7 +33,7 @@ and descriptions mirror the operational view. | `MKTT-WP-0003` | complete | done | `MKTT-WP-0001`, `MKTT-WP-0002`, `MKTT-WP-0004` | Core toolkit implementation is complete. | | `MKTT-WP-0006` | complete | done | `MKTT-WP-0004`; task-level trigger: `MKTT-WP-0003-T005` | Optional backend fabric is complete: manifests, capabilities, snapshot identity, interfaces, registry, provenance, and read-only CLI scaffolding. | | `MKTT-WP-0010` | complete | done | `MKTT-WP-0004`; task-level trigger: `MKTT-WP-0003-T006` | Content references, processors, explode/implode, weave/tangle, content classes, and migration examples are complete as the first WP-0010 extension layer. | -| `MKTT-WP-0007` | P2 | todo | `MKTT-WP-0006` | First practical cache backend use case: AST/JSONPath/SQLite/FTS. SQLite snapshots, AST inspection, metadata indexing, and incremental refresh are in place; JSONPath, FTS, and cache-backed query remain. | +| `MKTT-WP-0007` | complete | done | `MKTT-WP-0006` | Advanced query and local index backend is complete: AST inspection, optional JSONPath, SQLite snapshots/metadata, FTS5 search, incremental refresh, and local index CLI. | | `MKTT-WP-0005` | P2 | todo | `MKTT-WP-0003`, `MKTT-WP-0004` | Pick up when generation/form/context or semantic assessment pressure appears. | | `MKTT-WP-0011` | P2 | todo | `MKTT-WP-0003`; task-level triggers: `MKTT-WP-0010-T001`, `MKTT-WP-0010-T005` | Declarative Markdown dataflow workflows: source extraction, deterministic/assisted processing, and multi-output generation. | | `MKTT-WP-0009` | P2 | todo | `MKTT-WP-0006` | Establish access-control gateway before security-sensitive cache/context use. | diff --git a/src/markitect_tool/backend/__init__.py b/src/markitect_tool/backend/__init__.py index 1f49995..77685ba 100644 --- a/src/markitect_tool/backend/__init__.py +++ b/src/markitect_tool/backend/__init__.py @@ -36,6 +36,7 @@ from markitect_tool.backend.local_store import ( DEFAULT_LOCAL_INDEX_PATH, LOCAL_INDEX_SCHEMA_VERSION, LocalIndexBuildResult, + LocalSearchResult, LocalSnapshotStore, local_index_path_for, ) @@ -70,6 +71,7 @@ __all__ = [ "DEFAULT_LOCAL_INDEX_PATH", "LOCAL_INDEX_SCHEMA_VERSION", "LocalIndexBuildResult", + "LocalSearchResult", "LocalSnapshotStore", "local_index_path_for", ] diff --git a/src/markitect_tool/backend/local_store.py b/src/markitect_tool/backend/local_store.py index c13c240..99ee724 100644 --- a/src/markitect_tool/backend/local_store.py +++ b/src/markitect_tool/backend/local_store.py @@ -56,6 +56,24 @@ class LocalIndexBuildResult: return data +@dataclass(frozen=True) +class LocalSearchResult: + """One FTS search match from the local index.""" + + path: str + snapshot_id: str + unit_kind: str + unit_index: int + heading: str | None + text: str + rank: float + line_start: int | None = None + line_end: int | None = None + + def to_dict(self) -> dict[str, Any]: + return {key: value for key, value in asdict(self).items() if value is not None} + + class LocalSnapshotStore: """SQLite-backed local snapshot store for parsed Markdown documents.""" @@ -217,6 +235,7 @@ class LocalSnapshotStore: return with self._connect() as conn: _create_schema(conn) + conn.execute("delete from search_units where path = ?", (path,)) conn.execute("delete from blocks where path = ?", (path,)) conn.execute("delete from sections where path = ?", (path,)) conn.execute("delete from headings where path = ?", (path,)) @@ -236,6 +255,45 @@ class LocalSnapshotStore: raise KeyError(f"No indexed document `{path}`") return json.loads(row["document_json"]) + def search(self, query: str, *, limit: int = 20) -> list[LocalSearchResult]: + """Search indexed section and block text with SQLite FTS5.""" + + if not query.strip(): + raise ValueError("Search query cannot be empty") + if not self.path.exists(): + return [] + with self._connect() as conn: + _create_schema(conn) + try: + rows = conn.execute( + """ + select s.path, s.snapshot_id, s.unit_kind, s.unit_index, + s.heading, s.text, s.line_start, s.line_end, + bm25(search_units) as rank + from search_units s + where search_units match ? + order by rank + limit ? + """, + (query, limit), + ).fetchall() + except sqlite3.OperationalError as exc: + raise ValueError(f"Invalid FTS query `{query}`: {exc}") from exc + return [ + LocalSearchResult( + path=row["path"], + snapshot_id=row["snapshot_id"], + unit_kind=row["unit_kind"], + unit_index=row["unit_index"], + heading=row["heading"], + text=row["text"], + line_start=row["line_start"], + line_end=row["line_end"], + rank=float(row["rank"]), + ) + for row in rows + ] + def build( self, paths: list[str | Path], @@ -382,6 +440,16 @@ def _create_schema(conn: sqlite3.Connection) -> None: target_snapshot_id text, metadata_json text not null default '{}' ); + create virtual table if not exists search_units using fts5( + path unindexed, + snapshot_id unindexed, + unit_kind unindexed, + unit_index unindexed, + heading, + text, + line_start unindexed, + line_end unindexed + ); create index if not exists idx_sources_content_hash on sources(content_hash); create index if not exists idx_sources_snapshot_id on sources(snapshot_id); create index if not exists idx_sources_parser on sources(parser, parser_version); @@ -402,6 +470,7 @@ def _replace_document_units( conn.execute("delete from blocks where path = ?", (path,)) conn.execute("delete from sections where path = ?", (path,)) conn.execute("delete from headings where path = ?", (path,)) + conn.execute("delete from search_units where path = ?", (path,)) for idx, heading in enumerate(document.get("headings", [])): conn.execute( """ @@ -441,6 +510,22 @@ def _replace_document_units( line_end, ), ) + conn.execute( + """ + insert into search_units( + path, snapshot_id, unit_kind, unit_index, heading, text, line_start, line_end + ) values (?, ?, 'section', ?, ?, ?, ?, ?) + """, + ( + path, + snapshot_id, + idx, + str(heading["text"]), + text, + line_start, + line_end, + ), + ) for idx, block in enumerate(document.get("blocks", [])): conn.execute( """ @@ -459,6 +544,22 @@ def _replace_document_units( block.get("heading_level"), ), ) + conn.execute( + """ + insert into search_units( + path, snapshot_id, unit_kind, unit_index, heading, text, line_start, line_end + ) values (?, ?, 'block', ?, ?, ?, ?, ?) + """, + ( + path, + snapshot_id, + idx, + None, + str(block.get("text", "")), + block.get("line_start"), + block.get("line_end"), + ), + ) def _load_dependencies(conn: sqlite3.Connection) -> dict[str, list[DependencyEdge]]: diff --git a/src/markitect_tool/cli/main.py b/src/markitect_tool/cli/main.py index 734a435..2277b87 100644 --- a/src/markitect_tool/cli/main.py +++ b/src/markitect_tool/cli/main.py @@ -29,7 +29,7 @@ from markitect_tool.content_class import ( ContentClassResolutionError, load_content_class_file, ) -from markitect_tool.core import parse_markdown_file +from markitect_tool.core import Document, parse_markdown_file from markitect_tool.contract import ( ContractLoaderError, check_markdown_file, @@ -52,7 +52,13 @@ from markitect_tool.generation import ( from markitect_tool.literate import tangle_markdown, weave_markdown, write_tangle_files from markitect_tool.ops import IncludeError, compose_files, resolve_includes, transform_markdown from markitect_tool.processor import ProcessorContext, run_fenced_processors -from markitect_tool.query import InvalidQueryError, extract_document, query_document +from markitect_tool.query import ( + InvalidQueryError, + extract_document, + extract_document_jsonpath, + query_document, + query_document_jsonpath, +) from markitect_tool.reference import ( ReferenceContext, ReferenceResolutionError, @@ -162,6 +168,13 @@ def metrics(file: Path, output_format: str) -> None: @main.command() @click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path)) @click.argument("selector") +@click.option( + "--engine", + type=click.Choice(["selector", "jsonpath"], case_sensitive=False), + default="selector", + show_default=True, + help="Query engine to use.", +) @click.option( "--format", "output_format", @@ -169,16 +182,21 @@ def metrics(file: Path, output_format: str) -> None: default="json", show_default=True, ) -def query(file: Path, selector: str, output_format: str) -> None: +def query(file: Path, selector: str, engine: str, output_format: str) -> None: """Query structured Markdown content with a small selector.""" document = parse_markdown_file(file) try: - matches = query_document(document, selector) + matches = ( + query_document_jsonpath(document, selector) + if engine == "jsonpath" + else query_document(document, selector) + ) except InvalidQueryError as exc: raise click.ClickException(str(exc)) from exc data = { "selector": selector, + "engine": engine, "document_path": str(file), "count": len(matches), "matches": [match.to_dict() for match in matches], @@ -189,6 +207,13 @@ def query(file: Path, selector: str, output_format: str) -> None: @main.command() @click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path)) @click.argument("selector") +@click.option( + "--engine", + type=click.Choice(["selector", "jsonpath"], case_sensitive=False), + default="selector", + show_default=True, + help="Query engine to use.", +) @click.option( "--format", "output_format", @@ -196,16 +221,21 @@ def query(file: Path, selector: str, output_format: str) -> None: default="text", show_default=True, ) -def extract(file: Path, selector: str, output_format: str) -> None: +def extract(file: Path, selector: str, engine: str, output_format: str) -> None: """Extract text or Markdown content from structured Markdown.""" document = parse_markdown_file(file) try: - items = extract_document(document, selector) + items = ( + extract_document_jsonpath(document, selector) + if engine == "jsonpath" + else extract_document(document, selector) + ) except InvalidQueryError as exc: raise click.ClickException(str(exc)) from exc data = { "selector": selector, + "engine": engine, "document_path": str(file), "count": len(items), "items": items, @@ -976,6 +1006,124 @@ def cache_index( _emit_local_index_data(result.to_dict(), output_format) +@cache.command("query") +@click.argument("selector") +@click.option( + "--root", + type=click.Path(exists=True, file_okay=False, path_type=Path), + default=Path("."), + show_default=True, + help="Root used for the default local index path.", +) +@click.option( + "--index-path", + type=click.Path(dir_okay=False, path_type=Path), + help="SQLite index path. Defaults to .markitect/cache/index.sqlite3 under root.", +) +@click.option( + "--path", + "paths", + multiple=True, + help="Restrict query to one or more indexed relative paths.", +) +@click.option( + "--engine", + type=click.Choice(["selector", "jsonpath"], case_sensitive=False), + default="selector", + show_default=True, + help="Query engine to use.", +) +@click.option( + "--format", + "output_format", + type=click.Choice(["json", "yaml", "text"], case_sensitive=False), + default="json", + show_default=True, +) +def cache_query( + selector: str, + root: Path, + index_path: Path | None, + paths: tuple[str, ...], + engine: str, + output_format: str, +) -> None: + """Run a selector or JSONPath query over indexed document snapshots.""" + + store = LocalSnapshotStore(local_index_path_for(root, index_path)) + indexed_paths = sorted(paths or [state.path for state in store.load_state()]) + all_matches = [] + try: + for indexed_path in indexed_paths: + document = Document.from_dict(store.get_document(indexed_path)) + matches = ( + query_document_jsonpath(document, selector) + if engine == "jsonpath" + else query_document(document, selector) + ) + for match in matches: + item = match.to_dict() + item["source_path"] = indexed_path + all_matches.append(item) + except KeyError as exc: + raise click.ClickException(str(exc)) from exc + except InvalidQueryError as exc: + raise click.ClickException(str(exc)) from exc + data = { + "selector": selector, + "engine": engine, + "index_path": str(local_index_path_for(root, index_path)), + "count": len(all_matches), + "matches": all_matches, + } + _emit_query(data, output_format) + + +@main.command() +@click.argument("text") +@click.option( + "--root", + type=click.Path(exists=True, file_okay=False, path_type=Path), + default=Path("."), + show_default=True, + help="Root used for the default local index path.", +) +@click.option( + "--index-path", + type=click.Path(dir_okay=False, path_type=Path), + help="SQLite index path. Defaults to .markitect/cache/index.sqlite3 under root.", +) +@click.option("--limit", type=int, default=20, show_default=True) +@click.option( + "--format", + "output_format", + type=click.Choice(["json", "yaml", "text"], case_sensitive=False), + default="text", + show_default=True, +) +def search( + text: str, + root: Path, + index_path: Path | None, + limit: int, + output_format: str, +) -> None: + """Search the local SQLite index with FTS5.""" + + try: + store = LocalSnapshotStore(local_index_path_for(root, index_path)) + results = store.search(text, limit=limit) + except ValueError as exc: + raise click.ClickException(str(exc)) from exc + data = { + "query": text, + "index_path": str(local_index_path_for(root, index_path)), + "count": len(results), + "matches": [result.to_dict() for result in results], + } + _emit_search_results(data, output_format) + + @main.group() def template() -> None: """Render and inspect deterministic Markdown templates.""" @@ -1392,6 +1540,26 @@ def _emit_local_index_data(data: dict, output_format: str) -> None: click.echo(f"- {value}") +def _emit_search_results(data: dict, output_format: str) -> None: + if output_format == "json": + click.echo(json.dumps(data, indent=2, ensure_ascii=False)) + elif output_format == "yaml": + click.echo(yaml.safe_dump(data, sort_keys=False)) + else: + click.echo(f"{data['count']} match(es)") + for match in data["matches"]: + span = "" + if match.get("line_start"): + span = f":{match['line_start']}" + heading = f" [{match['heading']}]" if match.get("heading") else "" + click.echo( + f"- {match['path']}{span} {match['unit_kind']}#{match['unit_index']}{heading}" + ) + preview = " ".join(str(match.get("text", "")).split()) + if preview: + click.echo(f" {preview[:160]}") + + def _emit_reference_result(data: dict, output_format: str) -> None: if output_format == "json": click.echo(json.dumps(data, indent=2, ensure_ascii=False)) diff --git a/src/markitect_tool/core/document.py b/src/markitect_tool/core/document.py index e749e56..f91c13c 100644 --- a/src/markitect_tool/core/document.py +++ b/src/markitect_tool/core/document.py @@ -17,6 +17,10 @@ class Heading: def to_dict(self) -> dict[str, Any]: return asdict(self) + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "Heading": + return cls(level=int(data["level"]), text=str(data["text"]), line=int(data["line"])) + @dataclass(frozen=True) class ContentBlock: @@ -32,6 +36,16 @@ class ContentBlock: data = asdict(self) return {key: value for key, value in data.items() if value is not None} + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "ContentBlock": + return cls( + type=str(data["type"]), + text=str(data.get("text", "")), + line_start=int(data["line_start"]) if data.get("line_start") is not None else None, + line_end=int(data["line_end"]) if data.get("line_end") is not None else None, + heading_level=int(data["heading_level"]) if data.get("heading_level") is not None else None, + ) + @dataclass(frozen=True) class Section: @@ -46,6 +60,13 @@ class Section: "blocks": [block.to_dict() for block in self.blocks], } + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "Section": + return cls( + heading=Heading.from_dict(data["heading"]), + blocks=[ContentBlock.from_dict(block) for block in data.get("blocks", [])], + ) + @dataclass(frozen=True) class Document: @@ -70,3 +91,15 @@ class Document: "tokens": self.tokens, } return {key: value for key, value in data.items() if value is not None} + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "Document": + return cls( + source_path=str(data["source_path"]) if data.get("source_path") is not None else None, + frontmatter=dict(data.get("frontmatter", {})), + body=str(data.get("body", "")), + blocks=[ContentBlock.from_dict(block) for block in data.get("blocks", [])], + headings=[Heading.from_dict(heading) for heading in data.get("headings", [])], + sections=[Section.from_dict(section) for section in data.get("sections", [])], + tokens=list(data.get("tokens", [])), + ) diff --git a/src/markitect_tool/query/__init__.py b/src/markitect_tool/query/__init__.py index d545b4f..c16c9fa 100644 --- a/src/markitect_tool/query/__init__.py +++ b/src/markitect_tool/query/__init__.py @@ -4,12 +4,16 @@ from markitect_tool.query.engine import ( InvalidQueryError, QueryMatch, extract_document, + extract_document_jsonpath, query_document, + query_document_jsonpath, ) __all__ = [ "InvalidQueryError", "QueryMatch", "extract_document", + "extract_document_jsonpath", "query_document", + "query_document_jsonpath", ] diff --git a/src/markitect_tool/query/engine.py b/src/markitect_tool/query/engine.py index e267774..f5dc2f3 100644 --- a/src/markitect_tool/query/engine.py +++ b/src/markitect_tool/query/engine.py @@ -60,6 +60,42 @@ def query_document(document: Document, selector: str) -> list[QueryMatch]: raise InvalidQueryError(f"Unsupported selector target `{parsed.target}`") +def query_document_jsonpath(document: Document, expression: str) -> list[QueryMatch]: + """Query a parsed document with JSONPath over ``Document.to_dict()``. + + JSONPath support is intentionally optional so the core selector engine + remains dependency-light. Install ``markitect-tool[query]`` to enable it. + """ + + try: + from jsonpath_ng.ext import parse as parse_jsonpath + except ImportError as exc: # pragma: no cover - branch depends on env deps + raise InvalidQueryError( + "JSONPath queries require the optional `jsonpath-ng` dependency. " + "Install `markitect-tool[query]`." + ) from exc + + try: + compiled = parse_jsonpath(expression) + except Exception as exc: # jsonpath-ng raises parser-specific exceptions + raise InvalidQueryError(f"Invalid JSONPath expression `{expression}`: {exc}") from exc + + matches: list[QueryMatch] = [] + for match in compiled.find(document.to_dict()): + path = "$" + str(match.full_path) + value = match.value + matches.append( + QueryMatch( + kind=_jsonpath_kind(path, value), + path=path, + value=value, + text=_text_value(value), + line=_jsonpath_line(value), + ) + ) + return matches + + def extract_document(document: Document, selector: str) -> list[str]: """Extract text content from query matches.""" @@ -74,6 +110,16 @@ def extract_document(document: Document, selector: str) -> list[str]: return extracted +def extract_document_jsonpath(document: Document, expression: str) -> list[str]: + """Extract textual JSONPath matches from a parsed document.""" + + extracted: list[str] = [] + for match in query_document_jsonpath(document, expression): + if match.text is not None: + extracted.append(match.text) + return extracted + + def _parse_selector(selector: str) -> _Selector: raw = selector.strip() if not raw: @@ -240,3 +286,25 @@ def _text_value(value: Any) -> str | None: if isinstance(value, int | float | bool): return str(value) return None + + +def _jsonpath_kind(path: str, value: Any) -> str: + if ".frontmatter" in path: + return "frontmatter" + if ".headings" in path: + return "heading" if isinstance(value, dict) else "heading_value" + if ".sections" in path: + return "section" if isinstance(value, dict) else "section_value" + if ".blocks" in path: + return "block" if isinstance(value, dict) else "block_value" + if ".tokens" in path: + return "token" if isinstance(value, dict) else "token_value" + return "jsonpath" + + +def _jsonpath_line(value: Any) -> int | None: + if isinstance(value, dict): + raw_line = value.get("line") or value.get("line_start") + if isinstance(raw_line, int): + return raw_line + return None diff --git a/tests/test_local_snapshot_store.py b/tests/test_local_snapshot_store.py index 4c3b659..dd25c82 100644 --- a/tests/test_local_snapshot_store.py +++ b/tests/test_local_snapshot_store.py @@ -57,6 +57,23 @@ def test_local_snapshot_store_deletes_removed_files(tmp_path: Path): assert store.load_state() == [] +def test_local_snapshot_store_searches_sections_and_blocks(tmp_path: Path): + source = tmp_path / "doc.md" + source.write_text( + "# Doc\n\n## Decision\n\nWe choose a local SQLite index for repeated queries.\n", + encoding="utf-8", + ) + store = LocalSnapshotStore(local_index_path_for(tmp_path)) + store.build([tmp_path], root=tmp_path) + + results = store.search("SQLite") + + assert results + assert results[0].path == "doc.md" + assert {result.unit_kind for result in results} <= {"section", "block"} + assert any("SQLite index" in result.text for result in results) + + def test_mkt_ast_show_and_stats(tmp_path: Path): source = tmp_path / "doc.md" source.write_text("# Doc\n\nBody.\n", encoding="utf-8") @@ -87,3 +104,44 @@ def test_mkt_cache_init_and_index(tmp_path: Path): assert "parsed: 1" in indexed.output assert clean.exit_code == 0 assert "clean" in clean.output + + +def test_mkt_search_uses_local_index(tmp_path: Path): + source = tmp_path / "doc.md" + source.write_text("# Doc\n\nSearchable local index content.\n", encoding="utf-8") + runner = CliRunner() + indexed = runner.invoke(main, ["cache", "index", str(tmp_path), "--root", str(tmp_path)]) + + result = runner.invoke(main, ["search", "Searchable", "--root", str(tmp_path)]) + + assert indexed.exit_code == 0 + assert result.exit_code == 0 + assert "match(es)" in result.output + assert "doc.md" in result.output + + +def test_mkt_cache_query_uses_indexed_snapshots(tmp_path: Path): + one = tmp_path / "one.md" + two = tmp_path / "two.md" + one.write_text("# One\n\n## Decision\n\nUse SQLite.\n", encoding="utf-8") + two.write_text("# Two\n\n## Context\n\nOther material.\n", encoding="utf-8") + runner = CliRunner() + indexed = runner.invoke(main, ["cache", "index", str(tmp_path), "--root", str(tmp_path)]) + + result = runner.invoke( + main, + [ + "cache", + "query", + "sections[heading=Decision]", + "--root", + str(tmp_path), + "--format", + "json", + ], + ) + + assert indexed.exit_code == 0 + assert result.exit_code == 0 + assert '"count": 1' in result.output + assert '"source_path": "one.md"' in result.output diff --git a/tests/test_query_extraction.py b/tests/test_query_extraction.py index 7499eff..a6c3af2 100644 --- a/tests/test_query_extraction.py +++ b/tests/test_query_extraction.py @@ -1,11 +1,17 @@ from pathlib import Path +import importlib.util import pytest from click.testing import CliRunner from markitect_tool.cli import main from markitect_tool.core import parse_markdown -from markitect_tool.query import InvalidQueryError, extract_document, query_document +from markitect_tool.query import ( + InvalidQueryError, + extract_document, + query_document, + query_document_jsonpath, +) QUERY_DOC = """--- @@ -110,6 +116,41 @@ def test_invalid_query_reports_error(): query_document(document, "sections[heading") +@pytest.mark.skipif( + importlib.util.find_spec("jsonpath_ng") is None, + reason="jsonpath-ng optional dependency is not installed", +) +def test_query_document_jsonpath_returns_shared_match_envelope(): + document = parse_markdown(QUERY_DOC) + + matches = query_document_jsonpath(document, "$.headings[?(@.level == 2)].text") + + assert [match.value for match in matches] == [ + "Context", + "Decision", + "Consequences", + ] + assert all(match.kind == "heading_value" for match in matches) + + +def test_query_document_jsonpath_reports_missing_optional_dependency(monkeypatch): + document = parse_markdown(QUERY_DOC) + + import builtins + + real_import = builtins.__import__ + + def fake_import(name, *args, **kwargs): + if name.startswith("jsonpath_ng"): + raise ImportError("blocked") + return real_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", fake_import) + + with pytest.raises(InvalidQueryError, match="optional `jsonpath-ng`"): + query_document_jsonpath(document, "$.headings[*].text") + + def test_mkt_query_outputs_json(tmp_path: Path): source = tmp_path / "doc.md" source.write_text(QUERY_DOC, encoding="utf-8") @@ -136,6 +177,24 @@ def test_mkt_query_outputs_text(tmp_path: Path): assert "## Context" in result.output +@pytest.mark.skipif( + importlib.util.find_spec("jsonpath_ng") is None, + reason="jsonpath-ng optional dependency is not installed", +) +def test_mkt_query_jsonpath_outputs_json(tmp_path: Path): + source = tmp_path / "doc.md" + source.write_text(QUERY_DOC, encoding="utf-8") + + result = CliRunner().invoke( + main, + ["query", str(source), "$.frontmatter.status", "--engine", "jsonpath"], + ) + + assert result.exit_code == 0 + assert '"engine": "jsonpath"' in result.output + assert '"value": "accepted"' in result.output + + def test_mkt_extract_outputs_text(tmp_path: Path): source = tmp_path / "doc.md" source.write_text(QUERY_DOC, encoding="utf-8") diff --git a/workplans/MKTT-WP-0007-advanced-query-and-local-index-backend.md b/workplans/MKTT-WP-0007-advanced-query-and-local-index-backend.md index 1d644e8..926a1a3 100644 --- a/workplans/MKTT-WP-0007-advanced-query-and-local-index-backend.md +++ b/workplans/MKTT-WP-0007-advanced-query-and-local-index-backend.md @@ -3,7 +3,7 @@ id: MKTT-WP-0007 type: workplan title: "Advanced Query and Local Index Backend" domain: markitect -status: todo +status: done owner: markitect-tool topic_slug: markitect planning_priority: P2 @@ -97,7 +97,7 @@ tree/text output modes. ```task id: MKTT-WP-0007-T003 -status: todo +status: done priority: high state_hub_task_id: "a7b46b32-f322-4fe0-a6fb-60b0b823593c" ``` @@ -105,6 +105,11 @@ state_hub_task_id: "a7b46b32-f322-4fe0-a6fb-60b0b823593c" Support JSONPath over `Document.to_dict()` behind an optional dependency and shared query result envelope. +Implemented: `query_document_jsonpath()` and `extract_document_jsonpath()` use +the optional `jsonpath-ng` dependency and return the same `QueryMatch` envelope +as the compact selector engine. CLI `mkt query` and `mkt extract` accept +`--engine jsonpath`. + ## P7.4 - Build SQLite metadata and JSON index ```task @@ -137,7 +142,7 @@ target. ```task id: MKTT-WP-0007-T005 -status: todo +status: done priority: medium state_hub_task_id: "0f03e9be-b6f0-4e4b-8220-3bbf638a892b" ``` @@ -145,6 +150,10 @@ state_hub_task_id: "0f03e9be-b6f0-4e4b-8220-3bbf638a892b" Add full-text search over section and block text with source spans and relevance ranking. +Implemented: local SQLite index creates an FTS5 `search_units` virtual table +for sections and blocks, including path, snapshot id, unit kind/index, heading, +text, source spans, and BM25 rank. CLI `mkt search ` queries it. + ## P7.6 - Add incremental refresh ```task @@ -177,7 +186,7 @@ deletes removed files. ```task id: MKTT-WP-0007-T007 -status: todo +status: done priority: high state_hub_task_id: "35cc63ff-3723-43d5-aaf6-f9312efa0f4b" ``` @@ -191,9 +200,15 @@ mkt cache query mkt search ``` -Partial implementation: `mkt cache init` initializes the local SQLite store and -`mkt cache index ` builds or refreshes it. Cache-backed query and FTS -search remain part of this task. +Implemented: + +- `mkt cache init` +- `mkt cache index ` +- `mkt cache query ` +- `mkt search ` + +The older lightweight manifest commands remain available as `mkt cache build`, +`mkt cache status`, and `mkt cache fingerprint`. ## Exit Criteria