SQLite-backed local snapshot store

This commit is contained in:
2026-05-04 08:56:41 +02:00
parent 0d1ad21a9f
commit 36ff4cedab
7 changed files with 926 additions and 5 deletions

View File

@@ -18,8 +18,10 @@ from markitect_tool.cache import (
)
from markitect_tool.backend import (
BackendRegistryError,
LocalSnapshotStore,
load_backend_registry,
load_snapshot_state_file,
local_index_path_for,
plan_snapshot_refresh,
snapshot_identity_for_file,
)
@@ -95,6 +97,51 @@ def parse(file: Path, output_format: str) -> None:
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
@main.group()
def ast() -> None:
"""Inspect parsed Markdown ASTs and parser summaries."""
@ast.command("show")
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option(
"--format",
"output_format",
type=click.Choice(["json", "yaml", "tree"], case_sensitive=False),
default="json",
show_default=True,
)
def ast_show(file: Path, output_format: str) -> None:
"""Show a parsed Markdown AST without requiring a cache."""
document = parse_markdown_file(file)
data = document.to_dict()
if output_format == "yaml":
click.echo(yaml.safe_dump(data, sort_keys=False))
elif output_format == "tree":
for heading in document.headings:
click.echo(f"{'#' * heading.level} {heading.text}")
else:
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
@ast.command("stats")
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option(
"--format",
"output_format",
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
default="text",
show_default=True,
)
def ast_stats(file: Path, output_format: str) -> None:
"""Summarize parsed Markdown AST shape and token distribution."""
document = parse_markdown_file(file)
data = _ast_stats(document.to_dict(), str(file))
_emit_ast_stats(data, output_format)
@main.command()
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option(
@@ -726,6 +773,40 @@ def cache() -> None:
"""Fingerprint Markdown files and detect changed inputs."""
@cache.command("init")
@click.option(
"--root",
type=click.Path(exists=True, file_okay=False, path_type=Path),
default=Path("."),
show_default=True,
help="Root used for the default local index path.",
)
@click.option(
"--index-path",
type=click.Path(dir_okay=False, path_type=Path),
help="SQLite index path. Defaults to .markitect/cache/index.sqlite3 under root.",
)
@click.option(
"--format",
"output_format",
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
default="text",
show_default=True,
)
def cache_init(root: Path, index_path: Path | None, output_format: str) -> None:
"""Initialize the local SQLite snapshot/index store."""
resolved_index = local_index_path_for(root, index_path)
store = LocalSnapshotStore(resolved_index)
store.initialize()
data = {
"index_path": str(resolved_index),
"schema_version": "1",
"sources": len(store.load_state()),
}
_emit_local_index_data(data, output_format)
@cache.command("fingerprint")
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option(
@@ -833,6 +914,68 @@ def cache_status(
raise click.exceptions.Exit(1 if status.dirty else 0)
@cache.command("index")
@click.argument("paths", nargs=-1, required=True, type=click.Path(exists=True, path_type=Path))
@click.option(
"--root",
type=click.Path(exists=True, file_okay=False, path_type=Path),
default=Path("."),
show_default=True,
help="Root used for relative index paths.",
)
@click.option(
"--index-path",
type=click.Path(dir_okay=False, path_type=Path),
help="SQLite index path. Defaults to .markitect/cache/index.sqlite3 under root.",
)
@click.option("--no-recursive", is_flag=True, help="Do not recurse into directories.")
@click.option(
"--no-verify-hashes",
is_flag=True,
help="Do not hash metadata-changed files before parsing.",
)
@click.option(
"--parse-option",
"parse_options",
multiple=True,
metavar="KEY=VALUE",
help="Parse option included in the snapshot identity hash.",
)
@click.option("--contract-hash", help="Optional contract hash included in snapshot identity.")
@click.option(
"--format",
"output_format",
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
default="text",
show_default=True,
)
def cache_index(
paths: tuple[Path, ...],
root: Path,
index_path: Path | None,
no_recursive: bool,
no_verify_hashes: bool,
parse_options: tuple[str, ...],
contract_hash: str | None,
output_format: str,
) -> None:
"""Build or refresh the local SQLite snapshot/index store."""
try:
store = LocalSnapshotStore(local_index_path_for(root, index_path))
result = store.build(
list(paths),
root=root,
recursive=not no_recursive,
parse_options=_parse_key_value_options(parse_options),
contract_hash=contract_hash,
verify_hashes=not no_verify_hashes,
)
except ValueError as exc:
raise click.ClickException(str(exc)) from exc
_emit_local_index_data(result.to_dict(), output_format)
@main.group()
def template() -> None:
"""Render and inspect deterministic Markdown templates."""
@@ -1213,6 +1356,42 @@ def _emit_cache_data(data: dict, output_format: str) -> None:
click.echo(f"written: {data['written']}")
def _emit_ast_stats(data: dict, output_format: str) -> None:
if output_format == "json":
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
elif output_format == "yaml":
click.echo(yaml.safe_dump(data, sort_keys=False))
else:
click.echo(f"document_path: {data['document_path']}")
for key, value in data["counts"].items():
click.echo(f"{key}: {value}")
click.echo(f"max_heading_depth: {data['max_heading_depth']}")
if data["token_types"]:
click.echo("token_types:")
for token_type, count in data["token_types"].items():
click.echo(f"- {token_type}: {count}")
def _emit_local_index_data(data: dict, output_format: str) -> None:
if output_format == "json":
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
elif output_format == "yaml":
click.echo(yaml.safe_dump(data, sort_keys=False))
else:
click.echo(f"index_path: {data['index_path']}")
if data.get("schema_version"):
click.echo(f"schema_version: {data['schema_version']}")
if data.get("sources") is not None:
click.echo(f"sources: {data['sources']}")
if data.get("dirty") is not None:
click.echo("dirty" if data["dirty"] else "clean")
for key in ["parsed", "indexed", "metadata_updated", "deleted"]:
values = data.get(key, [])
click.echo(f"{key}: {len(values)}")
for value in values:
click.echo(f"- {value}")
def _emit_reference_result(data: dict, output_format: str) -> None:
if output_format == "json":
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
@@ -1404,6 +1583,29 @@ def _set_path(mapping: dict[str, object], path: list[str], value: object) -> Non
current[path[-1]] = value
def _ast_stats(document: dict, document_path: str) -> dict:
token_types: dict[str, int] = {}
for token in document.get("tokens", []):
token_type = str(token.get("type", "unknown"))
token_types[token_type] = token_types.get(token_type, 0) + 1
headings = document.get("headings", [])
return {
"document_path": document_path,
"source_path": document.get("source_path"),
"counts": {
"frontmatter_keys": len(document.get("frontmatter", {})),
"headings": len(headings),
"sections": len(document.get("sections", [])),
"blocks": len(document.get("blocks", [])),
"tokens": len(document.get("tokens", [])),
},
"max_heading_depth": max(
[int(heading.get("level", 0)) for heading in headings] or [0]
),
"token_types": dict(sorted(token_types.items())),
}
def _load_template_data(data_file: Path | None) -> dict[str, object]:
if data_file is None:
return {}