generated from coulomb/repo-seed
SQLite-backed local snapshot store
This commit is contained in:
@@ -18,8 +18,10 @@ from markitect_tool.cache import (
|
||||
)
|
||||
from markitect_tool.backend import (
|
||||
BackendRegistryError,
|
||||
LocalSnapshotStore,
|
||||
load_backend_registry,
|
||||
load_snapshot_state_file,
|
||||
local_index_path_for,
|
||||
plan_snapshot_refresh,
|
||||
snapshot_identity_for_file,
|
||||
)
|
||||
@@ -95,6 +97,51 @@ def parse(file: Path, output_format: str) -> None:
|
||||
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
|
||||
|
||||
@main.group()
|
||||
def ast() -> None:
|
||||
"""Inspect parsed Markdown ASTs and parser summaries."""
|
||||
|
||||
|
||||
@ast.command("show")
|
||||
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
||||
@click.option(
|
||||
"--format",
|
||||
"output_format",
|
||||
type=click.Choice(["json", "yaml", "tree"], case_sensitive=False),
|
||||
default="json",
|
||||
show_default=True,
|
||||
)
|
||||
def ast_show(file: Path, output_format: str) -> None:
|
||||
"""Show a parsed Markdown AST without requiring a cache."""
|
||||
|
||||
document = parse_markdown_file(file)
|
||||
data = document.to_dict()
|
||||
if output_format == "yaml":
|
||||
click.echo(yaml.safe_dump(data, sort_keys=False))
|
||||
elif output_format == "tree":
|
||||
for heading in document.headings:
|
||||
click.echo(f"{'#' * heading.level} {heading.text}")
|
||||
else:
|
||||
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
|
||||
|
||||
@ast.command("stats")
|
||||
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
||||
@click.option(
|
||||
"--format",
|
||||
"output_format",
|
||||
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
|
||||
default="text",
|
||||
show_default=True,
|
||||
)
|
||||
def ast_stats(file: Path, output_format: str) -> None:
|
||||
"""Summarize parsed Markdown AST shape and token distribution."""
|
||||
|
||||
document = parse_markdown_file(file)
|
||||
data = _ast_stats(document.to_dict(), str(file))
|
||||
_emit_ast_stats(data, output_format)
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
||||
@click.option(
|
||||
@@ -726,6 +773,40 @@ def cache() -> None:
|
||||
"""Fingerprint Markdown files and detect changed inputs."""
|
||||
|
||||
|
||||
@cache.command("init")
|
||||
@click.option(
|
||||
"--root",
|
||||
type=click.Path(exists=True, file_okay=False, path_type=Path),
|
||||
default=Path("."),
|
||||
show_default=True,
|
||||
help="Root used for the default local index path.",
|
||||
)
|
||||
@click.option(
|
||||
"--index-path",
|
||||
type=click.Path(dir_okay=False, path_type=Path),
|
||||
help="SQLite index path. Defaults to .markitect/cache/index.sqlite3 under root.",
|
||||
)
|
||||
@click.option(
|
||||
"--format",
|
||||
"output_format",
|
||||
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
|
||||
default="text",
|
||||
show_default=True,
|
||||
)
|
||||
def cache_init(root: Path, index_path: Path | None, output_format: str) -> None:
|
||||
"""Initialize the local SQLite snapshot/index store."""
|
||||
|
||||
resolved_index = local_index_path_for(root, index_path)
|
||||
store = LocalSnapshotStore(resolved_index)
|
||||
store.initialize()
|
||||
data = {
|
||||
"index_path": str(resolved_index),
|
||||
"schema_version": "1",
|
||||
"sources": len(store.load_state()),
|
||||
}
|
||||
_emit_local_index_data(data, output_format)
|
||||
|
||||
|
||||
@cache.command("fingerprint")
|
||||
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
||||
@click.option(
|
||||
@@ -833,6 +914,68 @@ def cache_status(
|
||||
raise click.exceptions.Exit(1 if status.dirty else 0)
|
||||
|
||||
|
||||
@cache.command("index")
|
||||
@click.argument("paths", nargs=-1, required=True, type=click.Path(exists=True, path_type=Path))
|
||||
@click.option(
|
||||
"--root",
|
||||
type=click.Path(exists=True, file_okay=False, path_type=Path),
|
||||
default=Path("."),
|
||||
show_default=True,
|
||||
help="Root used for relative index paths.",
|
||||
)
|
||||
@click.option(
|
||||
"--index-path",
|
||||
type=click.Path(dir_okay=False, path_type=Path),
|
||||
help="SQLite index path. Defaults to .markitect/cache/index.sqlite3 under root.",
|
||||
)
|
||||
@click.option("--no-recursive", is_flag=True, help="Do not recurse into directories.")
|
||||
@click.option(
|
||||
"--no-verify-hashes",
|
||||
is_flag=True,
|
||||
help="Do not hash metadata-changed files before parsing.",
|
||||
)
|
||||
@click.option(
|
||||
"--parse-option",
|
||||
"parse_options",
|
||||
multiple=True,
|
||||
metavar="KEY=VALUE",
|
||||
help="Parse option included in the snapshot identity hash.",
|
||||
)
|
||||
@click.option("--contract-hash", help="Optional contract hash included in snapshot identity.")
|
||||
@click.option(
|
||||
"--format",
|
||||
"output_format",
|
||||
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
|
||||
default="text",
|
||||
show_default=True,
|
||||
)
|
||||
def cache_index(
|
||||
paths: tuple[Path, ...],
|
||||
root: Path,
|
||||
index_path: Path | None,
|
||||
no_recursive: bool,
|
||||
no_verify_hashes: bool,
|
||||
parse_options: tuple[str, ...],
|
||||
contract_hash: str | None,
|
||||
output_format: str,
|
||||
) -> None:
|
||||
"""Build or refresh the local SQLite snapshot/index store."""
|
||||
|
||||
try:
|
||||
store = LocalSnapshotStore(local_index_path_for(root, index_path))
|
||||
result = store.build(
|
||||
list(paths),
|
||||
root=root,
|
||||
recursive=not no_recursive,
|
||||
parse_options=_parse_key_value_options(parse_options),
|
||||
contract_hash=contract_hash,
|
||||
verify_hashes=not no_verify_hashes,
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise click.ClickException(str(exc)) from exc
|
||||
_emit_local_index_data(result.to_dict(), output_format)
|
||||
|
||||
|
||||
@main.group()
|
||||
def template() -> None:
|
||||
"""Render and inspect deterministic Markdown templates."""
|
||||
@@ -1213,6 +1356,42 @@ def _emit_cache_data(data: dict, output_format: str) -> None:
|
||||
click.echo(f"written: {data['written']}")
|
||||
|
||||
|
||||
def _emit_ast_stats(data: dict, output_format: str) -> None:
|
||||
if output_format == "json":
|
||||
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
elif output_format == "yaml":
|
||||
click.echo(yaml.safe_dump(data, sort_keys=False))
|
||||
else:
|
||||
click.echo(f"document_path: {data['document_path']}")
|
||||
for key, value in data["counts"].items():
|
||||
click.echo(f"{key}: {value}")
|
||||
click.echo(f"max_heading_depth: {data['max_heading_depth']}")
|
||||
if data["token_types"]:
|
||||
click.echo("token_types:")
|
||||
for token_type, count in data["token_types"].items():
|
||||
click.echo(f"- {token_type}: {count}")
|
||||
|
||||
|
||||
def _emit_local_index_data(data: dict, output_format: str) -> None:
|
||||
if output_format == "json":
|
||||
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
elif output_format == "yaml":
|
||||
click.echo(yaml.safe_dump(data, sort_keys=False))
|
||||
else:
|
||||
click.echo(f"index_path: {data['index_path']}")
|
||||
if data.get("schema_version"):
|
||||
click.echo(f"schema_version: {data['schema_version']}")
|
||||
if data.get("sources") is not None:
|
||||
click.echo(f"sources: {data['sources']}")
|
||||
if data.get("dirty") is not None:
|
||||
click.echo("dirty" if data["dirty"] else "clean")
|
||||
for key in ["parsed", "indexed", "metadata_updated", "deleted"]:
|
||||
values = data.get(key, [])
|
||||
click.echo(f"{key}: {len(values)}")
|
||||
for value in values:
|
||||
click.echo(f"- {value}")
|
||||
|
||||
|
||||
def _emit_reference_result(data: dict, output_format: str) -> None:
|
||||
if output_format == "json":
|
||||
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
@@ -1404,6 +1583,29 @@ def _set_path(mapping: dict[str, object], path: list[str], value: object) -> Non
|
||||
current[path[-1]] = value
|
||||
|
||||
|
||||
def _ast_stats(document: dict, document_path: str) -> dict:
|
||||
token_types: dict[str, int] = {}
|
||||
for token in document.get("tokens", []):
|
||||
token_type = str(token.get("type", "unknown"))
|
||||
token_types[token_type] = token_types.get(token_type, 0) + 1
|
||||
headings = document.get("headings", [])
|
||||
return {
|
||||
"document_path": document_path,
|
||||
"source_path": document.get("source_path"),
|
||||
"counts": {
|
||||
"frontmatter_keys": len(document.get("frontmatter", {})),
|
||||
"headings": len(headings),
|
||||
"sections": len(document.get("sections", [])),
|
||||
"blocks": len(document.get("blocks", [])),
|
||||
"tokens": len(document.get("tokens", [])),
|
||||
},
|
||||
"max_heading_depth": max(
|
||||
[int(heading.get("level", 0)) for heading in headings] or [0]
|
||||
),
|
||||
"token_types": dict(sorted(token_types.items())),
|
||||
}
|
||||
|
||||
|
||||
def _load_template_data(data_file: Path | None) -> dict[str, object]:
|
||||
if data_file is None:
|
||||
return {}
|
||||
|
||||
Reference in New Issue
Block a user