Lightweight caching and incremental processing

This commit is contained in:
2026-05-04 01:35:32 +02:00
parent 8260a66528
commit 8203f50fd5
8 changed files with 612 additions and 3 deletions

View File

@@ -8,6 +8,14 @@ from pathlib import Path
import click
import yaml
from markitect_tool.cache import (
build_cache,
cache_path_for,
detect_changes,
fingerprint_file,
load_cache,
save_cache,
)
from markitect_tool.core import parse_markdown_file
from markitect_tool.contract import (
ContractLoaderError,
@@ -288,6 +296,118 @@ def include(
_emit_markdown_result(result.to_dict(), output_format, output)
@main.group()
def cache() -> None:
"""Fingerprint Markdown files and detect changed inputs."""
@cache.command("fingerprint")
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option(
"--root",
type=click.Path(exists=True, file_okay=False, path_type=Path),
default=Path("."),
show_default=True,
help="Root used for relative cache paths.",
)
@click.option(
"--format",
"output_format",
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
default="json",
show_default=True,
)
def cache_fingerprint(file: Path, root: Path, output_format: str) -> None:
"""Fingerprint one Markdown file."""
entry = fingerprint_file(file, root=root)
_emit_cache_data(entry.to_dict(), output_format)
@cache.command("build")
@click.argument("paths", nargs=-1, required=True, type=click.Path(exists=True, path_type=Path))
@click.option(
"--root",
type=click.Path(exists=True, file_okay=False, path_type=Path),
default=Path("."),
show_default=True,
help="Root used for relative cache paths.",
)
@click.option(
"--cache-path",
type=click.Path(dir_okay=False, path_type=Path),
help="Cache manifest path. Defaults to .markitect/cache/manifest.json under root.",
)
@click.option("--no-recursive", is_flag=True, help="Do not recurse into directories.")
@click.option("--dry-run", is_flag=True, help="Report manifest without writing it.")
@click.option(
"--format",
"output_format",
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
default="text",
show_default=True,
)
def cache_build(
paths: tuple[Path, ...],
root: Path,
cache_path: Path | None,
no_recursive: bool,
dry_run: bool,
output_format: str,
) -> None:
"""Build or refresh a lightweight Markdown cache manifest."""
manifest = build_cache(list(paths), root=root, recursive=not no_recursive)
manifest_path = cache_path_for(root, cache_path)
if not dry_run:
save_cache(manifest, manifest_path)
data = manifest.to_dict() | {
"cache_path": str(manifest_path),
"written": not dry_run,
"count": len(manifest.entries),
}
_emit_cache_data(data, output_format)
@cache.command("status")
@click.argument("paths", nargs=-1, required=True, type=click.Path(exists=True, path_type=Path))
@click.option(
"--root",
type=click.Path(exists=True, file_okay=False, path_type=Path),
default=Path("."),
show_default=True,
help="Root used for relative cache paths.",
)
@click.option(
"--cache-path",
type=click.Path(dir_okay=False, path_type=Path),
help="Cache manifest path. Defaults to .markitect/cache/manifest.json under root.",
)
@click.option("--no-recursive", is_flag=True, help="Do not recurse into directories.")
@click.option(
"--format",
"output_format",
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
default="text",
show_default=True,
)
def cache_status(
paths: tuple[Path, ...],
root: Path,
cache_path: Path | None,
no_recursive: bool,
output_format: str,
) -> None:
"""Report changed, new, unchanged, and deleted Markdown files."""
manifest_path = cache_path_for(root, cache_path)
manifest = load_cache(manifest_path)
status = detect_changes(manifest, list(paths), root=root, recursive=not no_recursive)
data = status.to_dict() | {"cache_path": str(manifest_path)}
_emit_cache_data(data, output_format)
raise click.exceptions.Exit(1 if status.dirty else 0)
@main.group()
def template() -> None:
"""Render and inspect deterministic Markdown templates."""
@@ -647,6 +767,27 @@ def _emit_markdown_result(data: dict, output_format: str, output: Path | None) -
click.echo(markdown, nl=False)
def _emit_cache_data(data: dict, output_format: str) -> None:
if output_format == "json":
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
elif output_format == "yaml":
click.echo(yaml.safe_dump(data, sort_keys=False))
else:
if "dirty" in data:
click.echo("dirty" if data["dirty"] else "clean")
for key in ["new", "changed", "deleted", "unchanged"]:
values = data.get(key, [])
if values:
click.echo(f"{key}: {len(values)}")
for value in values:
click.echo(f"- {value}")
else:
click.echo(f"cache_path: {data.get('cache_path', '<none>')}")
click.echo(f"count: {data.get('count', len(data.get('entries', [])))}")
if data.get("written") is not None:
click.echo(f"written: {data['written']}")
def _emit_jsonish(data: dict, output_format: str) -> None:
if output_format == "yaml":
click.echo(yaml.safe_dump(data, sort_keys=False))