Introduces a new `markitect/proxy/` module with pluggable extractors that convert non-markdown sources (PDF, HTML) into tracked markdown proxy files. Proxy files preserve origin metadata (path, checksum, timestamp) so they can be kept in sync when the original changes. CLI commands: `proxy create`, `proxy update`, `proxy status`, `proxy extractors`. Built-in extractors: PDF (pymupdf4llm), HTML (markdownify), Markdown (built-in). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
186 lines
6.0 KiB
Python
186 lines
6.0 KiB
Python
"""
|
|
Click CLI commands for the proxy file system.
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import click
|
|
|
|
from markitect.proxy.exceptions import ProxyError
|
|
|
|
|
|
@click.group("proxy")
|
|
def proxy_group():
|
|
"""Proxy file operations — create, update, and manage markdown proxies."""
|
|
pass
|
|
|
|
|
|
@proxy_group.command("create")
|
|
@click.argument("source", type=click.Path(exists=True))
|
|
@click.option(
|
|
"--output-dir", "-o",
|
|
type=click.Path(),
|
|
default=".",
|
|
help="Directory to write the proxy file (default: current dir).",
|
|
)
|
|
@click.option("--force", "-f", is_flag=True, help="Overwrite existing proxy file.")
|
|
def proxy_create(source, output_dir, force):
|
|
"""Create a markdown proxy for SOURCE."""
|
|
# Lazy imports so the CLI group registers even if deps are absent
|
|
from markitect.proxy.registry import registry
|
|
import markitect.proxy.extractors # noqa: F401 — registers built-ins
|
|
from markitect.proxy.generator import ProxyGenerator
|
|
|
|
gen = ProxyGenerator(registry)
|
|
try:
|
|
proxy_path = gen.create(Path(source), Path(output_dir), force=force)
|
|
click.echo(f"Created proxy: {proxy_path}")
|
|
except ProxyError as exc:
|
|
click.echo(f"Error: {exc}", err=True)
|
|
raise SystemExit(1)
|
|
|
|
|
|
@proxy_group.command("update")
|
|
@click.argument("target", type=click.Path(exists=True))
|
|
@click.option("--dry-run", is_flag=True, help="Show what would change without writing.")
|
|
def proxy_update(target, dry_run):
|
|
"""Re-extract a single proxy file or all proxies in a directory."""
|
|
from markitect.proxy.registry import registry
|
|
import markitect.proxy.extractors # noqa: F401
|
|
from markitect.proxy.generator import ProxyGenerator
|
|
|
|
gen = ProxyGenerator(registry)
|
|
target_path = Path(target).resolve()
|
|
|
|
try:
|
|
if target_path.is_file():
|
|
_update_one(gen, target_path, dry_run)
|
|
elif target_path.is_dir():
|
|
proxies = _find_proxy_files(gen, target_path)
|
|
if not proxies:
|
|
click.echo("No proxy files found.")
|
|
return
|
|
for p in proxies:
|
|
_update_one(gen, p, dry_run)
|
|
else:
|
|
click.echo(f"Error: {target} is not a file or directory.", err=True)
|
|
raise SystemExit(1)
|
|
except ProxyError as exc:
|
|
click.echo(f"Error: {exc}", err=True)
|
|
raise SystemExit(1)
|
|
|
|
|
|
@proxy_group.command("status")
|
|
@click.argument("directory", type=click.Path(exists=True), default=".")
|
|
@click.option(
|
|
"--format", "output_format",
|
|
type=click.Choice(["table", "json"]),
|
|
default="table",
|
|
help="Output format.",
|
|
)
|
|
def proxy_status(directory, output_format):
|
|
"""Show freshness of all proxy files in DIRECTORY (default: current dir)."""
|
|
from markitect.proxy.registry import registry
|
|
import markitect.proxy.extractors # noqa: F401
|
|
from markitect.proxy.generator import ProxyGenerator
|
|
|
|
gen = ProxyGenerator(registry)
|
|
|
|
try:
|
|
results = gen.bulk_status(Path(directory))
|
|
except ProxyError as exc:
|
|
click.echo(f"Error: {exc}", err=True)
|
|
raise SystemExit(1)
|
|
|
|
if not results:
|
|
click.echo("No proxy files found.")
|
|
return
|
|
|
|
if output_format == "json":
|
|
click.echo(json.dumps(results, indent=2))
|
|
else:
|
|
_print_status_table(results)
|
|
|
|
|
|
@proxy_group.command("extractors")
|
|
@click.option(
|
|
"--format", "output_format",
|
|
type=click.Choice(["table", "json"]),
|
|
default="table",
|
|
help="Output format.",
|
|
)
|
|
def proxy_extractors(output_format):
|
|
"""List registered extractors and their dependency status."""
|
|
from markitect.proxy.registry import registry
|
|
import markitect.proxy.extractors # noqa: F401
|
|
|
|
extractors = registry.list_extractors()
|
|
|
|
if output_format == "json":
|
|
rows = [
|
|
{
|
|
"name": e.name,
|
|
"version": e.version,
|
|
"extensions": list(e.extensions),
|
|
"installed": e.check_dependencies(),
|
|
"hint": e.dependency_hint(),
|
|
}
|
|
for e in extractors
|
|
]
|
|
click.echo(json.dumps(rows, indent=2))
|
|
else:
|
|
_print_extractor_table(extractors)
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
# helpers
|
|
# ------------------------------------------------------------------
|
|
|
|
def _update_one(gen, proxy_path, dry_run):
|
|
"""Update a single proxy file, respecting --dry-run."""
|
|
info = gen.status(proxy_path)
|
|
if info["status"] == "current":
|
|
click.echo(f" {proxy_path.name}: current")
|
|
return
|
|
if info["status"] == "missing-source":
|
|
click.echo(f" {proxy_path.name}: source missing")
|
|
return
|
|
if dry_run:
|
|
click.echo(f" {proxy_path.name}: stale (would update)")
|
|
else:
|
|
updated = gen.update(proxy_path)
|
|
label = "updated" if updated else "current"
|
|
click.echo(f" {proxy_path.name}: {label}")
|
|
|
|
|
|
def _find_proxy_files(gen, directory):
|
|
"""Return proxy file paths within *directory*."""
|
|
results = []
|
|
for path in sorted(directory.rglob("*.md")):
|
|
meta, _ = gen._try_read_proxy(path)
|
|
if meta is not None and meta.get("proxy") is True:
|
|
results.append(path)
|
|
return results
|
|
|
|
|
|
def _print_status_table(results):
|
|
"""Pretty-print a status table."""
|
|
click.echo(f"{'Proxy':<40} {'Source':<30} {'Status':<15} {'Extractor':<10}")
|
|
click.echo("-" * 95)
|
|
for r in results:
|
|
proxy_name = Path(r["proxy"]).name
|
|
click.echo(
|
|
f"{proxy_name:<40} {r['source']:<30} {r['status']:<15} {r['extractor']:<10}"
|
|
)
|
|
|
|
|
|
def _print_extractor_table(extractors):
|
|
"""Pretty-print an extractor table."""
|
|
click.echo(f"{'Name':<15} {'Version':<10} {'Extensions':<25} {'Status':<10}")
|
|
click.echo("-" * 60)
|
|
for e in extractors:
|
|
exts = ", ".join(e.extensions)
|
|
status = "installed" if e.check_dependencies() else "missing"
|
|
click.echo(f"{e.name:<15} {e.version:<10} {exts:<25} {status:<10}")
|