Implement REUSE-WP-0013 registry establish, update, and stats
Some checks failed
ci / validate-registry (push) Has been cancelled

Add stats, establish (scaffold, publish-check, discover), and update CLI
commands with optional llm-connect bridge, validate --root for sibling repos,
pytest coverage, and documentation for sibling registry onboarding.
This commit is contained in:
2026-06-16 01:21:01 +02:00
parent fb712b4b98
commit 70a5003f6e
19 changed files with 1740 additions and 30 deletions

View File

@@ -26,21 +26,48 @@ from reuse_surface.reports import (
format_cohort_markdown,
select_cohort,
)
from reuse_surface.establish import (
discover_capabilities,
format_publish_check_markdown,
publish_check,
scaffold_next_steps,
scaffold_registry,
)
from reuse_surface.registry_update import (
apply_deterministic_suggestions,
collect_deterministic_suggestions,
format_suggestions_json,
format_suggestions_markdown,
suggest_llm_updates,
)
from reuse_surface.stats import collect_stats, format_stats_json, format_stats_markdown
from reuse_surface.registry import (
ROOT,
capability_paths,
level_at_least,
load_index,
load_index_at,
load_schema,
parse_front_matter,
parse_vector,
registry_paths,
)
def _check_index_drift(entry_paths: list[Path], index: dict[str, Any]) -> list[str]:
def _registry_root(args: argparse.Namespace) -> Path:
if getattr(args, "root", None):
return Path(args.root).resolve()
return ROOT
def _check_index_drift(
entry_paths: list[Path],
index: dict[str, Any],
repo_root: Path,
) -> list[str]:
warnings: list[str] = []
indexed_paths = {item["path"] for item in index.get("capabilities", [])}
file_paths = {str(path.relative_to(ROOT)) for path in entry_paths}
file_paths = {str(path.relative_to(repo_root)) for path in entry_paths}
for path in sorted(file_paths - indexed_paths):
warnings.append(f"index drift: entry file not indexed: {path}")
for path in sorted(indexed_paths - file_paths):
@@ -48,11 +75,22 @@ def _check_index_drift(entry_paths: list[Path], index: dict[str, Any]) -> list[s
return warnings
def cmd_validate(args: argparse.Namespace) -> int:
def _capability_paths_for(repo_root: Path, target: Path | None) -> list[Path]:
if target is not None:
return [target]
cap_dir = registry_paths(repo_root)["capabilities"]
return sorted(path for path in cap_dir.glob("*.md") if path.name != ".gitkeep")
def _run_validate(
repo_root: Path,
*,
target: Path | None,
relations: bool,
) -> tuple[list[str], list[str], list[Path]]:
schema = load_schema()
validator = Draft202012Validator(schema)
target = Path(args.path) if args.path else None
paths = capability_paths(target)
paths = _capability_paths_for(repo_root, target)
errors: list[str] = []
warnings: list[str] = []
@@ -67,10 +105,23 @@ def cmd_validate(args: argparse.Namespace) -> int:
errors.append(f"{path}: {location}: {error.message}")
if not target:
index = load_index()
warnings.extend(_check_index_drift(paths, index))
if args.relations:
index_path = registry_paths(repo_root)["index"]
if index_path.exists():
index = load_index_at(index_path)
warnings.extend(_check_index_drift(paths, index, repo_root))
if relations and repo_root == ROOT:
warnings.extend(check_relations())
return errors, warnings, paths
def cmd_validate(args: argparse.Namespace) -> int:
repo_root = _registry_root(args)
target = Path(args.path) if args.path else None
if target and not target.is_absolute():
target = repo_root / target
errors, warnings, paths = _run_validate(
repo_root, target=target, relations=args.relations
)
for warning in warnings:
print(f"warning: {warning}", file=sys.stderr)
@@ -329,6 +380,117 @@ def cmd_hub_sync(args: argparse.Namespace) -> int:
return 0
def cmd_stats(args: argparse.Namespace) -> int:
repo_root = Path(args.path or ".").resolve()
stats = collect_stats(
repo_root,
federation_ready=args.federation_ready,
raw_url=args.raw_url,
hub_url=getattr(args, "hub_url", None),
)
if args.format == "json":
print(format_stats_json(stats))
else:
print(format_stats_markdown(stats), end="")
return 0
def cmd_establish(args: argparse.Namespace) -> int:
repo_root = Path(args.path or ".").resolve()
try:
if args.scaffold:
created = scaffold_registry(
repo_root, domain=args.domain, force=args.force
)
for path in created:
print(f"ok: wrote {path.relative_to(repo_root)}")
print(scaffold_next_steps(repo_root))
return 0
if args.publish_check:
result = publish_check(repo_root, raw_url=args.raw_url)
print(format_publish_check_markdown(result), end="")
return 0 if result["ok"] else 1
if args.discover:
result = discover_capabilities(
repo_root,
domain=args.domain,
dry_run=not args.apply,
apply=args.apply,
llm_url=args.llm_url,
context_max_files=args.context_max_files,
)
if result.get("dry_run"):
print(yaml.safe_dump(result["draft"], sort_keys=False))
return 0
for path in result.get("written", []):
print(f"ok: wrote {path}")
validate_args = argparse.Namespace(
path=None,
root=str(repo_root),
relations=False,
fail_on_warnings=True,
)
return cmd_validate(validate_args)
except ValueError as exc:
print(f"error: {exc}", file=sys.stderr)
return 1
print("error: specify --scaffold, --publish-check, or --discover", file=sys.stderr)
return 1
def cmd_update(args: argparse.Namespace) -> int:
repo_root = Path(args.path or ".").resolve()
try:
capability_id = None if args.all else args.capability
if not args.all and not args.capability:
print("error: specify --capability or --all", file=sys.stderr)
return 1
if args.suggest_maturity:
cap_ids = [args.capability] if args.capability else []
if args.all:
index = load_index_at(registry_paths(repo_root)["index"])
cap_ids = [row["id"] for row in index.get("capabilities", [])]
payload = {
"suggestions": [
suggest_llm_updates(
repo_root,
cap_id,
git_since=args.from_git_since,
llm_url=args.llm_url,
)
for cap_id in cap_ids
]
}
print(json.dumps(payload, indent=2, sort_keys=True))
return 0
suggestions = collect_deterministic_suggestions(
repo_root,
capability_id=capability_id,
git_since=args.from_git_since,
)
if args.apply:
changed = apply_deterministic_suggestions(repo_root, suggestions)
for line in changed:
print(f"ok: {line}")
validate_args = argparse.Namespace(
path=None,
root=str(repo_root),
relations=False,
fail_on_warnings=True,
)
return cmd_validate(validate_args)
if args.format == "json":
print(format_suggestions_json(suggestions))
else:
print(format_suggestions_markdown(suggestions), end="")
return 0
except ValueError as exc:
print(f"error: {exc}", file=sys.stderr)
return 1
def cmd_report_cohorts(args: argparse.Namespace) -> int:
filters = cohort_filters_from_args(args)
matches = select_cohort(filters)
@@ -399,6 +561,10 @@ def main(argv: list[str] | None = None) -> int:
action="store_true",
help="exit non-zero when warnings are present",
)
validate.add_argument(
"--root",
help="registry repo root (default: reuse-surface install root)",
)
validate.set_defaults(func=cmd_validate)
federation = subparsers.add_parser(
@@ -539,6 +705,41 @@ def main(argv: list[str] | None = None) -> int:
)
cohorts.set_defaults(func=cmd_report_cohorts)
stats = subparsers.add_parser("stats", help="registry maturity and federation stats")
stats.add_argument("--path", help="repo root (default: cwd)")
stats.add_argument("--federation-ready", action="store_true")
stats.add_argument("--raw-url", help="probe federation raw index URL")
stats.add_argument("--hub-url", help="hub base URL (or REUSE_SURFACE_URL)")
stats.add_argument("--format", choices=["markdown", "json"], default="markdown")
stats.set_defaults(func=cmd_stats)
establish = subparsers.add_parser(
"establish", help="bootstrap or discover capability registry"
)
establish.add_argument("--path", help="target repo root (default: cwd)")
establish.add_argument("--domain", default="helix_forge")
establish.add_argument("--force", action="store_true")
establish.add_argument("--scaffold", action="store_true")
establish.add_argument("--publish-check", action="store_true")
establish.add_argument("--discover", action="store_true")
establish.add_argument("--dry-run", action="store_true", help="discover preview (default)")
establish.add_argument("--apply", action="store_true", help="discover write + validate")
establish.add_argument("--raw-url", help="raw Gitea index URL for publish-check")
establish.add_argument("--llm-url", help="llm-connect base URL (or LLM_CONNECT_URL)")
establish.add_argument("--context-max-files", type=int, default=12)
establish.set_defaults(func=cmd_establish)
update = subparsers.add_parser("update", help="refresh registry metadata from repo signals")
update.add_argument("--path", help="repo root (default: cwd)")
update.add_argument("--capability", help="single capability id")
update.add_argument("--all", action="store_true")
update.add_argument("--from-git-since", help="git ref for change detection")
update.add_argument("--apply", action="store_true")
update.add_argument("--suggest-maturity", action="store_true")
update.add_argument("--llm-url", help="llm-connect base URL (or LLM_CONNECT_URL)")
update.add_argument("--format", choices=["markdown", "json"], default="markdown")
update.set_defaults(func=cmd_update)
args = parser.parse_args(argv)
return args.func(args)