generated from coulomb/repo-seed
Some checks failed
ci / validate-registry (push) Has been cancelled
Add hub sync and report cohorts CLI commands with pytest coverage, document sibling index publish contract and hub hardening path, align INTENT layout, raise external evidence on three registry entries, and close gap priorities 19-23 (priority 18 deferred on sibling index blocks).
547 lines
18 KiB
Python
547 lines
18 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import yaml
|
|
from jsonschema import Draft202012Validator
|
|
|
|
from reuse_surface.catalog import write_catalog
|
|
from reuse_surface.federation import write_federated_index
|
|
from reuse_surface import hub_client
|
|
from reuse_surface.graph import check_relations, render_mermaid, write_graph
|
|
from reuse_surface.hub_sync import (
|
|
DEFAULT_SOURCES_PATH,
|
|
build_manifest,
|
|
load_sources_manifest,
|
|
write_sources_manifest,
|
|
)
|
|
from reuse_surface.overlaps import find_overlaps
|
|
from reuse_surface.reports import (
|
|
cohort_filters_from_args,
|
|
format_cohort_json,
|
|
format_cohort_markdown,
|
|
select_cohort,
|
|
)
|
|
from reuse_surface.registry import (
|
|
ROOT,
|
|
capability_paths,
|
|
level_at_least,
|
|
load_index,
|
|
load_schema,
|
|
parse_front_matter,
|
|
parse_vector,
|
|
)
|
|
|
|
|
|
def _check_index_drift(entry_paths: list[Path], index: dict[str, Any]) -> list[str]:
|
|
warnings: list[str] = []
|
|
indexed_paths = {item["path"] for item in index.get("capabilities", [])}
|
|
file_paths = {str(path.relative_to(ROOT)) for path in entry_paths}
|
|
for path in sorted(file_paths - indexed_paths):
|
|
warnings.append(f"index drift: entry file not indexed: {path}")
|
|
for path in sorted(indexed_paths - file_paths):
|
|
warnings.append(f"index drift: index references missing file: {path}")
|
|
return warnings
|
|
|
|
|
|
def cmd_validate(args: argparse.Namespace) -> int:
|
|
schema = load_schema()
|
|
validator = Draft202012Validator(schema)
|
|
target = Path(args.path) if args.path else None
|
|
paths = capability_paths(target)
|
|
errors: list[str] = []
|
|
warnings: list[str] = []
|
|
|
|
for path in paths:
|
|
try:
|
|
data = parse_front_matter(path)
|
|
except ValueError as exc:
|
|
errors.append(str(exc))
|
|
continue
|
|
for error in sorted(validator.iter_errors(data), key=lambda e: e.path):
|
|
location = ".".join(str(part) for part in error.path) or "<root>"
|
|
errors.append(f"{path}: {location}: {error.message}")
|
|
|
|
if not target:
|
|
index = load_index()
|
|
warnings.extend(_check_index_drift(paths, index))
|
|
if args.relations:
|
|
warnings.extend(check_relations())
|
|
|
|
for warning in warnings:
|
|
print(f"warning: {warning}", file=sys.stderr)
|
|
for error in errors:
|
|
print(f"error: {error}", file=sys.stderr)
|
|
|
|
if errors or (args.fail_on_warnings and warnings):
|
|
return 1
|
|
print(f"ok: validated {len(paths)} capability entr{'y' if len(paths) == 1 else 'ies'}")
|
|
return 0
|
|
|
|
|
|
def _matches_query(item: dict[str, Any], args: argparse.Namespace) -> bool:
|
|
vector = parse_vector(item["vector"])
|
|
if args.discovery_min and not level_at_least(
|
|
"discovery", vector["discovery"], args.discovery_min
|
|
):
|
|
return False
|
|
if args.availability_min and not level_at_least(
|
|
"availability", vector["availability"], args.availability_min
|
|
):
|
|
return False
|
|
if args.domain and item.get("domain") != args.domain:
|
|
return False
|
|
if args.tag and args.tag not in item.get("tags", []):
|
|
return False
|
|
if args.consumption_mode:
|
|
modes = [mode.lower() for mode in item.get("consumption_modes", [])]
|
|
if args.consumption_mode.lower() not in modes:
|
|
return False
|
|
if args.keyword:
|
|
haystack = " ".join(
|
|
[
|
|
item.get("id", ""),
|
|
item.get("name", ""),
|
|
item.get("summary", ""),
|
|
" ".join(item.get("tags", [])),
|
|
]
|
|
).lower()
|
|
if args.keyword.lower() not in haystack:
|
|
return False
|
|
return True
|
|
|
|
|
|
def cmd_query(args: argparse.Namespace) -> int:
|
|
index = load_index()
|
|
matches = [
|
|
item for item in index.get("capabilities", []) if _matches_query(item, args)
|
|
]
|
|
if not matches:
|
|
print("no matches")
|
|
return 0
|
|
for item in matches:
|
|
print(
|
|
f"{item['id']} {item['vector']} {item['path']}\n"
|
|
f" {item['summary']}"
|
|
)
|
|
print(f"\n{len(matches)} match{'es' if len(matches) != 1 else ''}")
|
|
return 0
|
|
|
|
|
|
def _load_indexed_entries() -> list[tuple[dict[str, Any], dict[str, Any]]]:
|
|
index = load_index()
|
|
indexed_entries: list[tuple[dict[str, Any], dict[str, Any]]] = []
|
|
for item in index.get("capabilities", []):
|
|
path = ROOT / item["path"]
|
|
indexed_entries.append((item, parse_front_matter(path)))
|
|
return indexed_entries
|
|
|
|
|
|
def cmd_overlaps(args: argparse.Namespace) -> int:
|
|
indexed_entries = _load_indexed_entries()
|
|
candidates = find_overlaps(indexed_entries, threshold=args.threshold)
|
|
if not candidates:
|
|
print("no overlap candidates")
|
|
return 0
|
|
for candidate in candidates:
|
|
reasons = "; ".join(candidate.reasons)
|
|
print(
|
|
f"{candidate.left_id} <> {candidate.right_id} "
|
|
f"score={candidate.score:.2f} {reasons}"
|
|
)
|
|
print(f"\n{len(candidates)} candidate{'s' if len(candidates) != 1 else ''}")
|
|
return 0
|
|
|
|
|
|
def cmd_federation_compose(args: argparse.Namespace) -> int:
|
|
try:
|
|
target, warnings = write_federated_index(refresh=args.refresh)
|
|
except (FileNotFoundError, ValueError) as exc:
|
|
print(f"error: {exc}", file=sys.stderr)
|
|
return 1
|
|
for warning in warnings:
|
|
print(f"warning: {warning}", file=sys.stderr)
|
|
import yaml
|
|
|
|
data = yaml.safe_load(target.read_text(encoding="utf-8"))
|
|
count = len(data.get("capabilities", []))
|
|
print(f"ok: wrote {target.relative_to(ROOT)} ({count} capabilities)")
|
|
return 0
|
|
|
|
|
|
def cmd_graph(args: argparse.Namespace) -> int:
|
|
warnings = check_relations() if args.check else []
|
|
content = render_mermaid()
|
|
if args.stdout:
|
|
print(content, end="")
|
|
else:
|
|
path = write_graph()
|
|
from reuse_surface.catalog import GRAPH_HTML, render_graph_explorer
|
|
|
|
GRAPH_HTML.parent.mkdir(parents=True, exist_ok=True)
|
|
GRAPH_HTML.write_text(render_graph_explorer(content), encoding="utf-8")
|
|
print(f"ok: wrote {path.relative_to(ROOT)}")
|
|
print(f"ok: wrote {GRAPH_HTML.relative_to(ROOT)}")
|
|
for warning in warnings:
|
|
print(f"warning: {warning}", file=sys.stderr)
|
|
if args.fail_on_warnings and warnings:
|
|
return 1
|
|
return 0
|
|
|
|
|
|
def cmd_catalog(args: argparse.Namespace) -> int:
|
|
index = load_index()
|
|
indexed_entries = _load_indexed_entries()
|
|
paths = write_catalog(
|
|
index, indexed_entries, mermaid_source=render_mermaid()
|
|
)
|
|
for path in paths:
|
|
print(f"ok: wrote {path.relative_to(ROOT)}")
|
|
return 0
|
|
|
|
|
|
def _service_url(args: argparse.Namespace) -> str | None:
|
|
return getattr(args, "base_url", None)
|
|
|
|
|
|
def cmd_serve(args: argparse.Namespace) -> int:
|
|
from reuse_surface.hub.app import main as serve_main
|
|
|
|
serve_main()
|
|
return 0
|
|
|
|
|
|
def cmd_hub_status(args: argparse.Namespace) -> int:
|
|
try:
|
|
status, payload = hub_client.hub_status(_service_url(args))
|
|
except ValueError as exc:
|
|
print(f"error: {exc}", file=sys.stderr)
|
|
return 1
|
|
if status != 200:
|
|
print(f"error: hub returned {status}: {payload}", file=sys.stderr)
|
|
return 1
|
|
print(f"ok: {payload.get('service')} {payload.get('version')} ({payload.get('status')})")
|
|
return 0
|
|
|
|
|
|
def cmd_hub_list(args: argparse.Namespace) -> int:
|
|
try:
|
|
status, payload = hub_client.hub_list(_service_url(args))
|
|
except ValueError as exc:
|
|
print(f"error: {exc}", file=sys.stderr)
|
|
return 1
|
|
if status != 200:
|
|
print(f"error: hub returned {status}: {payload}", file=sys.stderr)
|
|
return 1
|
|
for repo in payload.get("repos", []):
|
|
enabled = "enabled" if repo.get("enabled") else "disabled"
|
|
print(f"{repo['repo']}\t{enabled}\t{repo.get('url', '')}")
|
|
print(f"\n{payload.get('count', 0)} registration(s)")
|
|
return 0
|
|
|
|
|
|
def cmd_hub_show(args: argparse.Namespace) -> int:
|
|
try:
|
|
status, payload = hub_client.hub_show(args.repo, _service_url(args))
|
|
except ValueError as exc:
|
|
print(f"error: {exc}", file=sys.stderr)
|
|
return 1
|
|
if status != 200:
|
|
print(f"error: hub returned {status}: {payload}", file=sys.stderr)
|
|
return 1
|
|
print(yaml.safe_dump(payload, sort_keys=False))
|
|
return 0
|
|
|
|
|
|
def cmd_hub_register(args: argparse.Namespace) -> int:
|
|
body: dict[str, Any] = {
|
|
"repo": args.repo,
|
|
"url": args.url,
|
|
"domain": args.domain,
|
|
"enabled": args.enabled,
|
|
"required": args.required,
|
|
}
|
|
if args.description:
|
|
body["description"] = args.description
|
|
try:
|
|
status, payload = hub_client.hub_register(body, _service_url(args))
|
|
except ValueError as exc:
|
|
print(f"error: {exc}", file=sys.stderr)
|
|
return 1
|
|
if status != 201:
|
|
print(f"error: hub returned {status}: {payload}", file=sys.stderr)
|
|
return 1
|
|
print(f"ok: registered {args.repo}")
|
|
return 0
|
|
|
|
|
|
def cmd_hub_update(args: argparse.Namespace) -> int:
|
|
body: dict[str, Any] = {}
|
|
if args.url is not None:
|
|
body["url"] = args.url
|
|
if args.enabled is not None:
|
|
body["enabled"] = args.enabled
|
|
if args.required is not None:
|
|
body["required"] = args.required
|
|
if args.domain is not None:
|
|
body["domain"] = args.domain
|
|
if args.description is not None:
|
|
body["description"] = args.description
|
|
if not body:
|
|
print("error: no fields to update", file=sys.stderr)
|
|
return 1
|
|
try:
|
|
status, payload = hub_client.hub_update(args.repo, body, _service_url(args))
|
|
except ValueError as exc:
|
|
print(f"error: {exc}", file=sys.stderr)
|
|
return 1
|
|
if status != 200:
|
|
print(f"error: hub returned {status}: {payload}", file=sys.stderr)
|
|
return 1
|
|
print(f"ok: updated {args.repo}")
|
|
return 0
|
|
|
|
|
|
def cmd_hub_sync(args: argparse.Namespace) -> int:
|
|
try:
|
|
status, payload = hub_client.hub_list(_service_url(args))
|
|
except ValueError as exc:
|
|
print(f"error: {exc}", file=sys.stderr)
|
|
return 1
|
|
if status != 200:
|
|
print(f"error: hub returned {status}: {payload}", file=sys.stderr)
|
|
return 1
|
|
output = Path(args.output) if args.output else DEFAULT_SOURCES_PATH
|
|
existing = load_sources_manifest(output) if args.merge else None
|
|
manifest = build_manifest(payload, existing, merge=args.merge)
|
|
if args.dry_run:
|
|
print(yaml.safe_dump(manifest, sort_keys=False))
|
|
return 0
|
|
written = write_sources_manifest(manifest, output)
|
|
print(
|
|
f"ok: wrote {written.relative_to(ROOT)} "
|
|
f"({len(manifest['sources'])} source(s))"
|
|
)
|
|
return 0
|
|
|
|
|
|
def cmd_report_cohorts(args: argparse.Namespace) -> int:
|
|
filters = cohort_filters_from_args(args)
|
|
matches = select_cohort(filters)
|
|
if args.format == "json":
|
|
print(format_cohort_json(matches, filters))
|
|
else:
|
|
print(format_cohort_markdown(matches, filters), end="")
|
|
return 0
|
|
|
|
|
|
def cmd_export(args: argparse.Namespace) -> int:
|
|
index = load_index()
|
|
bundle: dict[str, Any] = {
|
|
"version": index.get("version", 1),
|
|
"domain": index.get("domain"),
|
|
"updated": index.get("updated"),
|
|
"capabilities": [],
|
|
}
|
|
errors: list[str] = []
|
|
|
|
for item in index.get("capabilities", []):
|
|
path = ROOT / item["path"]
|
|
try:
|
|
front_matter = parse_front_matter(path)
|
|
except ValueError as exc:
|
|
errors.append(str(exc))
|
|
continue
|
|
bundle["capabilities"].append(
|
|
{
|
|
"index": item,
|
|
"entry": front_matter,
|
|
}
|
|
)
|
|
|
|
if errors:
|
|
for error in errors:
|
|
print(f"error: {error}", file=sys.stderr)
|
|
return 1
|
|
|
|
if args.format == "json":
|
|
print(json.dumps(bundle, indent=2, sort_keys=True))
|
|
else:
|
|
print(yaml.safe_dump(bundle, sort_keys=False))
|
|
print(
|
|
f"# exported {len(bundle['capabilities'])} capabilities",
|
|
file=sys.stderr,
|
|
)
|
|
return 0
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
parser = argparse.ArgumentParser(prog="reuse-surface")
|
|
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
|
|
validate = subparsers.add_parser("validate", help="validate capability entries")
|
|
validate.add_argument(
|
|
"path",
|
|
nargs="?",
|
|
help="optional capability markdown file; defaults to all entries",
|
|
)
|
|
validate.add_argument(
|
|
"--relations",
|
|
action="store_true",
|
|
help="check relation cycles and broken references",
|
|
)
|
|
validate.add_argument(
|
|
"--fail-on-warnings",
|
|
action="store_true",
|
|
help="exit non-zero when warnings are present",
|
|
)
|
|
validate.set_defaults(func=cmd_validate)
|
|
|
|
federation = subparsers.add_parser(
|
|
"federation", help="federation index operations"
|
|
)
|
|
federation_sub = federation.add_subparsers(dest="federation_command", required=True)
|
|
compose = federation_sub.add_parser("compose", help="compose federated index")
|
|
compose.add_argument(
|
|
"--refresh",
|
|
action="store_true",
|
|
help="bypass remote index cache and refetch URL sources",
|
|
)
|
|
compose.set_defaults(func=cmd_federation_compose)
|
|
|
|
query = subparsers.add_parser("query", help="query capability index")
|
|
query.add_argument("--discovery-min")
|
|
query.add_argument("--availability-min")
|
|
query.add_argument("--domain")
|
|
query.add_argument("--tag")
|
|
query.add_argument("--consumption-mode")
|
|
query.add_argument("--keyword")
|
|
query.set_defaults(func=cmd_query)
|
|
|
|
export = subparsers.add_parser("export", help="export registry bundle")
|
|
export.add_argument(
|
|
"--format",
|
|
choices=["yaml", "json"],
|
|
default="yaml",
|
|
)
|
|
export.set_defaults(func=cmd_export)
|
|
|
|
overlaps = subparsers.add_parser(
|
|
"overlaps", help="detect potential duplicate capabilities"
|
|
)
|
|
overlaps.add_argument(
|
|
"--threshold",
|
|
type=float,
|
|
default=0.28,
|
|
help="token similarity threshold (0-1)",
|
|
)
|
|
overlaps.set_defaults(func=cmd_overlaps)
|
|
|
|
catalog = subparsers.add_parser(
|
|
"catalog", help="generate human-readable capability catalog"
|
|
)
|
|
catalog.set_defaults(func=cmd_catalog)
|
|
|
|
graph = subparsers.add_parser("graph", help="generate relation graph")
|
|
graph.add_argument(
|
|
"--stdout",
|
|
action="store_true",
|
|
help="print Mermaid to stdout instead of writing docs/graph/",
|
|
)
|
|
graph.add_argument(
|
|
"--check",
|
|
action="store_true",
|
|
help="report depends_on cycles and broken relation references",
|
|
)
|
|
graph.add_argument(
|
|
"--fail-on-warnings",
|
|
action="store_true",
|
|
help="exit non-zero when relation warnings are present",
|
|
)
|
|
graph.set_defaults(func=cmd_graph)
|
|
|
|
serve = subparsers.add_parser("serve", help="run federation service API")
|
|
serve.set_defaults(func=cmd_serve)
|
|
|
|
hub = subparsers.add_parser("hub", help="federation service client")
|
|
hub.add_argument(
|
|
"--base-url",
|
|
help="service base URL (or set REUSE_SURFACE_URL)",
|
|
)
|
|
hub_sub = hub.add_subparsers(dest="hub_command", required=True)
|
|
|
|
hub_status = hub_sub.add_parser("status", help="check hub health")
|
|
hub_status.set_defaults(func=cmd_hub_status)
|
|
|
|
hub_list = hub_sub.add_parser("list", help="list registered repos")
|
|
hub_list.set_defaults(func=cmd_hub_list)
|
|
|
|
hub_show = hub_sub.add_parser("show", help="show one registration")
|
|
hub_show.add_argument("--repo", required=True)
|
|
hub_show.set_defaults(func=cmd_hub_show)
|
|
|
|
hub_register = hub_sub.add_parser("register", help="register a repo index URL")
|
|
hub_register.add_argument("--repo", required=True)
|
|
hub_register.add_argument("--url", required=True)
|
|
hub_register.add_argument("--domain", default="helix_forge")
|
|
hub_register.add_argument("--description")
|
|
hub_register.add_argument("--enabled", action=argparse.BooleanOptionalAction, default=True)
|
|
hub_register.add_argument("--required", action="store_true")
|
|
hub_register.set_defaults(func=cmd_hub_register)
|
|
|
|
hub_update = hub_sub.add_parser("update", help="update a repo registration")
|
|
hub_update.add_argument("--repo", required=True)
|
|
hub_update.add_argument("--url")
|
|
hub_update.add_argument("--domain")
|
|
hub_update.add_argument("--description")
|
|
hub_update.add_argument("--enabled", action=argparse.BooleanOptionalAction, default=None)
|
|
hub_update.add_argument("--required", action=argparse.BooleanOptionalAction, default=None)
|
|
hub_update.set_defaults(func=cmd_hub_update)
|
|
|
|
hub_sync = hub_sub.add_parser(
|
|
"sync", help="write federation sources.yaml from hub registrations"
|
|
)
|
|
hub_sync.add_argument(
|
|
"--output",
|
|
help=f"manifest path (default: {DEFAULT_SOURCES_PATH.relative_to(ROOT)})",
|
|
)
|
|
hub_sync.add_argument(
|
|
"--merge",
|
|
action="store_true",
|
|
help="keep local index sources not overridden by hub repo slugs",
|
|
)
|
|
hub_sync.add_argument(
|
|
"--dry-run",
|
|
action="store_true",
|
|
help="print manifest without writing",
|
|
)
|
|
hub_sync.set_defaults(func=cmd_hub_sync)
|
|
|
|
report = subparsers.add_parser("report", help="planning and analytics reports")
|
|
report_sub = report.add_subparsers(dest="report_command", required=True)
|
|
cohorts = report_sub.add_parser(
|
|
"cohorts", help="export capability cohorts by maturity filters"
|
|
)
|
|
cohorts.add_argument("--planning-min", help="discovery minimum (implies availability-max A1)")
|
|
cohorts.add_argument("--implementation-min", help="availability minimum")
|
|
cohorts.add_argument("--discovery-min")
|
|
cohorts.add_argument("--availability-min")
|
|
cohorts.add_argument("--availability-max")
|
|
cohorts.add_argument("--domain")
|
|
cohorts.add_argument(
|
|
"--format",
|
|
choices=["markdown", "json"],
|
|
default="markdown",
|
|
)
|
|
cohorts.set_defaults(func=cmd_report_cohorts)
|
|
|
|
args = parser.parse_args(argv)
|
|
return args.func(args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main()) |