Complete REUSE-WP-0004: CI, overlap detection, and catalog generation
Some checks failed
ci / validate-registry (push) Has been cancelled

Add Gitea CI workflow for registry validation, reuse-surface overlaps and
catalog commands, generated catalog artifacts, and documentation updates
closing gap analysis priorities 9-11.
This commit is contained in:
2026-06-15 01:20:31 +02:00
parent 5c5023c000
commit c366fc4a4e
12 changed files with 538 additions and 12 deletions

View File

@@ -9,9 +9,9 @@ from typing import Any
import yaml
from jsonschema import Draft202012Validator
from reuse_surface.catalog import write_catalog
from reuse_surface.overlaps import find_overlaps
from reuse_surface.registry import (
CAPABILITIES_DIR,
INDEX_PATH,
ROOT,
capability_paths,
level_at_least,
@@ -115,6 +115,40 @@ def cmd_query(args: argparse.Namespace) -> int:
return 0
def _load_indexed_entries() -> list[tuple[dict[str, Any], dict[str, Any]]]:
index = load_index()
indexed_entries: list[tuple[dict[str, Any], dict[str, Any]]] = []
for item in index.get("capabilities", []):
path = ROOT / item["path"]
indexed_entries.append((item, parse_front_matter(path)))
return indexed_entries
def cmd_overlaps(args: argparse.Namespace) -> int:
indexed_entries = _load_indexed_entries()
candidates = find_overlaps(indexed_entries, threshold=args.threshold)
if not candidates:
print("no overlap candidates")
return 0
for candidate in candidates:
reasons = "; ".join(candidate.reasons)
print(
f"{candidate.left_id} <> {candidate.right_id} "
f"score={candidate.score:.2f} {reasons}"
)
print(f"\n{len(candidates)} candidate{'s' if len(candidates) != 1 else ''}")
return 0
def cmd_catalog(args: argparse.Namespace) -> int:
index = load_index()
indexed_entries = _load_indexed_entries()
md_path, html_path = write_catalog(index, indexed_entries)
print(f"ok: wrote {md_path.relative_to(ROOT)}")
print(f"ok: wrote {html_path.relative_to(ROOT)}")
return 0
def cmd_export(args: argparse.Namespace) -> int:
index = load_index()
bundle: dict[str, Any] = {
@@ -184,6 +218,22 @@ def main(argv: list[str] | None = None) -> int:
)
export.set_defaults(func=cmd_export)
overlaps = subparsers.add_parser(
"overlaps", help="detect potential duplicate capabilities"
)
overlaps.add_argument(
"--threshold",
type=float,
default=0.28,
help="token similarity threshold (0-1)",
)
overlaps.set_defaults(func=cmd_overlaps)
catalog = subparsers.add_parser(
"catalog", help="generate human-readable capability catalog"
)
catalog.set_defaults(func=cmd_catalog)
args = parser.parse_args(argv)
return args.func(args)