generated from coulomb/repo-seed
Complete REUSE-WP-0004: CI, overlap detection, and catalog generation
Some checks failed
ci / validate-registry (push) Has been cancelled
Some checks failed
ci / validate-registry (push) Has been cancelled
Add Gitea CI workflow for registry validation, reuse-surface overlaps and catalog commands, generated catalog artifacts, and documentation updates closing gap analysis priorities 9-11.
This commit is contained in:
@@ -9,9 +9,9 @@ from typing import Any
|
||||
import yaml
|
||||
from jsonschema import Draft202012Validator
|
||||
|
||||
from reuse_surface.catalog import write_catalog
|
||||
from reuse_surface.overlaps import find_overlaps
|
||||
from reuse_surface.registry import (
|
||||
CAPABILITIES_DIR,
|
||||
INDEX_PATH,
|
||||
ROOT,
|
||||
capability_paths,
|
||||
level_at_least,
|
||||
@@ -115,6 +115,40 @@ def cmd_query(args: argparse.Namespace) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
def _load_indexed_entries() -> list[tuple[dict[str, Any], dict[str, Any]]]:
|
||||
index = load_index()
|
||||
indexed_entries: list[tuple[dict[str, Any], dict[str, Any]]] = []
|
||||
for item in index.get("capabilities", []):
|
||||
path = ROOT / item["path"]
|
||||
indexed_entries.append((item, parse_front_matter(path)))
|
||||
return indexed_entries
|
||||
|
||||
|
||||
def cmd_overlaps(args: argparse.Namespace) -> int:
|
||||
indexed_entries = _load_indexed_entries()
|
||||
candidates = find_overlaps(indexed_entries, threshold=args.threshold)
|
||||
if not candidates:
|
||||
print("no overlap candidates")
|
||||
return 0
|
||||
for candidate in candidates:
|
||||
reasons = "; ".join(candidate.reasons)
|
||||
print(
|
||||
f"{candidate.left_id} <> {candidate.right_id} "
|
||||
f"score={candidate.score:.2f} {reasons}"
|
||||
)
|
||||
print(f"\n{len(candidates)} candidate{'s' if len(candidates) != 1 else ''}")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_catalog(args: argparse.Namespace) -> int:
|
||||
index = load_index()
|
||||
indexed_entries = _load_indexed_entries()
|
||||
md_path, html_path = write_catalog(index, indexed_entries)
|
||||
print(f"ok: wrote {md_path.relative_to(ROOT)}")
|
||||
print(f"ok: wrote {html_path.relative_to(ROOT)}")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_export(args: argparse.Namespace) -> int:
|
||||
index = load_index()
|
||||
bundle: dict[str, Any] = {
|
||||
@@ -184,6 +218,22 @@ def main(argv: list[str] | None = None) -> int:
|
||||
)
|
||||
export.set_defaults(func=cmd_export)
|
||||
|
||||
overlaps = subparsers.add_parser(
|
||||
"overlaps", help="detect potential duplicate capabilities"
|
||||
)
|
||||
overlaps.add_argument(
|
||||
"--threshold",
|
||||
type=float,
|
||||
default=0.28,
|
||||
help="token similarity threshold (0-1)",
|
||||
)
|
||||
overlaps.set_defaults(func=cmd_overlaps)
|
||||
|
||||
catalog = subparsers.add_parser(
|
||||
"catalog", help="generate human-readable capability catalog"
|
||||
)
|
||||
catalog.set_defaults(func=cmd_catalog)
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
return args.func(args)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user