Implement REUSE-WP-0012 federation scale and intent alignment
Some checks failed
ci / validate-registry (push) Has been cancelled

Add hub sync and report cohorts CLI commands with pytest coverage, document
sibling index publish contract and hub hardening path, align INTENT layout,
raise external evidence on three registry entries, and close gap priorities
19-23 (priority 18 deferred on sibling index blocks).
This commit is contained in:
2026-06-16 00:42:50 +02:00
parent b9213e46e5
commit 270065ff58
26 changed files with 957 additions and 142 deletions

View File

@@ -13,7 +13,19 @@ from reuse_surface.catalog import write_catalog
from reuse_surface.federation import write_federated_index
from reuse_surface import hub_client
from reuse_surface.graph import check_relations, render_mermaid, write_graph
from reuse_surface.hub_sync import (
DEFAULT_SOURCES_PATH,
build_manifest,
load_sources_manifest,
write_sources_manifest,
)
from reuse_surface.overlaps import find_overlaps
from reuse_surface.reports import (
cohort_filters_from_args,
format_cohort_json,
format_cohort_markdown,
select_cohort,
)
from reuse_surface.registry import (
ROOT,
capability_paths,
@@ -294,6 +306,39 @@ def cmd_hub_update(args: argparse.Namespace) -> int:
return 0
def cmd_hub_sync(args: argparse.Namespace) -> int:
try:
status, payload = hub_client.hub_list(_service_url(args))
except ValueError as exc:
print(f"error: {exc}", file=sys.stderr)
return 1
if status != 200:
print(f"error: hub returned {status}: {payload}", file=sys.stderr)
return 1
output = Path(args.output) if args.output else DEFAULT_SOURCES_PATH
existing = load_sources_manifest(output) if args.merge else None
manifest = build_manifest(payload, existing, merge=args.merge)
if args.dry_run:
print(yaml.safe_dump(manifest, sort_keys=False))
return 0
written = write_sources_manifest(manifest, output)
print(
f"ok: wrote {written.relative_to(ROOT)} "
f"({len(manifest['sources'])} source(s))"
)
return 0
def cmd_report_cohorts(args: argparse.Namespace) -> int:
filters = cohort_filters_from_args(args)
matches = select_cohort(filters)
if args.format == "json":
print(format_cohort_json(matches, filters))
else:
print(format_cohort_markdown(matches, filters), end="")
return 0
def cmd_export(args: argparse.Namespace) -> int:
index = load_index()
bundle: dict[str, Any] = {
@@ -457,6 +502,43 @@ def main(argv: list[str] | None = None) -> int:
hub_update.add_argument("--required", action=argparse.BooleanOptionalAction, default=None)
hub_update.set_defaults(func=cmd_hub_update)
hub_sync = hub_sub.add_parser(
"sync", help="write federation sources.yaml from hub registrations"
)
hub_sync.add_argument(
"--output",
help=f"manifest path (default: {DEFAULT_SOURCES_PATH.relative_to(ROOT)})",
)
hub_sync.add_argument(
"--merge",
action="store_true",
help="keep local index sources not overridden by hub repo slugs",
)
hub_sync.add_argument(
"--dry-run",
action="store_true",
help="print manifest without writing",
)
hub_sync.set_defaults(func=cmd_hub_sync)
report = subparsers.add_parser("report", help="planning and analytics reports")
report_sub = report.add_subparsers(dest="report_command", required=True)
cohorts = report_sub.add_parser(
"cohorts", help="export capability cohorts by maturity filters"
)
cohorts.add_argument("--planning-min", help="discovery minimum (implies availability-max A1)")
cohorts.add_argument("--implementation-min", help="availability minimum")
cohorts.add_argument("--discovery-min")
cohorts.add_argument("--availability-min")
cohorts.add_argument("--availability-max")
cohorts.add_argument("--domain")
cohorts.add_argument(
"--format",
choices=["markdown", "json"],
default="markdown",
)
cohorts.set_defaults(func=cmd_report_cohorts)
args = parser.parse_args(argv)
return args.func(args)

100
reuse_surface/hub_sync.py Normal file
View File

@@ -0,0 +1,100 @@
from __future__ import annotations
from pathlib import Path
from typing import Any
import yaml
from reuse_surface.registry import ROOT
DEFAULT_SOURCES_PATH = ROOT / "registry" / "federation" / "sources.yaml"
def registration_to_source(registration: dict[str, Any]) -> dict[str, Any]:
source: dict[str, Any] = {
"repo": registration["repo"],
"url": registration["url"],
"enabled": registration.get("enabled", True),
"required": registration.get("required", False),
"domain": registration.get("domain", "helix_forge"),
}
for optional in (
"description",
"cache_ttl_seconds",
"auth_env",
"auth_header",
):
if registration.get(optional) is not None:
source[optional] = registration[optional]
return source
def sources_from_hub_payload(
payload: dict[str, Any],
*,
enabled_only: bool = True,
) -> list[dict[str, Any]]:
repos = payload.get("repos", [])
sources: list[dict[str, Any]] = []
for registration in repos:
if enabled_only and not registration.get("enabled", True):
continue
if not registration.get("url"):
continue
sources.append(registration_to_source(registration))
return sorted(sources, key=lambda item: item["repo"])
def merge_sources(
hub_sources: list[dict[str, Any]],
existing_sources: list[dict[str, Any]],
) -> list[dict[str, Any]]:
hub_repos = {source["repo"] for source in hub_sources}
merged = list(hub_sources)
for source in existing_sources:
if source.get("repo") in hub_repos:
continue
if "index" in source:
merged.append(source)
return sorted(merged, key=lambda item: item["repo"])
def build_manifest(
hub_payload: dict[str, Any],
existing: dict[str, Any] | None = None,
*,
merge: bool = False,
) -> dict[str, Any]:
hub_sources = sources_from_hub_payload(hub_payload)
if merge and existing:
sources = merge_sources(hub_sources, existing.get("sources", []))
else:
sources = hub_sources
return {
"version": existing.get("version", 1) if existing else 1,
"domain": existing.get("domain", "helix_forge") if existing else "helix_forge",
"collision_policy": existing.get("collision_policy", "warn")
if existing
else "warn",
"sources": sources,
}
def load_sources_manifest(path: Path) -> dict[str, Any]:
if not path.exists():
return {
"version": 1,
"domain": "helix_forge",
"collision_policy": "warn",
"sources": [],
}
return yaml.safe_load(path.read_text(encoding="utf-8"))
def write_sources_manifest(manifest: dict[str, Any], path: Path = DEFAULT_SOURCES_PATH) -> Path:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
yaml.safe_dump(manifest, sort_keys=False, allow_unicode=True),
encoding="utf-8",
)
return path

87
reuse_surface/reports.py Normal file
View File

@@ -0,0 +1,87 @@
from __future__ import annotations
import json
from typing import Any
from reuse_surface.registry import level_at_least, load_index, parse_vector
def _availability_at_most(current: str, maximum: str) -> bool:
from reuse_surface.registry import LEVEL_ORDERS
order = LEVEL_ORDERS["availability"]
return order.index(current) <= order.index(maximum)
def cohort_filters_from_args(args: Any) -> dict[str, str | None]:
filters: dict[str, str | None] = {
"discovery_min": getattr(args, "discovery_min", None),
"availability_min": getattr(args, "availability_min", None),
"availability_max": getattr(args, "availability_max", None),
"domain": getattr(args, "domain", None),
}
if getattr(args, "planning_min", None):
filters["discovery_min"] = args.planning_min
filters["availability_max"] = filters["availability_max"] or "A1"
if getattr(args, "implementation_min", None):
filters["availability_min"] = args.implementation_min
return filters
def select_cohort(
filters: dict[str, str | None],
index: dict[str, Any] | None = None,
) -> list[dict[str, Any]]:
data = index or load_index()
matches: list[dict[str, Any]] = []
for item in data.get("capabilities", []):
vector = parse_vector(item["vector"])
if filters.get("discovery_min") and not level_at_least(
"discovery", vector["discovery"], filters["discovery_min"]
):
continue
if filters.get("availability_min") and not level_at_least(
"availability", vector["availability"], filters["availability_min"]
):
continue
if filters.get("availability_max") and not _availability_at_most(
vector["availability"], filters["availability_max"]
):
continue
if filters.get("domain") and item.get("domain") != filters["domain"]:
continue
matches.append(item)
return matches
def format_cohort_markdown(
matches: list[dict[str, Any]],
filters: dict[str, str | None],
) -> str:
lines = ["# Capability cohort report", ""]
active = {key: value for key, value in filters.items() if value}
if active:
lines.append("Filters:")
for key, value in sorted(active.items()):
lines.append(f"- `{key}`: `{value}`")
lines.append("")
if not matches:
lines.append("_No capabilities matched._")
return "\n".join(lines) + "\n"
lines.append("| ID | Vector | Consumption modes |")
lines.append("|---|---|---|")
for item in matches:
modes = ", ".join(item.get("consumption_modes", []))
lines.append(f"| `{item['id']}` | {item['vector']} | {modes} |")
lines.append("")
lines.append(f"**{len(matches)}** capabilit{'y' if len(matches) == 1 else 'ies'}.")
return "\n".join(lines) + "\n"
def format_cohort_json(matches: list[dict[str, Any]], filters: dict[str, str | None]) -> str:
payload = {
"count": len(matches),
"filters": {key: value for key, value in filters.items() if value},
"capabilities": matches,
}
return json.dumps(payload, indent=2, sort_keys=True)