Implement REUSE-WP-0012 federation scale and intent alignment

Add hub sync and report cohorts CLI commands with pytest coverage, document sibling index publish contract and hub hardening path, align INTENT layout, raise external evidence on three registry entries, and close gap priorities 19-23 (priority 18 deferred on sibling index blocks).
2026-06-16 00:42:50 +02:00
parent b9213e46e5
commit 270065ff58
26 changed files with 957 additions and 142 deletions
--- a/reuse_surface/cli.py
+++ b/reuse_surface/cli.py
@@ -13,7 +13,19 @@ from reuse_surface.catalog import write_catalog
 from reuse_surface.federation import write_federated_index
 from reuse_surface import hub_client
 from reuse_surface.graph import check_relations, render_mermaid, write_graph
+from reuse_surface.hub_sync import (
+    DEFAULT_SOURCES_PATH,
+    build_manifest,
+    load_sources_manifest,
+    write_sources_manifest,
+)
 from reuse_surface.overlaps import find_overlaps
+from reuse_surface.reports import (
+    cohort_filters_from_args,
+    format_cohort_json,
+    format_cohort_markdown,
+    select_cohort,
+)
 from reuse_surface.registry import (
    ROOT,
    capability_paths,
@@ -294,6 +306,39 @@ def cmd_hub_update(args: argparse.Namespace) -> int:
    return 0


+def cmd_hub_sync(args: argparse.Namespace) -> int:
+    try:
+        status, payload = hub_client.hub_list(_service_url(args))
+    except ValueError as exc:
+        print(f"error: {exc}", file=sys.stderr)
+        return 1
+    if status != 200:
+        print(f"error: hub returned {status}: {payload}", file=sys.stderr)
+        return 1
+    output = Path(args.output) if args.output else DEFAULT_SOURCES_PATH
+    existing = load_sources_manifest(output) if args.merge else None
+    manifest = build_manifest(payload, existing, merge=args.merge)
+    if args.dry_run:
+        print(yaml.safe_dump(manifest, sort_keys=False))
+        return 0
+    written = write_sources_manifest(manifest, output)
+    print(
+        f"ok: wrote {written.relative_to(ROOT)} "
+        f"({len(manifest['sources'])} source(s))"
+    )
+    return 0
+
+
+def cmd_report_cohorts(args: argparse.Namespace) -> int:
+    filters = cohort_filters_from_args(args)
+    matches = select_cohort(filters)
+    if args.format == "json":
+        print(format_cohort_json(matches, filters))
+    else:
+        print(format_cohort_markdown(matches, filters), end="")
+    return 0
+
+
 def cmd_export(args: argparse.Namespace) -> int:
    index = load_index()
    bundle: dict[str, Any] = {
@@ -457,6 +502,43 @@ def main(argv: list[str] | None = None) -> int:
    hub_update.add_argument("--required", action=argparse.BooleanOptionalAction, default=None)
    hub_update.set_defaults(func=cmd_hub_update)

+    hub_sync = hub_sub.add_parser(
+        "sync", help="write federation sources.yaml from hub registrations"
+    )
+    hub_sync.add_argument(
+        "--output",
+        help=f"manifest path (default: {DEFAULT_SOURCES_PATH.relative_to(ROOT)})",
+    )
+    hub_sync.add_argument(
+        "--merge",
+        action="store_true",
+        help="keep local index sources not overridden by hub repo slugs",
+    )
+    hub_sync.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="print manifest without writing",
+    )
+    hub_sync.set_defaults(func=cmd_hub_sync)
+
+    report = subparsers.add_parser("report", help="planning and analytics reports")
+    report_sub = report.add_subparsers(dest="report_command", required=True)
+    cohorts = report_sub.add_parser(
+        "cohorts", help="export capability cohorts by maturity filters"
+    )
+    cohorts.add_argument("--planning-min", help="discovery minimum (implies availability-max A1)")
+    cohorts.add_argument("--implementation-min", help="availability minimum")
+    cohorts.add_argument("--discovery-min")
+    cohorts.add_argument("--availability-min")
+    cohorts.add_argument("--availability-max")
+    cohorts.add_argument("--domain")
+    cohorts.add_argument(
+        "--format",
+        choices=["markdown", "json"],
+        default="markdown",
+    )
+    cohorts.set_defaults(func=cmd_report_cohorts)
+
    args = parser.parse_args(argv)
    return args.func(args)

--- a/reuse_surface/hub_sync.py
+++ b/reuse_surface/hub_sync.py
@@ -0,0 +1,100 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from reuse_surface.registry import ROOT
+
+DEFAULT_SOURCES_PATH = ROOT / "registry" / "federation" / "sources.yaml"
+
+
+def registration_to_source(registration: dict[str, Any]) -> dict[str, Any]:
+    source: dict[str, Any] = {
+        "repo": registration["repo"],
+        "url": registration["url"],
+        "enabled": registration.get("enabled", True),
+        "required": registration.get("required", False),
+        "domain": registration.get("domain", "helix_forge"),
+    }
+    for optional in (
+        "description",
+        "cache_ttl_seconds",
+        "auth_env",
+        "auth_header",
+    ):
+        if registration.get(optional) is not None:
+            source[optional] = registration[optional]
+    return source
+
+
+def sources_from_hub_payload(
+    payload: dict[str, Any],
+    *,
+    enabled_only: bool = True,
+) -> list[dict[str, Any]]:
+    repos = payload.get("repos", [])
+    sources: list[dict[str, Any]] = []
+    for registration in repos:
+        if enabled_only and not registration.get("enabled", True):
+            continue
+        if not registration.get("url"):
+            continue
+        sources.append(registration_to_source(registration))
+    return sorted(sources, key=lambda item: item["repo"])
+
+
+def merge_sources(
+    hub_sources: list[dict[str, Any]],
+    existing_sources: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+    hub_repos = {source["repo"] for source in hub_sources}
+    merged = list(hub_sources)
+    for source in existing_sources:
+        if source.get("repo") in hub_repos:
+            continue
+        if "index" in source:
+            merged.append(source)
+    return sorted(merged, key=lambda item: item["repo"])
+
+
+def build_manifest(
+    hub_payload: dict[str, Any],
+    existing: dict[str, Any] | None = None,
+    *,
+    merge: bool = False,
+) -> dict[str, Any]:
+    hub_sources = sources_from_hub_payload(hub_payload)
+    if merge and existing:
+        sources = merge_sources(hub_sources, existing.get("sources", []))
+    else:
+        sources = hub_sources
+    return {
+        "version": existing.get("version", 1) if existing else 1,
+        "domain": existing.get("domain", "helix_forge") if existing else "helix_forge",
+        "collision_policy": existing.get("collision_policy", "warn")
+        if existing
+        else "warn",
+        "sources": sources,
+    }
+
+
+def load_sources_manifest(path: Path) -> dict[str, Any]:
+    if not path.exists():
+        return {
+            "version": 1,
+            "domain": "helix_forge",
+            "collision_policy": "warn",
+            "sources": [],
+        }
+    return yaml.safe_load(path.read_text(encoding="utf-8"))
+
+
+def write_sources_manifest(manifest: dict[str, Any], path: Path = DEFAULT_SOURCES_PATH) -> Path:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(
+        yaml.safe_dump(manifest, sort_keys=False, allow_unicode=True),
+        encoding="utf-8",
+    )
+    return path
--- a/reuse_surface/reports.py
+++ b/reuse_surface/reports.py
@@ -0,0 +1,87 @@
+from __future__ import annotations
+
+import json
+from typing import Any
+
+from reuse_surface.registry import level_at_least, load_index, parse_vector
+
+
+def _availability_at_most(current: str, maximum: str) -> bool:
+    from reuse_surface.registry import LEVEL_ORDERS
+
+    order = LEVEL_ORDERS["availability"]
+    return order.index(current) <= order.index(maximum)
+
+
+def cohort_filters_from_args(args: Any) -> dict[str, str | None]:
+    filters: dict[str, str | None] = {
+        "discovery_min": getattr(args, "discovery_min", None),
+        "availability_min": getattr(args, "availability_min", None),
+        "availability_max": getattr(args, "availability_max", None),
+        "domain": getattr(args, "domain", None),
+    }
+    if getattr(args, "planning_min", None):
+        filters["discovery_min"] = args.planning_min
+        filters["availability_max"] = filters["availability_max"] or "A1"
+    if getattr(args, "implementation_min", None):
+        filters["availability_min"] = args.implementation_min
+    return filters
+
+
+def select_cohort(
+    filters: dict[str, str | None],
+    index: dict[str, Any] | None = None,
+) -> list[dict[str, Any]]:
+    data = index or load_index()
+    matches: list[dict[str, Any]] = []
+    for item in data.get("capabilities", []):
+        vector = parse_vector(item["vector"])
+        if filters.get("discovery_min") and not level_at_least(
+            "discovery", vector["discovery"], filters["discovery_min"]
+        ):
+            continue
+        if filters.get("availability_min") and not level_at_least(
+            "availability", vector["availability"], filters["availability_min"]
+        ):
+            continue
+        if filters.get("availability_max") and not _availability_at_most(
+            vector["availability"], filters["availability_max"]
+        ):
+            continue
+        if filters.get("domain") and item.get("domain") != filters["domain"]:
+            continue
+        matches.append(item)
+    return matches
+
+
+def format_cohort_markdown(
+    matches: list[dict[str, Any]],
+    filters: dict[str, str | None],
+) -> str:
+    lines = ["# Capability cohort report", ""]
+    active = {key: value for key, value in filters.items() if value}
+    if active:
+        lines.append("Filters:")
+        for key, value in sorted(active.items()):
+            lines.append(f"- `{key}`: `{value}`")
+        lines.append("")
+    if not matches:
+        lines.append("_No capabilities matched._")
+        return "\n".join(lines) + "\n"
+    lines.append("| ID | Vector | Consumption modes |")
+    lines.append("|---|---|---|")
+    for item in matches:
+        modes = ", ".join(item.get("consumption_modes", []))
+        lines.append(f"| `{item['id']}` | {item['vector']} | {modes} |")
+    lines.append("")
+    lines.append(f"**{len(matches)}** capabilit{'y' if len(matches) == 1 else 'ies'}.")
+    return "\n".join(lines) + "\n"
+
+
+def format_cohort_json(matches: list[dict[str, Any]], filters: dict[str, str | None]) -> str:
+    payload = {
+        "count": len(matches),
+        "filters": {key: value for key, value in filters.items() if value},
+        "capabilities": matches,
+    }
+    return json.dumps(payload, indent=2, sort_keys=True)