Files
reuse-surface/reuse_surface/hub_sync.py
tegwick 270065ff58
Some checks failed
ci / validate-registry (push) Has been cancelled
Implement REUSE-WP-0012 federation scale and intent alignment
Add hub sync and report cohorts CLI commands with pytest coverage, document
sibling index publish contract and hub hardening path, align INTENT layout,
raise external evidence on three registry entries, and close gap priorities
19-23 (priority 18 deferred on sibling index blocks).
2026-06-16 00:42:50 +02:00

100 lines
2.9 KiB
Python

from __future__ import annotations
from pathlib import Path
from typing import Any
import yaml
from reuse_surface.registry import ROOT
DEFAULT_SOURCES_PATH = ROOT / "registry" / "federation" / "sources.yaml"
def registration_to_source(registration: dict[str, Any]) -> dict[str, Any]:
source: dict[str, Any] = {
"repo": registration["repo"],
"url": registration["url"],
"enabled": registration.get("enabled", True),
"required": registration.get("required", False),
"domain": registration.get("domain", "helix_forge"),
}
for optional in (
"description",
"cache_ttl_seconds",
"auth_env",
"auth_header",
):
if registration.get(optional) is not None:
source[optional] = registration[optional]
return source
def sources_from_hub_payload(
payload: dict[str, Any],
*,
enabled_only: bool = True,
) -> list[dict[str, Any]]:
repos = payload.get("repos", [])
sources: list[dict[str, Any]] = []
for registration in repos:
if enabled_only and not registration.get("enabled", True):
continue
if not registration.get("url"):
continue
sources.append(registration_to_source(registration))
return sorted(sources, key=lambda item: item["repo"])
def merge_sources(
hub_sources: list[dict[str, Any]],
existing_sources: list[dict[str, Any]],
) -> list[dict[str, Any]]:
hub_repos = {source["repo"] for source in hub_sources}
merged = list(hub_sources)
for source in existing_sources:
if source.get("repo") in hub_repos:
continue
if "index" in source:
merged.append(source)
return sorted(merged, key=lambda item: item["repo"])
def build_manifest(
hub_payload: dict[str, Any],
existing: dict[str, Any] | None = None,
*,
merge: bool = False,
) -> dict[str, Any]:
hub_sources = sources_from_hub_payload(hub_payload)
if merge and existing:
sources = merge_sources(hub_sources, existing.get("sources", []))
else:
sources = hub_sources
return {
"version": existing.get("version", 1) if existing else 1,
"domain": existing.get("domain", "helix_forge") if existing else "helix_forge",
"collision_policy": existing.get("collision_policy", "warn")
if existing
else "warn",
"sources": sources,
}
def load_sources_manifest(path: Path) -> dict[str, Any]:
if not path.exists():
return {
"version": 1,
"domain": "helix_forge",
"collision_policy": "warn",
"sources": [],
}
return yaml.safe_load(path.read_text(encoding="utf-8"))
def write_sources_manifest(manifest: dict[str, Any], path: Path = DEFAULT_SOURCES_PATH) -> Path:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
yaml.safe_dump(manifest, sort_keys=False, allow_unicode=True),
encoding="utf-8",
)
return path