Complete REUSE-WP-0005: registry federation and relation graphs
Some checks failed
ci / validate-registry (push) Has been cancelled

Add federation manifest and schema, federation compose and graph CLI commands,
relation cycle/reference checks, federated index and Mermaid graph artifacts,
RegistryFederation guide, and CI validation updates.
This commit is contained in:
2026-06-15 01:43:02 +02:00
parent f218a5305c
commit 40ab8dded0
15 changed files with 700 additions and 20 deletions

View File

@@ -10,6 +10,8 @@ import yaml
from jsonschema import Draft202012Validator
from reuse_surface.catalog import write_catalog
from reuse_surface.federation import write_federated_index
from reuse_surface.graph import check_relations, render_mermaid, write_graph
from reuse_surface.overlaps import find_overlaps
from reuse_surface.registry import (
ROOT,
@@ -54,6 +56,8 @@ def cmd_validate(args: argparse.Namespace) -> int:
if not target:
index = load_index()
warnings.extend(_check_index_drift(paths, index))
if args.relations:
warnings.extend(check_relations())
for warning in warnings:
print(f"warning: {warning}", file=sys.stderr)
@@ -140,6 +144,35 @@ def cmd_overlaps(args: argparse.Namespace) -> int:
return 0
def cmd_federation_compose(args: argparse.Namespace) -> int:
try:
target, warnings = write_federated_index()
except (FileNotFoundError, ValueError) as exc:
print(f"error: {exc}", file=sys.stderr)
return 1
for warning in warnings:
print(f"warning: {warning}", file=sys.stderr)
import yaml
data = yaml.safe_load(target.read_text(encoding="utf-8"))
count = len(data.get("capabilities", []))
print(f"ok: wrote {target.relative_to(ROOT)} ({count} capabilities)")
return 0
def cmd_graph(args: argparse.Namespace) -> int:
warnings = check_relations() if args.check else []
content = render_mermaid()
if args.stdout:
print(content, end="")
else:
path = write_graph()
print(f"ok: wrote {path.relative_to(ROOT)}")
for warning in warnings:
print(f"warning: {warning}", file=sys.stderr)
return 0
def cmd_catalog(args: argparse.Namespace) -> int:
index = load_index()
indexed_entries = _load_indexed_entries()
@@ -199,8 +232,20 @@ def main(argv: list[str] | None = None) -> int:
nargs="?",
help="optional capability markdown file; defaults to all entries",
)
validate.add_argument(
"--relations",
action="store_true",
help="check relation cycles and broken references",
)
validate.set_defaults(func=cmd_validate)
federation = subparsers.add_parser(
"federation", help="federation index operations"
)
federation_sub = federation.add_subparsers(dest="federation_command", required=True)
compose = federation_sub.add_parser("compose", help="compose federated index")
compose.set_defaults(func=cmd_federation_compose)
query = subparsers.add_parser("query", help="query capability index")
query.add_argument("--discovery-min")
query.add_argument("--availability-min")
@@ -234,6 +279,19 @@ def main(argv: list[str] | None = None) -> int:
)
catalog.set_defaults(func=cmd_catalog)
graph = subparsers.add_parser("graph", help="generate relation graph")
graph.add_argument(
"--stdout",
action="store_true",
help="print Mermaid to stdout instead of writing docs/graph/",
)
graph.add_argument(
"--check",
action="store_true",
help="report depends_on cycles and broken relation references",
)
graph.set_defaults(func=cmd_graph)
args = parser.parse_args(argv)
return args.func(args)

111
reuse_surface/federation.py Normal file
View File

@@ -0,0 +1,111 @@
from __future__ import annotations
import sys
from datetime import date
from pathlib import Path
from typing import Any
import yaml
from jsonschema import Draft202012Validator
from reuse_surface.registry import ROOT
MANIFEST_PATH = ROOT / "registry" / "federation" / "sources.yaml"
SCHEMA_PATH = ROOT / "schemas" / "federation.schema.yaml"
FEDERATED_INDEX_PATH = ROOT / "registry" / "indexes" / "federated.yaml"
def _expand_path(index_path: str) -> Path:
return Path(index_path).expanduser()
def load_federation_manifest(path: Path | None = None) -> dict[str, Any]:
manifest_path = path or MANIFEST_PATH
with manifest_path.open(encoding="utf-8") as handle:
manifest = yaml.safe_load(handle)
schema = yaml.safe_load(SCHEMA_PATH.read_text(encoding="utf-8"))
validator = Draft202012Validator(schema)
errors = sorted(validator.iter_errors(manifest), key=lambda err: err.path)
if errors:
messages = "; ".join(error.message for error in errors)
raise ValueError(f"invalid federation manifest: {messages}")
return manifest
def _resolve_index_path(index_value: str) -> Path:
path = _expand_path(index_value)
if not path.is_absolute():
path = (ROOT / path).resolve()
return path
def compose_federated_index(
manifest: dict[str, Any] | None = None,
) -> tuple[dict[str, Any], list[str]]:
manifest = manifest or load_federation_manifest()
warnings: list[str] = []
merged: list[dict[str, Any]] = []
seen_ids: dict[str, str] = {}
source_summaries: list[dict[str, Any]] = []
for source in manifest["sources"]:
if not source.get("enabled", False):
continue
index_path = _resolve_index_path(source["index"])
if not index_path.exists():
message = f"missing index for {source['repo']}: {index_path}"
if source.get("required", False):
raise FileNotFoundError(message)
warnings.append(message)
continue
with index_path.open(encoding="utf-8") as handle:
index_data = yaml.safe_load(handle)
count = 0
for item in index_data.get("capabilities", []):
cap_id = item["id"]
if cap_id in seen_ids:
warnings.append(
f"duplicate id {cap_id}: {seen_ids[cap_id]} and {source['repo']}"
)
else:
seen_ids[cap_id] = source["repo"]
federated_item = dict(item)
federated_item["source_repo"] = source["repo"]
federated_item["source_index"] = source["index"]
merged.append(federated_item)
count += 1
source_summaries.append(
{
"repo": source["repo"],
"index": source["index"],
"count": count,
}
)
federated = {
"version": manifest.get("version", 1),
"updated": date.today().isoformat(),
"domain": manifest.get("domain"),
"collision_policy": manifest.get("collision_policy", "warn"),
"sources": source_summaries,
"capabilities": sorted(merged, key=lambda item: item["id"]),
}
return federated, warnings
def write_federated_index(
output_path: Path | None = None,
manifest: dict[str, Any] | None = None,
) -> tuple[Path, list[str]]:
federated, warnings = compose_federated_index(manifest)
target = output_path or FEDERATED_INDEX_PATH
target.parent.mkdir(parents=True, exist_ok=True)
header = (
"# Composed federated capability index. Regenerate with:\n"
"# reuse-surface federation compose\n"
)
target.write_text(
header + yaml.safe_dump(federated, sort_keys=False),
encoding="utf-8",
)
return target, warnings

153
reuse_surface/graph.py Normal file
View File

@@ -0,0 +1,153 @@
from __future__ import annotations
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from reuse_surface.federation import FEDERATED_INDEX_PATH, compose_federated_index
from reuse_surface.registry import ROOT, load_index, parse_front_matter
GRAPH_PATH = ROOT / "docs" / "graph" / "capability-graph.mmd"
RELATION_TYPES = [
"depends_on",
"supports",
"used_by",
"related_to",
"specializes",
"generalizes",
"replaces",
"wraps",
]
@dataclass
class RelationEdge:
source_id: str
target_id: str
relation_type: str
def _node_id(capability_id: str) -> str:
return re.sub(r"[^a-zA-Z0-9_]", "_", capability_id)
def _load_local_relations() -> dict[str, dict[str, list[str]]]:
index = load_index()
relations_by_id: dict[str, dict[str, list[str]]] = {}
for item in index.get("capabilities", []):
path = ROOT / item["path"]
if not path.exists():
continue
entry = parse_front_matter(path)
relations = entry.get("relations") or {}
relations_by_id[entry["id"]] = {
relation_type: list(targets)
for relation_type, targets in relations.items()
if isinstance(targets, list)
}
return relations_by_id
def _known_ids() -> set[str]:
if FEDERATED_INDEX_PATH.exists():
import yaml
data = yaml.safe_load(FEDERATED_INDEX_PATH.read_text(encoding="utf-8"))
else:
data, _ = compose_federated_index()
return {item["id"] for item in data.get("capabilities", [])}
def collect_edges() -> list[RelationEdge]:
relations_by_id = _load_local_relations()
edges: list[RelationEdge] = []
for source_id, relation_map in relations_by_id.items():
for relation_type, targets in relation_map.items():
for target_id in targets:
edges.append(
RelationEdge(
source_id=source_id,
target_id=target_id,
relation_type=relation_type,
)
)
return edges
def find_depends_on_cycles() -> list[list[str]]:
relations_by_id = _load_local_relations()
graph: dict[str, list[str]] = {
cap_id: list(relation_map.get("depends_on", []))
for cap_id, relation_map in relations_by_id.items()
}
cycles: list[list[str]] = []
visited: set[str] = set()
stack: set[str] = set()
path: list[str] = []
def dfs(node: str) -> None:
visited.add(node)
stack.add(node)
path.append(node)
for neighbor in graph.get(node, []):
if neighbor not in visited:
dfs(neighbor)
elif neighbor in stack:
start = path.index(neighbor)
cycles.append(path[start:] + [neighbor])
path.pop()
stack.remove(node)
for node in graph:
if node not in visited:
dfs(node)
return cycles
def find_broken_references(known: set[str] | None = None) -> list[str]:
known = known or _known_ids()
warnings: list[str] = []
for edge in collect_edges():
if edge.target_id not in known:
warnings.append(
f"broken relation: {edge.source_id} "
f"{edge.relation_type} -> {edge.target_id}"
)
return warnings
def check_relations() -> list[str]:
warnings: list[str] = []
for cycle in find_depends_on_cycles():
warnings.append(f"depends_on cycle: {' -> '.join(cycle)}")
warnings.extend(find_broken_references())
return warnings
def _node_labels() -> dict[str, str]:
index = load_index()
labels: dict[str, str] = {}
for item in index.get("capabilities", []):
labels[item["id"]] = f"{item['id']}<br/>{item['vector']}"
return labels
def render_mermaid() -> str:
labels = _node_labels()
edges = collect_edges()
lines = ["graph LR"]
for cap_id, label in sorted(labels.items()):
lines.append(f' {_node_id(cap_id)}["{label}"]')
for edge in edges:
lines.append(
f" {_node_id(edge.source_id)} -->|{edge.relation_type}| {_node_id(edge.target_id)}"
)
return "\n".join(lines) + "\n"
def write_graph(path: Path | None = None) -> Path:
target = path or GRAPH_PATH
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(render_mermaid(), encoding="utf-8")
return target