Files
reuse-surface/reuse_surface/reports.py
tegwick 81c55e598f
Some checks failed
ci / validate-registry (push) Has been cancelled
REUSE-WP-0015: dedup owner entries, add report gaps (T02/T03/T05)
Remove 17 owner-migrated capabilities from reuse-surface index (keep
activity-core stub). Add report gaps CLI, roster stats + gaps CI steps.
T01 remains operator-blocked on Gitea publish.
2026-06-16 02:22:17 +02:00

211 lines
7.1 KiB
Python

from __future__ import annotations
import json
from pathlib import Path
from typing import Any
import yaml
from reuse_surface.registry import ROOT, level_at_least, load_index, parse_vector
def _availability_at_most(current: str, maximum: str) -> bool:
from reuse_surface.registry import LEVEL_ORDERS
order = LEVEL_ORDERS["availability"]
return order.index(current) <= order.index(maximum)
def cohort_filters_from_args(args: Any) -> dict[str, str | None]:
filters: dict[str, str | None] = {
"discovery_min": getattr(args, "discovery_min", None),
"availability_min": getattr(args, "availability_min", None),
"availability_max": getattr(args, "availability_max", None),
"domain": getattr(args, "domain", None),
}
if getattr(args, "planning_min", None):
filters["discovery_min"] = args.planning_min
filters["availability_max"] = filters["availability_max"] or "A1"
if getattr(args, "implementation_min", None):
filters["availability_min"] = args.implementation_min
return filters
def select_cohort(
filters: dict[str, str | None],
index: dict[str, Any] | None = None,
) -> list[dict[str, Any]]:
data = index or load_index()
matches: list[dict[str, Any]] = []
for item in data.get("capabilities", []):
vector = parse_vector(item["vector"])
if filters.get("discovery_min") and not level_at_least(
"discovery", vector["discovery"], filters["discovery_min"]
):
continue
if filters.get("availability_min") and not level_at_least(
"availability", vector["availability"], filters["availability_min"]
):
continue
if filters.get("availability_max") and not _availability_at_most(
vector["availability"], filters["availability_max"]
):
continue
if filters.get("domain") and item.get("domain") != filters["domain"]:
continue
matches.append(item)
return matches
def format_cohort_markdown(
matches: list[dict[str, Any]],
filters: dict[str, str | None],
) -> str:
lines = ["# Capability cohort report", ""]
active = {key: value for key, value in filters.items() if value}
if active:
lines.append("Filters:")
for key, value in sorted(active.items()):
lines.append(f"- `{key}`: `{value}`")
lines.append("")
if not matches:
lines.append("_No capabilities matched._")
return "\n".join(lines) + "\n"
lines.append("| ID | Vector | Consumption modes |")
lines.append("|---|---|---|")
for item in matches:
modes = ", ".join(item.get("consumption_modes", []))
lines.append(f"| `{item['id']}` | {item['vector']} | {modes} |")
lines.append("")
lines.append(f"**{len(matches)}** capabilit{'y' if len(matches) == 1 else 'ies'}.")
return "\n".join(lines) + "\n"
def format_cohort_json(matches: list[dict[str, Any]], filters: dict[str, str | None]) -> str:
payload = {
"count": len(matches),
"filters": {key: value for key, value in filters.items() if value},
"capabilities": matches,
}
return json.dumps(payload, indent=2, sort_keys=True)
def collect_gap_report(
roster_path: Path,
*,
index: dict[str, Any] | None = None,
) -> dict[str, Any]:
roster = yaml.safe_load(roster_path.read_text(encoding="utf-8"))
repos = roster.get("repos", [])
summary = roster.get("summary", {})
local_index = index or load_index()
local_by_owner: dict[str, list[str]] = {}
for row in local_index.get("capabilities", []):
owner = row.get("owner") or "unknown"
local_by_owner.setdefault(owner, []).append(row["id"])
publish_fail = [r for r in repos if r.get("publish_check") == "fail"]
empty_scaffolds = [
r for r in repos
if r.get("status") == "established" and r.get("capability_count", 0) == 0
]
seeded = [r for r in repos if r.get("seed_from_reuse_surface")]
dedup_pending = [
{
"slug": owner,
"local_ids": ids,
}
for owner, ids in sorted(local_by_owner.items())
if owner not in {"reuse-surface", "unknown"}
]
return {
"roster_path": str(roster_path),
"summary": summary,
"publish_fail": [
{
"slug": r["slug"],
"hub_registered": r.get("hub_registered"),
"publish_note": r.get("publish_note"),
}
for r in publish_fail
],
"empty_scaffold_count": len(empty_scaffolds),
"empty_scaffolds": [r["slug"] for r in empty_scaffolds],
"seeded_repos": [
{
"slug": r["slug"],
"seed_capability_ids": r.get("seed_capability_ids", []),
"publish_check": r.get("publish_check"),
}
for r in seeded
],
"dedup_pending_local_owners": dedup_pending,
"local_capability_count": len(local_index.get("capabilities", [])),
}
def format_gap_markdown(report: dict[str, Any]) -> str:
lines = ["# Registry gap report", ""]
lines.append(f"**Roster:** `{report['roster_path']}`")
summary = report.get("summary", {})
if summary:
lines.append(
f"**Workstation:** {summary.get('established', '?')}/"
f"{summary.get('total', '?')} established; "
f"publish pass {summary.get('publish_pass', '?')}/"
f"{summary.get('total', '?')}"
)
lines.append("")
fails = report.get("publish_fail", [])
lines.append(f"## Publish blocked ({len(fails)})")
if fails:
for item in fails:
note = item.get("publish_note") or ""
suffix = f"{note}" if note else ""
lines.append(f"- `{item['slug']}`{suffix}")
else:
lines.append("- none")
lines.append("")
dedup = report.get("dedup_pending_local_owners", [])
lines.append(f"## Local index owner stubs ({len(dedup)})")
if dedup:
for item in dedup:
ids = ", ".join(f"`{cap_id}`" for cap_id in item["local_ids"])
lines.append(f"- **{item['slug']}:** {ids}")
else:
lines.append("- none (owner rows migrated to canonical repos)")
lines.append("")
empty_count = report.get("empty_scaffold_count", 0)
lines.append(f"## Empty scaffolds ({empty_count})")
slugs = report.get("empty_scaffolds", [])
if slugs:
for slug in slugs:
lines.append(f"- `{slug}`")
else:
lines.append("- none")
lines.append("")
seeded = report.get("seeded_repos", [])
lines.append(f"## Seed-ready repos ({len(seeded)})")
for item in seeded:
publish = item.get("publish_check", "?")
lines.append(f"- `{item['slug']}` (publish: {publish})")
lines.append("")
lines.append(
f"**Local reuse-surface capabilities:** {report.get('local_capability_count', 0)}"
)
return "\n".join(lines) + "\n"
def format_gap_json(report: dict[str, Any]) -> str:
return json.dumps(report, indent=2, sort_keys=True)
def default_roster_path() -> Path:
return ROOT / "registry/federation/local-repo-roster.yaml"