#!/usr/bin/env python3 """gitea_inventory.py — compare Gitea repos against state-hub registered repos. Outputs three sections: 1. Registered — in both Gitea and state-hub (shows last_state_synced_at) 2. Unregistered — on Gitea but not in state-hub (candidates for onboarding) 3. Hub-only — in state-hub but no matching Gitea repo (local/stale) Usage: uv run python scripts/gitea_inventory.py [--json] [--api-base URL] make gitea-inventory make gitea-inventory JSON=1 Environment (from .env or shell): GITEA_URL — base URL, e.g. http://92.205.130.254:32166 GITEA_TOKEN — personal access token (needs read:repo scope) API_BASE — state-hub API, default http://127.0.0.1:8000 """ from __future__ import annotations import argparse import json import os import sys import urllib.parse import urllib.request from datetime import datetime, timezone # --------------------------------------------------------------------------- # Config # --------------------------------------------------------------------------- DEFAULT_API_BASE = "http://127.0.0.1:8000" DEFAULT_GITEA_URL = "http://92.205.130.254:32166" GITEA_ORG = "coulomb" def _load_env() -> None: """Load .env file from state-hub root if present (simple key=value parser).""" env_path = os.path.join(os.path.dirname(__file__), "..", ".env") env_path = os.path.normpath(env_path) if not os.path.exists(env_path): return with open(env_path) as f: for line in f: line = line.strip() if not line or line.startswith("#") or "=" not in line: continue key, _, val = line.partition("=") key = key.strip() val = val.strip().strip('"').strip("'") if key and key not in os.environ: os.environ[key] = val # --------------------------------------------------------------------------- # HTTP helpers # --------------------------------------------------------------------------- def _get(url: str, token: str | None = None) -> dict | list: req = urllib.request.Request(url) if token: req.add_header("Authorization", f"token {token}") req.add_header("Accept", "application/json") try: with urllib.request.urlopen(req, timeout=10) as resp: return json.loads(resp.read().decode()) except Exception as exc: print(f" HTTP error: {url} → {exc}", file=sys.stderr) return [] def _gitea_pages(base_url: str, path: str, token: str | None) -> list[dict]: """Paginate through a Gitea list endpoint.""" results = [] page = 1 while True: url = f"{base_url}{path}?limit=50&page={page}" data = _get(url, token) if not isinstance(data, list) or not data: break results.extend(data) if len(data) < 50: break page += 1 return results # --------------------------------------------------------------------------- # Fetch data # --------------------------------------------------------------------------- def fetch_gitea_repos(gitea_url: str, token: str | None) -> list[dict]: """Return all repos in the coulomb org (+ user repos if token is set).""" org_repos = _gitea_pages(gitea_url, f"/api/v1/orgs/{GITEA_ORG}/repos", token) # Also fetch user repos that may not be in the org user_repos = _gitea_pages(gitea_url, "/api/v1/user/repos", token) if token else [] # Deduplicate by full_name seen: set[str] = set() combined = [] for r in org_repos + user_repos: name = r.get("full_name", "") if name not in seen: seen.add(name) combined.append(r) return combined def fetch_hub_repos(api_base: str) -> list[dict]: return _get(f"{api_base}/repos/") or [] # type: ignore[return-value] # --------------------------------------------------------------------------- # Match logic # --------------------------------------------------------------------------- def _slug_candidates(gitea_repo: dict) -> set[str]: """Slug candidates from a Gitea repo entry.""" name = gitea_repo.get("name", "") # state-hub slugs are kebab-case; gitea names may use _ or - return {name, name.replace("_", "-"), name.lower(), name.lower().replace("_", "-")} def build_report(gitea_repos: list[dict], hub_repos: list[dict]) -> dict: hub_by_slug: dict[str, dict] = {r["slug"]: r for r in hub_repos} hub_matched: set[str] = set() registered = [] unregistered = [] for gr in gitea_repos: candidates = _slug_candidates(gr) matched_slug = next((c for c in candidates if c in hub_by_slug), None) if matched_slug: hub_matched.add(matched_slug) hr = hub_by_slug[matched_slug] registered.append({ "slug": matched_slug, "gitea_name": gr.get("name"), "gitea_url": gr.get("html_url"), "domain": hr.get("domain_slug"), "status": hr.get("status"), "last_state_synced_at": hr.get("last_state_synced_at"), "last_sbom_at": hr.get("last_sbom_at"), }) else: unregistered.append({ "gitea_name": gr.get("name"), "gitea_url": gr.get("html_url"), "description": gr.get("description") or "", "language": gr.get("language") or "", "stars": gr.get("stars_count", 0), }) hub_only = [ {"slug": slug, "domain": r.get("domain_slug"), "status": r.get("status")} for slug, r in hub_by_slug.items() if slug not in hub_matched ] return { "generated_at": datetime.now(timezone.utc).isoformat(), "registered": registered, "unregistered": unregistered, "hub_only": hub_only, } # --------------------------------------------------------------------------- # Rendering # --------------------------------------------------------------------------- def _age(ts: str | None) -> str: if not ts: return "never" try: dt = datetime.fromisoformat(ts.replace("Z", "+00:00")) delta = datetime.now(timezone.utc) - dt h = int(delta.total_seconds() // 3600) if h < 1: return f"{int(delta.total_seconds() // 60)}m ago" if h < 24: return f"{h}h ago" return f"{delta.days}d ago" except Exception: return ts def render_text(report: dict) -> str: SEP = "=" * 70 lines = [ "Custodian Gitea Inventory", f"Generated: {report['generated_at']}", SEP, ] # Registered reg = report["registered"] lines.append(f"\n✅ REGISTERED ({len(reg)}) — in both Gitea and state-hub") if reg: lines.append(f" {'slug':<30} {'domain':<20} {'synced':<15} {'sbom'}") lines.append(f" {'-'*28} {'-'*18} {'-'*13} {'-'*13}") for r in sorted(reg, key=lambda x: x["slug"]): synced = _age(r["last_state_synced_at"]) sbom = _age(r["last_sbom_at"]) lines.append(f" {r['slug']:<30} {(r['domain'] or ''):<20} {synced:<15} {sbom}") else: lines.append(" (none)") # Unregistered unreg = report["unregistered"] lines.append(f"\n⚠ UNREGISTERED ({len(unreg)}) — on Gitea but not in state-hub") if unreg: for r in sorted(unreg, key=lambda x: x["gitea_name"]): desc = f" — {r['description']}" if r["description"] else "" lang = f" [{r['language']}]" if r["language"] else "" lines.append(f" {r['gitea_name']}{lang}{desc}") lines.append(f"\n To onboard: make register-project DOMAIN= PROJECT_PATH=/home/worsch/") else: lines.append(" (none — all Gitea repos are registered 🎉)") # Hub-only hub_only = report["hub_only"] lines.append(f"\n🔵 HUB-ONLY ({len(hub_only)}) — in state-hub but no matching Gitea repo") if hub_only: for r in sorted(hub_only, key=lambda x: x["slug"]): lines.append(f" {r['slug']:<30} domain={r['domain'] or '?'} status={r['status']}") else: lines.append(" (none)") lines.append(f"\n{SEP}") return "\n".join(lines) # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main() -> None: _load_env() parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("--json", action="store_true", help="Output JSON instead of text") parser.add_argument("--api-base", default=os.environ.get("API_BASE", DEFAULT_API_BASE)) args = parser.parse_args() gitea_url = os.environ.get("GITEA_URL", DEFAULT_GITEA_URL).rstrip("/") token = os.environ.get("GITEA_TOKEN") or None if not token: print("⚠ GITEA_TOKEN not set — only public repos will be visible", file=sys.stderr) print("Fetching Gitea repos...", file=sys.stderr) gitea_repos = fetch_gitea_repos(gitea_url, token) print(f" {len(gitea_repos)} repos found on Gitea", file=sys.stderr) print("Fetching state-hub repos...", file=sys.stderr) hub_repos = fetch_hub_repos(args.api_base) print(f" {len(hub_repos)} repos registered in hub", file=sys.stderr) report = build_report(gitea_repos, hub_repos) if args.json: print(json.dumps(report, indent=2)) else: print(render_text(report)) if __name__ == "__main__": main()