generated from coulomb/repo-seed
- Migration e2f3a4b5c6d7: add last_state_synced_at to managed_repos
- consistency_check.py: PATCH last_state_synced_at after fix run;
fix ~ treated as non-empty state_hub_task_id (C-03 vs C-11);
fix _inject_task_id_into_block skipping injection when field exists
with null value
- install_hooks.sh: idempotent post-commit hook installer for all
registered repos (make install-hooks REPO= / install-hooks-all)
- gitea_inventory.py: compare coulomb Gitea org against state-hub
registered repos — registered / unregistered / hub-only sections
- infra/README.md: document systemd user timer + crontab fallback
- systemd user timer: custodian-sync.{service,timer} runs
fix-consistency-all every 15 min (enabled)
- dashboard/src/repo-sync.md: Repo Sync Health page — sync age table,
unregistered Gitea repos, hub-only repos
- api/routers/repos.py: GET /repos/{slug}/dispatch endpoint returning
active goal, pending tasks per workstream, human interventions
- mcp_server/server.py: get_repo_dispatch() MCP tool
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
273 lines
9.4 KiB
Python
273 lines
9.4 KiB
Python
#!/usr/bin/env python3
|
|
"""gitea_inventory.py — compare Gitea repos against state-hub registered repos.
|
|
|
|
Outputs three sections:
|
|
1. Registered — in both Gitea and state-hub (shows last_state_synced_at)
|
|
2. Unregistered — on Gitea but not in state-hub (candidates for onboarding)
|
|
3. Hub-only — in state-hub but no matching Gitea repo (local/stale)
|
|
|
|
Usage:
|
|
uv run python scripts/gitea_inventory.py [--json] [--api-base URL]
|
|
make gitea-inventory
|
|
make gitea-inventory JSON=1
|
|
|
|
Environment (from .env or shell):
|
|
GITEA_URL — base URL, e.g. http://92.205.130.254:32166
|
|
GITEA_TOKEN — personal access token (needs read:repo scope)
|
|
API_BASE — state-hub API, default http://127.0.0.1:8000
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
import urllib.parse
|
|
import urllib.request
|
|
from datetime import datetime, timezone
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Config
|
|
# ---------------------------------------------------------------------------
|
|
|
|
DEFAULT_API_BASE = "http://127.0.0.1:8000"
|
|
DEFAULT_GITEA_URL = "http://92.205.130.254:32166"
|
|
GITEA_ORG = "coulomb"
|
|
|
|
|
|
def _load_env() -> None:
|
|
"""Load .env file from state-hub root if present (simple key=value parser)."""
|
|
env_path = os.path.join(os.path.dirname(__file__), "..", ".env")
|
|
env_path = os.path.normpath(env_path)
|
|
if not os.path.exists(env_path):
|
|
return
|
|
with open(env_path) as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line or line.startswith("#") or "=" not in line:
|
|
continue
|
|
key, _, val = line.partition("=")
|
|
key = key.strip()
|
|
val = val.strip().strip('"').strip("'")
|
|
if key and key not in os.environ:
|
|
os.environ[key] = val
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# HTTP helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _get(url: str, token: str | None = None) -> dict | list:
|
|
req = urllib.request.Request(url)
|
|
if token:
|
|
req.add_header("Authorization", f"token {token}")
|
|
req.add_header("Accept", "application/json")
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
return json.loads(resp.read().decode())
|
|
except Exception as exc:
|
|
print(f" HTTP error: {url} → {exc}", file=sys.stderr)
|
|
return []
|
|
|
|
|
|
def _gitea_pages(base_url: str, path: str, token: str | None) -> list[dict]:
|
|
"""Paginate through a Gitea list endpoint."""
|
|
results = []
|
|
page = 1
|
|
while True:
|
|
url = f"{base_url}{path}?limit=50&page={page}"
|
|
data = _get(url, token)
|
|
if not isinstance(data, list) or not data:
|
|
break
|
|
results.extend(data)
|
|
if len(data) < 50:
|
|
break
|
|
page += 1
|
|
return results
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fetch data
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def fetch_gitea_repos(gitea_url: str, token: str | None) -> list[dict]:
|
|
"""Return all repos in the coulomb org (+ user repos if token is set)."""
|
|
org_repos = _gitea_pages(gitea_url, f"/api/v1/orgs/{GITEA_ORG}/repos", token)
|
|
# Also fetch user repos that may not be in the org
|
|
user_repos = _gitea_pages(gitea_url, "/api/v1/user/repos", token) if token else []
|
|
# Deduplicate by full_name
|
|
seen: set[str] = set()
|
|
combined = []
|
|
for r in org_repos + user_repos:
|
|
name = r.get("full_name", "")
|
|
if name not in seen:
|
|
seen.add(name)
|
|
combined.append(r)
|
|
return combined
|
|
|
|
|
|
def fetch_hub_repos(api_base: str) -> list[dict]:
|
|
return _get(f"{api_base}/repos/") or [] # type: ignore[return-value]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Match logic
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _slug_candidates(gitea_repo: dict) -> set[str]:
|
|
"""Slug candidates from a Gitea repo entry."""
|
|
name = gitea_repo.get("name", "")
|
|
# state-hub slugs are kebab-case; gitea names may use _ or -
|
|
return {name, name.replace("_", "-"), name.lower(), name.lower().replace("_", "-")}
|
|
|
|
|
|
def build_report(gitea_repos: list[dict], hub_repos: list[dict]) -> dict:
|
|
hub_by_slug: dict[str, dict] = {r["slug"]: r for r in hub_repos}
|
|
hub_matched: set[str] = set()
|
|
|
|
registered = []
|
|
unregistered = []
|
|
|
|
for gr in gitea_repos:
|
|
candidates = _slug_candidates(gr)
|
|
matched_slug = next((c for c in candidates if c in hub_by_slug), None)
|
|
if matched_slug:
|
|
hub_matched.add(matched_slug)
|
|
hr = hub_by_slug[matched_slug]
|
|
registered.append({
|
|
"slug": matched_slug,
|
|
"gitea_name": gr.get("name"),
|
|
"gitea_url": gr.get("html_url"),
|
|
"domain": hr.get("domain_slug"),
|
|
"status": hr.get("status"),
|
|
"last_state_synced_at": hr.get("last_state_synced_at"),
|
|
"last_sbom_at": hr.get("last_sbom_at"),
|
|
})
|
|
else:
|
|
unregistered.append({
|
|
"gitea_name": gr.get("name"),
|
|
"gitea_url": gr.get("html_url"),
|
|
"description": gr.get("description") or "",
|
|
"language": gr.get("language") or "",
|
|
"stars": gr.get("stars_count", 0),
|
|
})
|
|
|
|
hub_only = [
|
|
{"slug": slug, "domain": r.get("domain_slug"), "status": r.get("status")}
|
|
for slug, r in hub_by_slug.items()
|
|
if slug not in hub_matched
|
|
]
|
|
|
|
return {
|
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
"registered": registered,
|
|
"unregistered": unregistered,
|
|
"hub_only": hub_only,
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Rendering
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _age(ts: str | None) -> str:
|
|
if not ts:
|
|
return "never"
|
|
try:
|
|
dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
delta = datetime.now(timezone.utc) - dt
|
|
h = int(delta.total_seconds() // 3600)
|
|
if h < 1:
|
|
return f"{int(delta.total_seconds() // 60)}m ago"
|
|
if h < 24:
|
|
return f"{h}h ago"
|
|
return f"{delta.days}d ago"
|
|
except Exception:
|
|
return ts
|
|
|
|
|
|
def render_text(report: dict) -> str:
|
|
SEP = "=" * 70
|
|
lines = [
|
|
"Custodian Gitea Inventory",
|
|
f"Generated: {report['generated_at']}",
|
|
SEP,
|
|
]
|
|
|
|
# Registered
|
|
reg = report["registered"]
|
|
lines.append(f"\n✅ REGISTERED ({len(reg)}) — in both Gitea and state-hub")
|
|
if reg:
|
|
lines.append(f" {'slug':<30} {'domain':<20} {'synced':<15} {'sbom'}")
|
|
lines.append(f" {'-'*28} {'-'*18} {'-'*13} {'-'*13}")
|
|
for r in sorted(reg, key=lambda x: x["slug"]):
|
|
synced = _age(r["last_state_synced_at"])
|
|
sbom = _age(r["last_sbom_at"])
|
|
lines.append(f" {r['slug']:<30} {(r['domain'] or ''):<20} {synced:<15} {sbom}")
|
|
else:
|
|
lines.append(" (none)")
|
|
|
|
# Unregistered
|
|
unreg = report["unregistered"]
|
|
lines.append(f"\n⚠ UNREGISTERED ({len(unreg)}) — on Gitea but not in state-hub")
|
|
if unreg:
|
|
for r in sorted(unreg, key=lambda x: x["gitea_name"]):
|
|
desc = f" — {r['description']}" if r["description"] else ""
|
|
lang = f" [{r['language']}]" if r["language"] else ""
|
|
lines.append(f" {r['gitea_name']}{lang}{desc}")
|
|
lines.append(f"\n To onboard: make register-project DOMAIN=<domain> PROJECT_PATH=/home/worsch/<slug>")
|
|
else:
|
|
lines.append(" (none — all Gitea repos are registered 🎉)")
|
|
|
|
# Hub-only
|
|
hub_only = report["hub_only"]
|
|
lines.append(f"\n🔵 HUB-ONLY ({len(hub_only)}) — in state-hub but no matching Gitea repo")
|
|
if hub_only:
|
|
for r in sorted(hub_only, key=lambda x: x["slug"]):
|
|
lines.append(f" {r['slug']:<30} domain={r['domain'] or '?'} status={r['status']}")
|
|
else:
|
|
lines.append(" (none)")
|
|
|
|
lines.append(f"\n{SEP}")
|
|
return "\n".join(lines)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def main() -> None:
|
|
_load_env()
|
|
|
|
parser = argparse.ArgumentParser(description=__doc__,
|
|
formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
parser.add_argument("--json", action="store_true", help="Output JSON instead of text")
|
|
parser.add_argument("--api-base", default=os.environ.get("API_BASE", DEFAULT_API_BASE))
|
|
args = parser.parse_args()
|
|
|
|
gitea_url = os.environ.get("GITEA_URL", DEFAULT_GITEA_URL).rstrip("/")
|
|
token = os.environ.get("GITEA_TOKEN") or None
|
|
|
|
if not token:
|
|
print("⚠ GITEA_TOKEN not set — only public repos will be visible", file=sys.stderr)
|
|
|
|
print("Fetching Gitea repos...", file=sys.stderr)
|
|
gitea_repos = fetch_gitea_repos(gitea_url, token)
|
|
print(f" {len(gitea_repos)} repos found on Gitea", file=sys.stderr)
|
|
|
|
print("Fetching state-hub repos...", file=sys.stderr)
|
|
hub_repos = fetch_hub_repos(args.api_base)
|
|
print(f" {len(hub_repos)} repos registered in hub", file=sys.stderr)
|
|
|
|
report = build_report(gitea_repos, hub_repos)
|
|
|
|
if args.json:
|
|
print(json.dumps(report, indent=2))
|
|
else:
|
|
print(render_text(report))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|