feat(CUST-WP-0014): repo sync automation & Gitea inventory

- Migration e2f3a4b5c6d7: add last_state_synced_at to managed_repos
- consistency_check.py: PATCH last_state_synced_at after fix run;
  fix ~ treated as non-empty state_hub_task_id (C-03 vs C-11);
  fix _inject_task_id_into_block skipping injection when field exists
  with null value
- install_hooks.sh: idempotent post-commit hook installer for all
  registered repos (make install-hooks REPO= / install-hooks-all)
- gitea_inventory.py: compare coulomb Gitea org against state-hub
  registered repos — registered / unregistered / hub-only sections
- infra/README.md: document systemd user timer + crontab fallback
- systemd user timer: custodian-sync.{service,timer} runs
  fix-consistency-all every 15 min (enabled)
- dashboard/src/repo-sync.md: Repo Sync Health page — sync age table,
  unregistered Gitea repos, hub-only repos
- api/routers/repos.py: GET /repos/{slug}/dispatch endpoint returning
  active goal, pending tasks per workstream, human interventions
- mcp_server/server.py: get_repo_dispatch() MCP tool

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-16 01:41:16 +01:00
parent a2db606dcc
commit 5e7a72e144
14 changed files with 912 additions and 5 deletions

View File

@@ -191,9 +191,18 @@ def _inject_task_id_into_block(
task_meta = _parse_yaml_block(block_content.strip())
if str(task_meta.get("id", "")) != match_id:
return m.group(0)
if field_name in task_meta:
existing_val = task_meta.get(field_name)
if existing_val is not None and str(existing_val).strip() not in ("", "~", "null", "None", "none"):
return m.group(0)
new_content = block_content.rstrip() + f"\n{field_name}: \"{field_value}\""
# Replace existing null/~ line if present, otherwise append
new_content = re.sub(
rf"^{re.escape(field_name)}:.*$",
f'{field_name}: "{field_value}"',
block_content,
flags=re.MULTILINE,
)
if new_content == block_content:
new_content = block_content.rstrip() + f"\n{field_name}: \"{field_value}\""
return f"```task\n{new_content}\n```"
new_text = _TASK_BLOCK_RE.sub(_replace, text)
@@ -461,7 +470,10 @@ def check_repo(api_base: str, repo_slug: str) -> ConsistencyReport:
if task.get("_parse_error"):
continue
t_id = str(task.get("id", "")).strip()
t_sh_id = str(task.get("state_hub_task_id", "")).strip().strip('"')
_raw_sh = task.get("state_hub_task_id")
t_sh_id = "" if _raw_sh is None else str(_raw_sh).strip().strip('"')
if t_sh_id in ("~", "null", "None", "none"):
t_sh_id = ""
t_status = str(task.get("status", "")).strip()
if t_sh_id:
@@ -724,6 +736,12 @@ def fix_repo(api_base: str, repo_slug: str) -> ConsistencyReport:
except Exception as e:
report.fixes_applied.append(f"{issue.check_id} ERROR: {e}")
# Record that a sync run happened for this repo
from datetime import timezone as _tz
import datetime as _dt
now_iso = _dt.datetime.now(_tz.utc).isoformat()
_api_patch(api_base, f"/repos/{repo_slug}/", {"last_state_synced_at": now_iso})
return report

272
scripts/gitea_inventory.py Normal file
View File

@@ -0,0 +1,272 @@
#!/usr/bin/env python3
"""gitea_inventory.py — compare Gitea repos against state-hub registered repos.
Outputs three sections:
1. Registered — in both Gitea and state-hub (shows last_state_synced_at)
2. Unregistered — on Gitea but not in state-hub (candidates for onboarding)
3. Hub-only — in state-hub but no matching Gitea repo (local/stale)
Usage:
uv run python scripts/gitea_inventory.py [--json] [--api-base URL]
make gitea-inventory
make gitea-inventory JSON=1
Environment (from .env or shell):
GITEA_URL — base URL, e.g. http://92.205.130.254:32166
GITEA_TOKEN — personal access token (needs read:repo scope)
API_BASE — state-hub API, default http://127.0.0.1:8000
"""
from __future__ import annotations
import argparse
import json
import os
import sys
import urllib.parse
import urllib.request
from datetime import datetime, timezone
# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
DEFAULT_API_BASE = "http://127.0.0.1:8000"
DEFAULT_GITEA_URL = "http://92.205.130.254:32166"
GITEA_ORG = "coulomb"
def _load_env() -> None:
"""Load .env file from state-hub root if present (simple key=value parser)."""
env_path = os.path.join(os.path.dirname(__file__), "..", ".env")
env_path = os.path.normpath(env_path)
if not os.path.exists(env_path):
return
with open(env_path) as f:
for line in f:
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, _, val = line.partition("=")
key = key.strip()
val = val.strip().strip('"').strip("'")
if key and key not in os.environ:
os.environ[key] = val
# ---------------------------------------------------------------------------
# HTTP helpers
# ---------------------------------------------------------------------------
def _get(url: str, token: str | None = None) -> dict | list:
req = urllib.request.Request(url)
if token:
req.add_header("Authorization", f"token {token}")
req.add_header("Accept", "application/json")
try:
with urllib.request.urlopen(req, timeout=10) as resp:
return json.loads(resp.read().decode())
except Exception as exc:
print(f" HTTP error: {url}{exc}", file=sys.stderr)
return []
def _gitea_pages(base_url: str, path: str, token: str | None) -> list[dict]:
"""Paginate through a Gitea list endpoint."""
results = []
page = 1
while True:
url = f"{base_url}{path}?limit=50&page={page}"
data = _get(url, token)
if not isinstance(data, list) or not data:
break
results.extend(data)
if len(data) < 50:
break
page += 1
return results
# ---------------------------------------------------------------------------
# Fetch data
# ---------------------------------------------------------------------------
def fetch_gitea_repos(gitea_url: str, token: str | None) -> list[dict]:
"""Return all repos in the coulomb org (+ user repos if token is set)."""
org_repos = _gitea_pages(gitea_url, f"/api/v1/orgs/{GITEA_ORG}/repos", token)
# Also fetch user repos that may not be in the org
user_repos = _gitea_pages(gitea_url, "/api/v1/user/repos", token) if token else []
# Deduplicate by full_name
seen: set[str] = set()
combined = []
for r in org_repos + user_repos:
name = r.get("full_name", "")
if name not in seen:
seen.add(name)
combined.append(r)
return combined
def fetch_hub_repos(api_base: str) -> list[dict]:
return _get(f"{api_base}/repos/") or [] # type: ignore[return-value]
# ---------------------------------------------------------------------------
# Match logic
# ---------------------------------------------------------------------------
def _slug_candidates(gitea_repo: dict) -> set[str]:
"""Slug candidates from a Gitea repo entry."""
name = gitea_repo.get("name", "")
# state-hub slugs are kebab-case; gitea names may use _ or -
return {name, name.replace("_", "-"), name.lower(), name.lower().replace("_", "-")}
def build_report(gitea_repos: list[dict], hub_repos: list[dict]) -> dict:
hub_by_slug: dict[str, dict] = {r["slug"]: r for r in hub_repos}
hub_matched: set[str] = set()
registered = []
unregistered = []
for gr in gitea_repos:
candidates = _slug_candidates(gr)
matched_slug = next((c for c in candidates if c in hub_by_slug), None)
if matched_slug:
hub_matched.add(matched_slug)
hr = hub_by_slug[matched_slug]
registered.append({
"slug": matched_slug,
"gitea_name": gr.get("name"),
"gitea_url": gr.get("html_url"),
"domain": hr.get("domain_slug"),
"status": hr.get("status"),
"last_state_synced_at": hr.get("last_state_synced_at"),
"last_sbom_at": hr.get("last_sbom_at"),
})
else:
unregistered.append({
"gitea_name": gr.get("name"),
"gitea_url": gr.get("html_url"),
"description": gr.get("description") or "",
"language": gr.get("language") or "",
"stars": gr.get("stars_count", 0),
})
hub_only = [
{"slug": slug, "domain": r.get("domain_slug"), "status": r.get("status")}
for slug, r in hub_by_slug.items()
if slug not in hub_matched
]
return {
"generated_at": datetime.now(timezone.utc).isoformat(),
"registered": registered,
"unregistered": unregistered,
"hub_only": hub_only,
}
# ---------------------------------------------------------------------------
# Rendering
# ---------------------------------------------------------------------------
def _age(ts: str | None) -> str:
if not ts:
return "never"
try:
dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
delta = datetime.now(timezone.utc) - dt
h = int(delta.total_seconds() // 3600)
if h < 1:
return f"{int(delta.total_seconds() // 60)}m ago"
if h < 24:
return f"{h}h ago"
return f"{delta.days}d ago"
except Exception:
return ts
def render_text(report: dict) -> str:
SEP = "=" * 70
lines = [
"Custodian Gitea Inventory",
f"Generated: {report['generated_at']}",
SEP,
]
# Registered
reg = report["registered"]
lines.append(f"\n✅ REGISTERED ({len(reg)}) — in both Gitea and state-hub")
if reg:
lines.append(f" {'slug':<30} {'domain':<20} {'synced':<15} {'sbom'}")
lines.append(f" {'-'*28} {'-'*18} {'-'*13} {'-'*13}")
for r in sorted(reg, key=lambda x: x["slug"]):
synced = _age(r["last_state_synced_at"])
sbom = _age(r["last_sbom_at"])
lines.append(f" {r['slug']:<30} {(r['domain'] or ''):<20} {synced:<15} {sbom}")
else:
lines.append(" (none)")
# Unregistered
unreg = report["unregistered"]
lines.append(f"\n⚠ UNREGISTERED ({len(unreg)}) — on Gitea but not in state-hub")
if unreg:
for r in sorted(unreg, key=lambda x: x["gitea_name"]):
desc = f"{r['description']}" if r["description"] else ""
lang = f" [{r['language']}]" if r["language"] else ""
lines.append(f" {r['gitea_name']}{lang}{desc}")
lines.append(f"\n To onboard: make register-project DOMAIN=<domain> PROJECT_PATH=/home/worsch/<slug>")
else:
lines.append(" (none — all Gitea repos are registered 🎉)")
# Hub-only
hub_only = report["hub_only"]
lines.append(f"\n🔵 HUB-ONLY ({len(hub_only)}) — in state-hub but no matching Gitea repo")
if hub_only:
for r in sorted(hub_only, key=lambda x: x["slug"]):
lines.append(f" {r['slug']:<30} domain={r['domain'] or '?'} status={r['status']}")
else:
lines.append(" (none)")
lines.append(f"\n{SEP}")
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main() -> None:
_load_env()
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("--json", action="store_true", help="Output JSON instead of text")
parser.add_argument("--api-base", default=os.environ.get("API_BASE", DEFAULT_API_BASE))
args = parser.parse_args()
gitea_url = os.environ.get("GITEA_URL", DEFAULT_GITEA_URL).rstrip("/")
token = os.environ.get("GITEA_TOKEN") or None
if not token:
print("⚠ GITEA_TOKEN not set — only public repos will be visible", file=sys.stderr)
print("Fetching Gitea repos...", file=sys.stderr)
gitea_repos = fetch_gitea_repos(gitea_url, token)
print(f" {len(gitea_repos)} repos found on Gitea", file=sys.stderr)
print("Fetching state-hub repos...", file=sys.stderr)
hub_repos = fetch_hub_repos(args.api_base)
print(f" {len(hub_repos)} repos registered in hub", file=sys.stderr)
report = build_report(gitea_repos, hub_repos)
if args.json:
print(json.dumps(report, indent=2))
else:
print(render_text(report))
if __name__ == "__main__":
main()

149
scripts/install_hooks.sh Executable file
View File

@@ -0,0 +1,149 @@
#!/usr/bin/env bash
# install_hooks.sh — install a custodian post-commit sync hook into registered repos.
#
# Usage:
# ./install_hooks.sh --repo <slug> # install into one repo
# ./install_hooks.sh --all # install into all registered repos
# ./install_hooks.sh --repo <slug> --remove # remove hook from one repo
# ./install_hooks.sh --all --remove # remove hook from all repos
#
# The hook runs `make fix-consistency REPO=<slug>` in the state-hub after each
# commit, keeping the hub in sync with workplan file changes automatically.
#
# Idempotent: the hook block is guarded by a marker comment. Running twice is safe.
set -euo pipefail
STATEHUB_DIR="$(cd "$(dirname "$0")/.." && pwd)"
API_BASE="${STATE_HUB_API:-http://127.0.0.1:8000}"
MARKER="# custodian-sync-hook"
usage() {
echo "Usage: $0 --repo <slug> | --all [--remove]"
exit 1
}
# ── Arg parsing ───────────────────────────────────────────────────────────────
REPO_SLUG=""
DO_ALL=false
REMOVE=false
while [[ $# -gt 0 ]]; do
case "$1" in
--repo) REPO_SLUG="$2"; shift 2 ;;
--all) DO_ALL=true; shift ;;
--remove) REMOVE=true; shift ;;
-h|--help) usage ;;
*) echo "Unknown argument: $1"; usage ;;
esac
done
if [[ -z "$REPO_SLUG" && "$DO_ALL" == false ]]; then usage; fi
# ── Helper: resolve local path for a repo slug ───────────────────────────────
resolve_path() {
local slug="$1"
# Try the registered local_path first
local api_path
api_path=$(curl -sf "${API_BASE}/repos/${slug}/" | python3 -c \
"import json,sys; d=json.load(sys.stdin); print(d.get('local_path') or '')" 2>/dev/null || true)
if [[ -n "$api_path" && -d "$api_path" ]]; then
echo "$api_path"
return
fi
# Fall back to convention: /home/<user>/<slug>
local conventional="/home/$(whoami)/${slug}"
if [[ -d "$conventional" ]]; then
echo "$conventional"
return
fi
echo ""
}
# ── Helper: install hook into one repo ───────────────────────────────────────
install_hook() {
local slug="$1"
local repo_path
repo_path=$(resolve_path "$slug")
if [[ -z "$repo_path" ]]; then
echo "$slug: no local path found — skipping"
return
fi
if [[ ! -d "$repo_path/.git" ]]; then
echo "$slug: $repo_path is not a git repo — skipping"
return
fi
local hook_file="$repo_path/.git/hooks/post-commit"
local hook_block
hook_block=$(cat <<BLOCK
${MARKER} — managed by custodian, do not edit this block
if curl -sf ${API_BASE}/state/health >/dev/null 2>&1; then
(cd "${STATEHUB_DIR}" && make fix-consistency REPO=${slug} >/dev/null 2>&1 &)
fi
${MARKER}-end
BLOCK
)
if [[ -f "$hook_file" ]] && grep -q "$MARKER" "$hook_file"; then
echo "$slug: hook already present at $hook_file"
return
fi
if [[ -f "$hook_file" ]]; then
# Prepend to existing hook
local existing
existing=$(cat "$hook_file")
printf '#!/usr/bin/env bash\n%s\n\n%s\n' "$hook_block" "$existing" > "$hook_file"
else
printf '#!/usr/bin/env bash\n%s\n' "$hook_block" > "$hook_file"
fi
chmod +x "$hook_file"
echo "$slug: hook installed at $hook_file"
}
# ── Helper: remove hook from one repo ────────────────────────────────────────
remove_hook() {
local slug="$1"
local repo_path
repo_path=$(resolve_path "$slug")
if [[ -z "$repo_path" || ! -f "$repo_path/.git/hooks/post-commit" ]]; then
echo " $slug: no hook file found — skipping"
return
fi
local hook_file="$repo_path/.git/hooks/post-commit"
if ! grep -q "$MARKER" "$hook_file"; then
echo " $slug: custodian marker not found in hook — skipping"
return
fi
# Remove the marked block (between MARKER and MARKER-end inclusive)
python3 - "$hook_file" <<'PY'
import sys, re
path = sys.argv[1]
text = open(path).read()
cleaned = re.sub(
r'# custodian-sync-hook.*?# custodian-sync-hook-end\n?',
'',
text,
flags=re.DOTALL,
)
open(path, 'w').write(cleaned)
PY
echo " 🗑 $slug: hook block removed from $hook_file"
}
# ── Collect repo slugs ────────────────────────────────────────────────────────
if $DO_ALL; then
mapfile -t SLUGS < <(curl -sf "${API_BASE}/repos/" | python3 -c \
"import json,sys; [print(r['slug']) for r in json.load(sys.stdin) if r.get('status') == 'active']")
else
SLUGS=("$REPO_SLUG")
fi
echo "Custodian sync hook — $( $REMOVE && echo 'removing' || echo 'installing' ) for ${#SLUGS[@]} repo(s)"
for slug in "${SLUGS[@]}"; do
if $REMOVE; then remove_hook "$slug"; else install_hook "$slug"; fi
done
echo "Done."