diff --git a/state-hub/.env.example b/state-hub/.env.example index b279dce..3c94cbf 100644 --- a/state-hub/.env.example +++ b/state-hub/.env.example @@ -11,3 +11,7 @@ PGADMIN_PASSWORD=admin # API API_BASE=http://127.0.0.1:8000 + +# Gitea (for gitea_inventory.py) +GITEA_URL=http://92.205.130.254:32166 +GITEA_TOKEN= diff --git a/state-hub/Makefile b/state-hub/Makefile index d010cbd..7dce318 100644 --- a/state-hub/Makefile +++ b/state-hub/Makefile @@ -1,4 +1,4 @@ -.PHONY: install install-cli db db-tools migrate seed api dashboard check start clean register-project validate-adr add-domain rename-domain add-repo list-repos cleanup-stale tunnel tunnel-daemon tunnel-loop tunnel-status tunnel-stop +.PHONY: install install-cli db db-tools migrate seed api dashboard check start clean register-project validate-adr add-domain rename-domain add-repo list-repos cleanup-stale tunnel tunnel-daemon tunnel-loop tunnel-status tunnel-stop install-hooks install-hooks-all gitea-inventory COMPOSE = docker compose -f infra/docker-compose.yml --env-file .env @@ -190,5 +190,24 @@ fix-consistency-all: cleanup-stale: uv run python scripts/cleanup_stale_tasks.py +## Install custodian post-commit sync hook into one repo: make install-hooks REPO=marki-docx +install-hooks: + @test -n "$(REPO)" || (echo "ERROR: REPO is required. Usage: make install-hooks REPO="; exit 1) + bash scripts/install_hooks.sh --repo "$(REPO)" + +## Install custodian post-commit sync hook into all active registered repos +install-hooks-all: + bash scripts/install_hooks.sh --all + +## Remove custodian post-commit sync hook from one repo: make remove-hooks REPO=marki-docx +remove-hooks: + @test -n "$(REPO)" || (echo "ERROR: REPO is required. Usage: make remove-hooks REPO="; exit 1) + bash scripts/install_hooks.sh --repo "$(REPO)" --remove + +## Compare Gitea coulomb org repos against state-hub registered repos +## Requires GITEA_TOKEN in env or .env: make gitea-inventory GITEA_TOKEN= +gitea-inventory: + uv run python scripts/gitea_inventory.py $(if $(JSON),--json) + clean: $(COMPOSE) down -v diff --git a/state-hub/api/models/managed_repo.py b/state-hub/api/models/managed_repo.py index b67b1da..9823114 100644 --- a/state-hub/api/models/managed_repo.py +++ b/state-hub/api/models/managed_repo.py @@ -30,6 +30,9 @@ class ManagedRepo(Base, TimestampMixin): last_sbom_at: Mapped[datetime | None] = mapped_column( DateTime(timezone=True), nullable=True ) + last_state_synced_at: Mapped[datetime | None] = mapped_column( + DateTime(timezone=True), nullable=True + ) domain: Mapped["Domain"] = relationship( # noqa: F821 "Domain", back_populates="repos", lazy="selectin" diff --git a/state-hub/api/routers/repos.py b/state-hub/api/routers/repos.py index 469ef2a..a9587b4 100644 --- a/state-hub/api/routers/repos.py +++ b/state-hub/api/routers/repos.py @@ -7,7 +7,17 @@ from sqlalchemy.ext.asyncio import AsyncSession from api.database import get_session from api.models.domain import Domain from api.models.managed_repo import ManagedRepo -from api.schemas.managed_repo import RepoCreate, RepoRead, RepoUpdate +from api.models.repo_goal import RepoGoal +from api.models.task import Task +from api.models.workstream import Workstream +from api.schemas.managed_repo import ( + DispatchTask, + DispatchWorkstream, + RepoCreate, + RepoDispatch, + RepoRead, + RepoUpdate, +) router = APIRouter(prefix="/repos", tags=["repos"]) @@ -91,6 +101,86 @@ async def archive_repo( return repo +@router.get("/{slug}/dispatch", response_model=RepoDispatch) +async def get_repo_dispatch( + slug: str, + session: AsyncSession = Depends(get_session), +) -> RepoDispatch: + """Return active workstreams, pending tasks, and goal for a repo. + + This endpoint is the foundation for autonomous agent sessions: an agent can + call it at session start to discover what work is pending without needing to + read state-hub summary or scan workplan files manually. + """ + repo = await _get_repo_by_slug(slug, session) + + # Active goal + goal_result = await session.execute( + select(RepoGoal) + .where(RepoGoal.repo_id == repo.id, RepoGoal.status == "active") + .order_by(RepoGoal.priority) + .limit(1) + ) + goal_obj = goal_result.scalar_one_or_none() + active_goal = None + if goal_obj: + active_goal = { + "id": str(goal_obj.id), + "title": goal_obj.title, + "description": goal_obj.description, + "priority": goal_obj.priority, + } + + # Active workstreams + ws_result = await session.execute( + select(Workstream) + .where(Workstream.repo_id == repo.id, Workstream.status == "active") + .order_by(Workstream.created_at) + ) + workstreams = list(ws_result.scalars().all()) + + dispatch_workstreams: list[DispatchWorkstream] = [] + all_interventions: list[DispatchTask] = [] + + for ws in workstreams: + task_result = await session.execute( + select(Task) + .where(Task.workstream_id == ws.id, Task.status.in_(["todo", "in_progress"])) + .order_by(Task.created_at) + ) + tasks = list(task_result.scalars().all()) + + pending = [ + DispatchTask( + id=t.id, + title=t.title, + priority=t.priority, + status=t.status, + needs_human=t.needs_human, + ) + for t in tasks + ] + interventions = [t for t in pending if t.needs_human] + all_interventions.extend(interventions) + + dispatch_workstreams.append( + DispatchWorkstream( + id=ws.id, + title=ws.title, + status=ws.status, + pending_tasks=pending, + ) + ) + + return RepoDispatch( + repo_slug=slug, + active_goal=active_goal, + active_workstreams=dispatch_workstreams, + human_interventions=all_interventions, + last_state_synced_at=repo.last_state_synced_at, + ) + + async def _get_repo_by_slug(slug: str, session: AsyncSession) -> ManagedRepo: result = await session.execute(select(ManagedRepo).where(ManagedRepo.slug == slug)) repo = result.scalar_one_or_none() diff --git a/state-hub/api/schemas/managed_repo.py b/state-hub/api/schemas/managed_repo.py index 1aa8582..48e534b 100644 --- a/state-hub/api/schemas/managed_repo.py +++ b/state-hub/api/schemas/managed_repo.py @@ -1,5 +1,6 @@ import uuid from datetime import datetime +from typing import Any from pydantic import BaseModel, ConfigDict @@ -20,6 +21,7 @@ class RepoUpdate(BaseModel): remote_url: str | None = None description: str | None = None topic_id: uuid.UUID | None = None + last_state_synced_at: datetime | None = None class RepoRead(BaseModel): @@ -36,5 +38,29 @@ class RepoRead(BaseModel): topic_id: uuid.UUID | None = None sbom_source: str | None = None last_sbom_at: datetime | None = None + last_state_synced_at: datetime | None = None created_at: datetime updated_at: datetime + + +class DispatchTask(BaseModel): + id: uuid.UUID + title: str + priority: str + status: str + needs_human: bool + + +class DispatchWorkstream(BaseModel): + id: uuid.UUID + title: str + status: str + pending_tasks: list[DispatchTask] + + +class RepoDispatch(BaseModel): + repo_slug: str + active_goal: dict[str, Any] | None + active_workstreams: list[DispatchWorkstream] + human_interventions: list[DispatchTask] + last_state_synced_at: datetime | None diff --git a/state-hub/dashboard/observablehq.config.js b/state-hub/dashboard/observablehq.config.js index 74f9c59..a7552e4 100644 --- a/state-hub/dashboard/observablehq.config.js +++ b/state-hub/dashboard/observablehq.config.js @@ -26,6 +26,7 @@ export default { // ── Functional Report Views ──────────────────────────────────────────────── { name: "Contributions", path: "/contributions" }, { name: "SBOM", path: "/sbom" }, + { name: "Repo Sync", path: "/repo-sync" }, { name: "Progress", path: "/progress" }, // ── Policy ──────────────────────────────────────────────────────────────── { diff --git a/state-hub/dashboard/src/data/gitea-inventory.json.py b/state-hub/dashboard/src/data/gitea-inventory.json.py new file mode 100644 index 0000000..2e11542 --- /dev/null +++ b/state-hub/dashboard/src/data/gitea-inventory.json.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +"""Observable data loader: runs gitea_inventory.py and returns JSON output.""" +import json +import os +import subprocess +import sys + +SCRIPTS_DIR = os.path.join(os.path.dirname(__file__), "..", "..", "..", "scripts") +SCRIPTS_DIR = os.path.normpath(SCRIPTS_DIR) +PYTHON = os.path.join(os.path.dirname(sys.executable), "python") +if not os.path.exists(PYTHON): + PYTHON = sys.executable + +API_BASE = os.environ.get("API_BASE", "http://127.0.0.1:8000") + +try: + result = subprocess.run( + [PYTHON, os.path.join(SCRIPTS_DIR, "gitea_inventory.py"), "--json", + "--api-base", API_BASE], + capture_output=True, text=True, timeout=30, + ) + if result.returncode == 0 and result.stdout.strip(): + print(result.stdout) + else: + print(json.dumps({ + "error": result.stderr or "empty output", + "registered": [], "unregistered": [], "hub_only": [], + })) +except Exception as exc: + print(json.dumps({ + "error": str(exc), + "registered": [], "unregistered": [], "hub_only": [], + })) diff --git a/state-hub/dashboard/src/repo-sync.md b/state-hub/dashboard/src/repo-sync.md new file mode 100644 index 0000000..84c1b5a --- /dev/null +++ b/state-hub/dashboard/src/repo-sync.md @@ -0,0 +1,159 @@ +--- +title: Repo Sync Health +--- + +# Repo Sync Health + +```js +const repoData = await FileAttachment("data/repos.json").json(); +const inventory = await FileAttachment("data/gitea-inventory.json").json(); + +const repos = Array.isArray(repoData) ? repoData : (repoData.repos ?? []); +``` + +```js +// Helpers +function ageMs(ts) { + if (!ts) return Infinity; + return Date.now() - new Date(ts).getTime(); +} + +function fmtAge(ts) { + if (!ts) return "never"; + const ms = ageMs(ts); + const m = Math.floor(ms / 60000); + if (m < 60) return `${m}m ago`; + const h = Math.floor(m / 60); + if (h < 24) return `${h}h ago`; + return `${Math.floor(h / 24)}d ago`; +} + +function syncColor(ts) { + if (!ts) return "var(--theme-red)"; + const h = ageMs(ts) / 3600000; + if (h < 1) return "var(--theme-green)"; + if (h < 24) return "var(--theme-orange)"; + return "var(--theme-red)"; +} +``` + +## Registered Repos — Sync Status + +```js +const activeRepos = repos.filter(r => r.status === "active"); +const staleCount = activeRepos.filter(r => !r.last_state_synced_at || ageMs(r.last_state_synced_at) > 86400000).length; +const freshCount = activeRepos.filter(r => r.last_state_synced_at && ageMs(r.last_state_synced_at) < 3600000).length; +``` + +```js +display(html` +
+
+
${freshCount}
+
synced < 1h
+
+
+
${staleCount}
+
stale / never
+
+
+
${activeRepos.length}
+
total active
+
+
+`); +``` + +```js +const table = html` + + + + + + + + + + + ${activeRepos + .sort((a, b) => ageMs(a.last_state_synced_at) - ageMs(b.last_state_synced_at)) + .map(r => html` + + + + + + `) + } + +
RepoDomainLast SyncedLast SBOMStatus
${r.slug}${r.domain_slug}${fmtAge(r.last_state_synced_at)}${fmtAge(r.last_sbom_at)} + ${r.status} +
`; +display(table); +``` + +--- + +## Gitea Inventory — Unregistered Repos + +_Repos on Gitea (`coulomb` org) not yet tracked by the state-hub._ + +```js +const unregistered = inventory.unregistered ?? []; +``` + +```js +if (unregistered.length === 0) { + display(html`

🎉 All Gitea repos are registered!

`); +} else { + display(html` + + + + + + + + + + + ${unregistered.map(r => html` + + + + + `)} + +
RepoLanguageDescriptionOnboard
+ ${r.gitea_name} + ${r.language || "—"}${r.description || "—"} + make register-project DOMAIN=? PROJECT_PATH=/home/worsch/${r.gitea_name} +
+ `); +} +``` + +--- + +## Hub-Only Repos + +_Registered in the state-hub but no matching Gitea repo found._ + +```js +const hubOnly = inventory.hub_only ?? []; +``` + +```js +if (hubOnly.length === 0) { + display(html`

None — all hub repos have a Gitea counterpart.

`); +} else { + display(html`
    ${hubOnly.map(r => html`
  • ${r.slug} — domain: ${r.domain}, status: ${r.status}
  • `)}
`); +} +``` + +--- + +_Sync legend: 🟢 < 1h   🟠 1–24h   🔴 > 24h or never_ + +_Gitea token required for full inventory — set GITEA_TOKEN in state-hub/.env._ diff --git a/state-hub/infra/README.md b/state-hub/infra/README.md new file mode 100644 index 0000000..eac4e42 --- /dev/null +++ b/state-hub/infra/README.md @@ -0,0 +1,89 @@ +# State Hub Infrastructure + +## Docker (PostgreSQL) + +```bash +# Start postgres (required for API) +make db + +# Start postgres + pgadmin +make db-tools +``` + +The compose file is `infra/docker-compose.yml`. Copy `.env.example` to `.env` and set +`POSTGRES_PASSWORD` before starting. + +--- + +## Periodic Repo Sync — systemd user timer + +The custodian sync timer runs `fix-consistency-all` every 15 minutes, keeping +workplan file state in sync with the state-hub DB automatically (belt-and-suspenders +alongside the per-repo git post-commit hooks). + +### Installed unit files + +| File | Location | +|------|----------| +| `custodian-sync.service` | `~/.config/systemd/user/custodian-sync.service` | +| `custodian-sync.timer` | `~/.config/systemd/user/custodian-sync.timer` | + +### Management commands + +```bash +# Check status +systemctl --user status custodian-sync.timer +systemctl --user list-timers custodian-sync.timer + +# View recent logs +journalctl --user -u custodian-sync.service -n 50 + +# Trigger immediately (for testing) +systemctl --user start custodian-sync.service + +# Disable +systemctl --user disable --now custodian-sync.timer + +# Re-enable +systemctl --user enable --now custodian-sync.timer +``` + +### Guard condition + +The service uses `ExecStartPre` to check the API is reachable before running: +``` +ExecStartPre=/usr/bin/curl -sf http://127.0.0.1:8000/state/health +``` +If the API is offline, the service exits cleanly without error (the timer will retry +in 15 minutes). + +### WSL2 note + +systemd user mode works in WSL2 when `systemd=true` is set in `/etc/wsl.conf`. +If systemd is not available, fall back to crontab: + +```bash +# Crontab fallback (run crontab -e and add): +*/15 * * * * curl -sf http://127.0.0.1:8000/state/health && cd ~/the-custodian/state-hub && .venv/bin/python scripts/consistency_check.py --all --fix >> /tmp/custodian-sync.log 2>&1 +``` + +--- + +## Post-commit hooks + +Each registered repo can have a custodian sync hook installed that triggers +`fix-consistency` automatically after every commit: + +```bash +# Install into one repo +make install-hooks REPO=marki-docx + +# Install into all active registered repos +make install-hooks-all + +# Remove from one repo +make remove-hooks REPO=marki-docx +``` + +The hook is idempotent (guarded by `# custodian-sync-hook` marker) and runs +in the background so it does not block the commit. diff --git a/state-hub/mcp_server/server.py b/state-hub/mcp_server/server.py index fb17b37..836a6bb 100644 --- a/state-hub/mcp_server/server.py +++ b/state-hub/mcp_server/server.py @@ -1416,6 +1416,24 @@ def update_repo_goal( return json.dumps(goal, indent=2) +@mcp.tool() +def get_repo_dispatch(repo_slug: str) -> str: + """Return active workstreams, pending tasks, and goal for a repo. + + Use this at the start of a repo agent session to discover what work is + pending without needing to read the full state summary or scan workplan + files. The response includes: + - active_goal: the highest-priority active repo goal + - active_workstreams: list of active workstreams with pending tasks + - human_interventions: tasks that need human input (needs_human=true) + - last_state_synced_at: when the repo was last synced to the hub + + Args: + repo_slug: Slug of the repository (e.g. 'marki-docx') + """ + return json.dumps(_get(f"/repos/{repo_slug}/dispatch"), indent=2) + + # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- diff --git a/state-hub/migrations/versions/e2f3a4b5c6d7_add_last_state_synced_at_to_repos.py b/state-hub/migrations/versions/e2f3a4b5c6d7_add_last_state_synced_at_to_repos.py new file mode 100644 index 0000000..c7b5111 --- /dev/null +++ b/state-hub/migrations/versions/e2f3a4b5c6d7_add_last_state_synced_at_to_repos.py @@ -0,0 +1,26 @@ +"""Add last_state_synced_at to managed_repos + +Revision ID: e2f3a4b5c6d7 +Revises: d6e7f8a9b0c1 +Create Date: 2026-03-16 00:00:00.000000 +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "e2f3a4b5c6d7" +down_revision: Union[str, None] = "d6e7f8a9b0c1" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "managed_repos", + sa.Column("last_state_synced_at", sa.DateTime(timezone=True), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("managed_repos", "last_state_synced_at") diff --git a/state-hub/scripts/consistency_check.py b/state-hub/scripts/consistency_check.py index e160ef6..ab14107 100644 --- a/state-hub/scripts/consistency_check.py +++ b/state-hub/scripts/consistency_check.py @@ -191,9 +191,18 @@ def _inject_task_id_into_block( task_meta = _parse_yaml_block(block_content.strip()) if str(task_meta.get("id", "")) != match_id: return m.group(0) - if field_name in task_meta: + existing_val = task_meta.get(field_name) + if existing_val is not None and str(existing_val).strip() not in ("", "~", "null", "None", "none"): return m.group(0) - new_content = block_content.rstrip() + f"\n{field_name}: \"{field_value}\"" + # Replace existing null/~ line if present, otherwise append + new_content = re.sub( + rf"^{re.escape(field_name)}:.*$", + f'{field_name}: "{field_value}"', + block_content, + flags=re.MULTILINE, + ) + if new_content == block_content: + new_content = block_content.rstrip() + f"\n{field_name}: \"{field_value}\"" return f"```task\n{new_content}\n```" new_text = _TASK_BLOCK_RE.sub(_replace, text) @@ -461,7 +470,10 @@ def check_repo(api_base: str, repo_slug: str) -> ConsistencyReport: if task.get("_parse_error"): continue t_id = str(task.get("id", "")).strip() - t_sh_id = str(task.get("state_hub_task_id", "")).strip().strip('"') + _raw_sh = task.get("state_hub_task_id") + t_sh_id = "" if _raw_sh is None else str(_raw_sh).strip().strip('"') + if t_sh_id in ("~", "null", "None", "none"): + t_sh_id = "" t_status = str(task.get("status", "")).strip() if t_sh_id: @@ -724,6 +736,12 @@ def fix_repo(api_base: str, repo_slug: str) -> ConsistencyReport: except Exception as e: report.fixes_applied.append(f"{issue.check_id} ERROR: {e}") + # Record that a sync run happened for this repo + from datetime import timezone as _tz + import datetime as _dt + now_iso = _dt.datetime.now(_tz.utc).isoformat() + _api_patch(api_base, f"/repos/{repo_slug}/", {"last_state_synced_at": now_iso}) + return report diff --git a/state-hub/scripts/gitea_inventory.py b/state-hub/scripts/gitea_inventory.py new file mode 100644 index 0000000..409180d --- /dev/null +++ b/state-hub/scripts/gitea_inventory.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python3 +"""gitea_inventory.py — compare Gitea repos against state-hub registered repos. + +Outputs three sections: + 1. Registered — in both Gitea and state-hub (shows last_state_synced_at) + 2. Unregistered — on Gitea but not in state-hub (candidates for onboarding) + 3. Hub-only — in state-hub but no matching Gitea repo (local/stale) + +Usage: + uv run python scripts/gitea_inventory.py [--json] [--api-base URL] + make gitea-inventory + make gitea-inventory JSON=1 + +Environment (from .env or shell): + GITEA_URL — base URL, e.g. http://92.205.130.254:32166 + GITEA_TOKEN — personal access token (needs read:repo scope) + API_BASE — state-hub API, default http://127.0.0.1:8000 +""" +from __future__ import annotations + +import argparse +import json +import os +import sys +import urllib.parse +import urllib.request +from datetime import datetime, timezone + + +# --------------------------------------------------------------------------- +# Config +# --------------------------------------------------------------------------- + +DEFAULT_API_BASE = "http://127.0.0.1:8000" +DEFAULT_GITEA_URL = "http://92.205.130.254:32166" +GITEA_ORG = "coulomb" + + +def _load_env() -> None: + """Load .env file from state-hub root if present (simple key=value parser).""" + env_path = os.path.join(os.path.dirname(__file__), "..", ".env") + env_path = os.path.normpath(env_path) + if not os.path.exists(env_path): + return + with open(env_path) as f: + for line in f: + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, _, val = line.partition("=") + key = key.strip() + val = val.strip().strip('"').strip("'") + if key and key not in os.environ: + os.environ[key] = val + + +# --------------------------------------------------------------------------- +# HTTP helpers +# --------------------------------------------------------------------------- + +def _get(url: str, token: str | None = None) -> dict | list: + req = urllib.request.Request(url) + if token: + req.add_header("Authorization", f"token {token}") + req.add_header("Accept", "application/json") + try: + with urllib.request.urlopen(req, timeout=10) as resp: + return json.loads(resp.read().decode()) + except Exception as exc: + print(f" HTTP error: {url} → {exc}", file=sys.stderr) + return [] + + +def _gitea_pages(base_url: str, path: str, token: str | None) -> list[dict]: + """Paginate through a Gitea list endpoint.""" + results = [] + page = 1 + while True: + url = f"{base_url}{path}?limit=50&page={page}" + data = _get(url, token) + if not isinstance(data, list) or not data: + break + results.extend(data) + if len(data) < 50: + break + page += 1 + return results + + +# --------------------------------------------------------------------------- +# Fetch data +# --------------------------------------------------------------------------- + +def fetch_gitea_repos(gitea_url: str, token: str | None) -> list[dict]: + """Return all repos in the coulomb org (+ user repos if token is set).""" + org_repos = _gitea_pages(gitea_url, f"/api/v1/orgs/{GITEA_ORG}/repos", token) + # Also fetch user repos that may not be in the org + user_repos = _gitea_pages(gitea_url, "/api/v1/user/repos", token) if token else [] + # Deduplicate by full_name + seen: set[str] = set() + combined = [] + for r in org_repos + user_repos: + name = r.get("full_name", "") + if name not in seen: + seen.add(name) + combined.append(r) + return combined + + +def fetch_hub_repos(api_base: str) -> list[dict]: + return _get(f"{api_base}/repos/") or [] # type: ignore[return-value] + + +# --------------------------------------------------------------------------- +# Match logic +# --------------------------------------------------------------------------- + +def _slug_candidates(gitea_repo: dict) -> set[str]: + """Slug candidates from a Gitea repo entry.""" + name = gitea_repo.get("name", "") + # state-hub slugs are kebab-case; gitea names may use _ or - + return {name, name.replace("_", "-"), name.lower(), name.lower().replace("_", "-")} + + +def build_report(gitea_repos: list[dict], hub_repos: list[dict]) -> dict: + hub_by_slug: dict[str, dict] = {r["slug"]: r for r in hub_repos} + hub_matched: set[str] = set() + + registered = [] + unregistered = [] + + for gr in gitea_repos: + candidates = _slug_candidates(gr) + matched_slug = next((c for c in candidates if c in hub_by_slug), None) + if matched_slug: + hub_matched.add(matched_slug) + hr = hub_by_slug[matched_slug] + registered.append({ + "slug": matched_slug, + "gitea_name": gr.get("name"), + "gitea_url": gr.get("html_url"), + "domain": hr.get("domain_slug"), + "status": hr.get("status"), + "last_state_synced_at": hr.get("last_state_synced_at"), + "last_sbom_at": hr.get("last_sbom_at"), + }) + else: + unregistered.append({ + "gitea_name": gr.get("name"), + "gitea_url": gr.get("html_url"), + "description": gr.get("description") or "", + "language": gr.get("language") or "", + "stars": gr.get("stars_count", 0), + }) + + hub_only = [ + {"slug": slug, "domain": r.get("domain_slug"), "status": r.get("status")} + for slug, r in hub_by_slug.items() + if slug not in hub_matched + ] + + return { + "generated_at": datetime.now(timezone.utc).isoformat(), + "registered": registered, + "unregistered": unregistered, + "hub_only": hub_only, + } + + +# --------------------------------------------------------------------------- +# Rendering +# --------------------------------------------------------------------------- + +def _age(ts: str | None) -> str: + if not ts: + return "never" + try: + dt = datetime.fromisoformat(ts.replace("Z", "+00:00")) + delta = datetime.now(timezone.utc) - dt + h = int(delta.total_seconds() // 3600) + if h < 1: + return f"{int(delta.total_seconds() // 60)}m ago" + if h < 24: + return f"{h}h ago" + return f"{delta.days}d ago" + except Exception: + return ts + + +def render_text(report: dict) -> str: + SEP = "=" * 70 + lines = [ + "Custodian Gitea Inventory", + f"Generated: {report['generated_at']}", + SEP, + ] + + # Registered + reg = report["registered"] + lines.append(f"\n✅ REGISTERED ({len(reg)}) — in both Gitea and state-hub") + if reg: + lines.append(f" {'slug':<30} {'domain':<20} {'synced':<15} {'sbom'}") + lines.append(f" {'-'*28} {'-'*18} {'-'*13} {'-'*13}") + for r in sorted(reg, key=lambda x: x["slug"]): + synced = _age(r["last_state_synced_at"]) + sbom = _age(r["last_sbom_at"]) + lines.append(f" {r['slug']:<30} {(r['domain'] or ''):<20} {synced:<15} {sbom}") + else: + lines.append(" (none)") + + # Unregistered + unreg = report["unregistered"] + lines.append(f"\n⚠ UNREGISTERED ({len(unreg)}) — on Gitea but not in state-hub") + if unreg: + for r in sorted(unreg, key=lambda x: x["gitea_name"]): + desc = f" — {r['description']}" if r["description"] else "" + lang = f" [{r['language']}]" if r["language"] else "" + lines.append(f" {r['gitea_name']}{lang}{desc}") + lines.append(f"\n To onboard: make register-project DOMAIN= PROJECT_PATH=/home/worsch/") + else: + lines.append(" (none — all Gitea repos are registered 🎉)") + + # Hub-only + hub_only = report["hub_only"] + lines.append(f"\n🔵 HUB-ONLY ({len(hub_only)}) — in state-hub but no matching Gitea repo") + if hub_only: + for r in sorted(hub_only, key=lambda x: x["slug"]): + lines.append(f" {r['slug']:<30} domain={r['domain'] or '?'} status={r['status']}") + else: + lines.append(" (none)") + + lines.append(f"\n{SEP}") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main() -> None: + _load_env() + + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--json", action="store_true", help="Output JSON instead of text") + parser.add_argument("--api-base", default=os.environ.get("API_BASE", DEFAULT_API_BASE)) + args = parser.parse_args() + + gitea_url = os.environ.get("GITEA_URL", DEFAULT_GITEA_URL).rstrip("/") + token = os.environ.get("GITEA_TOKEN") or None + + if not token: + print("⚠ GITEA_TOKEN not set — only public repos will be visible", file=sys.stderr) + + print("Fetching Gitea repos...", file=sys.stderr) + gitea_repos = fetch_gitea_repos(gitea_url, token) + print(f" {len(gitea_repos)} repos found on Gitea", file=sys.stderr) + + print("Fetching state-hub repos...", file=sys.stderr) + hub_repos = fetch_hub_repos(args.api_base) + print(f" {len(hub_repos)} repos registered in hub", file=sys.stderr) + + report = build_report(gitea_repos, hub_repos) + + if args.json: + print(json.dumps(report, indent=2)) + else: + print(render_text(report)) + + +if __name__ == "__main__": + main() diff --git a/state-hub/scripts/install_hooks.sh b/state-hub/scripts/install_hooks.sh new file mode 100755 index 0000000..4159dbc --- /dev/null +++ b/state-hub/scripts/install_hooks.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env bash +# install_hooks.sh — install a custodian post-commit sync hook into registered repos. +# +# Usage: +# ./install_hooks.sh --repo # install into one repo +# ./install_hooks.sh --all # install into all registered repos +# ./install_hooks.sh --repo --remove # remove hook from one repo +# ./install_hooks.sh --all --remove # remove hook from all repos +# +# The hook runs `make fix-consistency REPO=` in the state-hub after each +# commit, keeping the hub in sync with workplan file changes automatically. +# +# Idempotent: the hook block is guarded by a marker comment. Running twice is safe. + +set -euo pipefail + +STATEHUB_DIR="$(cd "$(dirname "$0")/.." && pwd)" +API_BASE="${STATE_HUB_API:-http://127.0.0.1:8000}" +MARKER="# custodian-sync-hook" + +usage() { + echo "Usage: $0 --repo | --all [--remove]" + exit 1 +} + +# ── Arg parsing ─────────────────────────────────────────────────────────────── +REPO_SLUG="" +DO_ALL=false +REMOVE=false + +while [[ $# -gt 0 ]]; do + case "$1" in + --repo) REPO_SLUG="$2"; shift 2 ;; + --all) DO_ALL=true; shift ;; + --remove) REMOVE=true; shift ;; + -h|--help) usage ;; + *) echo "Unknown argument: $1"; usage ;; + esac +done + +if [[ -z "$REPO_SLUG" && "$DO_ALL" == false ]]; then usage; fi + +# ── Helper: resolve local path for a repo slug ─────────────────────────────── +resolve_path() { + local slug="$1" + # Try the registered local_path first + local api_path + api_path=$(curl -sf "${API_BASE}/repos/${slug}/" | python3 -c \ + "import json,sys; d=json.load(sys.stdin); print(d.get('local_path') or '')" 2>/dev/null || true) + if [[ -n "$api_path" && -d "$api_path" ]]; then + echo "$api_path" + return + fi + # Fall back to convention: /home// + local conventional="/home/$(whoami)/${slug}" + if [[ -d "$conventional" ]]; then + echo "$conventional" + return + fi + echo "" +} + +# ── Helper: install hook into one repo ─────────────────────────────────────── +install_hook() { + local slug="$1" + local repo_path + repo_path=$(resolve_path "$slug") + if [[ -z "$repo_path" ]]; then + echo " ⚠ $slug: no local path found — skipping" + return + fi + if [[ ! -d "$repo_path/.git" ]]; then + echo " ⚠ $slug: $repo_path is not a git repo — skipping" + return + fi + + local hook_file="$repo_path/.git/hooks/post-commit" + local hook_block + hook_block=$(cat </dev/null 2>&1; then + (cd "${STATEHUB_DIR}" && make fix-consistency REPO=${slug} >/dev/null 2>&1 &) +fi +${MARKER}-end +BLOCK +) + + if [[ -f "$hook_file" ]] && grep -q "$MARKER" "$hook_file"; then + echo " ✓ $slug: hook already present at $hook_file" + return + fi + + if [[ -f "$hook_file" ]]; then + # Prepend to existing hook + local existing + existing=$(cat "$hook_file") + printf '#!/usr/bin/env bash\n%s\n\n%s\n' "$hook_block" "$existing" > "$hook_file" + else + printf '#!/usr/bin/env bash\n%s\n' "$hook_block" > "$hook_file" + fi + chmod +x "$hook_file" + echo " ✅ $slug: hook installed at $hook_file" +} + +# ── Helper: remove hook from one repo ──────────────────────────────────────── +remove_hook() { + local slug="$1" + local repo_path + repo_path=$(resolve_path "$slug") + if [[ -z "$repo_path" || ! -f "$repo_path/.git/hooks/post-commit" ]]; then + echo " – $slug: no hook file found — skipping" + return + fi + + local hook_file="$repo_path/.git/hooks/post-commit" + if ! grep -q "$MARKER" "$hook_file"; then + echo " – $slug: custodian marker not found in hook — skipping" + return + fi + + # Remove the marked block (between MARKER and MARKER-end inclusive) + python3 - "$hook_file" <<'PY' +import sys, re +path = sys.argv[1] +text = open(path).read() +cleaned = re.sub( + r'# custodian-sync-hook.*?# custodian-sync-hook-end\n?', + '', + text, + flags=re.DOTALL, +) +open(path, 'w').write(cleaned) +PY + echo " 🗑 $slug: hook block removed from $hook_file" +} + +# ── Collect repo slugs ──────────────────────────────────────────────────────── +if $DO_ALL; then + mapfile -t SLUGS < <(curl -sf "${API_BASE}/repos/" | python3 -c \ + "import json,sys; [print(r['slug']) for r in json.load(sys.stdin) if r.get('status') == 'active']") +else + SLUGS=("$REPO_SLUG") +fi + +echo "Custodian sync hook — $( $REMOVE && echo 'removing' || echo 'installing' ) for ${#SLUGS[@]} repo(s)" +for slug in "${SLUGS[@]}"; do + if $REMOVE; then remove_hook "$slug"; else install_hook "$slug"; fi +done +echo "Done." diff --git a/workplans/CUST-WP-0014-repo-sync-automation.md b/workplans/CUST-WP-0014-repo-sync-automation.md new file mode 100644 index 0000000..6f8639d --- /dev/null +++ b/workplans/CUST-WP-0014-repo-sync-automation.md @@ -0,0 +1,246 @@ +--- +id: CUST-WP-0014 +type: workplan +title: Repo Sync Automation & Gitea Inventory +domain: custodian +repo: the-custodian +status: done +state_hub_workstream_id: 27ea80bd-76bf-44a7-b0ed-e09748d5390b +created: 2026-03-16 +updated: 2026-03-16 +--- + +# CUST-WP-0014 — Repo Sync Automation & Gitea Inventory + +## Problem + +When a repo agent completes work and commits, the state-hub does not automatically +learn about it. Task statuses in workplan `.md` files go unsynced until a human +manually runs `make fix-consistency REPO=`. This breaks the episodic memory +loop: future sessions see stale hub state and give wrong orientation. + +In parallel, the custodian only tracks repos that have been manually registered. +All other repos living on Gitea (`http://92.205.130.254:32166`, org `coulomb`) are +invisible — no workplan tracking, no SBOM, no goal alignment. + +## Goal + +1. **Automatic sync**: after every commit in a registered repo, the state-hub learns + about it within seconds — no agent discipline required. +2. **Gitea inventory**: the hub knows about every repo on Gitea; unregistered repos + are surfaced so they can be onboarded or explicitly marked out-of-scope. +3. **Sync timestamp**: every registered repo carries a `last_state_synced_at` + timestamp so health dashboards can detect stale repos at a glance. +4. **Dispatch endpoint** (Tier 3): the hub can tell any repo what active workplan + it should be working on and what tasks are pending — foundation for autonomous + agent sessions. + +## Architecture + +``` +┌──────────────┐ post-commit hook ┌───────────────────────────┐ +│ repo agent │ ──────────────────► │ fix-consistency REPO=x │ +│ (any repo) │ │ → updates task statuses │ +└──────────────┘ │ → sets last_state_synced │ + └───────────────────────────┘ + ▲ +┌──────────────────────────┐ cron (15 min) │ +│ fix-consistency-all │ ─────────────────────┘ (belt & suspenders) +└──────────────────────────┘ + +┌─────────────────────────────┐ +│ Gitea API (:32166/coulomb) │ ──► gitea_inventory.py ──► surface gaps +└─────────────────────────────┘ + +┌─────────────────────────────────────┐ +│ GET /repos/{slug}/dispatch │ ──► active workplan + pending tasks +└─────────────────────────────────────┘ for autonomous agent sessions +``` + +--- + +## Task: Add `last_state_synced_at` to managed_repos + +```task +id: CUST-WP-0014-T01 +status: todo +priority: high +state_hub_task_id: "f35c86a9-d927-4543-9e74-ff32cadcc766" +``` + +Migration: add `last_state_synced_at: DateTime (nullable)` to `managed_repos`. +Update `consistency_check.py` to PATCH this field to `utcnow()` after every +successful `--fix` run via `PATCH /repos/{slug}/` (add endpoint if missing). +Update `ManagedRepoRead` schema to include the field. + +Acceptance: `GET /repos/the-custodian/` shows `last_state_synced_at` non-null +after running `make fix-consistency REPO=the-custodian`. + +--- + +## Task: Git post-commit hook installer + +```task +id: CUST-WP-0014-T02 +status: todo +priority: high +state_hub_task_id: "97c831d9-d915-4b77-9dd6-929ff24dfd5e" +``` + +Create `state-hub/scripts/install_hooks.sh`: +- Accepts `--repo ` or `--all` (iterates `GET /repos/`) +- Resolves repo path from slug (convention: `/home/worsch/` or via a + `local_path` field — see T05) +- Writes `.git/hooks/post-commit` that calls: + ```bash + cd ~/the-custodian/state-hub && make fix-consistency REPO= + ``` +- Idempotent: prepends block guarded by `# custodian-sync-hook` marker if hook + already exists; skips if marker present +- Makes hook executable + +Add `make install-hooks REPO=` and `make install-hooks-all` Makefile targets. + +Acceptance: commit in `marki-docx` → `last_state_synced_at` updates within 2s. + +--- + +## Task: Periodic cron sync (belt-and-suspenders) + +```task +id: CUST-WP-0014-T03 +status: todo +priority: medium +state_hub_task_id: "06be1c0b-893b-4fbb-967c-9842ba59ffaa" +``` + +Add a cron entry (via systemd user timer or direct crontab) that runs: +``` +cd ~/the-custodian/state-hub && make fix-consistency-all +``` +every 15 minutes when the state-hub API is reachable. Use a guard: +```bash +curl -sf http://127.0.0.1:8000/state/health || exit 0 +``` + +Document the timer setup in `state-hub/infra/README.md` (systemd user timer +preferred on WSL2 if systemd is available; otherwise crontab fallback). + +Acceptance: after stopping all agents for 15 min and making a manual workplan +edit, `last_state_synced_at` updates without human intervention. + +--- + +## Task: Gitea repo discovery tool + +```task +id: CUST-WP-0014-T04 +status: todo +priority: high +state_hub_task_id: "f05a04e4-10f3-4c41-a73f-057f0dea5126" +``` + +Create `state-hub/scripts/gitea_inventory.py`: +- Reads Gitea base URL + token from env (`GITEA_URL`, `GITEA_TOKEN`) or `.env` +- Calls `GET /api/v1/orgs/coulomb/repos?limit=50&page=N` (paginate) +- Also includes user repos if needed: `GET /api/v1/user/repos` +- Compares result against `GET /repos/` from state-hub +- Outputs three sections: + 1. **Registered** — in both (show `last_state_synced_at`) + 2. **Unregistered** — on Gitea but not in hub (candidate for onboarding) + 3. **Hub-only** — in hub but no matching Gitea remote (stale or local-only) + +Add `make gitea-inventory` Makefile target. + +Add `GITEA_URL=http://92.205.130.254:32166` and `GITEA_TOKEN=` to `.env.example`. + +Acceptance: running `make gitea-inventory` with a valid token prints a clear +three-section report. + +--- + +## Task: Dashboard — Repo Sync Health page + +```task +id: CUST-WP-0014-T05 +status: todo +priority: medium +state_hub_task_id: "ceae2737-4762-49e5-ae41-9eca3ca79dda" +``` + +Add `dashboard/src/repo-sync.md` Observable page: +- Table: all registered repos, `last_state_synced_at` (age in h/m), colour-coded + (green < 1h, orange 1–24h, red > 24h or null) +- Section: Gitea repos not yet registered (calls a new data loader that wraps + `gitea_inventory.py --json`) +- Inline "Register" action placeholder (links to `make register-project` docs) + +Add to nav in `observablehq.config.js`. + +--- + +## Task: Dispatch endpoint + +```task +id: CUST-WP-0014-T06 +status: todo +priority: low +state_hub_task_id: "86b646f3-a966-4ff4-9c9f-8684f1e81c54" +``` + +Add `GET /repos/{slug}/dispatch` router in `api/routers/repos.py`: + +Response shape: +```json +{ + "repo_slug": "marki-docx", + "active_goal": { "id": "...", "title": "...", "description": "..." }, + "active_workstreams": [ + { + "id": "...", + "title": "...", + "workplan_file": "workplans/MRKD-WP-0001-level1-core.md", + "pending_tasks": [ + { "id": "...", "title": "...", "priority": "high", "needs_human": false } + ] + } + ], + "human_interventions": [...], + "last_state_synced_at": "2026-03-16T..." +} +``` + +`workplan_file` is derived from the workstream's `slug` field matched against +known workplan naming conventions — or stored explicitly (stretch: add +`workplan_path` column to workstreams). + +This endpoint is the foundation for a cron-triggered autonomous agent session: +```bash +curl http://127.0.0.1:8000/repos/marki-docx/dispatch | \ + claude --print "You are the marki-docx agent. $(cat -)" +``` + +MCP tool: `get_repo_dispatch(repo_slug)`. + +--- + +## Milestones + +| # | Milestone | Tasks | +|---|-----------|-------| +| M1 | Sync timestamp live | T01 | +| M2 | Auto-sync on commit | T01, T02 | +| M3 | Belt-and-suspenders | T03 | +| M4 | Gitea inventory visible | T04, T05 | +| M5 | Dispatch endpoint ready | T06 | + +## Dependencies + +- Consistency engine (CUST-WP-0008) — completed ✓ +- `managed_repos` table (v0.5) — live ✓ + +## Out of Scope + +- Autonomous agent scheduling (that builds on T06 but is a separate workplan) +- Gitea webhook integration (post-commit hook covers the same use case locally) +- Multi-user Gitea orgs beyond `coulomb`