#!/usr/bin/env bash # Prove POST /admin/sync works without restarting the activity-core worker. set -euo pipefail NAMESPACE="${ACTIVITY_CORE_NAMESPACE:-activity-core}" CLUSTER_HOST="${ACTIVITY_CORE_CLUSTER_HOST:-railiance01}" STATE_HUB_URL="${STATE_HUB_URL:-http://127.0.0.1:8000}" ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL="${ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL:-0}" ACTIVITY_CORE_ADMIN_SYNC_FIXTURE_COMMAND="${ACTIVITY_CORE_ADMIN_SYNC_FIXTURE_COMMAND:-}" ACTIVITY_CORE_ADMIN_SYNC_REQUIRE_FIXTURE="${ACTIVITY_CORE_ADMIN_SYNC_REQUIRE_FIXTURE:-0}" EVIDENCE_WORKSTREAM_ID="${STATE_HUB_EVIDENCE_WORKSTREAM_ID:-2c9e8e96-ec6a-433c-9e6d-0efbcd18679e}" EVIDENCE_TASK_ID="${STATE_HUB_EVIDENCE_TASK_ID:-60f3387d-3d14-42a9-b8a3-725a86468510}" STARTED_AT="$(date -u +"%Y-%m-%dT%H:%M:%SZ")" CURRENT_GATE=startup BEFORE_JSON="" AFTER_JSON="" FIXTURE_STATUS=skipped SYNC_RESPONSE_JSON="" EVIDENCE_NOTE_JSON="" export NAMESPACE CLUSTER_HOST STATE_HUB_URL EVIDENCE_WORKSTREAM_ID EVIDENCE_TASK_ID export STARTED_AT BEFORE_JSON AFTER_JSON FIXTURE_STATUS SYNC_RESPONSE_JSON log() { printf '[activity-core-admin-sync-smoke] %s\n' "$*"; } quote() { printf '%q' "$1"; } cluster_bash() { if [[ -n "$CLUSTER_HOST" ]]; then ssh "$CLUSTER_HOST" "bash -s" <<<"$1"; else bash -s <<<"$1"; fi; } post_evidence() { local status="$1" failing_gate="${2:-}" export EVIDENCE_STATUS="$status" FAILING_GATE="$failing_gate" python3 - <<'PY' import json, os, sys, urllib.request def env_json(name): raw = os.environ.get(name, "") if not raw: return None try: return json.loads(raw) except json.JSONDecodeError: return {"raw": raw} status = os.environ["EVIDENCE_STATUS"] failing_gate = os.environ.get("FAILING_GATE") or None detail = { "producer": "railiance-cluster", "verification": "activity-core no-restart admin sync smoke", "status": status, "failing_gate": failing_gate, "cluster_host": os.environ.get("CLUSTER_HOST") or "local-kubectl", "namespace": os.environ.get("NAMESPACE"), "worker_before": env_json("BEFORE_JSON"), "worker_after": env_json("AFTER_JSON"), "fixture_status": os.environ.get("FIXTURE_STATUS"), "sync_response": env_json("SYNC_RESPONSE_JSON"), "started_at": os.environ.get("STARTED_AT"), } summary = ( "Railiance activity-core no-restart admin-sync smoke passed: POST /admin/sync returned expected counters and worker pod identity/restart count stayed stable." if status == "passed" else "Railiance activity-core no-restart admin-sync smoke failed" + (f" at {failing_gate}" if failing_gate else "") + "; see non-secret evidence detail." ) payload = {"summary": summary, "event_type": "note", "author": "railiance-cluster", "detail": detail} if os.environ.get("EVIDENCE_WORKSTREAM_ID"): payload["workstream_id"] = os.environ["EVIDENCE_WORKSTREAM_ID"] if os.environ.get("EVIDENCE_TASK_ID"): payload["task_id"] = os.environ["EVIDENCE_TASK_ID"] req = urllib.request.Request(os.environ["STATE_HUB_URL"].rstrip("/") + "/progress/", data=json.dumps(payload).encode(), headers={"Content-Type": "application/json"}, method="POST") with urllib.request.urlopen(req, timeout=20) as resp: sys.stdout.write(resp.read().decode()) PY } on_error() { local code=$?; trap - ERR; post_evidence failed "$CURRENT_GATE" >/dev/null || true; exit "$code"; } trap on_error ERR if [[ "$CLUSTER_HOST" == local ]]; then [[ "$ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL" == 1 ]] || { echo 'ACTIVITY_CORE_CLUSTER_HOST=local requires ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL=1' >&2; exit 2; } CLUSTER_HOST="" fi export CLUSTER_HOST CURRENT_GATE='cluster executor preflight' log "using cluster executor: ${CLUSTER_HOST:-local kubectl}" cluster_bash 'set -euo pipefail; command -v kubectl >/dev/null; command -v python3 >/dev/null' worker_snapshot_script='import json,sys items=json.load(sys.stdin).get("items",[]) if not items: raise SystemExit("no actcore-worker pods found") pod=sorted(items,key=lambda item:item["metadata"]["name"])[0] container=pod["status"]["containerStatuses"][0] print(json.dumps({"name":pod["metadata"]["name"],"uid":pod["metadata"]["uid"],"phase":pod["status"].get("phase"),"restart_count":container.get("restartCount",0),"image":container.get("image"),"image_id":container.get("imageID")}, sort_keys=True))' CURRENT_GATE='worker baseline capture' BEFORE_JSON="$(cluster_bash "kubectl -n $(quote "$NAMESPACE") get pod -l app.kubernetes.io/name=actcore-worker -o json | python3 -c $(quote "$worker_snapshot_script")")" export BEFORE_JSON CURRENT_GATE='admin sync fixture' if [[ -n "$ACTIVITY_CORE_ADMIN_SYNC_FIXTURE_COMMAND" ]]; then log 'running operator-supplied fixture command' cluster_bash "$ACTIVITY_CORE_ADMIN_SYNC_FIXTURE_COMMAND" FIXTURE_STATUS=ran elif [[ "$ACTIVITY_CORE_ADMIN_SYNC_REQUIRE_FIXTURE" == 1 ]]; then echo 'ACTIVITY_CORE_ADMIN_SYNC_REQUIRE_FIXTURE=1 but no fixture command was supplied' >&2 exit 2 else FIXTURE_STATUS=skipped fi export FIXTURE_STATUS CURRENT_GATE='POST /admin/sync' log 'calling POST /admin/sync?definitions=true&schedules=true' SYNC_RESPONSE_JSON="$( cluster_bash "$(cat < {after['uid']}") if before['restart_count'] != after['restart_count']: raise SystemExit(f"worker restart count changed: {before['restart_count']} -> {after['restart_count']}") PY export AFTER_JSON CURRENT_GATE='State Hub evidence note' log 'posting non-secret evidence note to State Hub' EVIDENCE_NOTE_JSON="$(post_evidence passed '')" trap - ERR log 'verification passed' printf '%s\n' "$EVIDENCE_NOTE_JSON"