156 lines
6.7 KiB
Bash
Executable File
156 lines
6.7 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Prove POST /admin/sync works without restarting the activity-core worker.
|
|
set -euo pipefail
|
|
|
|
NAMESPACE="${ACTIVITY_CORE_NAMESPACE:-activity-core}"
|
|
CLUSTER_HOST="${ACTIVITY_CORE_CLUSTER_HOST:-railiance01}"
|
|
STATE_HUB_URL="${STATE_HUB_URL:-http://127.0.0.1:8000}"
|
|
ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL="${ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL:-0}"
|
|
ACTIVITY_CORE_ADMIN_SYNC_FIXTURE_COMMAND="${ACTIVITY_CORE_ADMIN_SYNC_FIXTURE_COMMAND:-}"
|
|
ACTIVITY_CORE_ADMIN_SYNC_REQUIRE_FIXTURE="${ACTIVITY_CORE_ADMIN_SYNC_REQUIRE_FIXTURE:-0}"
|
|
EVIDENCE_WORKSTREAM_ID="${STATE_HUB_EVIDENCE_WORKSTREAM_ID:-2c9e8e96-ec6a-433c-9e6d-0efbcd18679e}"
|
|
EVIDENCE_TASK_ID="${STATE_HUB_EVIDENCE_TASK_ID:-60f3387d-3d14-42a9-b8a3-725a86468510}"
|
|
|
|
STARTED_AT="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|
CURRENT_GATE=startup
|
|
BEFORE_JSON=""
|
|
AFTER_JSON=""
|
|
FIXTURE_STATUS=skipped
|
|
SYNC_RESPONSE_JSON=""
|
|
EVIDENCE_NOTE_JSON=""
|
|
|
|
export NAMESPACE CLUSTER_HOST STATE_HUB_URL EVIDENCE_WORKSTREAM_ID EVIDENCE_TASK_ID
|
|
export STARTED_AT BEFORE_JSON AFTER_JSON FIXTURE_STATUS SYNC_RESPONSE_JSON
|
|
|
|
log() { printf '[activity-core-admin-sync-smoke] %s\n' "$*"; }
|
|
quote() { printf '%q' "$1"; }
|
|
cluster_bash() { if [[ -n "$CLUSTER_HOST" ]]; then ssh "$CLUSTER_HOST" "bash -s" <<<"$1"; else bash -s <<<"$1"; fi; }
|
|
|
|
post_evidence() {
|
|
local status="$1" failing_gate="${2:-}"
|
|
export EVIDENCE_STATUS="$status" FAILING_GATE="$failing_gate"
|
|
python3 - <<'PY'
|
|
import json, os, sys, urllib.request
|
|
|
|
def env_json(name):
|
|
raw = os.environ.get(name, "")
|
|
if not raw:
|
|
return None
|
|
try:
|
|
return json.loads(raw)
|
|
except json.JSONDecodeError:
|
|
return {"raw": raw}
|
|
|
|
status = os.environ["EVIDENCE_STATUS"]
|
|
failing_gate = os.environ.get("FAILING_GATE") or None
|
|
detail = {
|
|
"producer": "railiance-cluster",
|
|
"verification": "activity-core no-restart admin sync smoke",
|
|
"status": status,
|
|
"failing_gate": failing_gate,
|
|
"cluster_host": os.environ.get("CLUSTER_HOST") or "local-kubectl",
|
|
"namespace": os.environ.get("NAMESPACE"),
|
|
"worker_before": env_json("BEFORE_JSON"),
|
|
"worker_after": env_json("AFTER_JSON"),
|
|
"fixture_status": os.environ.get("FIXTURE_STATUS"),
|
|
"sync_response": env_json("SYNC_RESPONSE_JSON"),
|
|
"started_at": os.environ.get("STARTED_AT"),
|
|
}
|
|
summary = (
|
|
"Railiance activity-core no-restart admin-sync smoke passed: POST /admin/sync returned expected counters and worker pod identity/restart count stayed stable."
|
|
if status == "passed"
|
|
else "Railiance activity-core no-restart admin-sync smoke failed" + (f" at {failing_gate}" if failing_gate else "") + "; see non-secret evidence detail."
|
|
)
|
|
payload = {"summary": summary, "event_type": "note", "author": "railiance-cluster", "detail": detail}
|
|
if os.environ.get("EVIDENCE_WORKSTREAM_ID"):
|
|
payload["workstream_id"] = os.environ["EVIDENCE_WORKSTREAM_ID"]
|
|
if os.environ.get("EVIDENCE_TASK_ID"):
|
|
payload["task_id"] = os.environ["EVIDENCE_TASK_ID"]
|
|
req = urllib.request.Request(os.environ["STATE_HUB_URL"].rstrip("/") + "/progress/", data=json.dumps(payload).encode(), headers={"Content-Type": "application/json"}, method="POST")
|
|
with urllib.request.urlopen(req, timeout=20) as resp:
|
|
sys.stdout.write(resp.read().decode())
|
|
PY
|
|
}
|
|
|
|
on_error() { local code=$?; trap - ERR; post_evidence failed "$CURRENT_GATE" >/dev/null || true; exit "$code"; }
|
|
trap on_error ERR
|
|
|
|
if [[ "$CLUSTER_HOST" == local ]]; then
|
|
[[ "$ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL" == 1 ]] || { echo 'ACTIVITY_CORE_CLUSTER_HOST=local requires ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL=1' >&2; exit 2; }
|
|
CLUSTER_HOST=""
|
|
fi
|
|
export CLUSTER_HOST
|
|
|
|
CURRENT_GATE='cluster executor preflight'
|
|
log "using cluster executor: ${CLUSTER_HOST:-local kubectl}"
|
|
cluster_bash 'set -euo pipefail; command -v kubectl >/dev/null; command -v python3 >/dev/null'
|
|
|
|
worker_snapshot_script='import json,sys
|
|
items=json.load(sys.stdin).get("items",[])
|
|
if not items: raise SystemExit("no actcore-worker pods found")
|
|
pod=sorted(items,key=lambda item:item["metadata"]["name"])[0]
|
|
container=pod["status"]["containerStatuses"][0]
|
|
print(json.dumps({"name":pod["metadata"]["name"],"uid":pod["metadata"]["uid"],"phase":pod["status"].get("phase"),"restart_count":container.get("restartCount",0),"image":container.get("image"),"image_id":container.get("imageID")}, sort_keys=True))'
|
|
|
|
CURRENT_GATE='worker baseline capture'
|
|
BEFORE_JSON="$(cluster_bash "kubectl -n $(quote "$NAMESPACE") get pod -l app.kubernetes.io/name=actcore-worker -o json | python3 -c $(quote "$worker_snapshot_script")")"
|
|
export BEFORE_JSON
|
|
|
|
CURRENT_GATE='admin sync fixture'
|
|
if [[ -n "$ACTIVITY_CORE_ADMIN_SYNC_FIXTURE_COMMAND" ]]; then
|
|
log 'running operator-supplied fixture command'
|
|
cluster_bash "$ACTIVITY_CORE_ADMIN_SYNC_FIXTURE_COMMAND"
|
|
FIXTURE_STATUS=ran
|
|
elif [[ "$ACTIVITY_CORE_ADMIN_SYNC_REQUIRE_FIXTURE" == 1 ]]; then
|
|
echo 'ACTIVITY_CORE_ADMIN_SYNC_REQUIRE_FIXTURE=1 but no fixture command was supplied' >&2
|
|
exit 2
|
|
else
|
|
FIXTURE_STATUS=skipped
|
|
fi
|
|
export FIXTURE_STATUS
|
|
|
|
CURRENT_GATE='POST /admin/sync'
|
|
log 'calling POST /admin/sync?definitions=true&schedules=true'
|
|
SYNC_RESPONSE_JSON="$(
|
|
cluster_bash "$(cat <<EOF
|
|
set -euo pipefail
|
|
kubectl -n $(quote "$NAMESPACE") exec -i deploy/actcore-api -- python - <<'PY'
|
|
import json, urllib.request
|
|
req = urllib.request.Request('http://localhost:8010/admin/sync?definitions=true&schedules=true', method='POST')
|
|
with urllib.request.urlopen(req, timeout=60) as resp:
|
|
payload = json.loads(resp.read().decode())
|
|
required = [('definitions','synced'),('schedules','upserted'),('schedules','paused'),('schedules','deleted_orphans'),('errors',None)]
|
|
for section, key in required:
|
|
if section not in payload:
|
|
raise SystemExit(f'missing sync response section {section!r}')
|
|
if key is not None and key not in payload[section]:
|
|
raise SystemExit(f'missing sync response key {section}.{key}')
|
|
if payload.get('errors'):
|
|
raise SystemExit('admin sync returned errors: ' + json.dumps(payload['errors']))
|
|
print(json.dumps(payload, sort_keys=True))
|
|
PY
|
|
EOF
|
|
)"
|
|
)"
|
|
export SYNC_RESPONSE_JSON
|
|
|
|
CURRENT_GATE='worker no-restart verification'
|
|
AFTER_JSON="$(cluster_bash "kubectl -n $(quote "$NAMESPACE") get pod -l app.kubernetes.io/name=actcore-worker -o json | python3 -c $(quote "$worker_snapshot_script")")"
|
|
python3 - <<'PY'
|
|
import json, os
|
|
before = json.loads(os.environ['BEFORE_JSON'])
|
|
after = json.loads(os.environ['AFTER_JSON'])
|
|
if before['uid'] != after['uid']:
|
|
raise SystemExit(f"worker pod changed uid: {before['uid']} -> {after['uid']}")
|
|
if before['restart_count'] != after['restart_count']:
|
|
raise SystemExit(f"worker restart count changed: {before['restart_count']} -> {after['restart_count']}")
|
|
PY
|
|
export AFTER_JSON
|
|
|
|
CURRENT_GATE='State Hub evidence note'
|
|
log 'posting non-secret evidence note to State Hub'
|
|
EVIDENCE_NOTE_JSON="$(post_evidence passed '')"
|
|
trap - ERR
|
|
log 'verification passed'
|
|
printf '%s\n' "$EVIDENCE_NOTE_JSON"
|