Files
railiance-cluster/tests/smoke_kube.sh
tegwick 660a63c674
Some checks failed
railiance-tests / smoke (push) Has been cancelled
feat(pgpool): implement WP-0003 T01-T04 — permanent fix for pgpool-password bug
T01: helm/gitea-values.yaml with postgresql-ha.pgpool.adminPassword
     (fill REPLACE_WITH_PGPOOL_ADMIN_PASSWORD before helm upgrade)
T02: tests/smoke_kube.sh — add pgpool and postgresql-ha pod health checks
T03: tests/test_ha_failover.sh — D3 HA failover test script
T04: docs/incidents/2026-03-10-pgpool-missing-secret.md + README link

Also: make test-ha-failover target, Makefile .PHONY updated.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-10 14:16:22 +01:00

66 lines
3.3 KiB
Bash

#!/usr/bin/env bash
set -euo pipefail
PASS=0
FAIL=0
ok() { echo "[OK] $*"; ((PASS++)) || true; }
fail() { echo "[FAIL] $*"; ((FAIL++)) || true; }
# ── Node Ready ───────────────────────────────────────────────────────────────
if kubectl get nodes 2>/dev/null | grep -q " Ready"; then
ok "Node(s) in Ready state"
else
fail "No nodes in Ready state"
fi
# ── Helm ─────────────────────────────────────────────────────────────────────
if helm version --short &>/dev/null; then
ok "helm version: $(helm version --short)"
else
fail "helm not available or erroring"
fi
# ── CoreDNS ──────────────────────────────────────────────────────────────────
if kubectl get pods -n kube-system -l k8s-app=kube-dns 2>/dev/null | grep -q "Running"; then
ok "CoreDNS pod running in kube-system"
else
fail "CoreDNS pod not running in kube-system"
fi
# ── Traefik ──────────────────────────────────────────────────────────────────
if kubectl get pods -n kube-system -l app.kubernetes.io/name=traefik 2>/dev/null | grep -q "Running"; then
ok "Traefik ingress controller running in kube-system"
else
fail "Traefik ingress controller not running in kube-system"
fi
# ── postgresql-ha pods ───────────────────────────────────────────────────────
PG_NOT_RUNNING=$(kubectl get pods -n default -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \
| grep -v "^NAME" | grep -v " Running " | wc -l)
if kubectl get pods -n default -l app.kubernetes.io/name=postgresql-ha 2>/dev/null | grep -q "Running"; then
if [[ "$PG_NOT_RUNNING" -eq 0 ]]; then
ok "All postgresql-ha pods Running"
else
fail "${PG_NOT_RUNNING} postgresql-ha pod(s) not in Running state"
fi
else
fail "No postgresql-ha pods found (is Gitea deployed?)"
fi
# ── pgpool (D3 requirement) ───────────────────────────────────────────────────
# pgpool CrashLoopBackOff is silent and only surfaces on pod restart/failover.
# A passing check here means the pgpool-password secret key is present.
PGPOOL_STATE=$(kubectl get pods -n default -l app.kubernetes.io/component=pgpool 2>/dev/null \
| grep -v "^NAME" | awk '{print $3}' | head -1)
if [[ "$PGPOOL_STATE" == "Running" ]]; then
ok "pgpool pod Running"
else
fail "pgpool pod not Running (state: ${PGPOOL_STATE:-not found}) — check pgpool-password secret key"
fi
# ── Summary ──────────────────────────────────────────────────────────────────
echo ""
echo "Results: ${PASS} passed, ${FAIL} failed"
[[ "$FAIL" -eq 0 ]]