From 3297ac1f6c695d67085ddafffd0d5fe9c9c8f5ee Mon Sep 17 00:00:00 2001 From: Bernd Worsch Date: Tue, 10 Mar 2026 13:42:54 +0000 Subject: [PATCH] =?UTF-8?q?fix(test):=20correct=20ha-failover=20test=20?= =?UTF-8?q?=E2=80=94=20wrong=20URL,=20wrong=20pod=20label,=20missing=20kub?= =?UTF-8?q?ectl?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three bugs: - GITEA_URL defaulted to localhost:3000; Gitea NodePort is 32166 - Pod label app.kubernetes.io/name=postgresql-ha matched pgpool pod too; added component=postgresql to target only postgres nodes - Used bare 'kubectl' which is not on PATH; switched to 'k3s kubectl' Co-Authored-By: Claude Sonnet 4.6 --- Makefile | 2 +- tests/test_ha_failover.sh | 26 ++++++++++++++------------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 7e0d4c3..ed9ea83 100644 --- a/Makefile +++ b/Makefile @@ -20,4 +20,4 @@ help: ## Show this help /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } \ /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) }' $(MAKEFILE_LIST) -.PHONY: k3s-install smoke help +.PHONY: k3s-install smoke test-ha-failover help diff --git a/tests/test_ha_failover.sh b/tests/test_ha_failover.sh index 4cec32d..add5bbd 100755 --- a/tests/test_ha_failover.sh +++ b/tests/test_ha_failover.sh @@ -14,8 +14,9 @@ set -uo pipefail -GITEA_URL="${1:-http://localhost:3000}" +GITEA_URL="${1:-http://localhost:32166}" NAMESPACE="default" +KUBECTL="k3s kubectl" FAILOVER_TIMEOUT=60 # seconds to wait for repmgr promotion RECOVERY_TIMEOUT=120 # seconds to wait for all pods Running again PASS=0 @@ -26,19 +27,20 @@ fail() { echo "[FAIL] $*"; ((FAIL++)) || true; } info() { echo "[INFO] $*"; } # ── Pre-flight ──────────────────────────────────────────────────────────────── -info "Target cluster: $(kubectl config current-context 2>/dev/null || echo 'default')" +info "Target cluster: $($KUBECTL config current-context 2>/dev/null || echo 'default')" info "Gitea URL: ${GITEA_URL}" info "Namespace: ${NAMESPACE}" echo "" -# Confirm postgresql-ha primary pod exists -PRIMARY_POD=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha \ +# Confirm a postgresql node pod exists (component=postgresql excludes pgpool) +PRIMARY_POD=$($KUBECTL get pods -n "${NAMESPACE}" \ + -l app.kubernetes.io/name=postgresql-ha,app.kubernetes.io/component=postgresql \ -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) if [[ -z "$PRIMARY_POD" ]]; then - fail "No postgresql-ha pods found — is Gitea deployed?" + fail "No postgresql-ha postgresql pods found — is Gitea deployed?" exit 1 fi -info "Primary pod to kill: ${PRIMARY_POD}" +info "PostgreSQL pod to kill: ${PRIMARY_POD}" # ── Baseline: Gitea accessible before failover ──────────────────────────────── info "Checking Gitea baseline..." @@ -52,14 +54,14 @@ fi # ── Trigger failover: kill primary pod ─────────────────────────────────────── info "Deleting primary pod ${PRIMARY_POD} to trigger failover..." -kubectl delete pod -n "${NAMESPACE}" "${PRIMARY_POD}" --grace-period=0 +$KUBECTL delete pod -n "${NAMESPACE}" "${PRIMARY_POD}" --grace-period=0 FAILOVER_START=$(date +%s) # ── Wait for repmgr promotion ───────────────────────────────────────────────── info "Waiting up to ${FAILOVER_TIMEOUT}s for a replica to be promoted..." PROMOTED=false while (( $(date +%s) - FAILOVER_START < FAILOVER_TIMEOUT )); do - RUNNING=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \ + RUNNING=$($KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \ | grep " Running " | wc -l) if [[ "$RUNNING" -ge 1 ]]; then PROMOTED=true @@ -98,7 +100,7 @@ fi info "Checking pgpool state..." PGPOOL_OK=false for i in $(seq 1 20); do - PGPOOL_STATE=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/component=pgpool 2>/dev/null \ + PGPOOL_STATE=$($KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/component=pgpool 2>/dev/null \ | grep -v "^NAME" | awk '{print $3}' | head -1) if [[ "$PGPOOL_STATE" == "Running" ]]; then PGPOOL_OK=true @@ -118,9 +120,9 @@ info "Waiting up to ${RECOVERY_TIMEOUT}s for all postgresql-ha pods to return to ALL_OK=false RECOVERY_START=$(date +%s) while (( $(date +%s) - RECOVERY_START < RECOVERY_TIMEOUT )); do - TOTAL=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \ + TOTAL=$($KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \ | grep -v "^NAME" | wc -l) - RUNNING=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \ + RUNNING=$($KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \ | grep " Running " | wc -l) if [[ "$TOTAL" -gt 0 && "$TOTAL" -eq "$RUNNING" ]]; then ALL_OK=true @@ -135,7 +137,7 @@ if $ALL_OK; then ok "All postgresql-ha pods recovered to Running" else fail "Not all postgresql-ha pods recovered within ${RECOVERY_TIMEOUT}s" - kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null || true + $KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null || true fi # ── Summary ───────────────────────────────────────────────────────────────────