diff --git a/Makefile b/Makefile index 7e0d4c3..ed9ea83 100644 --- a/Makefile +++ b/Makefile @@ -20,4 +20,4 @@ help: ## Show this help /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } \ /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) }' $(MAKEFILE_LIST) -.PHONY: k3s-install smoke help +.PHONY: k3s-install smoke test-ha-failover help diff --git a/tests/test_ha_failover.sh b/tests/test_ha_failover.sh index 4cec32d..add5bbd 100755 --- a/tests/test_ha_failover.sh +++ b/tests/test_ha_failover.sh @@ -14,8 +14,9 @@ set -uo pipefail -GITEA_URL="${1:-http://localhost:3000}" +GITEA_URL="${1:-http://localhost:32166}" NAMESPACE="default" +KUBECTL="k3s kubectl" FAILOVER_TIMEOUT=60 # seconds to wait for repmgr promotion RECOVERY_TIMEOUT=120 # seconds to wait for all pods Running again PASS=0 @@ -26,19 +27,20 @@ fail() { echo "[FAIL] $*"; ((FAIL++)) || true; } info() { echo "[INFO] $*"; } # ── Pre-flight ──────────────────────────────────────────────────────────────── -info "Target cluster: $(kubectl config current-context 2>/dev/null || echo 'default')" +info "Target cluster: $($KUBECTL config current-context 2>/dev/null || echo 'default')" info "Gitea URL: ${GITEA_URL}" info "Namespace: ${NAMESPACE}" echo "" -# Confirm postgresql-ha primary pod exists -PRIMARY_POD=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha \ +# Confirm a postgresql node pod exists (component=postgresql excludes pgpool) +PRIMARY_POD=$($KUBECTL get pods -n "${NAMESPACE}" \ + -l app.kubernetes.io/name=postgresql-ha,app.kubernetes.io/component=postgresql \ -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) if [[ -z "$PRIMARY_POD" ]]; then - fail "No postgresql-ha pods found — is Gitea deployed?" + fail "No postgresql-ha postgresql pods found — is Gitea deployed?" exit 1 fi -info "Primary pod to kill: ${PRIMARY_POD}" +info "PostgreSQL pod to kill: ${PRIMARY_POD}" # ── Baseline: Gitea accessible before failover ──────────────────────────────── info "Checking Gitea baseline..." @@ -52,14 +54,14 @@ fi # ── Trigger failover: kill primary pod ─────────────────────────────────────── info "Deleting primary pod ${PRIMARY_POD} to trigger failover..." -kubectl delete pod -n "${NAMESPACE}" "${PRIMARY_POD}" --grace-period=0 +$KUBECTL delete pod -n "${NAMESPACE}" "${PRIMARY_POD}" --grace-period=0 FAILOVER_START=$(date +%s) # ── Wait for repmgr promotion ───────────────────────────────────────────────── info "Waiting up to ${FAILOVER_TIMEOUT}s for a replica to be promoted..." PROMOTED=false while (( $(date +%s) - FAILOVER_START < FAILOVER_TIMEOUT )); do - RUNNING=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \ + RUNNING=$($KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \ | grep " Running " | wc -l) if [[ "$RUNNING" -ge 1 ]]; then PROMOTED=true @@ -98,7 +100,7 @@ fi info "Checking pgpool state..." PGPOOL_OK=false for i in $(seq 1 20); do - PGPOOL_STATE=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/component=pgpool 2>/dev/null \ + PGPOOL_STATE=$($KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/component=pgpool 2>/dev/null \ | grep -v "^NAME" | awk '{print $3}' | head -1) if [[ "$PGPOOL_STATE" == "Running" ]]; then PGPOOL_OK=true @@ -118,9 +120,9 @@ info "Waiting up to ${RECOVERY_TIMEOUT}s for all postgresql-ha pods to return to ALL_OK=false RECOVERY_START=$(date +%s) while (( $(date +%s) - RECOVERY_START < RECOVERY_TIMEOUT )); do - TOTAL=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \ + TOTAL=$($KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \ | grep -v "^NAME" | wc -l) - RUNNING=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \ + RUNNING=$($KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \ | grep " Running " | wc -l) if [[ "$TOTAL" -gt 0 && "$TOTAL" -eq "$RUNNING" ]]; then ALL_OK=true @@ -135,7 +137,7 @@ if $ALL_OK; then ok "All postgresql-ha pods recovered to Running" else fail "Not all postgresql-ha pods recovered within ${RECOVERY_TIMEOUT}s" - kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null || true + $KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null || true fi # ── Summary ───────────────────────────────────────────────────────────────────