fix(test): correct ha-failover test — wrong URL, wrong pod label, missing kubectl
Three bugs: - GITEA_URL defaulted to localhost:3000; Gitea NodePort is 32166 - Pod label app.kubernetes.io/name=postgresql-ha matched pgpool pod too; added component=postgresql to target only postgres nodes - Used bare 'kubectl' which is not on PATH; switched to 'k3s kubectl' Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2
Makefile
2
Makefile
@@ -20,4 +20,4 @@ help: ## Show this help
|
|||||||
/^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } \
|
/^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } \
|
||||||
/^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) }' $(MAKEFILE_LIST)
|
/^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) }' $(MAKEFILE_LIST)
|
||||||
|
|
||||||
.PHONY: k3s-install smoke help
|
.PHONY: k3s-install smoke test-ha-failover help
|
||||||
|
|||||||
@@ -14,8 +14,9 @@
|
|||||||
|
|
||||||
set -uo pipefail
|
set -uo pipefail
|
||||||
|
|
||||||
GITEA_URL="${1:-http://localhost:3000}"
|
GITEA_URL="${1:-http://localhost:32166}"
|
||||||
NAMESPACE="default"
|
NAMESPACE="default"
|
||||||
|
KUBECTL="k3s kubectl"
|
||||||
FAILOVER_TIMEOUT=60 # seconds to wait for repmgr promotion
|
FAILOVER_TIMEOUT=60 # seconds to wait for repmgr promotion
|
||||||
RECOVERY_TIMEOUT=120 # seconds to wait for all pods Running again
|
RECOVERY_TIMEOUT=120 # seconds to wait for all pods Running again
|
||||||
PASS=0
|
PASS=0
|
||||||
@@ -26,19 +27,20 @@ fail() { echo "[FAIL] $*"; ((FAIL++)) || true; }
|
|||||||
info() { echo "[INFO] $*"; }
|
info() { echo "[INFO] $*"; }
|
||||||
|
|
||||||
# ── Pre-flight ────────────────────────────────────────────────────────────────
|
# ── Pre-flight ────────────────────────────────────────────────────────────────
|
||||||
info "Target cluster: $(kubectl config current-context 2>/dev/null || echo 'default')"
|
info "Target cluster: $($KUBECTL config current-context 2>/dev/null || echo 'default')"
|
||||||
info "Gitea URL: ${GITEA_URL}"
|
info "Gitea URL: ${GITEA_URL}"
|
||||||
info "Namespace: ${NAMESPACE}"
|
info "Namespace: ${NAMESPACE}"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# Confirm postgresql-ha primary pod exists
|
# Confirm a postgresql node pod exists (component=postgresql excludes pgpool)
|
||||||
PRIMARY_POD=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha \
|
PRIMARY_POD=$($KUBECTL get pods -n "${NAMESPACE}" \
|
||||||
|
-l app.kubernetes.io/name=postgresql-ha,app.kubernetes.io/component=postgresql \
|
||||||
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
|
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
|
||||||
if [[ -z "$PRIMARY_POD" ]]; then
|
if [[ -z "$PRIMARY_POD" ]]; then
|
||||||
fail "No postgresql-ha pods found — is Gitea deployed?"
|
fail "No postgresql-ha postgresql pods found — is Gitea deployed?"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
info "Primary pod to kill: ${PRIMARY_POD}"
|
info "PostgreSQL pod to kill: ${PRIMARY_POD}"
|
||||||
|
|
||||||
# ── Baseline: Gitea accessible before failover ────────────────────────────────
|
# ── Baseline: Gitea accessible before failover ────────────────────────────────
|
||||||
info "Checking Gitea baseline..."
|
info "Checking Gitea baseline..."
|
||||||
@@ -52,14 +54,14 @@ fi
|
|||||||
|
|
||||||
# ── Trigger failover: kill primary pod ───────────────────────────────────────
|
# ── Trigger failover: kill primary pod ───────────────────────────────────────
|
||||||
info "Deleting primary pod ${PRIMARY_POD} to trigger failover..."
|
info "Deleting primary pod ${PRIMARY_POD} to trigger failover..."
|
||||||
kubectl delete pod -n "${NAMESPACE}" "${PRIMARY_POD}" --grace-period=0
|
$KUBECTL delete pod -n "${NAMESPACE}" "${PRIMARY_POD}" --grace-period=0
|
||||||
FAILOVER_START=$(date +%s)
|
FAILOVER_START=$(date +%s)
|
||||||
|
|
||||||
# ── Wait for repmgr promotion ─────────────────────────────────────────────────
|
# ── Wait for repmgr promotion ─────────────────────────────────────────────────
|
||||||
info "Waiting up to ${FAILOVER_TIMEOUT}s for a replica to be promoted..."
|
info "Waiting up to ${FAILOVER_TIMEOUT}s for a replica to be promoted..."
|
||||||
PROMOTED=false
|
PROMOTED=false
|
||||||
while (( $(date +%s) - FAILOVER_START < FAILOVER_TIMEOUT )); do
|
while (( $(date +%s) - FAILOVER_START < FAILOVER_TIMEOUT )); do
|
||||||
RUNNING=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \
|
RUNNING=$($KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \
|
||||||
| grep " Running " | wc -l)
|
| grep " Running " | wc -l)
|
||||||
if [[ "$RUNNING" -ge 1 ]]; then
|
if [[ "$RUNNING" -ge 1 ]]; then
|
||||||
PROMOTED=true
|
PROMOTED=true
|
||||||
@@ -98,7 +100,7 @@ fi
|
|||||||
info "Checking pgpool state..."
|
info "Checking pgpool state..."
|
||||||
PGPOOL_OK=false
|
PGPOOL_OK=false
|
||||||
for i in $(seq 1 20); do
|
for i in $(seq 1 20); do
|
||||||
PGPOOL_STATE=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/component=pgpool 2>/dev/null \
|
PGPOOL_STATE=$($KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/component=pgpool 2>/dev/null \
|
||||||
| grep -v "^NAME" | awk '{print $3}' | head -1)
|
| grep -v "^NAME" | awk '{print $3}' | head -1)
|
||||||
if [[ "$PGPOOL_STATE" == "Running" ]]; then
|
if [[ "$PGPOOL_STATE" == "Running" ]]; then
|
||||||
PGPOOL_OK=true
|
PGPOOL_OK=true
|
||||||
@@ -118,9 +120,9 @@ info "Waiting up to ${RECOVERY_TIMEOUT}s for all postgresql-ha pods to return to
|
|||||||
ALL_OK=false
|
ALL_OK=false
|
||||||
RECOVERY_START=$(date +%s)
|
RECOVERY_START=$(date +%s)
|
||||||
while (( $(date +%s) - RECOVERY_START < RECOVERY_TIMEOUT )); do
|
while (( $(date +%s) - RECOVERY_START < RECOVERY_TIMEOUT )); do
|
||||||
TOTAL=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \
|
TOTAL=$($KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \
|
||||||
| grep -v "^NAME" | wc -l)
|
| grep -v "^NAME" | wc -l)
|
||||||
RUNNING=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \
|
RUNNING=$($KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null \
|
||||||
| grep " Running " | wc -l)
|
| grep " Running " | wc -l)
|
||||||
if [[ "$TOTAL" -gt 0 && "$TOTAL" -eq "$RUNNING" ]]; then
|
if [[ "$TOTAL" -gt 0 && "$TOTAL" -eq "$RUNNING" ]]; then
|
||||||
ALL_OK=true
|
ALL_OK=true
|
||||||
@@ -135,7 +137,7 @@ if $ALL_OK; then
|
|||||||
ok "All postgresql-ha pods recovered to Running"
|
ok "All postgresql-ha pods recovered to Running"
|
||||||
else
|
else
|
||||||
fail "Not all postgresql-ha pods recovered within ${RECOVERY_TIMEOUT}s"
|
fail "Not all postgresql-ha pods recovered within ${RECOVERY_TIMEOUT}s"
|
||||||
kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null || true
|
$KUBECTL get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=postgresql-ha 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# ── Summary ───────────────────────────────────────────────────────────────────
|
# ── Summary ───────────────────────────────────────────────────────────────────
|
||||||
|
|||||||
Reference in New Issue
Block a user