From 00e688bd8ea14836956f7ba05d6a3aea1362684d Mon Sep 17 00:00:00 2001 From: tegwick Date: Fri, 15 May 2026 09:22:16 +0200 Subject: [PATCH] fix(WP-0004): live deployment fixes from integration test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Dockerfile: copy alembic.ini + migrations/ so actcore-migrate works - docker-compose.railiance.yml: - Temporal: add dynamicconfig volume mount + correct DYNAMIC_CONFIG_FILE_PATH - Temporal: healthcheck uses 'temporal operator cluster health' (not tctl) - NATS: add monitoring port -m 8222 for wget-based healthcheck - actcore-api healthcheck: use Python urllib (curl absent from slim image) - api.py: fix /health Temporal probe — Client has no describe_namespace; use workflow_service.get_system_info(GetSystemInfoRequest()) instead - Makefile: grep -Eh to suppress filename prefix when MAKEFILE_LIST has multiple files (.env included via -include) All 8 services start cleanly; /health returns {"status":"ok",...} HTTP 200; SIGTERM drains worker cleanly within grace period; make help correct. Co-Authored-By: Claude Sonnet 4.6 --- Dockerfile | 2 ++ Makefile | 2 +- docker-compose.railiance.yml | 13 ++++++++----- src/activity_core/api.py | 3 ++- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index c4b2c04..ddbbfb8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,6 +11,8 @@ FROM python:3.12-slim AS runtime WORKDIR /app COPY --from=builder /app/.venv /app/.venv COPY --from=builder /app/src /app/src +COPY alembic.ini ./ +COPY migrations/ ./migrations/ COPY activity-definitions/ ./activity-definitions/ COPY event-types/ ./event-types/ COPY tasks/ ./tasks/ diff --git a/Makefile b/Makefile index 7425731..bf1c62d 100644 --- a/Makefile +++ b/Makefile @@ -49,6 +49,6 @@ start-event-router: ## Start NATS event router # ── Help ────────────────────────────────────────────────────────────────────── help: ## Show this help message - @grep -E '^[a-zA-Z_-]+:.*?##' $(MAKEFILE_LIST) | \ + @grep -Eh '^[a-zA-Z_-]+:.*?##' $(MAKEFILE_LIST) | \ awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-24s\033[0m %s\n", $$1, $$2}' | \ sort diff --git a/docker-compose.railiance.yml b/docker-compose.railiance.yml index c16e9b8..7faefde 100644 --- a/docker-compose.railiance.yml +++ b/docker-compose.railiance.yml @@ -29,14 +29,16 @@ services: POSTGRES_USER: temporal POSTGRES_PWD: temporal POSTGRES_SEEDS: temporal-db - DYNAMIC_CONFIG_FILE_PATH: /etc/temporal/dynamicconfig.yaml + DYNAMIC_CONFIG_FILE_PATH: config/dynamicconfig/development-sql.yaml ENABLE_ES: "false" VISIBILITY_DBNAME: temporal_visibility TEMPORAL_ADDRESS: temporal:7233 + volumes: + - ./dynamicconfig:/etc/temporal/config/dynamicconfig networks: - actcore-net healthcheck: - test: ["CMD-SHELL", "tctl --address temporal:7233 cluster health 2>&1 | grep -q SERVING"] + test: ["CMD", "temporal", "operator", "cluster", "health", "--address", "temporal:7233"] interval: 10s timeout: 10s retries: 20 @@ -59,15 +61,16 @@ services: # ── NATS with JetStream ─────────────────────────────────────────────────────── nats: image: nats:2.10-alpine - command: ["-js", "-sd", "/data"] + command: ["-js", "-sd", "/data", "-m", "8222"] volumes: - nats-data:/data ports: - "4222:4222" + - "8222:8222" networks: - actcore-net healthcheck: - test: ["CMD-SHELL", "nats-server --help > /dev/null 2>&1 || wget -q -O- http://localhost:8222/healthz | grep -q ok"] + test: ["CMD-SHELL", "wget -qO- http://localhost:8222/healthz | grep -q ok"] interval: 5s timeout: 5s retries: 10 @@ -141,7 +144,7 @@ services: - actcore-net restart: unless-stopped healthcheck: - test: ["CMD-SHELL", "curl -sf http://localhost:8010/health"] + test: ["CMD", "python", "-c", "import urllib.request,sys; r=urllib.request.urlopen('http://localhost:8010/health'); sys.exit(0 if r.status==200 else 1)"] interval: 10s timeout: 5s retries: 5 diff --git a/src/activity_core/api.py b/src/activity_core/api.py index cd3be69..2b527ab 100644 --- a/src/activity_core/api.py +++ b/src/activity_core/api.py @@ -34,6 +34,7 @@ from fastapi.responses import JSONResponse from pydantic import BaseModel from sqlalchemy import select, text from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine +from temporalio.api.workflowservice.v1 import GetSystemInfoRequest from temporalio.client import Client from activity_core.models import ActivityDefinition, CronTriggerConfig @@ -289,7 +290,7 @@ async def health() -> JSONResponse: pass try: - await _get_temporal().describe_namespace(TEMPORAL_NAMESPACE) + await _get_temporal().workflow_service.get_system_info(GetSystemInfoRequest()) temporal_ok = True except Exception: pass