fix(WP-0004): live deployment fixes from integration test

- Dockerfile: copy alembic.ini + migrations/ so actcore-migrate works
- docker-compose.railiance.yml:
    - Temporal: add dynamicconfig volume mount + correct DYNAMIC_CONFIG_FILE_PATH
    - Temporal: healthcheck uses 'temporal operator cluster health' (not tctl)
    - NATS: add monitoring port -m 8222 for wget-based healthcheck
    - actcore-api healthcheck: use Python urllib (curl absent from slim image)
- api.py: fix /health Temporal probe — Client has no describe_namespace;
    use workflow_service.get_system_info(GetSystemInfoRequest()) instead
- Makefile: grep -Eh to suppress filename prefix when MAKEFILE_LIST has
    multiple files (.env included via -include)

All 8 services start cleanly; /health returns {"status":"ok",...} HTTP 200;
SIGTERM drains worker cleanly within grace period; make help correct.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-15 09:22:16 +02:00
parent 94bd34231c
commit 00e688bd8e
4 changed files with 13 additions and 7 deletions

View File

@@ -11,6 +11,8 @@ FROM python:3.12-slim AS runtime
WORKDIR /app
COPY --from=builder /app/.venv /app/.venv
COPY --from=builder /app/src /app/src
COPY alembic.ini ./
COPY migrations/ ./migrations/
COPY activity-definitions/ ./activity-definitions/
COPY event-types/ ./event-types/
COPY tasks/ ./tasks/

View File

@@ -49,6 +49,6 @@ start-event-router: ## Start NATS event router
# ── Help ──────────────────────────────────────────────────────────────────────
help: ## Show this help message
@grep -E '^[a-zA-Z_-]+:.*?##' $(MAKEFILE_LIST) | \
@grep -Eh '^[a-zA-Z_-]+:.*?##' $(MAKEFILE_LIST) | \
awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-24s\033[0m %s\n", $$1, $$2}' | \
sort

View File

@@ -29,14 +29,16 @@ services:
POSTGRES_USER: temporal
POSTGRES_PWD: temporal
POSTGRES_SEEDS: temporal-db
DYNAMIC_CONFIG_FILE_PATH: /etc/temporal/dynamicconfig.yaml
DYNAMIC_CONFIG_FILE_PATH: config/dynamicconfig/development-sql.yaml
ENABLE_ES: "false"
VISIBILITY_DBNAME: temporal_visibility
TEMPORAL_ADDRESS: temporal:7233
volumes:
- ./dynamicconfig:/etc/temporal/config/dynamicconfig
networks:
- actcore-net
healthcheck:
test: ["CMD-SHELL", "tctl --address temporal:7233 cluster health 2>&1 | grep -q SERVING"]
test: ["CMD", "temporal", "operator", "cluster", "health", "--address", "temporal:7233"]
interval: 10s
timeout: 10s
retries: 20
@@ -59,15 +61,16 @@ services:
# ── NATS with JetStream ───────────────────────────────────────────────────────
nats:
image: nats:2.10-alpine
command: ["-js", "-sd", "/data"]
command: ["-js", "-sd", "/data", "-m", "8222"]
volumes:
- nats-data:/data
ports:
- "4222:4222"
- "8222:8222"
networks:
- actcore-net
healthcheck:
test: ["CMD-SHELL", "nats-server --help > /dev/null 2>&1 || wget -q -O- http://localhost:8222/healthz | grep -q ok"]
test: ["CMD-SHELL", "wget -qO- http://localhost:8222/healthz | grep -q ok"]
interval: 5s
timeout: 5s
retries: 10
@@ -141,7 +144,7 @@ services:
- actcore-net
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "curl -sf http://localhost:8010/health"]
test: ["CMD", "python", "-c", "import urllib.request,sys; r=urllib.request.urlopen('http://localhost:8010/health'); sys.exit(0 if r.status==200 else 1)"]
interval: 10s
timeout: 5s
retries: 5

View File

@@ -34,6 +34,7 @@ from fastapi.responses import JSONResponse
from pydantic import BaseModel
from sqlalchemy import select, text
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from temporalio.api.workflowservice.v1 import GetSystemInfoRequest
from temporalio.client import Client
from activity_core.models import ActivityDefinition, CronTriggerConfig
@@ -289,7 +290,7 @@ async def health() -> JSONResponse:
pass
try:
await _get_temporal().describe_namespace(TEMPORAL_NAMESPACE)
await _get_temporal().workflow_service.get_system_info(GetSystemInfoRequest())
temporal_ok = True
except Exception:
pass