generated from coulomb/repo-seed
feat(WP-0004): railiance deployment & service ops
- Dockerfile (multi-stage, uv-based, slim runtime) - .dockerignore - docker-compose.railiance.yml (Temporal + NATS + PG, no Elasticsearch) - GET /health endpoint (db + temporal probes, 200/503) - .env.example (complete env var reference) - Makefile: migrate, sync-all, dev-up/down, railiance-up/down, start-worker, start-api, start-event-router, help targets; extracted sync-event-types Python to scripts/sync_event_types.py - SIGTERM graceful shutdown in worker.py and event_router.py - docs/runbook.md: Railiance deployment section Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -30,8 +30,9 @@ from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
||||
from temporalio.client import Client
|
||||
|
||||
@@ -275,6 +276,32 @@ async def trigger_definition(definition_id: uuid.UUID) -> dict[str, str]:
|
||||
|
||||
# T42: Curator gate — event type approval endpoint
|
||||
|
||||
@app.get("/health")
|
||||
async def health() -> JSONResponse:
|
||||
db_ok = False
|
||||
temporal_ok = False
|
||||
|
||||
try:
|
||||
async with _get_db()() as session:
|
||||
await session.execute(text("SELECT 1"))
|
||||
db_ok = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
await _get_temporal().describe_namespace(TEMPORAL_NAMESPACE)
|
||||
temporal_ok = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
status = "ok" if db_ok and temporal_ok else "degraded"
|
||||
code = 200 if status == "ok" else 503
|
||||
return JSONResponse(
|
||||
{"status": status, "db": db_ok, "temporal": temporal_ok},
|
||||
status_code=code,
|
||||
)
|
||||
|
||||
|
||||
@app.post("/event-types/{type_id}/approve", status_code=200)
|
||||
async def approve_event_type(type_id: str) -> dict[str, str]:
|
||||
"""Approve a pending event type, setting its status to 'active'.
|
||||
|
||||
@@ -23,6 +23,7 @@ from __future__ import annotations
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
@@ -202,12 +203,19 @@ class EventRouter:
|
||||
_CONSUMER_NAME,
|
||||
)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
stop = asyncio.Event()
|
||||
loop.add_signal_handler(signal.SIGTERM, stop.set)
|
||||
loop.add_signal_handler(signal.SIGINT, stop.set)
|
||||
|
||||
try:
|
||||
await asyncio.Future() # run until cancelled
|
||||
await stop.wait()
|
||||
logger.info("Shutdown signal received — draining event router")
|
||||
finally:
|
||||
await sub.unsubscribe()
|
||||
await self._nc.drain()
|
||||
await engine.dispose()
|
||||
logger.info("Event router stopped cleanly")
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
|
||||
@@ -26,6 +26,7 @@ from __future__ import annotations
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
|
||||
from temporalio.client import Client
|
||||
from temporalio.runtime import PrometheusConfig, Runtime, TelemetryConfig
|
||||
@@ -102,12 +103,21 @@ async def run() -> None:
|
||||
activities=[persist_task_instance],
|
||||
)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
stop = asyncio.Event()
|
||||
loop.add_signal_handler(signal.SIGTERM, stop.set)
|
||||
loop.add_signal_handler(signal.SIGINT, stop.set)
|
||||
|
||||
async with orchestrator_worker, task_worker:
|
||||
print(
|
||||
f"Workers running — queues: {ORCHESTRATOR_TASK_QUEUE!r}, "
|
||||
f"{TASK_EXECUTION_TASK_QUEUE!r} (namespace={TEMPORAL_NAMESPACE!r})"
|
||||
logger.info(
|
||||
"Workers running — queues: %r, %r (namespace=%r)",
|
||||
ORCHESTRATOR_TASK_QUEUE,
|
||||
TASK_EXECUTION_TASK_QUEUE,
|
||||
TEMPORAL_NAMESPACE,
|
||||
)
|
||||
await asyncio.Future() # run until cancelled
|
||||
await stop.wait()
|
||||
logger.info("Shutdown signal received — draining workers")
|
||||
logger.info("Workers stopped cleanly")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user