feat(WP-0002): complete Triggers & Ops workstream

Delivers all 12 tasks (T22–T33): Temporal Schedule manager + startup
sync, NATS JetStream event router, FastAPI CRUD + manual trigger,
Prometheus metrics wiring, custom search-attribute tagging, and
operational runbook. Marks workplan status as done.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-28 01:04:43 +01:00
parent 9f15296e25
commit ea5fbe0bf3
14 changed files with 1612 additions and 48 deletions

266
src/activity_core/api.py Normal file
View File

@@ -0,0 +1,266 @@
"""FastAPI REST API for activity-core.
T30: CRUD for ActivityDefinition + manual one-shot trigger.
Endpoints:
GET /activity-definitions/ — list all
GET /activity-definitions/{id} — get one
POST /activity-definitions/ — create
PUT /activity-definitions/{id} — update
DELETE /activity-definitions/{id} — delete
POST /activity-definitions/{id}/trigger — manual one-shot run
Schedule lifecycle:
- POST/PUT with trigger_type='cron' upserts a Temporal Schedule.
- DELETE removes the Temporal Schedule if present.
- /trigger starts RunActivityWorkflow directly (works for any trigger_type).
Run with:
TEMPORAL_HOST=localhost:7233 \
ACTCORE_DB_URL=postgresql+asyncpg://actcore:actcore@localhost:5433/actcore \
uv run uvicorn activity_core.api:app --port 8010
"""
from __future__ import annotations
import os
import uuid
from contextlib import asynccontextmanager
from datetime import datetime, timezone
from typing import Any
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from temporalio.client import Client
from activity_core.models import ActivityDefinition, CronTriggerConfig
from activity_core.orm import ActivityDefinition as ActivityDefinitionRow
from activity_core.schedule_manager import delete_schedule, upsert_schedule
TEMPORAL_HOST = os.environ.get("TEMPORAL_HOST", "localhost:7233")
TEMPORAL_NAMESPACE = os.environ.get("TEMPORAL_NAMESPACE", "default")
_ORCHESTRATOR_TASK_QUEUE = "orchestrator-tq"
# --- App state ---------------------------------------------------------------
_session_factory: async_sessionmaker[AsyncSession] | None = None
_temporal_client: Client | None = None
@asynccontextmanager
async def lifespan(app: FastAPI): # type: ignore[type-arg]
global _session_factory, _temporal_client
db_url = os.environ.get("ACTCORE_DB_URL")
if not db_url:
raise RuntimeError("ACTCORE_DB_URL is required")
engine = create_async_engine(db_url)
_session_factory = async_sessionmaker(engine, expire_on_commit=False)
_temporal_client = await Client.connect(TEMPORAL_HOST, namespace=TEMPORAL_NAMESPACE)
yield
await engine.dispose()
app = FastAPI(title="activity-core API", lifespan=lifespan)
def _get_db() -> async_sessionmaker[AsyncSession]:
assert _session_factory is not None
return _session_factory
def _get_temporal() -> Client:
assert _temporal_client is not None
return _temporal_client
# --- Schemas -----------------------------------------------------------------
class ActivityDefinitionCreate(BaseModel):
name: str
enabled: bool = True
trigger_config: dict[str, Any]
context_sources: list[dict[str, Any]] = []
task_templates: list[dict[str, Any]] = []
dedupe_key_strategy: str = "skip"
version: int = 1
class ActivityDefinitionUpdate(BaseModel):
name: str | None = None
enabled: bool | None = None
trigger_config: dict[str, Any] | None = None
context_sources: list[dict[str, Any]] | None = None
task_templates: list[dict[str, Any]] | None = None
dedupe_key_strategy: str | None = None
version: int | None = None
class ActivityDefinitionResponse(BaseModel):
id: uuid.UUID
name: str
enabled: bool
trigger_type: str
trigger_config: dict[str, Any]
context_sources: list[dict[str, Any]]
task_templates: list[dict[str, Any]]
dedupe_key_strategy: str
version: int
created_at: datetime
updated_at: datetime
def _row_to_response(row: ActivityDefinitionRow) -> ActivityDefinitionResponse:
return ActivityDefinitionResponse(
id=row.id,
name=row.name,
enabled=row.enabled,
trigger_type=row.trigger_type,
trigger_config=row.trigger_config,
context_sources=row.context_sources,
task_templates=row.task_templates,
dedupe_key_strategy=row.dedupe_key_strategy,
version=row.version,
created_at=row.created_at,
updated_at=row.updated_at,
)
async def _upsert_schedule_if_cron(row: ActivityDefinitionRow) -> None:
"""Upsert a Temporal Schedule for the row if it uses a cron trigger."""
try:
defn = ActivityDefinition.model_validate(
{
"id": row.id,
"name": row.name,
"enabled": row.enabled,
"trigger_config": row.trigger_config,
"context_sources": row.context_sources,
"task_templates": row.task_templates,
"dedupe_key_strategy": row.dedupe_key_strategy,
"version": row.version,
}
)
if isinstance(defn.trigger_config, CronTriggerConfig):
await upsert_schedule(_get_temporal(), defn)
except Exception:
pass # Schedule management is best-effort; don't fail the API call.
# --- Routes ------------------------------------------------------------------
@app.get("/activity-definitions/", response_model=list[ActivityDefinitionResponse])
async def list_definitions() -> list[ActivityDefinitionResponse]:
"""List all ActivityDefinitions."""
Session = _get_db()
async with Session() as session:
rows = (await session.scalars(select(ActivityDefinitionRow))).all()
return [_row_to_response(r) for r in rows]
@app.get("/activity-definitions/{definition_id}", response_model=ActivityDefinitionResponse)
async def get_definition(definition_id: uuid.UUID) -> ActivityDefinitionResponse:
"""Get one ActivityDefinition by ID."""
Session = _get_db()
async with Session() as session:
row = await session.get(ActivityDefinitionRow, definition_id)
if row is None:
raise HTTPException(status_code=404, detail="ActivityDefinition not found")
return _row_to_response(row)
@app.post("/activity-definitions/", response_model=ActivityDefinitionResponse, status_code=201)
async def create_definition(body: ActivityDefinitionCreate) -> ActivityDefinitionResponse:
"""Create a new ActivityDefinition. Upserts a Temporal Schedule if trigger_type='cron'."""
trigger_type = body.trigger_config.get("trigger_type", "")
row = ActivityDefinitionRow(
id=uuid.uuid4(),
name=body.name,
enabled=body.enabled,
trigger_type=trigger_type,
trigger_config=body.trigger_config,
context_sources=body.context_sources,
task_templates=body.task_templates,
dedupe_key_strategy=body.dedupe_key_strategy,
version=body.version,
)
Session = _get_db()
async with Session() as session:
async with session.begin():
session.add(row)
await _upsert_schedule_if_cron(row)
return _row_to_response(row)
@app.put("/activity-definitions/{definition_id}", response_model=ActivityDefinitionResponse)
async def update_definition(
definition_id: uuid.UUID, body: ActivityDefinitionUpdate
) -> ActivityDefinitionResponse:
"""Update an ActivityDefinition. Re-upserts the Temporal Schedule if trigger_type='cron'."""
Session = _get_db()
async with Session() as session:
row = await session.get(ActivityDefinitionRow, definition_id)
if row is None:
raise HTTPException(status_code=404, detail="ActivityDefinition not found")
if body.name is not None:
row.name = body.name
if body.enabled is not None:
row.enabled = body.enabled
if body.trigger_config is not None:
row.trigger_config = body.trigger_config
row.trigger_type = body.trigger_config.get("trigger_type", row.trigger_type)
if body.context_sources is not None:
row.context_sources = body.context_sources
if body.task_templates is not None:
row.task_templates = body.task_templates
if body.dedupe_key_strategy is not None:
row.dedupe_key_strategy = body.dedupe_key_strategy
if body.version is not None:
row.version = body.version
async with session.begin():
session.add(row)
await _upsert_schedule_if_cron(row)
return _row_to_response(row)
@app.delete("/activity-definitions/{definition_id}", status_code=204)
async def delete_definition(definition_id: uuid.UUID) -> None:
"""Delete an ActivityDefinition and its Temporal Schedule if present."""
Session = _get_db()
async with Session() as session:
row = await session.get(ActivityDefinitionRow, definition_id)
if row is None:
raise HTTPException(status_code=404, detail="ActivityDefinition not found")
async with session.begin():
await session.delete(row)
await delete_schedule(_get_temporal(), definition_id)
@app.post("/activity-definitions/{definition_id}/trigger", status_code=202)
async def trigger_definition(definition_id: uuid.UUID) -> dict[str, str]:
"""Manually trigger a one-shot RunActivityWorkflow for any ActivityDefinition."""
Session = _get_db()
async with Session() as session:
row = await session.get(ActivityDefinitionRow, definition_id)
if row is None:
raise HTTPException(status_code=404, detail="ActivityDefinition not found")
trigger_key = f"manual-{uuid.uuid4()}"
workflow_id = f"activity-{definition_id}:{trigger_key}"
handle = await _get_temporal().start_workflow(
"RunActivityWorkflow",
args=[str(definition_id), trigger_key, datetime.now(tz=timezone.utc).isoformat()],
id=workflow_id,
task_queue=_ORCHESTRATOR_TASK_QUEUE,
)
return {"workflow_id": handle.id, "trigger_key": trigger_key}

View File

@@ -0,0 +1,226 @@
"""Event Router — NATS JetStream consumer that routes events to RunActivityWorkflow.
T26: EventRouter class — connects to NATS JetStream, subscribes to activity.>
T27: Routing rules — match event.type + payload filters to enabled ActivityDefinitions
T28: Start/signal workflow from Event Router with idempotent workflow ID
Stream: ACTIVITY_EVENTS
Subject: activity.>
Consumer: activity-core-event-router (durable, push-based)
Message ack happens only after the workflow has been successfully started,
giving at-least-once delivery semantics.
Usage:
NATS_URL=nats://localhost:4222 \
ACTCORE_DB_URL=postgresql+asyncpg://... \
TEMPORAL_HOST=localhost:7233 \
python -m activity_core.event_router
"""
from __future__ import annotations
import asyncio
import json
import logging
import os
import uuid
from datetime import datetime, timezone
from typing import Any
import nats
import nats.js.api
from nats.aio.client import Client as NATSClient
from nats.js.client import JetStreamContext
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from temporalio.client import Client as TemporalClient
from temporalio.common import WorkflowIDConflictPolicy
from temporalio.exceptions import WorkflowAlreadyStartedError
from activity_core.models import EventEnvelope, EventTriggerConfig
from activity_core.orm import ActivityDefinition as ActivityDefinitionRow
logger = logging.getLogger(__name__)
NATS_URL = os.environ.get("NATS_URL", "nats://localhost:4222")
TEMPORAL_HOST = os.environ.get("TEMPORAL_HOST", "localhost:7233")
TEMPORAL_NAMESPACE = os.environ.get("TEMPORAL_NAMESPACE", "default")
_STREAM_NAME = "ACTIVITY_EVENTS"
_SUBJECT = "activity.>"
_CONSUMER_NAME = "activity-core-event-router"
_ORCHESTRATOR_TASK_QUEUE = "orchestrator-tq"
class EventRouter:
"""Subscribes to NATS JetStream and routes incoming events to Temporal workflows."""
def __init__(
self,
nats_url: str,
temporal_client: TemporalClient,
db_url: str,
) -> None:
self._nats_url = nats_url
self._temporal = temporal_client
self._db_url = db_url
self._nc: NATSClient | None = None
self._js: JetStreamContext | None = None
self._session_factory: async_sessionmaker[AsyncSession] | None = None
async def _ensure_stream(self, js: JetStreamContext) -> None:
"""Create the ACTIVITY_EVENTS stream if it does not exist."""
try:
await js.find_stream(_SUBJECT)
except Exception:
await js.add_stream(
nats.js.api.StreamConfig(
name=_STREAM_NAME,
subjects=[_SUBJECT],
)
)
logger.info("created JetStream stream %r", _STREAM_NAME)
# T27: Load all enabled event-trigger ActivityDefinitions from DB.
async def _load_event_definitions(
self,
) -> list[tuple[str, EventTriggerConfig]]:
"""Return list of (activity_id, EventTriggerConfig) for enabled event defs."""
assert self._session_factory is not None
async with self._session_factory() as session:
rows = (
await session.scalars(
select(ActivityDefinitionRow).where(
ActivityDefinitionRow.trigger_type == "event",
ActivityDefinitionRow.enabled.is_(True),
)
)
).all()
result = []
for row in rows:
try:
cfg = EventTriggerConfig.model_validate(row.trigger_config)
result.append((str(row.id), cfg))
except Exception:
logger.warning("skipping malformed trigger_config for activity %s", row.id)
return result
# T27: Match an envelope against the routing rules.
def _matches(self, envelope: EventEnvelope, cfg: EventTriggerConfig) -> bool:
"""Return True if the envelope matches the EventTriggerConfig."""
if envelope.type != cfg.event_type:
return False
# All filter key/value pairs must be present in envelope.payload.
for key, value in cfg.filters.items():
if envelope.payload.get(key) != value:
return False
return True
# T28: Start RunActivityWorkflow for a matched activity.
async def _dispatch(self, activity_id: str, envelope: EventEnvelope) -> None:
"""Start RunActivityWorkflow for one matched activity.
Workflow ID is deterministic: activity-{activity_id}:{event_id}
REJECT_DUPLICATE prevents double-processing if the message is redelivered
before ack reaches NATS.
"""
workflow_id = f"activity-{activity_id}:{envelope.event_id}"
try:
await self._temporal.start_workflow(
"RunActivityWorkflow",
args=[activity_id, envelope.event_id, envelope.occurred_at.isoformat()],
id=workflow_id,
task_queue=_ORCHESTRATOR_TASK_QUEUE,
id_conflict_policy=WorkflowIDConflictPolicy.FAIL,
)
logger.info(
"started workflow %r for event %r (activity %s)",
workflow_id,
envelope.event_id,
activity_id,
)
except WorkflowAlreadyStartedError:
# Duplicate delivery — workflow already running or completed; safe to skip.
logger.debug("duplicate event %r for activity %s — skipped", envelope.event_id, activity_id)
async def _handle_message(self, msg: Any) -> None:
"""Decode a NATS message, match it against routing rules, and dispatch."""
try:
raw = json.loads(msg.data.decode())
envelope = EventEnvelope.model_validate(raw)
except Exception:
logger.warning("failed to parse event envelope from NATS message — nacking")
await msg.nak()
return
# T27: Reload routing table per message so hot changes take effect.
event_defs = await self._load_event_definitions()
matched = [aid for aid, cfg in event_defs if self._matches(envelope, cfg)]
if not matched:
logger.debug("event %r type=%r matched no definitions", envelope.event_id, envelope.type)
await msg.ack()
return
# T28: Start a workflow for each matched activity.
for activity_id in matched:
await self._dispatch(activity_id, envelope)
# Ack only after all dispatches succeed (at-least-once guarantee).
await msg.ack()
async def start(self) -> None:
"""Connect to NATS, set up the stream/consumer, and begin processing.
Blocks until cancelled.
"""
engine = create_async_engine(self._db_url)
self._session_factory = async_sessionmaker(engine, expire_on_commit=False)
self._nc = await nats.connect(self._nats_url)
self._js = self._nc.jetstream()
await self._ensure_stream(self._js)
# Durable push consumer — survives restarts, replays unacked messages.
sub = await self._js.subscribe(
_SUBJECT,
durable=_CONSUMER_NAME,
cb=self._handle_message,
manual_ack=True,
)
logger.info(
"EventRouter listening on subject %r (stream=%r, consumer=%r)",
_SUBJECT,
_STREAM_NAME,
_CONSUMER_NAME,
)
try:
await asyncio.Future() # run until cancelled
finally:
await sub.unsubscribe()
await self._nc.drain()
await engine.dispose()
async def main() -> None:
logging.basicConfig(level=logging.INFO)
db_url = os.environ.get("ACTCORE_DB_URL")
if not db_url:
raise RuntimeError("ACTCORE_DB_URL is required")
temporal_client = await TemporalClient.connect(TEMPORAL_HOST, namespace=TEMPORAL_NAMESPACE)
router = EventRouter(
nats_url=NATS_URL,
temporal_client=temporal_client,
db_url=db_url,
)
await router.start()
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,163 @@
"""Temporal Schedule management for activity-core.
T22: upsert_schedule, delete_schedule, list_schedules
T24: misfire_policy → ScheduleOverlapPolicy mapping (all three policies)
Schedule ID convention: activity-schedule-{activity_definition.id}
Workflow triggered: RunActivityWorkflow on orchestrator-tq
"""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from uuid import UUID
from temporalio.client import (
Client,
Schedule,
ScheduleActionStartWorkflow,
ScheduleBackfill,
ScheduleHandle,
ScheduleOverlapPolicy,
SchedulePolicy,
ScheduleSpec,
ScheduleState,
ScheduleUpdate,
ScheduleUpdateInput,
)
from temporalio.service import RPCError
from activity_core.models import ActivityDefinition, CronTriggerConfig
_ORCHESTRATOR_TASK_QUEUE = "orchestrator-tq"
# Trigger_key sentinel used when a workflow is started by a Temporal Schedule.
# RunActivityWorkflow detects this value and derives run dedup key from workflow_id.
SCHEDULED_TRIGGER_KEY = "scheduled"
# T24: misfire_policy → ScheduleOverlapPolicy
_MISFIRE_TO_OVERLAP: dict[str, ScheduleOverlapPolicy] = {
"skip": ScheduleOverlapPolicy.SKIP,
"catchup": ScheduleOverlapPolicy.BUFFER_ALL,
"compress": ScheduleOverlapPolicy.BUFFER_ONE,
}
def schedule_id(activity_id: str | UUID) -> str:
"""Return the canonical Temporal Schedule ID for an ActivityDefinition."""
return f"activity-schedule-{activity_id}"
def _overlap_policy(misfire_policy: str) -> ScheduleOverlapPolicy:
return _MISFIRE_TO_OVERLAP.get(misfire_policy, ScheduleOverlapPolicy.SKIP)
def _build_schedule(defn: ActivityDefinition) -> Schedule:
"""Construct a Temporal Schedule object from a cron ActivityDefinition."""
assert isinstance(defn.trigger_config, CronTriggerConfig)
cfg: CronTriggerConfig = defn.trigger_config
# Workflow ID uses ${firstScheduledTime} so each schedule fire gets a
# unique workflow ID, enabling replay/audit without ID conflicts.
action = ScheduleActionStartWorkflow(
"RunActivityWorkflow",
args=[str(defn.id), SCHEDULED_TRIGGER_KEY, None],
id=f"activity-{defn.id}:${{firstScheduledTime}}",
task_queue=_ORCHESTRATOR_TASK_QUEUE,
)
spec = ScheduleSpec(
cron_expressions=[cfg.cron_expression],
timezone_name=cfg.timezone,
jitter=timedelta(seconds=cfg.jitter_seconds) if cfg.jitter_seconds else None,
)
policy = SchedulePolicy(overlap=_overlap_policy(cfg.misfire_policy))
state = ScheduleState(paused=not defn.enabled)
return Schedule(action=action, spec=spec, policy=policy, state=state)
async def upsert_schedule(client: Client, defn: ActivityDefinition) -> ScheduleHandle:
"""Create or update a Temporal Schedule for a cron ActivityDefinition.
- Only operates on definitions with trigger_type='cron'.
- If enabled=False the schedule is created paused.
- For misfire_policy='catchup', triggers a backfill covering the last hour
after each upsert to replay any recently missed fires.
Returns the ScheduleHandle for the created/updated schedule.
"""
if not isinstance(defn.trigger_config, CronTriggerConfig):
raise ValueError(
f"upsert_schedule requires trigger_type='cron', "
f"got {defn.trigger_config.trigger_type!r}"
)
sid = schedule_id(defn.id)
sched = _build_schedule(defn)
try:
handle = await client.create_schedule(sid, sched)
except RPCError:
# Schedule already exists — update it in place.
handle = client.get_schedule_handle(sid)
async def _updater(input: ScheduleUpdateInput) -> ScheduleUpdate: # noqa: ARG001
return ScheduleUpdate(schedule=sched)
await handle.update(_updater)
# Sync pause state explicitly (update replaces the schedule object
# but pause state is part of ScheduleState, already embedded above).
if defn.enabled:
await handle.unpause()
else:
await handle.pause(note="disabled via upsert_schedule")
# T24 catchup: backfill any fires missed in the last hour.
if isinstance(defn.trigger_config, CronTriggerConfig):
if defn.trigger_config.misfire_policy == "catchup":
now = datetime.now(tz=timezone.utc)
backfill_start = now - timedelta(hours=1)
await handle.backfill(
[
ScheduleBackfill(
start_at=backfill_start,
end_at=now,
overlap=ScheduleOverlapPolicy.BUFFER_ALL,
)
]
)
return handle
async def delete_schedule(client: Client, activity_id: str | UUID) -> None:
"""Delete the Temporal Schedule for the given activity_id.
No-op if the schedule does not exist.
"""
handle = client.get_schedule_handle(schedule_id(activity_id))
try:
await handle.delete()
except RPCError:
pass # Not found — treat as success.
async def list_schedules(client: Client) -> list[dict]:
"""Enumerate all activity-core Temporal Schedules.
Returns a list of dicts: [{"schedule_id": str, "activity_id": str}, ...]
"""
prefix = "activity-schedule-"
results: list[dict] = []
async for entry in await client.list_schedules():
if entry.id.startswith(prefix):
results.append(
{
"schedule_id": entry.id,
"activity_id": entry.id[len(prefix) :],
}
)
return results

View File

@@ -0,0 +1,123 @@
"""Bootstrap script: sync Temporal Schedules with the ActivityDefinition DB.
T23: On startup, ensures every enabled cron ActivityDefinition has a live
Temporal Schedule, and removes orphaned schedules that have no matching DB row.
Run directly:
ACTCORE_DB_URL=... uv run python -m activity_core.sync_schedules
Also called from worker.py before the worker enters its run loop.
"""
from __future__ import annotations
import asyncio
import logging
import os
import uuid
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from temporalio.client import Client
from activity_core.models import ActivityDefinition, CronTriggerConfig
from activity_core.orm import ActivityDefinition as ActivityDefinitionRow
from activity_core.schedule_manager import delete_schedule, list_schedules, upsert_schedule
logger = logging.getLogger(__name__)
TEMPORAL_HOST = os.environ.get("TEMPORAL_HOST", "localhost:7233")
TEMPORAL_NAMESPACE = os.environ.get("TEMPORAL_NAMESPACE", "default")
def _row_to_domain(row: ActivityDefinitionRow) -> ActivityDefinition:
"""Convert an ORM row to a domain ActivityDefinition for schedule_manager."""
return ActivityDefinition.model_validate(
{
"id": row.id,
"name": row.name,
"enabled": row.enabled,
"trigger_config": row.trigger_config,
"context_sources": row.context_sources,
"task_templates": row.task_templates,
"dedupe_key_strategy": row.dedupe_key_strategy,
"version": row.version,
}
)
async def sync(client: Client, db_url: str) -> None:
"""Reconcile Temporal Schedules against the ActivityDefinition table.
Steps:
1. Load all enabled cron ActivityDefinitions from Postgres.
2. Upsert a Temporal Schedule for each one.
3. Delete Temporal Schedules whose activity_id has no matching DB row
(tombstone cleanup for deleted or trigger-type-changed definitions).
"""
engine = create_async_engine(db_url)
session_factory = async_sessionmaker(engine, expire_on_commit=False)
try:
async with session_factory() as session:
rows = (
await session.scalars(
select(ActivityDefinitionRow).where(
ActivityDefinitionRow.trigger_type == "cron"
)
)
).all()
finally:
await engine.dispose()
db_activity_ids: set[str] = set()
upserted = 0
skipped = 0
for row in rows:
defn = _row_to_domain(row)
if not isinstance(defn.trigger_config, CronTriggerConfig):
continue # should not happen given the WHERE clause, but guard anyway
db_activity_ids.add(str(defn.id))
if defn.enabled:
await upsert_schedule(client, defn)
upserted += 1
logger.info("upserted schedule for activity %s (%s)", defn.id, defn.name)
else:
# Disabled definitions: schedule may exist (paused) — leave it;
# upsert_schedule already handles the paused state.
await upsert_schedule(client, defn)
skipped += 1
logger.info("upserted paused schedule for disabled activity %s", defn.id)
# Tombstone cleanup: remove Temporal Schedules with no matching DB row.
existing_schedules = await list_schedules(client)
deleted = 0
for entry in existing_schedules:
if entry["activity_id"] not in db_activity_ids:
await delete_schedule(client, entry["activity_id"])
deleted += 1
logger.info("deleted orphaned schedule %s", entry["schedule_id"])
logger.info(
"sync_schedules complete — upserted=%d skipped_disabled=%d deleted_orphans=%d",
upserted,
skipped,
deleted,
)
async def main() -> None:
logging.basicConfig(level=logging.INFO)
db_url = os.environ.get("ACTCORE_DB_URL")
if not db_url:
raise RuntimeError("ACTCORE_DB_URL is required")
client = await Client.connect(TEMPORAL_HOST, namespace=TEMPORAL_NAMESPACE)
await sync(client, db_url)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -4,23 +4,31 @@ Starts two workers (wired up in T20):
- orchestrator-tq: RunActivityWorkflow + its activities
- task-execution-tq: TaskExecutorWorkflow
T23: Calls sync_schedules before entering the worker run loop to ensure
all cron ActivityDefinitions have live Temporal Schedules.
T31: Exposes Prometheus metrics via the Temporal SDK runtime on :9090/metrics.
Run with:
TEMPORAL_HOST=localhost:7233 \
ACTCORE_DB_URL=postgresql+asyncpg://actcore:actcore@localhost:5433/actcore \
python -m activity_core.worker
Environment variables:
TEMPORAL_HOST Temporal frontend address (default: localhost:7233)
TEMPORAL_NAMESPACE Temporal namespace (default: default)
ACTCORE_DB_URL App DB connection string (required)
TEMPORAL_HOST Temporal frontend address (default: localhost:7233)
TEMPORAL_NAMESPACE Temporal namespace (default: default)
ACTCORE_DB_URL App DB connection string (required)
PROMETHEUS_BIND_ADDR Prometheus metrics bind (default: 0.0.0.0:9090)
"""
from __future__ import annotations
import asyncio
import logging
import os
from temporalio.client import Client
from temporalio.runtime import PrometheusConfig, Runtime, TelemetryConfig
from temporalio.worker import Worker
from activity_core.activities import (
@@ -30,10 +38,14 @@ from activity_core.activities import (
persist_task_instance,
resolve_context,
)
from activity_core.sync_schedules import sync as sync_schedules
from activity_core.workflows import RunActivityWorkflow, TaskExecutorWorkflow
logger = logging.getLogger(__name__)
TEMPORAL_HOST = os.environ.get("TEMPORAL_HOST", "localhost:7233")
TEMPORAL_NAMESPACE = os.environ.get("TEMPORAL_NAMESPACE", "default")
PROMETHEUS_BIND_ADDR = os.environ.get("PROMETHEUS_BIND_ADDR", "0.0.0.0:9090")
ORCHESTRATOR_TASK_QUEUE = "orchestrator-tq"
TASK_EXECUTION_TASK_QUEUE = "task-execution-tq"
@@ -45,7 +57,23 @@ async def run() -> None:
raise RuntimeError("ACTCORE_DB_URL is required")
init_session_factory(db_url)
client = await Client.connect(TEMPORAL_HOST, namespace=TEMPORAL_NAMESPACE)
# T31: Configure the Temporal SDK runtime to emit metrics in Prometheus format.
runtime = Runtime(
telemetry=TelemetryConfig(
metrics=PrometheusConfig(bind_address=PROMETHEUS_BIND_ADDR)
)
)
client = await Client.connect(
TEMPORAL_HOST, namespace=TEMPORAL_NAMESPACE, runtime=runtime
)
# T23: Sync Temporal Schedules with the DB before workers start accepting tasks.
logger.info("Syncing Temporal Schedules with ActivityDefinition DB...")
try:
await sync_schedules(client, db_url)
except Exception:
logger.exception("schedule sync failed — continuing worker startup")
orchestrator_worker = Worker(
client,
@@ -70,4 +98,5 @@ async def run() -> None:
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
asyncio.run(run())

View File

@@ -15,7 +15,7 @@ import uuid
from datetime import timedelta
from temporalio import workflow
from temporalio.common import RetryPolicy
from temporalio.common import RetryPolicy, SearchAttributeKey, TypedSearchAttributes, SearchAttributePair
with workflow.unsafe.imports_passed_through():
from activity_core.activities import (
@@ -25,6 +25,12 @@ with workflow.unsafe.imports_passed_through():
resolve_context,
)
from activity_core.template_engine import evaluate_templates
from activity_core.schedule_manager import SCHEDULED_TRIGGER_KEY
# T32: Custom search attributes for Temporal visibility (must be registered in Temporal first).
# Registration: temporal operator search-attribute create --name ActivityId --type Keyword
_ACTIVITY_ID_KEY = SearchAttributeKey.for_keyword("ActivityId")
_ACTIVITY_NAME_KEY = SearchAttributeKey.for_keyword("ActivityName")
_RETRY_POLICY = RetryPolicy(
initial_interval=timedelta(seconds=1),
@@ -74,6 +80,16 @@ class RunActivityWorkflow:
retry_policy=_RETRY_POLICY,
)
# T32: Tag this workflow execution with activity metadata so runs are
# filterable in the Temporal UI (requires ActivityId + ActivityName to be
# registered as custom search attributes — see docs/runbook.md).
workflow.upsert_search_attributes(
TypedSearchAttributes([
SearchAttributePair(_ACTIVITY_ID_KEY, activity_id),
SearchAttributePair(_ACTIVITY_NAME_KEY, defn.get("name", "")),
])
)
# ── 2. Resolve context ────────────────────────────────────────────────
context_snapshot: dict = await workflow.execute_activity(
resolve_context,
@@ -89,9 +105,14 @@ class RunActivityWorkflow:
# ── 4. Log the run ────────────────────────────────────────────────────
# run_id is derived deterministically so log_run retries are idempotent.
run_id = str(
uuid.uuid5(uuid.NAMESPACE_URL, f"{activity_id}:{trigger_key}")
)
# For schedule-fired runs the trigger_key is the sentinel "scheduled";
# each fire has a unique workflow_id (embeds ${firstScheduledTime}), so
# we use the workflow_id as the dedup key instead.
if trigger_key == SCHEDULED_TRIGGER_KEY:
dedup_source = workflow.info().workflow_id
else:
dedup_source = f"{activity_id}:{trigger_key}"
run_id = str(uuid.uuid5(uuid.NAMESPACE_URL, dedup_source))
await workflow.execute_activity(
log_run,
{