feat(ACTIVITY-WP-0014): explicit run-miss recovery policies (T02, T04)

Set Temporal catchup_window on cron schedules so a fire missed during a
worker/Temporal outage is no longer silently dropped. Redefine misfire_policy
into three explicit modes — skip, catchup_all, catchup_latest — mapping to
(catchup_window, overlap) pairs; legacy catchup/compress aliased. Add
catchup_window_seconds override. Remove the ad-hoc upsert-time 1h backfill in
favour of native catchup. Apply catchup_latest to daily-statehub-wsjf-triage in
the Railiance runtime manifest and document run-miss policies in the runbook.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-23 14:15:45 +02:00
parent ffc0ee2cb7
commit a83b117f60
6 changed files with 181 additions and 29 deletions

View File

@@ -37,6 +37,7 @@ def _make_defn(
misfire_policy: str = "skip",
enabled: bool = True,
jitter: int = 0,
catchup_window_seconds: int | None = None,
) -> ActivityDefinition:
return ActivityDefinition(
id=uuid.uuid4(),
@@ -46,6 +47,7 @@ def _make_defn(
cron_expression=cron,
misfire_policy=misfire_policy,
jitter_seconds=jitter,
catchup_window_seconds=catchup_window_seconds,
),
)
@@ -186,6 +188,76 @@ async def test_misfire_policy_compress_sets_overlap_buffer_one(env: WorkflowEnvi
await delete_schedule(env.client, defn.id)
# ── ACTIVITY-WP-0014: explicit run-miss policies + catchup window ────────────
@pytest.mark.asyncio
async def test_skip_sets_short_catchup_window(env: WorkflowEnvironment) -> None:
"""skip = run on trigger or skip: tiny grace window, no real recovery."""
defn = _make_defn(misfire_policy="skip")
await upsert_schedule(env.client, defn)
desc = await env.client.get_schedule_handle(schedule_id(defn.id)).describe()
assert desc.schedule.policy.overlap == ScheduleOverlapPolicy.SKIP
assert desc.schedule.policy.catchup_window == timedelta(seconds=60)
await delete_schedule(env.client, defn.id)
@pytest.mark.asyncio
async def test_catchup_all_recovers_full_window(env: WorkflowEnvironment) -> None:
"""catchup_all = recover every missed fire: long window, BUFFER_ALL."""
defn = _make_defn(misfire_policy="catchup_all")
await upsert_schedule(env.client, defn)
desc = await env.client.get_schedule_handle(schedule_id(defn.id)).describe()
assert desc.schedule.policy.overlap == ScheduleOverlapPolicy.BUFFER_ALL
assert desc.schedule.policy.catchup_window == timedelta(days=365)
await delete_schedule(env.client, defn.id)
@pytest.mark.asyncio
async def test_catchup_latest_does_not_accumulate(env: WorkflowEnvironment) -> None:
"""catchup_latest = recover only the most recent missed fire: BUFFER_ONE."""
defn = _make_defn(misfire_policy="catchup_latest")
await upsert_schedule(env.client, defn)
desc = await env.client.get_schedule_handle(schedule_id(defn.id)).describe()
assert desc.schedule.policy.overlap == ScheduleOverlapPolicy.BUFFER_ONE
assert desc.schedule.policy.catchup_window == timedelta(hours=24)
await delete_schedule(env.client, defn.id)
@pytest.mark.asyncio
async def test_legacy_aliases_map_to_explicit_policies(env: WorkflowEnvironment) -> None:
"""Legacy catchup/compress keep working and pick up the new catchup windows."""
catchup = _make_defn(misfire_policy="catchup")
compress = _make_defn(misfire_policy="compress")
await upsert_schedule(env.client, catchup)
await upsert_schedule(env.client, compress)
d1 = await env.client.get_schedule_handle(schedule_id(catchup.id)).describe()
d2 = await env.client.get_schedule_handle(schedule_id(compress.id)).describe()
assert d1.schedule.policy.catchup_window == timedelta(days=365)
assert d2.schedule.policy.catchup_window == timedelta(hours=24)
await delete_schedule(env.client, catchup.id)
await delete_schedule(env.client, compress.id)
@pytest.mark.asyncio
async def test_explicit_catchup_window_override(env: WorkflowEnvironment) -> None:
"""An explicit catchup_window_seconds overrides the per-policy default."""
defn = _make_defn(misfire_policy="skip", catchup_window_seconds=7200)
await upsert_schedule(env.client, defn)
desc = await env.client.get_schedule_handle(schedule_id(defn.id)).describe()
assert desc.schedule.policy.catchup_window == timedelta(hours=2)
await delete_schedule(env.client, defn.id)
@pytest.mark.asyncio
async def test_schedule_smoke_test_creates_one_shot_schedule(
env: WorkflowEnvironment,