feat(ACTIVITY-WP-0014): missed-fire detection & alert sink (T03)

Add activity_core/schedule_health: a pure evaluate_schedule_health() verdict
(built on Temporal's num_actions_missed_catchup_window plus a staleness check),
an async check_schedule_health() reader, and post_missed_fire_alert() that emits
a schedule_miss State Hub progress event. Makes a missed fire visible even under
misfire_policy=skip, where Temporal drops it by design. Unit tests for the
verdict logic.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-23 14:25:33 +02:00
parent 77af65afb2
commit 053d18b24a
3 changed files with 269 additions and 2 deletions

View File

@@ -0,0 +1,81 @@
"""ACTIVITY-WP-0014 T03: missed-fire detection verdict tests."""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from activity_core.schedule_health import evaluate_schedule_health
NOW = datetime(2026, 6, 23, 12, 0, tzinfo=timezone.utc)
def test_healthy_when_recent_fire_and_no_drops() -> None:
health = evaluate_schedule_health(
activity_id="a1",
missed_catchup_window=0,
last_fired_at=NOW - timedelta(minutes=5),
now=NOW,
expected_interval=timedelta(hours=1),
)
assert health.healthy is True
assert health.missed is False
assert health.reasons == []
def test_unhealthy_when_catchup_window_dropped_fires() -> None:
health = evaluate_schedule_health(
activity_id="a1",
missed_catchup_window=2,
last_fired_at=NOW - timedelta(minutes=5),
now=NOW,
)
assert health.missed is True
assert "2 fire(s) dropped" in health.reasons[0]
def test_unhealthy_when_last_fire_too_stale() -> None:
health = evaluate_schedule_health(
activity_id="daily",
missed_catchup_window=0,
last_fired_at=NOW - timedelta(days=2),
now=NOW,
expected_interval=timedelta(days=1),
)
assert health.missed is True
assert any("exceeding the expected" in r for r in health.reasons)
assert health.staleness == timedelta(days=2)
def test_within_tolerance_is_healthy() -> None:
health = evaluate_schedule_health(
activity_id="daily",
missed_catchup_window=0,
last_fired_at=NOW - (timedelta(days=1) + timedelta(minutes=5)),
now=NOW,
expected_interval=timedelta(days=1),
tolerance=timedelta(minutes=10),
)
assert health.healthy is True
def test_no_fire_recorded_for_due_schedule_is_unhealthy() -> None:
health = evaluate_schedule_health(
activity_id="daily",
missed_catchup_window=0,
last_fired_at=None,
now=NOW,
expected_interval=timedelta(days=1),
)
assert health.missed is True
assert "no recorded fire" in health.reasons[0]
def test_no_interval_and_no_fire_is_not_flagged() -> None:
# Without an expected interval we cannot assert a miss from absence alone.
health = evaluate_schedule_health(
activity_id="event-ish",
missed_catchup_window=0,
last_fired_at=None,
now=NOW,
)
assert health.healthy is True