generated from coulomb/repo-seed
T1: systemd --user units (ops-warden-worker.{service,timer}) + scripts/install-worker-timer.sh
(--enable opt-in, cron fallback documented) + examples/worker.env.example. Kill switch:
`systemctl --user disable --now ops-warden-worker.timer` or WORKER_ENABLED=0. Installed and
ENABLED — verified a real systemd run (Result=success, used the llm brain) and the timer is
active (next run +15min).
T2: hardened worker-tick.sh — State Hub /state/health precheck → graceful skip (exit 0) when
unreachable; worker-run failure logged but never fails the unit (retry next tick). Verified
hub-down skip and a live tick.
Conservative tier only; nothing auto-sent. Kill switch is one command.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
62 lines
2.5 KiB
Bash
Executable File
62 lines
2.5 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Scheduled tick for the ops-warden conservative worker (WARDEN-WP-0020 T4).
|
|
#
|
|
# Triages NEW State Hub coordination requests into $WARDEN_STATE_DIR/worker-digest.md
|
|
# (drafted replies you approve) and posts ONE progress note. Conservative tier: it NEVER
|
|
# sends to other agents and never marks messages read. Safe to schedule.
|
|
#
|
|
# DISABLED by default. Enable with a cron entry (every 15 min), e.g.:
|
|
# */15 * * * * /home/worsch/ops-warden/scripts/worker-tick.sh >> ~/.local/state/warden/worker-tick.log 2>&1
|
|
# Brain: WORKER_BRAIN=llm (default; needs llm-connect) or rule (offline, deterministic).
|
|
# To use llm without an in-cluster run, set LLM_CONNECT_URL; otherwise the tick opens a
|
|
# short-lived kubectl port-forward to activity-core/llm-connect and tears it down.
|
|
set -euo pipefail
|
|
|
|
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
|
STATE="${WARDEN_STATE_DIR:-$HOME/.local/state/warden}"
|
|
mkdir -p "$STATE"
|
|
|
|
# Master off-switch (env file / WORKER_ENABLED=0) — skip without touching the timer.
|
|
if [[ "${WORKER_ENABLED:-1}" == "0" ]]; then
|
|
echo "$(date -Is) tick: WORKER_ENABLED=0; skip"
|
|
exit 0
|
|
fi
|
|
|
|
# Concurrency guard — never let two ticks overlap.
|
|
exec 9>"$STATE/worker-tick.lock"
|
|
flock -n 9 || { echo "$(date -Is) tick: another run holds the lock; skip"; exit 0; }
|
|
|
|
BRAIN="${WORKER_BRAIN:-llm}"
|
|
HUB_URL="${WARDEN_HUB_URL:-http://127.0.0.1:8000}"
|
|
LLM_URL="${LLM_CONNECT_URL:-}"
|
|
PF_PID=""
|
|
cleanup() { [[ -n "$PF_PID" ]] && kill "$PF_PID" 2>/dev/null || true; }
|
|
trap cleanup EXIT
|
|
|
|
# Graceful skip if the State Hub is unreachable — a transient outage is not a fault.
|
|
if ! curl -fsS -m 6 "$HUB_URL/state/health" >/dev/null 2>&1; then
|
|
echo "$(date -Is) tick: State Hub unreachable at $HUB_URL; skip"
|
|
exit 0
|
|
fi
|
|
|
|
if [[ "$BRAIN" == "llm" && -z "$LLM_URL" ]]; then
|
|
if command -v kubectl >/dev/null 2>&1; then
|
|
kubectl -n activity-core port-forward deploy/llm-connect 18080:8080 >/dev/null 2>&1 &
|
|
PF_PID=$!
|
|
sleep 4
|
|
LLM_URL="http://127.0.0.1:18080"
|
|
else
|
|
echo "$(date -Is) tick: kubectl unavailable; falling back to rule brain"
|
|
BRAIN="rule"
|
|
fi
|
|
fi
|
|
|
|
echo "$(date -Is) tick: brain=$BRAIN hub=$HUB_URL"
|
|
# A worker-run failure (transient hub/llm hiccup) is logged but never fails the unit —
|
|
# the next tick retries. Real bugs still surface in the log.
|
|
if ! LLM_CONNECT_URL="$LLM_URL" WARDEN_HUB_URL="$HUB_URL" \
|
|
uv run --directory "$ROOT" warden worker run --execute --brain "$BRAIN"; then
|
|
echo "$(date -Is) tick: worker run returned non-zero; will retry next tick"
|
|
fi
|
|
exit 0
|