generated from coulomb/repo-seed
feat(WARDEN-WP-0021): T1+T2 — scheduled worker tick enabled (systemd --user timer)
T1: systemd --user units (ops-warden-worker.{service,timer}) + scripts/install-worker-timer.sh
(--enable opt-in, cron fallback documented) + examples/worker.env.example. Kill switch:
`systemctl --user disable --now ops-warden-worker.timer` or WORKER_ENABLED=0. Installed and
ENABLED — verified a real systemd run (Result=success, used the llm brain) and the timer is
active (next run +15min).
T2: hardened worker-tick.sh — State Hub /state/health precheck → graceful skip (exit 0) when
unreachable; worker-run failure logged but never fails the unit (retry next tick). Verified
hub-down skip and a live tick.
Conservative tier only; nothing auto-sent. Kill switch is one command.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
15
examples/worker.env.example
Normal file
15
examples/worker.env.example
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# ops-warden scheduled worker config (WARDEN-WP-0021).
|
||||||
|
# Installed to ~/.config/warden/worker.env and loaded by the systemd --user service.
|
||||||
|
# No secret values belong here.
|
||||||
|
|
||||||
|
# State Hub URL the worker reads its inbox from (railiance01 after cust-wp-0011).
|
||||||
|
WARDEN_HUB_URL=http://127.0.0.1:8000
|
||||||
|
|
||||||
|
# Planner: 'llm' (llm-connect; smarter) or 'rule' (offline, deterministic fallback).
|
||||||
|
WORKER_BRAIN=llm
|
||||||
|
|
||||||
|
# Master on/off for the tick without touching the timer. 0 = skip every run.
|
||||||
|
WORKER_ENABLED=1
|
||||||
|
|
||||||
|
# Optional: set a reachable llm-connect URL to skip the per-tick kubectl port-forward.
|
||||||
|
# LLM_CONNECT_URL=http://127.0.0.1:18080
|
||||||
41
scripts/install-worker-timer.sh
Executable file
41
scripts/install-worker-timer.sh
Executable file
@@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Install (and optionally enable) the ops-warden conservative worker systemd --user timer.
|
||||||
|
# WARDEN-WP-0021 T1. Build-stage, conservative tier only (triage + draft, never auto-send).
|
||||||
|
#
|
||||||
|
# ./scripts/install-worker-timer.sh # install units + env, DISABLED
|
||||||
|
# ./scripts/install-worker-timer.sh --enable # install + start the 15-min timer
|
||||||
|
#
|
||||||
|
# Kill switch (one command):
|
||||||
|
# systemctl --user disable --now ops-warden-worker.timer
|
||||||
|
# (or set WORKER_ENABLED=0 in ~/.config/warden/worker.env)
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||||
|
UNIT_DIR="$HOME/.config/systemd/user"
|
||||||
|
ENV_FILE="$HOME/.config/warden/worker.env"
|
||||||
|
|
||||||
|
if ! command -v systemctl >/dev/null 2>&1; then
|
||||||
|
echo "systemctl not found — this host has no systemd. Use the cron fallback:" >&2
|
||||||
|
echo " */15 * * * * $ROOT/scripts/worker-tick.sh >> ~/.local/state/warden/worker-tick.log 2>&1" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p "$UNIT_DIR" "$(dirname "$ENV_FILE")"
|
||||||
|
if [[ ! -f "$ENV_FILE" ]]; then
|
||||||
|
install -m 600 "$ROOT/examples/worker.env.example" "$ENV_FILE"
|
||||||
|
echo "wrote $ENV_FILE (review it)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Substitute the repo path into the service unit at install time.
|
||||||
|
sed "s#@ROOT@#$ROOT#g" "$ROOT/systemd/ops-warden-worker.service" > "$UNIT_DIR/ops-warden-worker.service"
|
||||||
|
cp "$ROOT/systemd/ops-warden-worker.timer" "$UNIT_DIR/ops-warden-worker.timer"
|
||||||
|
systemctl --user daemon-reload
|
||||||
|
echo "installed: ops-warden-worker.{service,timer} → $UNIT_DIR"
|
||||||
|
|
||||||
|
if [[ "${1:-}" == "--enable" ]]; then
|
||||||
|
systemctl --user enable --now ops-warden-worker.timer
|
||||||
|
echo "ENABLED — next runs: systemctl --user list-timers ops-warden-worker.timer"
|
||||||
|
else
|
||||||
|
echo "not enabled. start with: systemctl --user enable --now ops-warden-worker.timer"
|
||||||
|
fi
|
||||||
|
echo "kill switch: systemctl --user disable --now ops-warden-worker.timer (or WORKER_ENABLED=0 in $ENV_FILE)"
|
||||||
@@ -16,16 +16,29 @@ ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
|||||||
STATE="${WARDEN_STATE_DIR:-$HOME/.local/state/warden}"
|
STATE="${WARDEN_STATE_DIR:-$HOME/.local/state/warden}"
|
||||||
mkdir -p "$STATE"
|
mkdir -p "$STATE"
|
||||||
|
|
||||||
|
# Master off-switch (env file / WORKER_ENABLED=0) — skip without touching the timer.
|
||||||
|
if [[ "${WORKER_ENABLED:-1}" == "0" ]]; then
|
||||||
|
echo "$(date -Is) tick: WORKER_ENABLED=0; skip"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
# Concurrency guard — never let two ticks overlap.
|
# Concurrency guard — never let two ticks overlap.
|
||||||
exec 9>"$STATE/worker-tick.lock"
|
exec 9>"$STATE/worker-tick.lock"
|
||||||
flock -n 9 || { echo "$(date -Is) tick: another run holds the lock; skip"; exit 0; }
|
flock -n 9 || { echo "$(date -Is) tick: another run holds the lock; skip"; exit 0; }
|
||||||
|
|
||||||
BRAIN="${WORKER_BRAIN:-llm}"
|
BRAIN="${WORKER_BRAIN:-llm}"
|
||||||
|
HUB_URL="${WARDEN_HUB_URL:-http://127.0.0.1:8000}"
|
||||||
LLM_URL="${LLM_CONNECT_URL:-}"
|
LLM_URL="${LLM_CONNECT_URL:-}"
|
||||||
PF_PID=""
|
PF_PID=""
|
||||||
cleanup() { [[ -n "$PF_PID" ]] && kill "$PF_PID" 2>/dev/null || true; }
|
cleanup() { [[ -n "$PF_PID" ]] && kill "$PF_PID" 2>/dev/null || true; }
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
# Graceful skip if the State Hub is unreachable — a transient outage is not a fault.
|
||||||
|
if ! curl -fsS -m 6 "$HUB_URL/state/health" >/dev/null 2>&1; then
|
||||||
|
echo "$(date -Is) tick: State Hub unreachable at $HUB_URL; skip"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ "$BRAIN" == "llm" && -z "$LLM_URL" ]]; then
|
if [[ "$BRAIN" == "llm" && -z "$LLM_URL" ]]; then
|
||||||
if command -v kubectl >/dev/null 2>&1; then
|
if command -v kubectl >/dev/null 2>&1; then
|
||||||
kubectl -n activity-core port-forward deploy/llm-connect 18080:8080 >/dev/null 2>&1 &
|
kubectl -n activity-core port-forward deploy/llm-connect 18080:8080 >/dev/null 2>&1 &
|
||||||
@@ -38,5 +51,11 @@ if [[ "$BRAIN" == "llm" && -z "$LLM_URL" ]]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "$(date -Is) tick: brain=$BRAIN"
|
echo "$(date -Is) tick: brain=$BRAIN hub=$HUB_URL"
|
||||||
LLM_CONNECT_URL="$LLM_URL" uv run --directory "$ROOT" warden worker run --execute --brain "$BRAIN"
|
# A worker-run failure (transient hub/llm hiccup) is logged but never fails the unit —
|
||||||
|
# the next tick retries. Real bugs still surface in the log.
|
||||||
|
if ! LLM_CONNECT_URL="$LLM_URL" WARDEN_HUB_URL="$HUB_URL" \
|
||||||
|
uv run --directory "$ROOT" warden worker run --execute --brain "$BRAIN"; then
|
||||||
|
echo "$(date -Is) tick: worker run returned non-zero; will retry next tick"
|
||||||
|
fi
|
||||||
|
exit 0
|
||||||
|
|||||||
14
systemd/ops-warden-worker.service
Normal file
14
systemd/ops-warden-worker.service
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=ops-warden conservative coordination worker (one tick)
|
||||||
|
Documentation=https://gitea.coulomb.social/coulomb/ops-warden
|
||||||
|
After=network-online.target
|
||||||
|
Wants=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
# uv lives in ~/.local/bin; kubectl in /usr/local/bin or /usr/bin.
|
||||||
|
Environment=PATH=%h/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||||
|
EnvironmentFile=%h/.config/warden/worker.env
|
||||||
|
ExecStart=@ROOT@/scripts/worker-tick.sh
|
||||||
|
# A graceful skip (hub down, WORKER_ENABLED=0) exits 0; never restart-loop.
|
||||||
|
TimeoutStartSec=180
|
||||||
11
systemd/ops-warden-worker.timer
Normal file
11
systemd/ops-warden-worker.timer
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Run the ops-warden conservative worker tick every 15 minutes
|
||||||
|
|
||||||
|
[Timer]
|
||||||
|
OnBootSec=2min
|
||||||
|
OnUnitActiveSec=15min
|
||||||
|
# Catch up one missed run if the machine was asleep, but don't stack.
|
||||||
|
Persistent=true
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=timers.target
|
||||||
@@ -4,7 +4,7 @@ type: workplan
|
|||||||
title: "Enable the scheduled worker tick — conservative inbox triage, unattended"
|
title: "Enable the scheduled worker tick — conservative inbox triage, unattended"
|
||||||
domain: infotech
|
domain: infotech
|
||||||
repo: ops-warden
|
repo: ops-warden
|
||||||
status: proposed
|
status: active
|
||||||
owner: claude
|
owner: claude
|
||||||
topic_slug: custodian
|
topic_slug: custodian
|
||||||
planning_priority: high
|
planning_priority: high
|
||||||
@@ -57,7 +57,7 @@ the tick already honors that env var.
|
|||||||
|
|
||||||
```task
|
```task
|
||||||
id: WARDEN-WP-0021-T01
|
id: WARDEN-WP-0021-T01
|
||||||
status: todo
|
status: done
|
||||||
priority: high
|
priority: high
|
||||||
state_hub_task_id: "10451fe6-7fab-4ae0-8494-e6cfdfbcf8cf"
|
state_hub_task_id: "10451fe6-7fab-4ae0-8494-e6cfdfbcf8cf"
|
||||||
```
|
```
|
||||||
@@ -73,7 +73,7 @@ state_hub_task_id: "10451fe6-7fab-4ae0-8494-e6cfdfbcf8cf"
|
|||||||
|
|
||||||
```task
|
```task
|
||||||
id: WARDEN-WP-0021-T02
|
id: WARDEN-WP-0021-T02
|
||||||
status: todo
|
status: done
|
||||||
priority: high
|
priority: high
|
||||||
state_hub_task_id: "1f35f816-1af5-46ff-b48c-1715f3ae5784"
|
state_hub_task_id: "1f35f816-1af5-46ff-b48c-1715f3ae5784"
|
||||||
```
|
```
|
||||||
|
|||||||
Reference in New Issue
Block a user