diff --git a/examples/worker.env.example b/examples/worker.env.example new file mode 100644 index 0000000..e304e86 --- /dev/null +++ b/examples/worker.env.example @@ -0,0 +1,15 @@ +# ops-warden scheduled worker config (WARDEN-WP-0021). +# Installed to ~/.config/warden/worker.env and loaded by the systemd --user service. +# No secret values belong here. + +# State Hub URL the worker reads its inbox from (railiance01 after cust-wp-0011). +WARDEN_HUB_URL=http://127.0.0.1:8000 + +# Planner: 'llm' (llm-connect; smarter) or 'rule' (offline, deterministic fallback). +WORKER_BRAIN=llm + +# Master on/off for the tick without touching the timer. 0 = skip every run. +WORKER_ENABLED=1 + +# Optional: set a reachable llm-connect URL to skip the per-tick kubectl port-forward. +# LLM_CONNECT_URL=http://127.0.0.1:18080 diff --git a/scripts/install-worker-timer.sh b/scripts/install-worker-timer.sh new file mode 100755 index 0000000..6a9ca49 --- /dev/null +++ b/scripts/install-worker-timer.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Install (and optionally enable) the ops-warden conservative worker systemd --user timer. +# WARDEN-WP-0021 T1. Build-stage, conservative tier only (triage + draft, never auto-send). +# +# ./scripts/install-worker-timer.sh # install units + env, DISABLED +# ./scripts/install-worker-timer.sh --enable # install + start the 15-min timer +# +# Kill switch (one command): +# systemctl --user disable --now ops-warden-worker.timer +# (or set WORKER_ENABLED=0 in ~/.config/warden/worker.env) +set -euo pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +UNIT_DIR="$HOME/.config/systemd/user" +ENV_FILE="$HOME/.config/warden/worker.env" + +if ! command -v systemctl >/dev/null 2>&1; then + echo "systemctl not found — this host has no systemd. Use the cron fallback:" >&2 + echo " */15 * * * * $ROOT/scripts/worker-tick.sh >> ~/.local/state/warden/worker-tick.log 2>&1" >&2 + exit 1 +fi + +mkdir -p "$UNIT_DIR" "$(dirname "$ENV_FILE")" +if [[ ! -f "$ENV_FILE" ]]; then + install -m 600 "$ROOT/examples/worker.env.example" "$ENV_FILE" + echo "wrote $ENV_FILE (review it)" +fi + +# Substitute the repo path into the service unit at install time. +sed "s#@ROOT@#$ROOT#g" "$ROOT/systemd/ops-warden-worker.service" > "$UNIT_DIR/ops-warden-worker.service" +cp "$ROOT/systemd/ops-warden-worker.timer" "$UNIT_DIR/ops-warden-worker.timer" +systemctl --user daemon-reload +echo "installed: ops-warden-worker.{service,timer} → $UNIT_DIR" + +if [[ "${1:-}" == "--enable" ]]; then + systemctl --user enable --now ops-warden-worker.timer + echo "ENABLED — next runs: systemctl --user list-timers ops-warden-worker.timer" +else + echo "not enabled. start with: systemctl --user enable --now ops-warden-worker.timer" +fi +echo "kill switch: systemctl --user disable --now ops-warden-worker.timer (or WORKER_ENABLED=0 in $ENV_FILE)" diff --git a/scripts/worker-tick.sh b/scripts/worker-tick.sh index ce13852..5d2415c 100755 --- a/scripts/worker-tick.sh +++ b/scripts/worker-tick.sh @@ -16,16 +16,29 @@ ROOT="$(cd "$(dirname "$0")/.." && pwd)" STATE="${WARDEN_STATE_DIR:-$HOME/.local/state/warden}" mkdir -p "$STATE" +# Master off-switch (env file / WORKER_ENABLED=0) — skip without touching the timer. +if [[ "${WORKER_ENABLED:-1}" == "0" ]]; then + echo "$(date -Is) tick: WORKER_ENABLED=0; skip" + exit 0 +fi + # Concurrency guard — never let two ticks overlap. exec 9>"$STATE/worker-tick.lock" flock -n 9 || { echo "$(date -Is) tick: another run holds the lock; skip"; exit 0; } BRAIN="${WORKER_BRAIN:-llm}" +HUB_URL="${WARDEN_HUB_URL:-http://127.0.0.1:8000}" LLM_URL="${LLM_CONNECT_URL:-}" PF_PID="" cleanup() { [[ -n "$PF_PID" ]] && kill "$PF_PID" 2>/dev/null || true; } trap cleanup EXIT +# Graceful skip if the State Hub is unreachable — a transient outage is not a fault. +if ! curl -fsS -m 6 "$HUB_URL/state/health" >/dev/null 2>&1; then + echo "$(date -Is) tick: State Hub unreachable at $HUB_URL; skip" + exit 0 +fi + if [[ "$BRAIN" == "llm" && -z "$LLM_URL" ]]; then if command -v kubectl >/dev/null 2>&1; then kubectl -n activity-core port-forward deploy/llm-connect 18080:8080 >/dev/null 2>&1 & @@ -38,5 +51,11 @@ if [[ "$BRAIN" == "llm" && -z "$LLM_URL" ]]; then fi fi -echo "$(date -Is) tick: brain=$BRAIN" -LLM_CONNECT_URL="$LLM_URL" uv run --directory "$ROOT" warden worker run --execute --brain "$BRAIN" +echo "$(date -Is) tick: brain=$BRAIN hub=$HUB_URL" +# A worker-run failure (transient hub/llm hiccup) is logged but never fails the unit — +# the next tick retries. Real bugs still surface in the log. +if ! LLM_CONNECT_URL="$LLM_URL" WARDEN_HUB_URL="$HUB_URL" \ + uv run --directory "$ROOT" warden worker run --execute --brain "$BRAIN"; then + echo "$(date -Is) tick: worker run returned non-zero; will retry next tick" +fi +exit 0 diff --git a/systemd/ops-warden-worker.service b/systemd/ops-warden-worker.service new file mode 100644 index 0000000..a412dc6 --- /dev/null +++ b/systemd/ops-warden-worker.service @@ -0,0 +1,14 @@ +[Unit] +Description=ops-warden conservative coordination worker (one tick) +Documentation=https://gitea.coulomb.social/coulomb/ops-warden +After=network-online.target +Wants=network-online.target + +[Service] +Type=oneshot +# uv lives in ~/.local/bin; kubectl in /usr/local/bin or /usr/bin. +Environment=PATH=%h/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +EnvironmentFile=%h/.config/warden/worker.env +ExecStart=@ROOT@/scripts/worker-tick.sh +# A graceful skip (hub down, WORKER_ENABLED=0) exits 0; never restart-loop. +TimeoutStartSec=180 diff --git a/systemd/ops-warden-worker.timer b/systemd/ops-warden-worker.timer new file mode 100644 index 0000000..52be230 --- /dev/null +++ b/systemd/ops-warden-worker.timer @@ -0,0 +1,11 @@ +[Unit] +Description=Run the ops-warden conservative worker tick every 15 minutes + +[Timer] +OnBootSec=2min +OnUnitActiveSec=15min +# Catch up one missed run if the machine was asleep, but don't stack. +Persistent=true + +[Install] +WantedBy=timers.target diff --git a/workplans/WARDEN-WP-0021-enable-scheduled-worker-tick.md b/workplans/WARDEN-WP-0021-enable-scheduled-worker-tick.md index 10907ba..4ee8afe 100644 --- a/workplans/WARDEN-WP-0021-enable-scheduled-worker-tick.md +++ b/workplans/WARDEN-WP-0021-enable-scheduled-worker-tick.md @@ -4,7 +4,7 @@ type: workplan title: "Enable the scheduled worker tick — conservative inbox triage, unattended" domain: infotech repo: ops-warden -status: proposed +status: active owner: claude topic_slug: custodian planning_priority: high @@ -57,7 +57,7 @@ the tick already honors that env var. ```task id: WARDEN-WP-0021-T01 -status: todo +status: done priority: high state_hub_task_id: "10451fe6-7fab-4ae0-8494-e6cfdfbcf8cf" ``` @@ -73,7 +73,7 @@ state_hub_task_id: "10451fe6-7fab-4ae0-8494-e6cfdfbcf8cf" ```task id: WARDEN-WP-0021-T02 -status: todo +status: done priority: high state_hub_task_id: "1f35f816-1af5-46ff-b48c-1715f3ae5784" ```