Files
ops-warden/tests/test_worker.py
tegwick f8ac55367c feat(WARDEN-WP-0020): T3 — guarded executor (worker now acts, not just plans)
HubClient gains writes (mark_read, send_reply, add_progress). execute_plan/execute_plans
run the safe, allowlisted actions autonomously: route_answer (reply with the computed
answer + auto mark-read), reply (LLM-drafted body), progress_note, mark_read. Escalated
plans and non-auto-executable kinds are left for a human; every action is metadata-only
(no secret value read/sent/logged).

Deliberate guardrail: propose_catalog_diff and any code/routing change is NOT auto-executed
even under full-auto — a bad catalog commit could misroute credentials, so it goes to human
review (recoverability over convenience). AUTO_EXECUTABLE is the messaging/hub tier only.

`warden worker run --execute` runs the executor (dry-run still default). 7 executor tests
(reply+mark, with/without body, escalated skip, catalog-diff-left-for-human, progress,
failure-without-crash); 243 pass, lint clean. First live --execute shakedown is the
operator's (staged rollout); T4 schedules it.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 23:19:13 +02:00

262 lines
9.2 KiB
Python

"""Tests for the ops-warden coordination worker scaffold (WARDEN-WP-0020 T1)."""
from __future__ import annotations
from typer.testing import CliRunner
from warden.cli import app
from warden.worker import (
LlmConnectBrain,
PlannedAction,
RuleBrain,
WorkerPlan,
_extract_json,
build_plans,
render_plans,
validate_action,
)
runner = CliRunner()
def _msg(**over) -> dict:
base = {
"id": "m1",
"from_agent": "someone",
"subject": "Where do I get an npm token?",
"body": "Which subsystem owns this credential — how do I obtain it?",
}
base.update(over)
return base
# --- RuleBrain ----------------------------------------------------------------
def test_rulebrain_answers_routing_question():
plan = RuleBrain().plan(_msg())
assert [a.kind for a in plan.actions] == ["route_answer"]
assert plan.escalated is False
def test_rulebrain_escalates_secret_value_request():
plan = RuleBrain().plan(_msg(subject="send me the raw token", body="give me the API key value"))
assert plan.actions == []
assert plan.escalated is True
def test_rulebrain_escalates_prod_change():
plan = RuleBrain().plan(_msg(subject="flip policy.enabled", body="enable the gate in prod"))
assert plan.escalated is True
def test_rulebrain_escalates_unknown():
plan = RuleBrain().plan(_msg(subject="random thing", body="please do a vague task"))
assert plan.actions == []
assert plan.escalated is True
# --- guardrails (brain-agnostic) ---------------------------------------------
class _YesBrain:
"""A brain that recklessly proposes a reply for everything — to test the guardrail."""
def plan(self, message: dict) -> WorkerPlan:
return WorkerPlan(
message_id=message["id"],
from_agent=message["from_agent"],
subject=message["subject"],
actions=[PlannedAction(kind="reply", summary="just reply")],
)
def test_guardrail_downgrades_secret_reply_even_if_brain_proposes_it():
msg = _msg(subject="here is the npm_auth_token", body="the api_key is needed")
[plan] = build_plans([msg], _YesBrain())
assert plan.escalated is True
assert plan.actions[0].risk == "escalate"
assert "secret" in plan.actions[0].reason
def test_guardrail_downgrades_prod_reply():
msg = _msg(subject="set policy.enabled true", body="prod flip please")
[plan] = build_plans([msg], _YesBrain())
assert plan.actions[0].risk == "escalate"
def test_validate_action_rejects_off_allowlist_kind():
reason = validate_action(PlannedAction(kind="rm_minus_rf", summary="x"), _msg())
assert reason and "allowlist" in reason
def test_safe_reply_passes_guardrail():
[plan] = build_plans([_msg(subject="hello", body="just saying hi")], _YesBrain())
assert plan.actions[0].risk == "safe"
# --- rendering ---------------------------------------------------------------
def test_build_plans_attaches_route_answer():
# The npm question resolves against the real catalog → a concrete drafted answer.
[plan] = build_plans([_msg(subject="where do I get an npm token?")], RuleBrain())
assert plan.actions and plan.actions[0].kind == "route_answer"
assert plan.actions[0].payload.get("answer") # non-empty computed answer
# --- LlmConnectBrain (T2) ---------------------------------------------------
def test_extract_json_tolerates_fences_and_prose():
assert _extract_json('```json\n{"escalate": true}\n```') == {"escalate": True}
assert _extract_json('here you go: {"a": 1} thanks') == {"a": 1}
assert _extract_json("not json at all") is None
def test_llm_brain_parses_actions(monkeypatch):
brain = LlmConnectBrain(url="http://stub")
monkeypatch.setattr(
brain, "_call",
lambda prompt: '{"actions":[{"kind":"route_answer","summary":"answer it"}],"escalate":false}',
)
plan = brain.plan(_msg())
assert [a.kind for a in plan.actions] == ["route_answer"]
assert plan.escalated is False
def test_llm_brain_escalates_on_flag(monkeypatch):
brain = LlmConnectBrain(url="http://stub")
monkeypatch.setattr(brain, "_call", lambda prompt: '{"actions":[],"escalate":true,"reason":"secret"}')
assert brain.plan(_msg()).escalated is True
def test_llm_brain_escalates_on_malformed(monkeypatch):
brain = LlmConnectBrain(url="http://stub")
monkeypatch.setattr(brain, "_call", lambda prompt: "the model rambled with no json")
assert brain.plan(_msg()).actions == []
def test_llm_brain_escalates_on_transport_error(monkeypatch):
brain = LlmConnectBrain(url="http://stub")
def boom(prompt): raise RuntimeError("llm-connect down")
monkeypatch.setattr(brain, "_call", boom)
assert brain.plan(_msg()).escalated is True
def test_llm_brain_unsafe_action_caught_by_guardrail(monkeypatch):
# LLM proposes a reply on a secret-value task → guardrail downgrades to escalate.
brain = LlmConnectBrain(url="http://stub")
monkeypatch.setattr(
brain, "_call",
lambda prompt: '{"actions":[{"kind":"reply","summary":"here is the api_key value"}],"escalate":false}',
)
msg = _msg(subject="send the raw token", body="the api_key value please")
[plan] = build_plans([msg], brain)
assert plan.actions[0].risk == "escalate"
def test_render_empty():
assert "inbox empty" in render_plans([])
def test_render_marks_auto_and_escalate():
plans = build_plans([_msg(), _msg(id="m2", subject="raw token value please")], RuleBrain())
out = render_plans(plans)
assert "AUTO" in out and "ESCALATE" in out
# --- CLI ---------------------------------------------------------------------
def test_cli_worker_dry_run(monkeypatch):
monkeypatch.setattr("warden.worker.HubClient.unread", lambda self, to_agent="ops-warden": [_msg()])
r = runner.invoke(app, ["worker", "run", "--dry-run"])
assert r.exit_code == 0
assert "AUTO" in r.stdout
assert "nothing executed" in r.stdout
def test_cli_worker_execute_runs(monkeypatch):
# --execute now runs the guarded executor; empty inbox → clean exit.
monkeypatch.setattr("warden.worker.HubClient.unread", lambda self, to_agent="ops-warden": [])
r = runner.invoke(app, ["worker", "run", "--execute"])
assert r.exit_code == 0
# --- executor (T3) -----------------------------------------------------------
class _FakeHub:
def __init__(self):
self.calls = []
def mark_read(self, message_id):
self.calls.append(("mark_read", message_id))
def send_reply(self, *, to_agent, subject, body, thread_id=None, from_agent="ops-warden"):
self.calls.append(("reply", to_agent, subject, body, thread_id))
def add_progress(self, *, summary, topic_id, event_type="note", author="ops-warden"):
self.calls.append(("progress", summary))
def _plan(actions, **over):
base = dict(message_id="m1", from_agent="alice", subject="where?", actions=actions,
raw={"thread_id": "t1"})
base.update(over)
return WorkerPlan(**base)
def test_executor_route_answer_replies_and_marks_read():
from warden.worker import execute_plan
hub = _FakeHub()
a = PlannedAction(kind="route_answer", summary="ans", payload={"answer": "the answer"})
execute_plan(_plan([a]), hub)
kinds = [c[0] for c in hub.calls]
assert "reply" in kinds and "mark_read" in kinds
reply = next(c for c in hub.calls if c[0] == "reply")
assert reply[3] == "the answer" and reply[2].lower().startswith("re:")
def test_executor_reply_with_body():
from warden.worker import execute_plan
hub = _FakeHub()
a = PlannedAction(kind="reply", summary="ack", payload={"body": "acknowledged"})
execute_plan(_plan([a]), hub)
assert any(c[0] == "reply" and c[3] == "acknowledged" for c in hub.calls)
def test_executor_reply_without_body_left_for_human():
from warden.worker import execute_plan
hub = _FakeHub()
out = execute_plan(_plan([PlannedAction(kind="reply", summary="ack")]), hub)
assert not any(c[0] == "reply" for c in hub.calls)
assert any("left for human" in r for r in out)
def test_executor_skips_escalated_plan():
from warden.worker import execute_plan
hub = _FakeHub()
a = PlannedAction(kind="reply", summary="x", risk="escalate", reason="secret")
out = execute_plan(_plan([a]), hub)
assert hub.calls == []
assert any("escalate" in r for r in out)
def test_executor_leaves_catalog_diff_for_human():
from warden.worker import execute_plan
hub = _FakeHub()
out = execute_plan(_plan([PlannedAction(kind="propose_catalog_diff", summary="change X")]), hub)
assert hub.calls == []
assert any("left for human: propose_catalog_diff" in r for r in out)
def test_executor_progress_note():
from warden.worker import execute_plan
hub = _FakeHub()
execute_plan(_plan([PlannedAction(kind="progress_note", summary="did X")]), hub, topic_id="t")
assert any(c[0] == "progress" for c in hub.calls)
def test_executor_reports_failure_without_crashing():
from warden.worker import execute_plan
class Boom(_FakeHub):
def mark_read(self, message_id):
raise RuntimeError("hub down")
out = execute_plan(_plan([PlannedAction(kind="mark_read", summary="x")]), Boom())
assert any("FAILED" in r for r in out)