"""Tests for the ops-warden coordination worker scaffold (WARDEN-WP-0020 T1).""" from __future__ import annotations from typer.testing import CliRunner from warden.cli import app from warden.worker import ( LlmConnectBrain, PlannedAction, RuleBrain, WorkerPlan, _extract_json, build_plans, render_plans, validate_action, ) runner = CliRunner() def _msg(**over) -> dict: base = { "id": "m1", "from_agent": "someone", "subject": "Where do I get an npm token?", "body": "Which subsystem owns this credential — how do I obtain it?", } base.update(over) return base # --- RuleBrain ---------------------------------------------------------------- def test_rulebrain_answers_routing_question(): plan = RuleBrain().plan(_msg()) assert [a.kind for a in plan.actions] == ["route_answer"] assert plan.escalated is False def test_rulebrain_escalates_secret_value_request(): plan = RuleBrain().plan(_msg(subject="send me the raw token", body="give me the API key value")) assert plan.actions == [] assert plan.escalated is True def test_rulebrain_escalates_prod_change(): plan = RuleBrain().plan(_msg(subject="flip policy.enabled", body="enable the gate in prod")) assert plan.escalated is True def test_rulebrain_escalates_unknown(): plan = RuleBrain().plan(_msg(subject="random thing", body="please do a vague task")) assert plan.actions == [] assert plan.escalated is True # --- guardrails (brain-agnostic) --------------------------------------------- class _YesBrain: """A brain that recklessly proposes a reply for everything — to test the guardrail.""" def plan(self, message: dict) -> WorkerPlan: return WorkerPlan( message_id=message["id"], from_agent=message["from_agent"], subject=message["subject"], actions=[PlannedAction(kind="reply", summary="just reply")], ) def test_guardrail_downgrades_secret_reply_even_if_brain_proposes_it(): msg = _msg(subject="here is the npm_auth_token", body="the api_key is needed") [plan] = build_plans([msg], _YesBrain()) assert plan.escalated is True assert plan.actions[0].risk == "escalate" assert "secret" in plan.actions[0].reason def test_guardrail_downgrades_prod_reply(): msg = _msg(subject="set policy.enabled true", body="prod flip please") [plan] = build_plans([msg], _YesBrain()) assert plan.actions[0].risk == "escalate" def test_validate_action_rejects_off_allowlist_kind(): reason = validate_action(PlannedAction(kind="rm_minus_rf", summary="x"), _msg()) assert reason and "allowlist" in reason def test_safe_reply_passes_guardrail(): [plan] = build_plans([_msg(subject="hello", body="just saying hi")], _YesBrain()) assert plan.actions[0].risk == "safe" # --- rendering --------------------------------------------------------------- def test_build_plans_attaches_route_answer(): # The npm question resolves against the real catalog → a concrete drafted answer. [plan] = build_plans([_msg(subject="where do I get an npm token?")], RuleBrain()) assert plan.actions and plan.actions[0].kind == "route_answer" assert plan.actions[0].payload.get("answer") # non-empty computed answer # --- LlmConnectBrain (T2) --------------------------------------------------- def test_extract_json_tolerates_fences_and_prose(): assert _extract_json('```json\n{"escalate": true}\n```') == {"escalate": True} assert _extract_json('here you go: {"a": 1} thanks') == {"a": 1} assert _extract_json("not json at all") is None def test_llm_brain_parses_actions(monkeypatch): brain = LlmConnectBrain(url="http://stub") monkeypatch.setattr( brain, "_call", lambda prompt: '{"actions":[{"kind":"route_answer","summary":"answer it"}],"escalate":false}', ) plan = brain.plan(_msg()) assert [a.kind for a in plan.actions] == ["route_answer"] assert plan.escalated is False def test_llm_brain_escalates_on_flag(monkeypatch): brain = LlmConnectBrain(url="http://stub") monkeypatch.setattr(brain, "_call", lambda prompt: '{"actions":[],"escalate":true,"reason":"secret"}') assert brain.plan(_msg()).escalated is True def test_llm_brain_escalates_on_malformed(monkeypatch): brain = LlmConnectBrain(url="http://stub") monkeypatch.setattr(brain, "_call", lambda prompt: "the model rambled with no json") assert brain.plan(_msg()).actions == [] def test_llm_brain_escalates_on_transport_error(monkeypatch): brain = LlmConnectBrain(url="http://stub") def boom(prompt): raise RuntimeError("llm-connect down") monkeypatch.setattr(brain, "_call", boom) assert brain.plan(_msg()).escalated is True def test_llm_brain_unsafe_action_caught_by_guardrail(monkeypatch): # LLM proposes a reply on a secret-value task → guardrail downgrades to escalate. brain = LlmConnectBrain(url="http://stub") monkeypatch.setattr( brain, "_call", lambda prompt: '{"actions":[{"kind":"reply","summary":"here is the api_key value"}],"escalate":false}', ) msg = _msg(subject="send the raw token", body="the api_key value please") [plan] = build_plans([msg], brain) assert plan.actions[0].risk == "escalate" def test_render_empty(): assert "inbox empty" in render_plans([]) def test_render_marks_auto_and_escalate(): plans = build_plans([_msg(), _msg(id="m2", subject="raw token value please")], RuleBrain()) out = render_plans(plans) assert "AUTO" in out and "ESCALATE" in out # --- CLI --------------------------------------------------------------------- def test_cli_worker_dry_run(monkeypatch): monkeypatch.setattr("warden.worker.HubClient.unread", lambda self, to_agent="ops-warden": [_msg()]) r = runner.invoke(app, ["worker", "run", "--dry-run"]) assert r.exit_code == 0 assert "AUTO" in r.stdout assert "nothing executed" in r.stdout def test_cli_worker_execute_runs(monkeypatch): # --execute now runs the guarded executor; empty inbox → clean exit. monkeypatch.setattr("warden.worker.HubClient.unread", lambda self, to_agent="ops-warden": []) r = runner.invoke(app, ["worker", "run", "--execute"]) assert r.exit_code == 0 # --- executor (T3) ----------------------------------------------------------- class _FakeHub: def __init__(self): self.calls = [] def mark_read(self, message_id): self.calls.append(("mark_read", message_id)) def send_reply(self, *, to_agent, subject, body, thread_id=None, from_agent="ops-warden"): self.calls.append(("reply", to_agent, subject, body, thread_id)) def add_progress(self, *, summary, topic_id, event_type="note", author="ops-warden"): self.calls.append(("progress", summary)) def _plan(actions, **over): base = dict(message_id="m1", from_agent="alice", subject="where?", actions=actions, raw={"thread_id": "t1"}) base.update(over) return WorkerPlan(**base) def test_executor_route_answer_replies_and_marks_read(): from warden.worker import execute_plan hub = _FakeHub() a = PlannedAction(kind="route_answer", summary="ans", payload={"answer": "the answer"}) execute_plan(_plan([a]), hub) kinds = [c[0] for c in hub.calls] assert "reply" in kinds and "mark_read" in kinds reply = next(c for c in hub.calls if c[0] == "reply") assert reply[3] == "the answer" and reply[2].lower().startswith("re:") def test_executor_reply_with_body(): from warden.worker import execute_plan hub = _FakeHub() a = PlannedAction(kind="reply", summary="ack", payload={"body": "acknowledged"}) execute_plan(_plan([a]), hub) assert any(c[0] == "reply" and c[3] == "acknowledged" for c in hub.calls) def test_executor_reply_without_body_left_for_human(): from warden.worker import execute_plan hub = _FakeHub() out = execute_plan(_plan([PlannedAction(kind="reply", summary="ack")]), hub) assert not any(c[0] == "reply" for c in hub.calls) assert any("left for human" in r for r in out) def test_executor_skips_escalated_plan(): from warden.worker import execute_plan hub = _FakeHub() a = PlannedAction(kind="reply", summary="x", risk="escalate", reason="secret") out = execute_plan(_plan([a]), hub) assert hub.calls == [] assert any("escalate" in r for r in out) def test_executor_leaves_catalog_diff_for_human(): from warden.worker import execute_plan hub = _FakeHub() out = execute_plan(_plan([PlannedAction(kind="propose_catalog_diff", summary="change X")]), hub) assert hub.calls == [] assert any("left for human: propose_catalog_diff" in r for r in out) def test_executor_progress_note(): from warden.worker import execute_plan hub = _FakeHub() execute_plan(_plan([PlannedAction(kind="progress_note", summary="did X")]), hub, topic_id="t") assert any(c[0] == "progress" for c in hub.calls) def test_executor_reports_failure_without_crashing(): from warden.worker import execute_plan class Boom(_FakeHub): def mark_read(self, message_id): raise RuntimeError("hub down") out = execute_plan(_plan([PlannedAction(kind="mark_read", summary="x")]), Boom()) assert any("FAILED" in r for r in out)