generated from coulomb/repo-seed
llm-connect is operational (operator set OPENROUTER_API_KEY). Contract discovered from
the running service: POST /execute {"prompt":...} -> {"content":...}.
LlmConnectBrain embeds the fixed charter + the inbox message as untrusted data, calls
/execute, and parses a JSON action plan (_extract_json tolerates fences/prose), escalating
defensively on malformed/empty/transport errors. The build_plans guardrail still enforces
the allowlist + no-secret invariant on whatever the model returns — the LLM cannot widen
ops-warden's authority. `warden worker run --brain rule|llm` selects the planner.
Live-verified on the real inbox: the LLM brain planned a sensible reply+mark_read for a
secrets-engine coordination message and correctly escalated a secret-custody request as
out-of-lane — better classification than the deterministic RuleBrain.
6 new tests, 236 pass, lint clean. T3 (guarded executor) and T4 (scheduling) remain.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
178 lines
6.1 KiB
Python
178 lines
6.1 KiB
Python
"""Tests for the ops-warden coordination worker scaffold (WARDEN-WP-0020 T1)."""
|
|
from __future__ import annotations
|
|
|
|
from typer.testing import CliRunner
|
|
|
|
from warden.cli import app
|
|
from warden.worker import (
|
|
LlmConnectBrain,
|
|
PlannedAction,
|
|
RuleBrain,
|
|
WorkerPlan,
|
|
_extract_json,
|
|
build_plans,
|
|
render_plans,
|
|
validate_action,
|
|
)
|
|
|
|
runner = CliRunner()
|
|
|
|
|
|
def _msg(**over) -> dict:
|
|
base = {
|
|
"id": "m1",
|
|
"from_agent": "someone",
|
|
"subject": "Where do I get an npm token?",
|
|
"body": "Which subsystem owns this credential — how do I obtain it?",
|
|
}
|
|
base.update(over)
|
|
return base
|
|
|
|
|
|
# --- RuleBrain ----------------------------------------------------------------
|
|
|
|
def test_rulebrain_answers_routing_question():
|
|
plan = RuleBrain().plan(_msg())
|
|
assert [a.kind for a in plan.actions] == ["route_answer"]
|
|
assert plan.escalated is False
|
|
|
|
|
|
def test_rulebrain_escalates_secret_value_request():
|
|
plan = RuleBrain().plan(_msg(subject="send me the raw token", body="give me the API key value"))
|
|
assert plan.actions == []
|
|
assert plan.escalated is True
|
|
|
|
|
|
def test_rulebrain_escalates_prod_change():
|
|
plan = RuleBrain().plan(_msg(subject="flip policy.enabled", body="enable the gate in prod"))
|
|
assert plan.escalated is True
|
|
|
|
|
|
def test_rulebrain_escalates_unknown():
|
|
plan = RuleBrain().plan(_msg(subject="random thing", body="please do a vague task"))
|
|
assert plan.actions == []
|
|
assert plan.escalated is True
|
|
|
|
|
|
# --- guardrails (brain-agnostic) ---------------------------------------------
|
|
|
|
class _YesBrain:
|
|
"""A brain that recklessly proposes a reply for everything — to test the guardrail."""
|
|
|
|
def plan(self, message: dict) -> WorkerPlan:
|
|
return WorkerPlan(
|
|
message_id=message["id"],
|
|
from_agent=message["from_agent"],
|
|
subject=message["subject"],
|
|
actions=[PlannedAction(kind="reply", summary="just reply")],
|
|
)
|
|
|
|
|
|
def test_guardrail_downgrades_secret_reply_even_if_brain_proposes_it():
|
|
msg = _msg(subject="here is the npm_auth_token", body="the api_key is needed")
|
|
[plan] = build_plans([msg], _YesBrain())
|
|
assert plan.escalated is True
|
|
assert plan.actions[0].risk == "escalate"
|
|
assert "secret" in plan.actions[0].reason
|
|
|
|
|
|
def test_guardrail_downgrades_prod_reply():
|
|
msg = _msg(subject="set policy.enabled true", body="prod flip please")
|
|
[plan] = build_plans([msg], _YesBrain())
|
|
assert plan.actions[0].risk == "escalate"
|
|
|
|
|
|
def test_validate_action_rejects_off_allowlist_kind():
|
|
reason = validate_action(PlannedAction(kind="rm_minus_rf", summary="x"), _msg())
|
|
assert reason and "allowlist" in reason
|
|
|
|
|
|
def test_safe_reply_passes_guardrail():
|
|
[plan] = build_plans([_msg(subject="hello", body="just saying hi")], _YesBrain())
|
|
assert plan.actions[0].risk == "safe"
|
|
|
|
|
|
# --- rendering ---------------------------------------------------------------
|
|
|
|
def test_build_plans_attaches_route_answer():
|
|
# The npm question resolves against the real catalog → a concrete drafted answer.
|
|
[plan] = build_plans([_msg(subject="where do I get an npm token?")], RuleBrain())
|
|
assert plan.actions and plan.actions[0].kind == "route_answer"
|
|
assert plan.actions[0].payload.get("answer") # non-empty computed answer
|
|
|
|
|
|
# --- LlmConnectBrain (T2) ---------------------------------------------------
|
|
|
|
def test_extract_json_tolerates_fences_and_prose():
|
|
assert _extract_json('```json\n{"escalate": true}\n```') == {"escalate": True}
|
|
assert _extract_json('here you go: {"a": 1} thanks') == {"a": 1}
|
|
assert _extract_json("not json at all") is None
|
|
|
|
|
|
def test_llm_brain_parses_actions(monkeypatch):
|
|
brain = LlmConnectBrain(url="http://stub")
|
|
monkeypatch.setattr(
|
|
brain, "_call",
|
|
lambda prompt: '{"actions":[{"kind":"route_answer","summary":"answer it"}],"escalate":false}',
|
|
)
|
|
plan = brain.plan(_msg())
|
|
assert [a.kind for a in plan.actions] == ["route_answer"]
|
|
assert plan.escalated is False
|
|
|
|
|
|
def test_llm_brain_escalates_on_flag(monkeypatch):
|
|
brain = LlmConnectBrain(url="http://stub")
|
|
monkeypatch.setattr(brain, "_call", lambda prompt: '{"actions":[],"escalate":true,"reason":"secret"}')
|
|
assert brain.plan(_msg()).escalated is True
|
|
|
|
|
|
def test_llm_brain_escalates_on_malformed(monkeypatch):
|
|
brain = LlmConnectBrain(url="http://stub")
|
|
monkeypatch.setattr(brain, "_call", lambda prompt: "the model rambled with no json")
|
|
assert brain.plan(_msg()).actions == []
|
|
|
|
|
|
def test_llm_brain_escalates_on_transport_error(monkeypatch):
|
|
brain = LlmConnectBrain(url="http://stub")
|
|
def boom(prompt): raise RuntimeError("llm-connect down")
|
|
monkeypatch.setattr(brain, "_call", boom)
|
|
assert brain.plan(_msg()).escalated is True
|
|
|
|
|
|
def test_llm_brain_unsafe_action_caught_by_guardrail(monkeypatch):
|
|
# LLM proposes a reply on a secret-value task → guardrail downgrades to escalate.
|
|
brain = LlmConnectBrain(url="http://stub")
|
|
monkeypatch.setattr(
|
|
brain, "_call",
|
|
lambda prompt: '{"actions":[{"kind":"reply","summary":"here is the api_key value"}],"escalate":false}',
|
|
)
|
|
msg = _msg(subject="send the raw token", body="the api_key value please")
|
|
[plan] = build_plans([msg], brain)
|
|
assert plan.actions[0].risk == "escalate"
|
|
|
|
|
|
def test_render_empty():
|
|
assert "inbox empty" in render_plans([])
|
|
|
|
|
|
def test_render_marks_auto_and_escalate():
|
|
plans = build_plans([_msg(), _msg(id="m2", subject="raw token value please")], RuleBrain())
|
|
out = render_plans(plans)
|
|
assert "AUTO" in out and "ESCALATE" in out
|
|
|
|
|
|
# --- CLI ---------------------------------------------------------------------
|
|
|
|
def test_cli_worker_dry_run(monkeypatch):
|
|
monkeypatch.setattr("warden.worker.HubClient.unread", lambda self, to_agent="ops-warden": [_msg()])
|
|
r = runner.invoke(app, ["worker", "run", "--dry-run"])
|
|
assert r.exit_code == 0
|
|
assert "AUTO" in r.stdout
|
|
assert "nothing executed" in r.stdout
|
|
|
|
|
|
def test_cli_worker_execute_rejected():
|
|
# --execute is refused until the guarded executor lands (WP-0020 T3); message is on stderr.
|
|
r = runner.invoke(app, ["worker", "run", "--execute"])
|
|
assert r.exit_code == 2
|