ops-warden/tests/test_worker.py

"""Tests for the ops-warden coordination worker scaffold (WARDEN-WP-0020 T1)."""
from __future__ import annotations

from typer.testing import CliRunner

from warden.cli import app
from warden.worker import (
    LlmConnectBrain,
    PlannedAction,
    RuleBrain,
    WorkerPlan,
    _extract_json,
    build_digest,
    build_plans,
    render_plans,
    run_conservative,
    validate_action,
)

runner = CliRunner()


def _msg(**over) -> dict:
    base = {
        "id": "m1",
        "from_agent": "someone",
        "subject": "Where do I get an npm token?",
        "body": "Which subsystem owns this credential — how do I obtain it?",
    }
    base.update(over)
    return base


# --- RuleBrain ----------------------------------------------------------------

def test_rulebrain_answers_routing_question():
    plan = RuleBrain().plan(_msg())
    assert [a.kind for a in plan.actions] == ["route_answer"]
    assert plan.escalated is False


def test_rulebrain_escalates_secret_value_request():
    plan = RuleBrain().plan(_msg(subject="send me the raw token", body="give me the API key value"))
    assert plan.actions == []
    assert plan.escalated is True


def test_rulebrain_escalates_prod_change():
    plan = RuleBrain().plan(_msg(subject="flip policy.enabled", body="enable the gate in prod"))
    assert plan.escalated is True


def test_rulebrain_escalates_unknown():
    plan = RuleBrain().plan(_msg(subject="random thing", body="please do a vague task"))
    assert plan.actions == []
    assert plan.escalated is True


# --- guardrails (brain-agnostic) ---------------------------------------------

class _YesBrain:
    """A brain that recklessly proposes a reply for everything — to test the guardrail."""

    def plan(self, message: dict) -> WorkerPlan:
        return WorkerPlan(
            message_id=message["id"],
            from_agent=message["from_agent"],
            subject=message["subject"],
            actions=[PlannedAction(kind="reply", summary="just reply")],
        )


def test_guardrail_downgrades_secret_reply_even_if_brain_proposes_it():
    msg = _msg(subject="here is the npm_auth_token", body="the api_key is needed")
    [plan] = build_plans([msg], _YesBrain())
    assert plan.escalated is True
    assert plan.actions[0].risk == "escalate"
    assert "secret" in plan.actions[0].reason


def test_guardrail_downgrades_prod_reply():
    msg = _msg(subject="set policy.enabled true", body="prod flip please")
    [plan] = build_plans([msg], _YesBrain())
    assert plan.actions[0].risk == "escalate"


def test_validate_action_rejects_off_allowlist_kind():
    reason = validate_action(PlannedAction(kind="rm_minus_rf", summary="x"), _msg())
    assert reason and "allowlist" in reason


def test_safe_reply_passes_guardrail():
    [plan] = build_plans([_msg(subject="hello", body="just saying hi")], _YesBrain())
    assert plan.actions[0].risk == "safe"


# --- rendering ---------------------------------------------------------------

def test_build_plans_attaches_route_answer():
    # The npm question resolves against the real catalog → a concrete drafted answer.
    [plan] = build_plans([_msg(subject="where do I get an npm token?")], RuleBrain())
    assert plan.actions and plan.actions[0].kind == "route_answer"
    assert plan.actions[0].payload.get("answer")  # non-empty computed answer


# --- LlmConnectBrain (T2) ---------------------------------------------------

def test_extract_json_tolerates_fences_and_prose():
    assert _extract_json('```json\n{"escalate": true}\n```') == {"escalate": True}
    assert _extract_json('here you go: {"a": 1} thanks') == {"a": 1}
    assert _extract_json("not json at all") is None


def test_llm_brain_parses_actions(monkeypatch):
    brain = LlmConnectBrain(url="http://stub")
    monkeypatch.setattr(
        brain, "_call",
        lambda prompt: '{"actions":[{"kind":"route_answer","summary":"answer it"}],"escalate":false}',
    )
    plan = brain.plan(_msg())
    assert [a.kind for a in plan.actions] == ["route_answer"]
    assert plan.escalated is False


def test_llm_brain_escalates_on_flag(monkeypatch):
    brain = LlmConnectBrain(url="http://stub")
    monkeypatch.setattr(brain, "_call", lambda prompt: '{"actions":[],"escalate":true,"reason":"secret"}')
    assert brain.plan(_msg()).escalated is True


def test_llm_brain_escalates_on_malformed(monkeypatch):
    brain = LlmConnectBrain(url="http://stub")
    monkeypatch.setattr(brain, "_call", lambda prompt: "the model rambled with no json")
    assert brain.plan(_msg()).actions == []


def test_llm_brain_escalates_on_transport_error(monkeypatch):
    brain = LlmConnectBrain(url="http://stub")
    def boom(prompt): raise RuntimeError("llm-connect down")
    monkeypatch.setattr(brain, "_call", boom)
    assert brain.plan(_msg()).escalated is True


def test_llm_brain_unsafe_action_caught_by_guardrail(monkeypatch):
    # LLM proposes a reply on a secret-value task → guardrail downgrades to escalate.
    brain = LlmConnectBrain(url="http://stub")
    monkeypatch.setattr(
        brain, "_call",
        lambda prompt: '{"actions":[{"kind":"reply","summary":"here is the api_key value"}],"escalate":false}',
    )
    msg = _msg(subject="send the raw token", body="the api_key value please")
    [plan] = build_plans([msg], brain)
    assert plan.actions[0].risk == "escalate"


def test_render_empty():
    assert "inbox empty" in render_plans([])


def test_render_marks_auto_and_escalate():
    plans = build_plans([_msg(), _msg(id="m2", subject="raw token value please")], RuleBrain())
    out = render_plans(plans)
    assert "AUTO" in out and "ESCALATE" in out


# --- CLI ---------------------------------------------------------------------

def test_cli_worker_dry_run(monkeypatch):
    monkeypatch.setattr("warden.worker.HubClient.unread", lambda self, to_agent="ops-warden": [_msg()])
    r = runner.invoke(app, ["worker", "run", "--dry-run"])
    assert r.exit_code == 0
    assert "AUTO" in r.stdout
    assert "nothing executed" in r.stdout


def test_cli_worker_execute_runs(monkeypatch, tmp_path):
    # --execute runs the conservative tier; empty inbox → clean exit.
    monkeypatch.setenv("WARDEN_STATE_DIR", str(tmp_path))
    monkeypatch.setattr("warden.worker.HubClient.unread", lambda self, to_agent="ops-warden": [])
    r = runner.invoke(app, ["worker", "run", "--execute"])
    assert r.exit_code == 0


# --- conservative tier (Option A) --------------------------------------------

def test_build_digest_shows_drafts_and_escalations():
    p1 = _plan([PlannedAction(kind="reply", summary="ack", payload={"body": "hello there"})])
    p2 = _plan([PlannedAction(kind="reply", summary="x", risk="escalate", reason="secret")],
               message_id="m2")
    out = build_digest([p1, p2])
    assert "DRAFT READY" in out and "NEEDS YOU" in out and "hello there" in out


def test_run_conservative_drafts_no_sends_and_dedups(tmp_path):
    hub = _FakeHub()
    p = _plan([PlannedAction(kind="route_answer", summary="a", payload={"answer": "the answer"})])
    run_conservative([p], hub, topic_id="t", state_dir=tmp_path)
    # never sends to other agents or marks read — only a single progress note
    assert not any(c[0] in ("reply", "mark_read") for c in hub.calls)
    assert any(c[0] == "progress" for c in hub.calls)
    digest = (tmp_path / "worker-digest.md").read_text()
    assert "the answer" in digest
    # second run: message already seen → no new progress note (schedule-safe dedup)
    hub2 = _FakeHub()
    run_conservative([p], hub2, topic_id="t", state_dir=tmp_path)
    assert not any(c[0] == "progress" for c in hub2.calls)


# --- approve loop (WP-0021 T4) ------------------------------------------------

def test_conservative_persists_draft_and_approve_sends(tmp_path):
    from warden.worker import approve_draft, list_drafts, load_drafts
    hub = _FakeHub()
    p = _plan([PlannedAction(kind="route_answer", summary="a", payload={"answer": "the answer"})])
    run_conservative([p], hub, state_dir=tmp_path)
    drafts = load_drafts(tmp_path)
    assert "m1" in drafts and drafts["m1"]["body"] == "the answer"
    assert "m1" in list_drafts(tmp_path)
    # approve → sends the reply + marks read + drops the draft
    hub2 = _FakeHub()
    out = approve_draft("m1", hub2, state_dir=tmp_path)
    assert any(c[0] == "reply" and c[3] == "the answer" for c in hub2.calls)
    assert any(c[0] == "mark_read" for c in hub2.calls)
    assert "m1" not in load_drafts(tmp_path)
    assert "sent reply" in out


def test_approve_body_override(tmp_path):
    from warden.worker import approve_draft, save_drafts
    save_drafts(tmp_path, {"m9": {"to_agent": "bob", "subject": "Re: x", "body": "orig", "thread_id": "t"}})
    hub = _FakeHub()
    approve_draft("m9", hub, state_dir=tmp_path, body_override="edited")
    assert any(c[0] == "reply" and c[3] == "edited" for c in hub.calls)


def test_approve_missing_draft(tmp_path):
    from warden.worker import approve_draft
    out = approve_draft("nope", _FakeHub(), state_dir=tmp_path)
    assert "no pending draft" in out


def test_escalated_plan_persists_no_draft(tmp_path):
    a = PlannedAction(kind="reply", summary="x", risk="escalate", reason="secret")
    run_conservative([_plan([a])], _FakeHub(), state_dir=tmp_path)
    from warden.worker import load_drafts
    assert load_drafts(tmp_path) == {}


# --- executor (T3) -----------------------------------------------------------

class _FakeHub:
    def __init__(self):
        self.calls = []

    def mark_read(self, message_id):
        self.calls.append(("mark_read", message_id))

    def send_reply(self, *, to_agent, subject, body, thread_id=None, from_agent="ops-warden"):
        self.calls.append(("reply", to_agent, subject, body, thread_id))

    def add_progress(self, *, summary, topic_id, event_type="note", author="ops-warden"):
        self.calls.append(("progress", summary))


def _plan(actions, **over):
    base = dict(message_id="m1", from_agent="alice", subject="where?", actions=actions,
                raw={"thread_id": "t1"})
    base.update(over)
    return WorkerPlan(**base)


def test_executor_route_answer_replies_and_marks_read():
    from warden.worker import execute_plan
    hub = _FakeHub()
    a = PlannedAction(kind="route_answer", summary="ans", payload={"answer": "the answer"})
    execute_plan(_plan([a]), hub)
    kinds = [c[0] for c in hub.calls]
    assert "reply" in kinds and "mark_read" in kinds
    reply = next(c for c in hub.calls if c[0] == "reply")
    assert reply[3] == "the answer" and reply[2].lower().startswith("re:")


def test_executor_reply_with_body():
    from warden.worker import execute_plan
    hub = _FakeHub()
    a = PlannedAction(kind="reply", summary="ack", payload={"body": "acknowledged"})
    execute_plan(_plan([a]), hub)
    assert any(c[0] == "reply" and c[3] == "acknowledged" for c in hub.calls)


def test_executor_reply_without_body_left_for_human():
    from warden.worker import execute_plan
    hub = _FakeHub()
    out = execute_plan(_plan([PlannedAction(kind="reply", summary="ack")]), hub)
    assert not any(c[0] == "reply" for c in hub.calls)
    assert any("left for human" in r for r in out)


def test_executor_skips_escalated_plan():
    from warden.worker import execute_plan
    hub = _FakeHub()
    a = PlannedAction(kind="reply", summary="x", risk="escalate", reason="secret")
    out = execute_plan(_plan([a]), hub)
    assert hub.calls == []
    assert any("escalate" in r for r in out)


def test_executor_leaves_catalog_diff_for_human():
    from warden.worker import execute_plan
    hub = _FakeHub()
    out = execute_plan(_plan([PlannedAction(kind="propose_catalog_diff", summary="change X")]), hub)
    assert hub.calls == []
    assert any("left for human: propose_catalog_diff" in r for r in out)


def test_executor_progress_note():
    from warden.worker import execute_plan
    hub = _FakeHub()
    execute_plan(_plan([PlannedAction(kind="progress_note", summary="did X")]), hub, topic_id="t")
    assert any(c[0] == "progress" for c in hub.calls)


def test_executor_reports_failure_without_crashing():
    from warden.worker import execute_plan
    class Boom(_FakeHub):
        def mark_read(self, message_id):
            raise RuntimeError("hub down")
    out = execute_plan(_plan([PlannedAction(kind="mark_read", summary="x")]), Boom())
    assert any("FAILED" in r for r in out)