Files
the-custodian/runtime/tests/test_actions.py
tegwick 2fdbcb5d7a feat(CUST-WP-0001): implement Custodian Agent Runtime bootstrap
T2 complete: OODA loop skeleton with LLM integration, bounded actions,
and 32 offline unit tests.

Deliverables:
- runtime/agent.py     — CLI entry point (--domain/--all/--dry-run/--llm)
- runtime/context.py   — Observe: fetch_state + build_context
- runtime/actions.py   — Act: parse_plan + execute (3 sanctioned writes)
- runtime/README.md    — usage guide and architecture overview
- runtime/tests/       — 32 tests, fully offline
- runtime/pyproject.toml — standalone package with llm-connect dep
- canon/architecture/adr-002-custodian-agent-runtime-design.md

Key design decisions (ADR-002):
- Lives in runtime/ (not a new repo) — tight canon/state-hub coupling
- ClaudeCodeAdapter by default (local-first, no API key)
- Single-pass synchronous OODA for v0.1 simplicity
- Exactly 3 sanctioned write ops: add_progress_event, update_task_status, flag_for_human
- LLM returns JSON block in markdown for structured+auditable output

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 22:36:24 +01:00

180 lines
6.7 KiB
Python

"""Tests for the bounded action executor (actions.py).
All API calls are mocked — no live state-hub required.
"""
from __future__ import annotations
from unittest.mock import MagicMock, call, patch
import pytest
from actions import (
SANCTIONED_ACTIONS,
execute,
parse_plan,
)
# ---------------------------------------------------------------------------
# parse_plan
# ---------------------------------------------------------------------------
class TestParsePlan:
def test_extracts_json_from_markdown(self):
response = (
"Here is my analysis.\n\n"
"```json\n"
'{"observations": ["all good"], "progress_events": [], '
'"tasks_to_update": [], "tasks_to_flag": []}\n'
"```\n\n"
"Let me know if you need anything else."
)
plan = parse_plan(response)
assert plan["observations"] == ["all good"]
assert plan["progress_events"] == []
def test_returns_empty_plan_on_no_json_block(self):
plan = parse_plan("Just some text with no JSON block.")
assert plan == {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
def test_returns_empty_plan_on_malformed_json(self):
response = "```json\n{broken json\n```"
plan = parse_plan(response)
assert plan == {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
def test_handles_multiple_json_blocks_uses_first(self):
response = (
"```json\n{\"progress_events\": [{\"summary\": \"first\"}], "
"\"tasks_to_update\": [], \"tasks_to_flag\": []}\n```\n"
"```json\n{\"progress_events\": [{\"summary\": \"second\"}], "
"\"tasks_to_update\": [], \"tasks_to_flag\": []}\n```"
)
plan = parse_plan(response)
assert plan["progress_events"][0]["summary"] == "first"
def test_missing_keys_are_defaulted(self):
response = '```json\n{"observations": ["noted"]}\n```'
plan = parse_plan(response)
assert "progress_events" in plan
assert "tasks_to_update" in plan
assert "tasks_to_flag" in plan
# ---------------------------------------------------------------------------
# execute — dry run
# ---------------------------------------------------------------------------
class TestExecuteDryRun:
def test_dry_run_makes_no_api_calls(self):
plan = {
"progress_events": [{"summary": "test", "workstream_id": None, "event_type": "note"}],
"tasks_to_update": [{"task_id": "t1", "status": "done"}],
"tasks_to_flag": [{"task_id": "t2", "note": "needs review"}],
}
with patch("httpx.post") as mock_post, patch("httpx.patch") as mock_patch:
results = execute(plan, dry_run=True)
mock_post.assert_not_called()
mock_patch.assert_not_called()
def test_dry_run_returns_descriptions(self):
plan = {
"progress_events": [{"summary": "test note", "event_type": "note"}],
"tasks_to_update": [],
"tasks_to_flag": [],
}
results = execute(plan, dry_run=True)
assert len(results) == 1
assert "test note" in results[0] or "dry-run" in results[0].lower()
# ---------------------------------------------------------------------------
# execute — live (mocked API)
# ---------------------------------------------------------------------------
class TestExecuteLive:
def _ok_response(self, data: dict = None):
resp = MagicMock()
resp.status_code = 201
resp.json.return_value = data or {"id": "new-id"}
resp.raise_for_status = MagicMock()
return resp
def test_posts_progress_event(self):
plan = {
"progress_events": [
{"summary": "session note", "workstream_id": "ws-1", "event_type": "note"}
],
"tasks_to_update": [],
"tasks_to_flag": [],
}
with patch("httpx.post") as mock_post:
mock_post.return_value = self._ok_response()
results = execute(plan)
mock_post.assert_called_once()
payload = mock_post.call_args[1]["json"]
assert payload["summary"] == "session note"
def test_patches_task_status(self):
plan = {
"progress_events": [],
"tasks_to_update": [{"task_id": "abc-123", "status": "done"}],
"tasks_to_flag": [],
}
with patch("httpx.patch") as mock_patch:
mock_patch.return_value = self._ok_response()
results = execute(plan)
mock_patch.assert_called_once()
call_url = mock_patch.call_args[0][0]
assert "abc-123" in call_url
def test_flags_task_for_human(self):
plan = {
"progress_events": [],
"tasks_to_update": [],
"tasks_to_flag": [{"task_id": "t99", "note": "needs human review"}],
}
with patch("httpx.patch") as mock_patch:
mock_patch.return_value = self._ok_response()
results = execute(plan)
mock_patch.assert_called_once()
payload = mock_patch.call_args[1]["json"]
assert payload.get("needs_human") is True
assert "needs human review" in payload.get("intervention_note", "")
def test_gracefully_handles_api_error(self):
plan = {
"progress_events": [{"summary": "test", "event_type": "note"}],
"tasks_to_update": [],
"tasks_to_flag": [],
}
with patch("httpx.post") as mock_post:
mock_post.side_effect = Exception("Connection refused")
# Should not raise — errors are logged in results
results = execute(plan)
assert any("error" in r.lower() or "failed" in r.lower() for r in results)
def test_empty_plan_returns_empty_results(self):
plan = {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
results = execute(plan)
assert results == []
# ---------------------------------------------------------------------------
# sanctioned_actions constant
# ---------------------------------------------------------------------------
class TestSanctionedActions:
def test_only_three_sanctioned_actions(self):
assert len(SANCTIONED_ACTIONS) == 3
def test_contains_expected_actions(self):
assert "add_progress_event" in SANCTIONED_ACTIONS
assert "update_task_status" in SANCTIONED_ACTIONS
assert "flag_for_human" in SANCTIONED_ACTIONS
def test_no_destructive_actions_sanctioned(self):
for action in SANCTIONED_ACTIONS:
assert "delete" not in action.lower()
assert "destroy" not in action.lower()
assert "drop" not in action.lower()