feat(CUST-WP-0001): implement Custodian Agent Runtime bootstrap
T2 complete: OODA loop skeleton with LLM integration, bounded actions, and 32 offline unit tests. Deliverables: - runtime/agent.py — CLI entry point (--domain/--all/--dry-run/--llm) - runtime/context.py — Observe: fetch_state + build_context - runtime/actions.py — Act: parse_plan + execute (3 sanctioned writes) - runtime/README.md — usage guide and architecture overview - runtime/tests/ — 32 tests, fully offline - runtime/pyproject.toml — standalone package with llm-connect dep - canon/architecture/adr-002-custodian-agent-runtime-design.md Key design decisions (ADR-002): - Lives in runtime/ (not a new repo) — tight canon/state-hub coupling - ClaudeCodeAdapter by default (local-first, no API key) - Single-pass synchronous OODA for v0.1 simplicity - Exactly 3 sanctioned write ops: add_progress_event, update_task_status, flag_for_human - LLM returns JSON block in markdown for structured+auditable output Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
179
runtime/tests/test_actions.py
Normal file
179
runtime/tests/test_actions.py
Normal file
@@ -0,0 +1,179 @@
|
||||
"""Tests for the bounded action executor (actions.py).
|
||||
|
||||
All API calls are mocked — no live state-hub required.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, call, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from actions import (
|
||||
SANCTIONED_ACTIONS,
|
||||
execute,
|
||||
parse_plan,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# parse_plan
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestParsePlan:
|
||||
def test_extracts_json_from_markdown(self):
|
||||
response = (
|
||||
"Here is my analysis.\n\n"
|
||||
"```json\n"
|
||||
'{"observations": ["all good"], "progress_events": [], '
|
||||
'"tasks_to_update": [], "tasks_to_flag": []}\n'
|
||||
"```\n\n"
|
||||
"Let me know if you need anything else."
|
||||
)
|
||||
plan = parse_plan(response)
|
||||
assert plan["observations"] == ["all good"]
|
||||
assert plan["progress_events"] == []
|
||||
|
||||
def test_returns_empty_plan_on_no_json_block(self):
|
||||
plan = parse_plan("Just some text with no JSON block.")
|
||||
assert plan == {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
|
||||
|
||||
def test_returns_empty_plan_on_malformed_json(self):
|
||||
response = "```json\n{broken json\n```"
|
||||
plan = parse_plan(response)
|
||||
assert plan == {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
|
||||
|
||||
def test_handles_multiple_json_blocks_uses_first(self):
|
||||
response = (
|
||||
"```json\n{\"progress_events\": [{\"summary\": \"first\"}], "
|
||||
"\"tasks_to_update\": [], \"tasks_to_flag\": []}\n```\n"
|
||||
"```json\n{\"progress_events\": [{\"summary\": \"second\"}], "
|
||||
"\"tasks_to_update\": [], \"tasks_to_flag\": []}\n```"
|
||||
)
|
||||
plan = parse_plan(response)
|
||||
assert plan["progress_events"][0]["summary"] == "first"
|
||||
|
||||
def test_missing_keys_are_defaulted(self):
|
||||
response = '```json\n{"observations": ["noted"]}\n```'
|
||||
plan = parse_plan(response)
|
||||
assert "progress_events" in plan
|
||||
assert "tasks_to_update" in plan
|
||||
assert "tasks_to_flag" in plan
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# execute — dry run
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestExecuteDryRun:
|
||||
def test_dry_run_makes_no_api_calls(self):
|
||||
plan = {
|
||||
"progress_events": [{"summary": "test", "workstream_id": None, "event_type": "note"}],
|
||||
"tasks_to_update": [{"task_id": "t1", "status": "done"}],
|
||||
"tasks_to_flag": [{"task_id": "t2", "note": "needs review"}],
|
||||
}
|
||||
with patch("httpx.post") as mock_post, patch("httpx.patch") as mock_patch:
|
||||
results = execute(plan, dry_run=True)
|
||||
mock_post.assert_not_called()
|
||||
mock_patch.assert_not_called()
|
||||
|
||||
def test_dry_run_returns_descriptions(self):
|
||||
plan = {
|
||||
"progress_events": [{"summary": "test note", "event_type": "note"}],
|
||||
"tasks_to_update": [],
|
||||
"tasks_to_flag": [],
|
||||
}
|
||||
results = execute(plan, dry_run=True)
|
||||
assert len(results) == 1
|
||||
assert "test note" in results[0] or "dry-run" in results[0].lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# execute — live (mocked API)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestExecuteLive:
|
||||
def _ok_response(self, data: dict = None):
|
||||
resp = MagicMock()
|
||||
resp.status_code = 201
|
||||
resp.json.return_value = data or {"id": "new-id"}
|
||||
resp.raise_for_status = MagicMock()
|
||||
return resp
|
||||
|
||||
def test_posts_progress_event(self):
|
||||
plan = {
|
||||
"progress_events": [
|
||||
{"summary": "session note", "workstream_id": "ws-1", "event_type": "note"}
|
||||
],
|
||||
"tasks_to_update": [],
|
||||
"tasks_to_flag": [],
|
||||
}
|
||||
with patch("httpx.post") as mock_post:
|
||||
mock_post.return_value = self._ok_response()
|
||||
results = execute(plan)
|
||||
mock_post.assert_called_once()
|
||||
payload = mock_post.call_args[1]["json"]
|
||||
assert payload["summary"] == "session note"
|
||||
|
||||
def test_patches_task_status(self):
|
||||
plan = {
|
||||
"progress_events": [],
|
||||
"tasks_to_update": [{"task_id": "abc-123", "status": "done"}],
|
||||
"tasks_to_flag": [],
|
||||
}
|
||||
with patch("httpx.patch") as mock_patch:
|
||||
mock_patch.return_value = self._ok_response()
|
||||
results = execute(plan)
|
||||
mock_patch.assert_called_once()
|
||||
call_url = mock_patch.call_args[0][0]
|
||||
assert "abc-123" in call_url
|
||||
|
||||
def test_flags_task_for_human(self):
|
||||
plan = {
|
||||
"progress_events": [],
|
||||
"tasks_to_update": [],
|
||||
"tasks_to_flag": [{"task_id": "t99", "note": "needs human review"}],
|
||||
}
|
||||
with patch("httpx.patch") as mock_patch:
|
||||
mock_patch.return_value = self._ok_response()
|
||||
results = execute(plan)
|
||||
mock_patch.assert_called_once()
|
||||
payload = mock_patch.call_args[1]["json"]
|
||||
assert payload.get("needs_human") is True
|
||||
assert "needs human review" in payload.get("intervention_note", "")
|
||||
|
||||
def test_gracefully_handles_api_error(self):
|
||||
plan = {
|
||||
"progress_events": [{"summary": "test", "event_type": "note"}],
|
||||
"tasks_to_update": [],
|
||||
"tasks_to_flag": [],
|
||||
}
|
||||
with patch("httpx.post") as mock_post:
|
||||
mock_post.side_effect = Exception("Connection refused")
|
||||
# Should not raise — errors are logged in results
|
||||
results = execute(plan)
|
||||
assert any("error" in r.lower() or "failed" in r.lower() for r in results)
|
||||
|
||||
def test_empty_plan_returns_empty_results(self):
|
||||
plan = {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
|
||||
results = execute(plan)
|
||||
assert results == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# sanctioned_actions constant
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSanctionedActions:
|
||||
def test_only_three_sanctioned_actions(self):
|
||||
assert len(SANCTIONED_ACTIONS) == 3
|
||||
|
||||
def test_contains_expected_actions(self):
|
||||
assert "add_progress_event" in SANCTIONED_ACTIONS
|
||||
assert "update_task_status" in SANCTIONED_ACTIONS
|
||||
assert "flag_for_human" in SANCTIONED_ACTIONS
|
||||
|
||||
def test_no_destructive_actions_sanctioned(self):
|
||||
for action in SANCTIONED_ACTIONS:
|
||||
assert "delete" not in action.lower()
|
||||
assert "destroy" not in action.lower()
|
||||
assert "drop" not in action.lower()
|
||||
Reference in New Issue
Block a user