T2 complete: OODA loop skeleton with LLM integration, bounded actions, and 32 offline unit tests. Deliverables: - runtime/agent.py — CLI entry point (--domain/--all/--dry-run/--llm) - runtime/context.py — Observe: fetch_state + build_context - runtime/actions.py — Act: parse_plan + execute (3 sanctioned writes) - runtime/README.md — usage guide and architecture overview - runtime/tests/ — 32 tests, fully offline - runtime/pyproject.toml — standalone package with llm-connect dep - canon/architecture/adr-002-custodian-agent-runtime-design.md Key design decisions (ADR-002): - Lives in runtime/ (not a new repo) — tight canon/state-hub coupling - ClaudeCodeAdapter by default (local-first, no API key) - Single-pass synchronous OODA for v0.1 simplicity - Exactly 3 sanctioned write ops: add_progress_event, update_task_status, flag_for_human - LLM returns JSON block in markdown for structured+auditable output Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
180 lines
6.7 KiB
Python
180 lines
6.7 KiB
Python
"""Tests for the bounded action executor (actions.py).
|
|
|
|
All API calls are mocked — no live state-hub required.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from unittest.mock import MagicMock, call, patch
|
|
|
|
import pytest
|
|
|
|
from actions import (
|
|
SANCTIONED_ACTIONS,
|
|
execute,
|
|
parse_plan,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# parse_plan
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestParsePlan:
|
|
def test_extracts_json_from_markdown(self):
|
|
response = (
|
|
"Here is my analysis.\n\n"
|
|
"```json\n"
|
|
'{"observations": ["all good"], "progress_events": [], '
|
|
'"tasks_to_update": [], "tasks_to_flag": []}\n'
|
|
"```\n\n"
|
|
"Let me know if you need anything else."
|
|
)
|
|
plan = parse_plan(response)
|
|
assert plan["observations"] == ["all good"]
|
|
assert plan["progress_events"] == []
|
|
|
|
def test_returns_empty_plan_on_no_json_block(self):
|
|
plan = parse_plan("Just some text with no JSON block.")
|
|
assert plan == {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
|
|
|
|
def test_returns_empty_plan_on_malformed_json(self):
|
|
response = "```json\n{broken json\n```"
|
|
plan = parse_plan(response)
|
|
assert plan == {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
|
|
|
|
def test_handles_multiple_json_blocks_uses_first(self):
|
|
response = (
|
|
"```json\n{\"progress_events\": [{\"summary\": \"first\"}], "
|
|
"\"tasks_to_update\": [], \"tasks_to_flag\": []}\n```\n"
|
|
"```json\n{\"progress_events\": [{\"summary\": \"second\"}], "
|
|
"\"tasks_to_update\": [], \"tasks_to_flag\": []}\n```"
|
|
)
|
|
plan = parse_plan(response)
|
|
assert plan["progress_events"][0]["summary"] == "first"
|
|
|
|
def test_missing_keys_are_defaulted(self):
|
|
response = '```json\n{"observations": ["noted"]}\n```'
|
|
plan = parse_plan(response)
|
|
assert "progress_events" in plan
|
|
assert "tasks_to_update" in plan
|
|
assert "tasks_to_flag" in plan
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# execute — dry run
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestExecuteDryRun:
|
|
def test_dry_run_makes_no_api_calls(self):
|
|
plan = {
|
|
"progress_events": [{"summary": "test", "workstream_id": None, "event_type": "note"}],
|
|
"tasks_to_update": [{"task_id": "t1", "status": "done"}],
|
|
"tasks_to_flag": [{"task_id": "t2", "note": "needs review"}],
|
|
}
|
|
with patch("httpx.post") as mock_post, patch("httpx.patch") as mock_patch:
|
|
results = execute(plan, dry_run=True)
|
|
mock_post.assert_not_called()
|
|
mock_patch.assert_not_called()
|
|
|
|
def test_dry_run_returns_descriptions(self):
|
|
plan = {
|
|
"progress_events": [{"summary": "test note", "event_type": "note"}],
|
|
"tasks_to_update": [],
|
|
"tasks_to_flag": [],
|
|
}
|
|
results = execute(plan, dry_run=True)
|
|
assert len(results) == 1
|
|
assert "test note" in results[0] or "dry-run" in results[0].lower()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# execute — live (mocked API)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestExecuteLive:
|
|
def _ok_response(self, data: dict = None):
|
|
resp = MagicMock()
|
|
resp.status_code = 201
|
|
resp.json.return_value = data or {"id": "new-id"}
|
|
resp.raise_for_status = MagicMock()
|
|
return resp
|
|
|
|
def test_posts_progress_event(self):
|
|
plan = {
|
|
"progress_events": [
|
|
{"summary": "session note", "workstream_id": "ws-1", "event_type": "note"}
|
|
],
|
|
"tasks_to_update": [],
|
|
"tasks_to_flag": [],
|
|
}
|
|
with patch("httpx.post") as mock_post:
|
|
mock_post.return_value = self._ok_response()
|
|
results = execute(plan)
|
|
mock_post.assert_called_once()
|
|
payload = mock_post.call_args[1]["json"]
|
|
assert payload["summary"] == "session note"
|
|
|
|
def test_patches_task_status(self):
|
|
plan = {
|
|
"progress_events": [],
|
|
"tasks_to_update": [{"task_id": "abc-123", "status": "done"}],
|
|
"tasks_to_flag": [],
|
|
}
|
|
with patch("httpx.patch") as mock_patch:
|
|
mock_patch.return_value = self._ok_response()
|
|
results = execute(plan)
|
|
mock_patch.assert_called_once()
|
|
call_url = mock_patch.call_args[0][0]
|
|
assert "abc-123" in call_url
|
|
|
|
def test_flags_task_for_human(self):
|
|
plan = {
|
|
"progress_events": [],
|
|
"tasks_to_update": [],
|
|
"tasks_to_flag": [{"task_id": "t99", "note": "needs human review"}],
|
|
}
|
|
with patch("httpx.patch") as mock_patch:
|
|
mock_patch.return_value = self._ok_response()
|
|
results = execute(plan)
|
|
mock_patch.assert_called_once()
|
|
payload = mock_patch.call_args[1]["json"]
|
|
assert payload.get("needs_human") is True
|
|
assert "needs human review" in payload.get("intervention_note", "")
|
|
|
|
def test_gracefully_handles_api_error(self):
|
|
plan = {
|
|
"progress_events": [{"summary": "test", "event_type": "note"}],
|
|
"tasks_to_update": [],
|
|
"tasks_to_flag": [],
|
|
}
|
|
with patch("httpx.post") as mock_post:
|
|
mock_post.side_effect = Exception("Connection refused")
|
|
# Should not raise — errors are logged in results
|
|
results = execute(plan)
|
|
assert any("error" in r.lower() or "failed" in r.lower() for r in results)
|
|
|
|
def test_empty_plan_returns_empty_results(self):
|
|
plan = {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
|
|
results = execute(plan)
|
|
assert results == []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# sanctioned_actions constant
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestSanctionedActions:
|
|
def test_only_three_sanctioned_actions(self):
|
|
assert len(SANCTIONED_ACTIONS) == 3
|
|
|
|
def test_contains_expected_actions(self):
|
|
assert "add_progress_event" in SANCTIONED_ACTIONS
|
|
assert "update_task_status" in SANCTIONED_ACTIONS
|
|
assert "flag_for_human" in SANCTIONED_ACTIONS
|
|
|
|
def test_no_destructive_actions_sanctioned(self):
|
|
for action in SANCTIONED_ACTIONS:
|
|
assert "delete" not in action.lower()
|
|
assert "destroy" not in action.lower()
|
|
assert "drop" not in action.lower()
|