feat(CUST-WP-0001): implement Custodian Agent Runtime bootstrap

T2 complete: OODA loop skeleton with LLM integration, bounded actions, and 32 offline unit tests. Deliverables: - runtime/agent.py — CLI entry point (--domain/--all/--dry-run/--llm) - runtime/context.py — Observe: fetch_state + build_context - runtime/actions.py — Act: parse_plan + execute (3 sanctioned writes) - runtime/README.md — usage guide and architecture overview - runtime/tests/ — 32 tests, fully offline - runtime/pyproject.toml — standalone package with llm-connect dep - canon/architecture/adr-002-custodian-agent-runtime-design.md Key design decisions (ADR-002): - Lives in runtime/ (not a new repo) — tight canon/state-hub coupling - ClaudeCodeAdapter by default (local-first, no API key) - Single-pass synchronous OODA for v0.1 simplicity - Exactly 3 sanctioned write ops: add_progress_event, update_task_status, flag_for_human - LLM returns JSON block in markdown for structured+auditable output Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 22:36:24 +01:00
parent 5358d417ec
commit 2fdbcb5d7a
11 changed files with 1227 additions and 3 deletions
--- a/runtime/tests/test_actions.py
+++ b/runtime/tests/test_actions.py
@@ -0,0 +1,179 @@
+"""Tests for the bounded action executor (actions.py).
+
+All API calls are mocked — no live state-hub required.
+"""
+from __future__ import annotations
+
+from unittest.mock import MagicMock, call, patch
+
+import pytest
+
+from actions import (
+    SANCTIONED_ACTIONS,
+    execute,
+    parse_plan,
+)
+
+
+# ---------------------------------------------------------------------------
+# parse_plan
+# ---------------------------------------------------------------------------
+
+class TestParsePlan:
+    def test_extracts_json_from_markdown(self):
+        response = (
+            "Here is my analysis.\n\n"
+            "```json\n"
+            '{"observations": ["all good"], "progress_events": [], '
+            '"tasks_to_update": [], "tasks_to_flag": []}\n'
+            "```\n\n"
+            "Let me know if you need anything else."
+        )
+        plan = parse_plan(response)
+        assert plan["observations"] == ["all good"]
+        assert plan["progress_events"] == []
+
+    def test_returns_empty_plan_on_no_json_block(self):
+        plan = parse_plan("Just some text with no JSON block.")
+        assert plan == {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
+
+    def test_returns_empty_plan_on_malformed_json(self):
+        response = "```json\n{broken json\n```"
+        plan = parse_plan(response)
+        assert plan == {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
+
+    def test_handles_multiple_json_blocks_uses_first(self):
+        response = (
+            "```json\n{\"progress_events\": [{\"summary\": \"first\"}], "
+            "\"tasks_to_update\": [], \"tasks_to_flag\": []}\n```\n"
+            "```json\n{\"progress_events\": [{\"summary\": \"second\"}], "
+            "\"tasks_to_update\": [], \"tasks_to_flag\": []}\n```"
+        )
+        plan = parse_plan(response)
+        assert plan["progress_events"][0]["summary"] == "first"
+
+    def test_missing_keys_are_defaulted(self):
+        response = '```json\n{"observations": ["noted"]}\n```'
+        plan = parse_plan(response)
+        assert "progress_events" in plan
+        assert "tasks_to_update" in plan
+        assert "tasks_to_flag" in plan
+
+
+# ---------------------------------------------------------------------------
+# execute — dry run
+# ---------------------------------------------------------------------------
+
+class TestExecuteDryRun:
+    def test_dry_run_makes_no_api_calls(self):
+        plan = {
+            "progress_events": [{"summary": "test", "workstream_id": None, "event_type": "note"}],
+            "tasks_to_update": [{"task_id": "t1", "status": "done"}],
+            "tasks_to_flag": [{"task_id": "t2", "note": "needs review"}],
+        }
+        with patch("httpx.post") as mock_post, patch("httpx.patch") as mock_patch:
+            results = execute(plan, dry_run=True)
+        mock_post.assert_not_called()
+        mock_patch.assert_not_called()
+
+    def test_dry_run_returns_descriptions(self):
+        plan = {
+            "progress_events": [{"summary": "test note", "event_type": "note"}],
+            "tasks_to_update": [],
+            "tasks_to_flag": [],
+        }
+        results = execute(plan, dry_run=True)
+        assert len(results) == 1
+        assert "test note" in results[0] or "dry-run" in results[0].lower()
+
+
+# ---------------------------------------------------------------------------
+# execute — live (mocked API)
+# ---------------------------------------------------------------------------
+
+class TestExecuteLive:
+    def _ok_response(self, data: dict = None):
+        resp = MagicMock()
+        resp.status_code = 201
+        resp.json.return_value = data or {"id": "new-id"}
+        resp.raise_for_status = MagicMock()
+        return resp
+
+    def test_posts_progress_event(self):
+        plan = {
+            "progress_events": [
+                {"summary": "session note", "workstream_id": "ws-1", "event_type": "note"}
+            ],
+            "tasks_to_update": [],
+            "tasks_to_flag": [],
+        }
+        with patch("httpx.post") as mock_post:
+            mock_post.return_value = self._ok_response()
+            results = execute(plan)
+        mock_post.assert_called_once()
+        payload = mock_post.call_args[1]["json"]
+        assert payload["summary"] == "session note"
+
+    def test_patches_task_status(self):
+        plan = {
+            "progress_events": [],
+            "tasks_to_update": [{"task_id": "abc-123", "status": "done"}],
+            "tasks_to_flag": [],
+        }
+        with patch("httpx.patch") as mock_patch:
+            mock_patch.return_value = self._ok_response()
+            results = execute(plan)
+        mock_patch.assert_called_once()
+        call_url = mock_patch.call_args[0][0]
+        assert "abc-123" in call_url
+
+    def test_flags_task_for_human(self):
+        plan = {
+            "progress_events": [],
+            "tasks_to_update": [],
+            "tasks_to_flag": [{"task_id": "t99", "note": "needs human review"}],
+        }
+        with patch("httpx.patch") as mock_patch:
+            mock_patch.return_value = self._ok_response()
+            results = execute(plan)
+        mock_patch.assert_called_once()
+        payload = mock_patch.call_args[1]["json"]
+        assert payload.get("needs_human") is True
+        assert "needs human review" in payload.get("intervention_note", "")
+
+    def test_gracefully_handles_api_error(self):
+        plan = {
+            "progress_events": [{"summary": "test", "event_type": "note"}],
+            "tasks_to_update": [],
+            "tasks_to_flag": [],
+        }
+        with patch("httpx.post") as mock_post:
+            mock_post.side_effect = Exception("Connection refused")
+            # Should not raise — errors are logged in results
+            results = execute(plan)
+        assert any("error" in r.lower() or "failed" in r.lower() for r in results)
+
+    def test_empty_plan_returns_empty_results(self):
+        plan = {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
+        results = execute(plan)
+        assert results == []
+
+
+# ---------------------------------------------------------------------------
+# sanctioned_actions constant
+# ---------------------------------------------------------------------------
+
+class TestSanctionedActions:
+    def test_only_three_sanctioned_actions(self):
+        assert len(SANCTIONED_ACTIONS) == 3
+
+    def test_contains_expected_actions(self):
+        assert "add_progress_event" in SANCTIONED_ACTIONS
+        assert "update_task_status" in SANCTIONED_ACTIONS
+        assert "flag_for_human" in SANCTIONED_ACTIONS
+
+    def test_no_destructive_actions_sanctioned(self):
+        for action in SANCTIONED_ACTIONS:
+            assert "delete" not in action.lower()
+            assert "destroy" not in action.lower()
+            assert "drop" not in action.lower()