feat(CUST-WP-0001): implement Custodian Agent Runtime bootstrap

T2 complete: OODA loop skeleton with LLM integration, bounded actions,
and 32 offline unit tests.

Deliverables:
- runtime/agent.py     — CLI entry point (--domain/--all/--dry-run/--llm)
- runtime/context.py   — Observe: fetch_state + build_context
- runtime/actions.py   — Act: parse_plan + execute (3 sanctioned writes)
- runtime/README.md    — usage guide and architecture overview
- runtime/tests/       — 32 tests, fully offline
- runtime/pyproject.toml — standalone package with llm-connect dep
- canon/architecture/adr-002-custodian-agent-runtime-design.md

Key design decisions (ADR-002):
- Lives in runtime/ (not a new repo) — tight canon/state-hub coupling
- ClaudeCodeAdapter by default (local-first, no API key)
- Single-pass synchronous OODA for v0.1 simplicity
- Exactly 3 sanctioned write ops: add_progress_event, update_task_status, flag_for_human
- LLM returns JSON block in markdown for structured+auditable output

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-12 22:36:24 +01:00
parent 5358d417ec
commit 2fdbcb5d7a
11 changed files with 1227 additions and 3 deletions

View File

View File

@@ -0,0 +1,179 @@
"""Tests for the bounded action executor (actions.py).
All API calls are mocked — no live state-hub required.
"""
from __future__ import annotations
from unittest.mock import MagicMock, call, patch
import pytest
from actions import (
SANCTIONED_ACTIONS,
execute,
parse_plan,
)
# ---------------------------------------------------------------------------
# parse_plan
# ---------------------------------------------------------------------------
class TestParsePlan:
def test_extracts_json_from_markdown(self):
response = (
"Here is my analysis.\n\n"
"```json\n"
'{"observations": ["all good"], "progress_events": [], '
'"tasks_to_update": [], "tasks_to_flag": []}\n'
"```\n\n"
"Let me know if you need anything else."
)
plan = parse_plan(response)
assert plan["observations"] == ["all good"]
assert plan["progress_events"] == []
def test_returns_empty_plan_on_no_json_block(self):
plan = parse_plan("Just some text with no JSON block.")
assert plan == {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
def test_returns_empty_plan_on_malformed_json(self):
response = "```json\n{broken json\n```"
plan = parse_plan(response)
assert plan == {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
def test_handles_multiple_json_blocks_uses_first(self):
response = (
"```json\n{\"progress_events\": [{\"summary\": \"first\"}], "
"\"tasks_to_update\": [], \"tasks_to_flag\": []}\n```\n"
"```json\n{\"progress_events\": [{\"summary\": \"second\"}], "
"\"tasks_to_update\": [], \"tasks_to_flag\": []}\n```"
)
plan = parse_plan(response)
assert plan["progress_events"][0]["summary"] == "first"
def test_missing_keys_are_defaulted(self):
response = '```json\n{"observations": ["noted"]}\n```'
plan = parse_plan(response)
assert "progress_events" in plan
assert "tasks_to_update" in plan
assert "tasks_to_flag" in plan
# ---------------------------------------------------------------------------
# execute — dry run
# ---------------------------------------------------------------------------
class TestExecuteDryRun:
def test_dry_run_makes_no_api_calls(self):
plan = {
"progress_events": [{"summary": "test", "workstream_id": None, "event_type": "note"}],
"tasks_to_update": [{"task_id": "t1", "status": "done"}],
"tasks_to_flag": [{"task_id": "t2", "note": "needs review"}],
}
with patch("httpx.post") as mock_post, patch("httpx.patch") as mock_patch:
results = execute(plan, dry_run=True)
mock_post.assert_not_called()
mock_patch.assert_not_called()
def test_dry_run_returns_descriptions(self):
plan = {
"progress_events": [{"summary": "test note", "event_type": "note"}],
"tasks_to_update": [],
"tasks_to_flag": [],
}
results = execute(plan, dry_run=True)
assert len(results) == 1
assert "test note" in results[0] or "dry-run" in results[0].lower()
# ---------------------------------------------------------------------------
# execute — live (mocked API)
# ---------------------------------------------------------------------------
class TestExecuteLive:
def _ok_response(self, data: dict = None):
resp = MagicMock()
resp.status_code = 201
resp.json.return_value = data or {"id": "new-id"}
resp.raise_for_status = MagicMock()
return resp
def test_posts_progress_event(self):
plan = {
"progress_events": [
{"summary": "session note", "workstream_id": "ws-1", "event_type": "note"}
],
"tasks_to_update": [],
"tasks_to_flag": [],
}
with patch("httpx.post") as mock_post:
mock_post.return_value = self._ok_response()
results = execute(plan)
mock_post.assert_called_once()
payload = mock_post.call_args[1]["json"]
assert payload["summary"] == "session note"
def test_patches_task_status(self):
plan = {
"progress_events": [],
"tasks_to_update": [{"task_id": "abc-123", "status": "done"}],
"tasks_to_flag": [],
}
with patch("httpx.patch") as mock_patch:
mock_patch.return_value = self._ok_response()
results = execute(plan)
mock_patch.assert_called_once()
call_url = mock_patch.call_args[0][0]
assert "abc-123" in call_url
def test_flags_task_for_human(self):
plan = {
"progress_events": [],
"tasks_to_update": [],
"tasks_to_flag": [{"task_id": "t99", "note": "needs human review"}],
}
with patch("httpx.patch") as mock_patch:
mock_patch.return_value = self._ok_response()
results = execute(plan)
mock_patch.assert_called_once()
payload = mock_patch.call_args[1]["json"]
assert payload.get("needs_human") is True
assert "needs human review" in payload.get("intervention_note", "")
def test_gracefully_handles_api_error(self):
plan = {
"progress_events": [{"summary": "test", "event_type": "note"}],
"tasks_to_update": [],
"tasks_to_flag": [],
}
with patch("httpx.post") as mock_post:
mock_post.side_effect = Exception("Connection refused")
# Should not raise — errors are logged in results
results = execute(plan)
assert any("error" in r.lower() or "failed" in r.lower() for r in results)
def test_empty_plan_returns_empty_results(self):
plan = {"progress_events": [], "tasks_to_update": [], "tasks_to_flag": []}
results = execute(plan)
assert results == []
# ---------------------------------------------------------------------------
# sanctioned_actions constant
# ---------------------------------------------------------------------------
class TestSanctionedActions:
def test_only_three_sanctioned_actions(self):
assert len(SANCTIONED_ACTIONS) == 3
def test_contains_expected_actions(self):
assert "add_progress_event" in SANCTIONED_ACTIONS
assert "update_task_status" in SANCTIONED_ACTIONS
assert "flag_for_human" in SANCTIONED_ACTIONS
def test_no_destructive_actions_sanctioned(self):
for action in SANCTIONED_ACTIONS:
assert "delete" not in action.lower()
assert "destroy" not in action.lower()
assert "drop" not in action.lower()

View File

@@ -0,0 +1,164 @@
"""Tests for the observation and context-building layer (context.py).
All tests are offline — httpx is mocked so no live state-hub required.
"""
from __future__ import annotations
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from context import (
build_context,
fetch_state,
load_constitution,
CONSTITUTION_PATH,
API_BASE,
)
# ---------------------------------------------------------------------------
# fetch_state
# ---------------------------------------------------------------------------
class TestFetchState:
def _mock_response(self, data: dict, status: int = 200):
resp = MagicMock()
resp.status_code = status
resp.json.return_value = data
resp.raise_for_status = MagicMock()
return resp
def test_fetch_state_calls_summary_endpoint(self):
state_data = {"totals": {"tasks": {"todo": 5}}, "topics": []}
with patch("httpx.get") as mock_get:
mock_get.return_value = self._mock_response(state_data)
result = fetch_state(domain=None)
mock_get.assert_called_once()
call_url = mock_get.call_args[0][0]
assert "/state/summary" in call_url
def test_fetch_state_with_domain_calls_domain_endpoint(self):
domain_data = {"domain": "custodian", "workstreams": []}
with patch("httpx.get") as mock_get:
mock_get.return_value = self._mock_response(domain_data)
result = fetch_state(domain="custodian")
call_url = mock_get.call_args[0][0]
assert "custodian" in call_url
def test_fetch_state_returns_dict(self):
state_data = {"totals": {}, "topics": []}
with patch("httpx.get") as mock_get:
mock_get.return_value = self._mock_response(state_data)
result = fetch_state()
assert isinstance(result, dict)
def test_fetch_state_handles_connection_error(self):
with patch("httpx.get") as mock_get:
mock_get.side_effect = Exception("Connection refused")
result = fetch_state()
# Graceful degradation: returns empty dict, does not raise
assert result == {}
def test_fetch_state_handles_non_200(self):
resp = MagicMock()
resp.raise_for_status.side_effect = Exception("503 Service Unavailable")
with patch("httpx.get") as mock_get:
mock_get.return_value = resp
result = fetch_state()
assert result == {}
def test_fetch_state_custom_api_base(self):
with patch("httpx.get") as mock_get:
mock_get.return_value = self._mock_response({})
fetch_state(api_base="http://localhost:9999")
call_url = mock_get.call_args[0][0]
assert "localhost:9999" in call_url
# ---------------------------------------------------------------------------
# load_constitution
# ---------------------------------------------------------------------------
class TestLoadConstitution:
def test_load_constitution_returns_non_empty_string(self):
text = load_constitution()
assert isinstance(text, str)
assert len(text) > 100
def test_load_constitution_contains_key_clauses(self):
text = load_constitution()
assert "Custodian" in text
assert "Forbidden" in text or "forbidden" in text.lower()
def test_constitution_path_exists(self):
assert CONSTITUTION_PATH.exists(), (
f"Constitution not found at {CONSTITUTION_PATH}. "
"The path is hardcoded relative to this file — check context.py."
)
def test_load_constitution_with_missing_file(self, tmp_path, monkeypatch):
"""If constitution is missing, return a minimal fallback, not an exception."""
import context as ctx_module
monkeypatch.setattr(ctx_module, "CONSTITUTION_PATH",
tmp_path / "nonexistent.md")
text = load_constitution()
assert isinstance(text, str)
# Should return fallback, not crash
assert len(text) > 0
# ---------------------------------------------------------------------------
# build_context
# ---------------------------------------------------------------------------
class TestBuildContext:
def _minimal_state(self) -> dict:
return {
"totals": {
"tasks": {"todo": 3, "done": 10},
"workstreams": {"active": 2, "completed": 5},
"decisions": {"open": 0},
},
"blocking_decisions": [],
"blocked_tasks": [],
"open_workstreams": [],
}
def test_build_context_returns_string(self):
ctx = build_context(self._minimal_state(), "## Constitution\nBe safe.")
assert isinstance(ctx, str)
def test_build_context_includes_constitution(self):
ctx = build_context(self._minimal_state(), "## Constitution\nBe safe.")
assert "Be safe" in ctx
def test_build_context_includes_task_counts(self):
ctx = build_context(self._minimal_state(), "")
assert "3" in ctx # todo count
def test_build_context_mentions_blocked_tasks_when_present(self):
state = self._minimal_state()
state["blocked_tasks"] = [
{"id": "t1", "title": "Deploy postgres", "blocking_reason": "no cluster"}
]
ctx = build_context(state, "")
assert "Deploy postgres" in ctx or "blocked" in ctx.lower()
def test_build_context_mentions_blocking_decisions_when_present(self):
state = self._minimal_state()
state["blocking_decisions"] = [
{"id": "d1", "title": "Which DB?", "type": "pending"}
]
ctx = build_context(state, "")
assert "Which DB?" in ctx or "decision" in ctx.lower()
def test_build_context_with_empty_state_does_not_crash(self):
ctx = build_context({}, "some constitution")
assert isinstance(ctx, str)
def test_build_context_includes_json_response_instruction(self):
"""The prompt must instruct the LLM to return a JSON block."""
ctx = build_context(self._minimal_state(), "")
assert "```json" in ctx or "JSON" in ctx