feat: WP-0001 foundation + WP-0002 core extensions

WP-0001 — Foundation & GAAF Baseline - SCOPE.md, ARCHITECTURE-LAYERS.md, contracts/ tree - .claude/rules/ stubs filled (architecture, stack, boundary) - 57 tests (pytest), pyproject.toml with ruff+mypy, CI workflow WP-0002 — Core Extensions (FR-4 + FR-3) - FR-4: BudgetTracker (thread-safe) + LLMBudgetExceededError + optional RunConfig.budget_tracker + enforcement in all adapters - FR-3: async_execute_prompt on LLMAdapter ABC (asyncio.to_thread fallback) + native asyncio.create_subprocess_exec in ClaudeCodeAdapter 81 tests passing. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-01 22:24:14 +00:00
parent 57b346bb8b
commit d71f4114d1
28 changed files with 1601 additions and 26 deletions
--- a/tests/test_budget.py
+++ b/tests/test_budget.py
@@ -0,0 +1,152 @@
+"""
+Tests for BudgetTracker (FR-4) and LLMBudgetExceededError.
+"""
+
+import threading
+import pytest
+
+from llm_connect.models import BudgetTracker, RunConfig
+from llm_connect.adapter import MockLLMAdapter
+from llm_connect.exceptions import LLMBudgetExceededError, LLMError
+
+
+class TestBudgetTracker:
+    def test_initial_state(self):
+        t = BudgetTracker(total=1000)
+        assert t.total == 1000
+        assert t.spent == 0
+        assert t.remaining() == 1000
+
+    def test_consume_updates_spent(self):
+        t = BudgetTracker(total=1000)
+        t.consume(300)
+        assert t.spent == 300
+        assert t.remaining() == 700
+
+    def test_consume_multiple_times(self):
+        t = BudgetTracker(total=1000)
+        t.consume(400)
+        t.consume(400)
+        assert t.spent == 800
+        assert t.remaining() == 200
+
+    def test_consume_exact_budget(self):
+        t = BudgetTracker(total=100)
+        t.consume(100)
+        assert t.spent == 100
+        assert t.remaining() == 0
+
+    def test_consume_exceeds_budget_raises(self):
+        t = BudgetTracker(total=100)
+        t.consume(60)
+        with pytest.raises(LLMBudgetExceededError):
+            t.consume(50)
+
+    def test_exceeded_error_carries_details(self):
+        t = BudgetTracker(total=100)
+        t.consume(80)
+        with pytest.raises(LLMBudgetExceededError) as exc_info:
+            t.consume(30)
+        err = exc_info.value
+        assert err.total == 100
+        assert err.spent == 80
+        assert err.requested == 30
+
+    def test_exceeded_error_is_subclass_of_llm_error(self):
+        with pytest.raises(LLMError):
+            t = BudgetTracker(total=10)
+            t.consume(20)
+
+    def test_remaining_never_negative(self):
+        t = BudgetTracker(total=100)
+        t.consume(100)
+        assert t.remaining() == 0
+
+    def test_invalid_total_raises(self):
+        with pytest.raises(ValueError):
+            BudgetTracker(total=0)
+        with pytest.raises(ValueError):
+            BudgetTracker(total=-1)
+
+    def test_repr(self):
+        t = BudgetTracker(total=500)
+        t.consume(100)
+        r = repr(t)
+        assert "500" in r
+        assert "100" in r
+
+    def test_thread_safety(self):
+        """Concurrent consume() calls must not corrupt state or allow overspend."""
+        total = 1000
+        t = BudgetTracker(total=total)
+        errors = []
+
+        def consume_100():
+            try:
+                t.consume(100)
+            except LLMBudgetExceededError:
+                errors.append(1)
+
+        threads = [threading.Thread(target=consume_100) for _ in range(15)]
+        for th in threads:
+            th.start()
+        for th in threads:
+            th.join()
+
+        # At most 10 consumes of 100 can succeed within a budget of 1000
+        assert t.spent <= total
+        assert len(errors) == 5  # 15 attempts, 10 succeed, 5 fail
+
+
+class TestBudgetEnforcementInAdapter:
+    def test_single_call_consumes_budget(self):
+        tracker = BudgetTracker(total=10000)
+        config = RunConfig(budget_tracker=tracker)
+        adapter = MockLLMAdapter(mock_response="hello world")
+        adapter.execute_prompt("test prompt", config)
+        assert tracker.spent > 0
+
+    def test_exhausted_budget_raises_before_call(self):
+        tracker = BudgetTracker(total=1)
+        tracker.consume(1)  # exhaust it
+        config = RunConfig(budget_tracker=tracker)
+        adapter = MockLLMAdapter()
+        with pytest.raises(LLMBudgetExceededError):
+            adapter.execute_prompt("any prompt", config)
+        # Adapter should not have been called
+        assert adapter.call_count == 0
+
+    def test_delegation_chain_shared_tracker(self):
+        """A → B → C sharing the same tracker enforces the cap across all calls."""
+        tracker = BudgetTracker(total=10000)
+        config = RunConfig(budget_tracker=tracker)
+        adapter = MockLLMAdapter(mock_response="response")
+
+        adapter.execute_prompt("call A", config)
+        adapter.execute_prompt("call B", config)
+        adapter.execute_prompt("call C", config)
+
+        assert adapter.call_count == 3
+        assert tracker.spent > 0
+
+    def test_budget_exceeded_mid_chain(self):
+        """Chain stops when budget is exhausted between calls."""
+        # MockLLMAdapter uses word count for tokens — "x" * 200 = 200 token prompt
+        # mock_response "r" * 100 = 25 tokens; total ~75 per call
+        adapter = MockLLMAdapter(mock_response="r " * 50)  # ~50 completion tokens
+        tracker = BudgetTracker(total=200)
+        config = RunConfig(budget_tracker=tracker)
+
+        # First call succeeds
+        adapter.execute_prompt("p " * 100, config)
+        # Eventually exhausts the budget
+        with pytest.raises(LLMBudgetExceededError):
+            for _ in range(10):
+                adapter.execute_prompt("p " * 100, config)
+
+    def test_no_tracker_has_no_effect(self):
+        """Adapters work normally when no budget_tracker is set."""
+        config = RunConfig()  # no budget_tracker
+        adapter = MockLLMAdapter()
+        response = adapter.execute_prompt("hello", config)
+        assert response.content == "Mock LLM response"