diff --git a/llm_connect/adapter.py b/llm_connect/adapter.py index 4742e70..129a33f 100644 --- a/llm_connect/adapter.py +++ b/llm_connect/adapter.py @@ -10,6 +10,7 @@ from abc import ABC, abstractmethod from typing import Dict, Any from llm_connect.models import RunConfig, LLMResponse, BudgetTracker +from llm_connect.exceptions import LLMBudgetExceededError class LLMAdapter(ABC): @@ -79,14 +80,12 @@ class LLMAdapter(ABC): def _preflight_budget(self, config: RunConfig) -> None: """Raise ``LLMBudgetExceededError`` if the budget is already exhausted.""" if config.budget_tracker is not None and config.budget_tracker.remaining() == 0: - from llm_connect.exceptions import LLMBudgetExceededError tracker = config.budget_tracker raise LLMBudgetExceededError( "Token budget exhausted before making request", total=tracker.total, spent=tracker.spent, requested=0, - context={"total": tracker.total, "spent": tracker.spent}, ) def _consume_budget(self, config: RunConfig, response: LLMResponse) -> None: @@ -135,13 +134,15 @@ class MockLLMAdapter(LLMAdapter): self.last_prompt = prompt self.last_config = config + prompt_tokens = len(prompt.split()) + completion_tokens = len(self.mock_response.split()) response = LLMResponse( content=self.mock_response, model=config.model_name, usage={ - "prompt_tokens": len(prompt.split()), - "completion_tokens": len(self.mock_response.split()), - "total_tokens": len(prompt.split()) + len(self.mock_response.split()), + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": prompt_tokens + completion_tokens, }, finish_reason="stop", metadata={"mock": True}, diff --git a/llm_connect/exceptions.py b/llm_connect/exceptions.py index 165a92b..a6b257c 100644 --- a/llm_connect/exceptions.py +++ b/llm_connect/exceptions.py @@ -82,6 +82,8 @@ class LLMBudgetExceededError(LLMError): cause: Optional[Exception] = None, context: Optional[Dict[str, Any]] = None, ): + if context is None: + context = {"total": total, "spent": spent, "requested": requested} super().__init__(message, cause=cause, context=context) self.total = total self.spent = spent diff --git a/llm_connect/gemini.py b/llm_connect/gemini.py index 171c176..7d5db8d 100644 --- a/llm_connect/gemini.py +++ b/llm_connect/gemini.py @@ -2,7 +2,6 @@ Google Gemini adapter — calls the Generative Language REST API directly. """ -import asyncio import time from typing import Optional, Dict, Any @@ -111,10 +110,6 @@ class GeminiAdapter(LLMAdapter): self._consume_budget(config, response) return response - async def async_execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse: - """Async wrapper — runs execute_prompt in a thread executor.""" - return await asyncio.to_thread(self.execute_prompt, prompt, config) - def validate_config(self, config: RunConfig) -> bool: if not self._api_key: return False diff --git a/llm_connect/models.py b/llm_connect/models.py index b456e6c..cd40a6a 100644 --- a/llm_connect/models.py +++ b/llm_connect/models.py @@ -9,6 +9,8 @@ import threading from dataclasses import dataclass, field from typing import Dict, Any, Optional +from llm_connect.exceptions import LLMBudgetExceededError + class BudgetTracker: """Shared token budget for a call or delegation chain. @@ -36,8 +38,6 @@ class BudgetTracker: def consume(self, tokens: int) -> None: """Record *tokens* as spent. Raises ``LLMBudgetExceededError`` if cap exceeded.""" - from llm_connect.exceptions import LLMBudgetExceededError # avoid circular at module load - with self._lock: new_spent = self.spent + tokens if new_spent > self.total: @@ -46,7 +46,6 @@ class BudgetTracker: total=self.total, spent=self.spent, requested=tokens, - context={"total": self.total, "spent": self.spent, "requested": tokens}, ) self.spent = new_spent diff --git a/llm_connect/openai.py b/llm_connect/openai.py index 9528fbc..c0c76d2 100644 --- a/llm_connect/openai.py +++ b/llm_connect/openai.py @@ -2,7 +2,6 @@ OpenAI (ChatGPT) adapter — calls the OpenAI chat completions API. """ -import asyncio import time from typing import Optional, Dict, Any @@ -100,10 +99,6 @@ class OpenAIAdapter(LLMAdapter): self._consume_budget(config, response) return response - async def async_execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse: - """Async wrapper — runs execute_prompt in a thread executor.""" - return await asyncio.to_thread(self.execute_prompt, prompt, config) - def validate_config(self, config: RunConfig) -> bool: if not self._api_key: return False diff --git a/llm_connect/openrouter.py b/llm_connect/openrouter.py index 8cba1c1..4f9c69b 100644 --- a/llm_connect/openrouter.py +++ b/llm_connect/openrouter.py @@ -2,7 +2,6 @@ OpenRouter adapter — calls the OpenAI-compatible chat completions API. """ -import asyncio import time from typing import Optional, Dict, Any @@ -108,10 +107,6 @@ class OpenRouterAdapter(LLMAdapter): self._consume_budget(config, response) return response - async def async_execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse: - """Async wrapper — runs execute_prompt in a thread executor.""" - return await asyncio.to_thread(self.execute_prompt, prompt, config) - def validate_config(self, config: RunConfig) -> bool: if not self._api_key: return False