generated from coulomb/repo-seed
refactor: simplify post-WP-0002 cleanup
- Remove redundant async_execute_prompt overrides from OpenAI/Gemini/OpenRouter adapters (identical to base class default — asyncio import also removed) - Cache prompt.split() result in MockLLMAdapter to avoid double evaluation - Promote deferred LLMBudgetExceededError imports to module level in models.py and adapter.py (no circular dependency) - Auto-populate context dict in LLMBudgetExceededError.__init__ so callers need not pass redundant context= kwarg Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -10,6 +10,7 @@ from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any
|
||||
|
||||
from llm_connect.models import RunConfig, LLMResponse, BudgetTracker
|
||||
from llm_connect.exceptions import LLMBudgetExceededError
|
||||
|
||||
|
||||
class LLMAdapter(ABC):
|
||||
@@ -79,14 +80,12 @@ class LLMAdapter(ABC):
|
||||
def _preflight_budget(self, config: RunConfig) -> None:
|
||||
"""Raise ``LLMBudgetExceededError`` if the budget is already exhausted."""
|
||||
if config.budget_tracker is not None and config.budget_tracker.remaining() == 0:
|
||||
from llm_connect.exceptions import LLMBudgetExceededError
|
||||
tracker = config.budget_tracker
|
||||
raise LLMBudgetExceededError(
|
||||
"Token budget exhausted before making request",
|
||||
total=tracker.total,
|
||||
spent=tracker.spent,
|
||||
requested=0,
|
||||
context={"total": tracker.total, "spent": tracker.spent},
|
||||
)
|
||||
|
||||
def _consume_budget(self, config: RunConfig, response: LLMResponse) -> None:
|
||||
@@ -135,13 +134,15 @@ class MockLLMAdapter(LLMAdapter):
|
||||
self.last_prompt = prompt
|
||||
self.last_config = config
|
||||
|
||||
prompt_tokens = len(prompt.split())
|
||||
completion_tokens = len(self.mock_response.split())
|
||||
response = LLMResponse(
|
||||
content=self.mock_response,
|
||||
model=config.model_name,
|
||||
usage={
|
||||
"prompt_tokens": len(prompt.split()),
|
||||
"completion_tokens": len(self.mock_response.split()),
|
||||
"total_tokens": len(prompt.split()) + len(self.mock_response.split()),
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
},
|
||||
finish_reason="stop",
|
||||
metadata={"mock": True},
|
||||
|
||||
@@ -82,6 +82,8 @@ class LLMBudgetExceededError(LLMError):
|
||||
cause: Optional[Exception] = None,
|
||||
context: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
if context is None:
|
||||
context = {"total": total, "spent": spent, "requested": requested}
|
||||
super().__init__(message, cause=cause, context=context)
|
||||
self.total = total
|
||||
self.spent = spent
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
Google Gemini adapter — calls the Generative Language REST API directly.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
@@ -111,10 +110,6 @@ class GeminiAdapter(LLMAdapter):
|
||||
self._consume_budget(config, response)
|
||||
return response
|
||||
|
||||
async def async_execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
|
||||
"""Async wrapper — runs execute_prompt in a thread executor."""
|
||||
return await asyncio.to_thread(self.execute_prompt, prompt, config)
|
||||
|
||||
def validate_config(self, config: RunConfig) -> bool:
|
||||
if not self._api_key:
|
||||
return False
|
||||
|
||||
@@ -9,6 +9,8 @@ import threading
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
from llm_connect.exceptions import LLMBudgetExceededError
|
||||
|
||||
|
||||
class BudgetTracker:
|
||||
"""Shared token budget for a call or delegation chain.
|
||||
@@ -36,8 +38,6 @@ class BudgetTracker:
|
||||
|
||||
def consume(self, tokens: int) -> None:
|
||||
"""Record *tokens* as spent. Raises ``LLMBudgetExceededError`` if cap exceeded."""
|
||||
from llm_connect.exceptions import LLMBudgetExceededError # avoid circular at module load
|
||||
|
||||
with self._lock:
|
||||
new_spent = self.spent + tokens
|
||||
if new_spent > self.total:
|
||||
@@ -46,7 +46,6 @@ class BudgetTracker:
|
||||
total=self.total,
|
||||
spent=self.spent,
|
||||
requested=tokens,
|
||||
context={"total": self.total, "spent": self.spent, "requested": tokens},
|
||||
)
|
||||
self.spent = new_spent
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
OpenAI (ChatGPT) adapter — calls the OpenAI chat completions API.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
@@ -100,10 +99,6 @@ class OpenAIAdapter(LLMAdapter):
|
||||
self._consume_budget(config, response)
|
||||
return response
|
||||
|
||||
async def async_execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
|
||||
"""Async wrapper — runs execute_prompt in a thread executor."""
|
||||
return await asyncio.to_thread(self.execute_prompt, prompt, config)
|
||||
|
||||
def validate_config(self, config: RunConfig) -> bool:
|
||||
if not self._api_key:
|
||||
return False
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
OpenRouter adapter — calls the OpenAI-compatible chat completions API.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
@@ -108,10 +107,6 @@ class OpenRouterAdapter(LLMAdapter):
|
||||
self._consume_budget(config, response)
|
||||
return response
|
||||
|
||||
async def async_execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
|
||||
"""Async wrapper — runs execute_prompt in a thread executor."""
|
||||
return await asyncio.to_thread(self.execute_prompt, prompt, config)
|
||||
|
||||
def validate_config(self, config: RunConfig) -> bool:
|
||||
if not self._api_key:
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user