generated from coulomb/repo-seed
Implement llm-connect ADHOC diagnostics
This commit is contained in:
@@ -1,221 +1,151 @@
|
||||
"""
|
||||
OpenRouter adapter — calls the OpenAI-compatible chat completions API.
|
||||
"""
|
||||
|
||||
import time
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from llm_connect.adapter import LLMAdapter
|
||||
from llm_connect.models import RunConfig, LLMResponse
|
||||
from llm_connect.config import LLMConfig, resolve_api_key, find_project_root
|
||||
from llm_connect._http import post_json
|
||||
from llm_connect.exceptions import (
|
||||
LLMConfigurationError,
|
||||
LLMAPIError,
|
||||
LLMRateLimitError,
|
||||
)
|
||||
|
||||
_DEFAULT_MODEL = "anthropic/claude-sonnet-4"
|
||||
|
||||
|
||||
class OpenRouterAdapter(LLMAdapter):
|
||||
"""LLM adapter that calls the OpenRouter chat completions endpoint.
|
||||
|
||||
Constructor args override values from *config*; *config* overrides
|
||||
global defaults. The model used for a given call is resolved as:
|
||||
``constructor model > RunConfig.model_name > default``.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
config: Optional[LLMConfig] = None,
|
||||
system_prompt: Optional[str] = None,
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
max_retries: Optional[int] = None,
|
||||
):
|
||||
self._config = config or LLMConfig()
|
||||
# Track whether the model was explicitly supplied (constructor or
|
||||
# LLMConfig). Comparing self._model to _DEFAULT_MODEL is not enough —
|
||||
# callers who pass --model anthropic/claude-sonnet-4 happen to match
|
||||
# the default and would otherwise be misrouted to RunConfig.model_name
|
||||
# (which defaults to "gpt-4" — quietly sending every call to OpenAI's
|
||||
# gpt-4 model, which is what broke the activity-core CUST-WP-0045
|
||||
# canary on 2026-06-02).
|
||||
self._explicit_model = model is not None or self._config.model is not None
|
||||
self._model = model or self._config.model or _DEFAULT_MODEL
|
||||
self._api_base = (api_base or self._config.api_base).rstrip("/")
|
||||
self._system_prompt = system_prompt
|
||||
self._extra_headers = extra_headers or {}
|
||||
self._max_retries = max_retries if max_retries is not None else self._config.max_retries
|
||||
|
||||
# Resolve API key
|
||||
root = find_project_root()
|
||||
key_file_paths = [root / "apikey-openrouter.txt"] if root else []
|
||||
self._api_key = resolve_api_key(
|
||||
explicit=api_key or self._config.api_key,
|
||||
env_var="OPENROUTER_API_KEY",
|
||||
key_file_paths=key_file_paths,
|
||||
)
|
||||
|
||||
# ── LLMAdapter interface ────────────────────────────────────────
|
||||
|
||||
def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
|
||||
self._preflight_budget(config)
|
||||
# Explicit constructor/LLMConfig model wins; only fall back to the
|
||||
# per-call RunConfig.model_name when the adapter wasn't told what to
|
||||
# use. RunConfig.model_name defaults to "gpt-4", so falling back
|
||||
# unconditionally would silently misroute callers.
|
||||
if self._explicit_model:
|
||||
model = self._model
|
||||
else:
|
||||
model = config.model_name or self._model
|
||||
|
||||
messages: list[Dict[str, str]] = []
|
||||
if self._system_prompt:
|
||||
messages.append({"role": "system", "content": self._system_prompt})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": config.temperature,
|
||||
"max_tokens": config.max_tokens,
|
||||
}
|
||||
if config.model_params:
|
||||
_merge_model_params(payload, config.model_params)
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self._api_key}",
|
||||
**self._extra_headers,
|
||||
}
|
||||
url = f"{self._api_base}/chat/completions"
|
||||
|
||||
start = time.time()
|
||||
data = self._post_with_retries(url, payload, headers, config.timeout_seconds)
|
||||
latency = time.time() - start
|
||||
|
||||
# Parse response
|
||||
choice = data.get("choices", [{}])[0]
|
||||
content = choice.get("message", {}).get("content", "")
|
||||
finish_reason = choice.get("finish_reason", "stop")
|
||||
usage = data.get("usage", {})
|
||||
|
||||
response = LLMResponse(
|
||||
content=content,
|
||||
model=data.get("model", model),
|
||||
usage={
|
||||
"prompt_tokens": usage.get("prompt_tokens", 0),
|
||||
"completion_tokens": usage.get("completion_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
},
|
||||
finish_reason=finish_reason,
|
||||
metadata={
|
||||
"provider": "openrouter",
|
||||
"latency_seconds": round(latency, 3),
|
||||
"response_id": data.get("id", ""),
|
||||
},
|
||||
)
|
||||
self._consume_budget(config, response)
|
||||
return response
|
||||
|
||||
def validate_config(self, config: RunConfig) -> bool:
|
||||
if not self._api_key:
|
||||
return False
|
||||
if not (self._model or config.model_name):
|
||||
return False
|
||||
if not (0.0 <= config.temperature <= 2.0):
|
||||
return False
|
||||
return True
|
||||
|
||||
# ── Internals ───────────────────────────────────────────────────
|
||||
|
||||
def _post_with_retries(
|
||||
self,
|
||||
url: str,
|
||||
payload: Dict[str, Any],
|
||||
headers: Dict[str, str],
|
||||
timeout: int,
|
||||
) -> Dict[str, Any]:
|
||||
last_exc: Optional[Exception] = None
|
||||
for attempt in range(self._max_retries + 1):
|
||||
try:
|
||||
return post_json(url, payload, headers, timeout=timeout)
|
||||
except LLMRateLimitError as exc:
|
||||
last_exc = exc
|
||||
if attempt < self._max_retries:
|
||||
time.sleep(2 ** attempt)
|
||||
except LLMAPIError as exc:
|
||||
if exc.status_code >= 500 and attempt < self._max_retries:
|
||||
last_exc = exc
|
||||
time.sleep(2 ** attempt)
|
||||
else:
|
||||
raise
|
||||
raise last_exc # type: ignore[misc]
|
||||
|
||||
|
||||
# OpenAI Chat Completions fields that map straight through from model_params.
|
||||
# Anything not in this set is provider-specific and must be either translated
|
||||
# or dropped — we never blind-merge into the payload, because OpenRouter
|
||||
# rejects unknown top-level fields with HTTP 400.
|
||||
_OPENAI_PASSTHROUGH_FIELDS = frozenset({
|
||||
"top_p", "n", "stream", "stop", "presence_penalty",
|
||||
"frequency_penalty", "logit_bias", "user", "seed",
|
||||
"tools", "tool_choice", "response_format",
|
||||
"logprobs", "top_logprobs", "parallel_tool_calls",
|
||||
})
|
||||
|
||||
# Provider-specific model_params keys that have no OpenAI Chat Completions
|
||||
# equivalent and must be silently dropped to keep payloads valid.
|
||||
_DROPPED_NON_OPENAI_FIELDS = frozenset({
|
||||
"reasoning_effort", # Claude CLI / Anthropic-specific
|
||||
"max_depth", # llm-connect's own depth knob
|
||||
"claude_cli_path", # adapter wiring leak
|
||||
"json_schema", # translated below into response_format
|
||||
})
|
||||
|
||||
|
||||
def _merge_model_params(payload: Dict[str, Any], model_params: Dict[str, Any]) -> None:
|
||||
"""Merge RunConfig.model_params into an OpenAI Chat Completions payload.
|
||||
|
||||
Pass-through whitelisted OpenAI keys, translate json_schema into the
|
||||
proper response_format wrapper, drop known provider-specific fields,
|
||||
and ignore anything else rather than letting it through and triggering
|
||||
a 400 from OpenRouter (the failure mode that hit CUST-WP-0045 on
|
||||
2026-06-02 — reasoning_effort and a top-level json_schema were merged
|
||||
into the body and the API rejected both).
|
||||
"""
|
||||
schema = model_params.get("json_schema")
|
||||
if schema is not None and "response_format" not in payload:
|
||||
if isinstance(schema, str):
|
||||
try:
|
||||
import json as _json
|
||||
schema = _json.loads(schema)
|
||||
except (ValueError, TypeError):
|
||||
schema = None
|
||||
if isinstance(schema, dict):
|
||||
# strict=False: OpenAI's strict mode requires additionalProperties
|
||||
# to be false on every object and every property in the required
|
||||
# list. Most application-supplied schemas are not written that
|
||||
# way (the activity-core daily-triage schema, for example, has
|
||||
# neither). With strict=False, OpenRouter still honours the
|
||||
# schema as a soft constraint and the model's output remains
|
||||
# structured. Callers can opt back into strict by including
|
||||
# `strict: true` themselves in a custom `response_format`.
|
||||
payload["response_format"] = {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "structured_output",
|
||||
"schema": schema,
|
||||
"strict": False,
|
||||
},
|
||||
}
|
||||
|
||||
for key, value in model_params.items():
|
||||
if key in _DROPPED_NON_OPENAI_FIELDS:
|
||||
continue
|
||||
if key in _OPENAI_PASSTHROUGH_FIELDS:
|
||||
payload[key] = value
|
||||
# else: silently drop unknown keys rather than risk a 400.
|
||||
"""
|
||||
OpenRouter adapter - calls the OpenAI-compatible chat completions API.
|
||||
"""
|
||||
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from llm_connect._http import post_json
|
||||
from llm_connect._payload import merge_openai_chat_model_params
|
||||
from llm_connect.adapter import LLMAdapter
|
||||
from llm_connect.config import LLMConfig, find_project_root, resolve_api_key
|
||||
from llm_connect.exceptions import LLMAPIError, LLMRateLimitError
|
||||
from llm_connect.models import LLMResponse, RunConfig
|
||||
|
||||
_DEFAULT_MODEL = "anthropic/claude-sonnet-4"
|
||||
|
||||
|
||||
class OpenRouterAdapter(LLMAdapter):
|
||||
"""LLM adapter that calls the OpenRouter chat completions endpoint.
|
||||
|
||||
Constructor args override values from *config*; *config* overrides
|
||||
global defaults. The model used for a given call is resolved as:
|
||||
``constructor model > RunConfig.model_name > default``.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
config: Optional[LLMConfig] = None,
|
||||
system_prompt: Optional[str] = None,
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
max_retries: Optional[int] = None,
|
||||
):
|
||||
self._config = config or LLMConfig()
|
||||
# Track whether the model was explicitly supplied (constructor or
|
||||
# LLMConfig). Comparing self._model to _DEFAULT_MODEL is not enough:
|
||||
# callers who pass --model anthropic/claude-sonnet-4 happen to match
|
||||
# the default and would otherwise be misrouted to RunConfig.model_name
|
||||
# (which defaults to "gpt-4", quietly sending every call to OpenAI's
|
||||
# gpt-4 model, which is what broke the activity-core CUST-WP-0045
|
||||
# canary on 2026-06-02).
|
||||
self._explicit_model = model is not None or self._config.model is not None
|
||||
self._model = model or self._config.model or _DEFAULT_MODEL
|
||||
self._api_base = (api_base or self._config.api_base).rstrip("/")
|
||||
self._system_prompt = system_prompt
|
||||
self._extra_headers = extra_headers or {}
|
||||
self._max_retries = max_retries if max_retries is not None else self._config.max_retries
|
||||
|
||||
root = find_project_root()
|
||||
key_file_paths = [root / "apikey-openrouter.txt"] if root else []
|
||||
self._api_key = resolve_api_key(
|
||||
explicit=api_key or self._config.api_key,
|
||||
env_var="OPENROUTER_API_KEY",
|
||||
key_file_paths=key_file_paths,
|
||||
)
|
||||
|
||||
# LLMAdapter interface
|
||||
|
||||
def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
|
||||
self._preflight_budget(config)
|
||||
# Explicit constructor/LLMConfig model wins; only fall back to the
|
||||
# per-call RunConfig.model_name when the adapter was not told what to
|
||||
# use. RunConfig.model_name defaults to "gpt-4", so falling back
|
||||
# unconditionally would silently misroute callers.
|
||||
if self._explicit_model:
|
||||
model = self._model
|
||||
else:
|
||||
model = config.model_name or self._model
|
||||
|
||||
messages: list[Dict[str, str]] = []
|
||||
if self._system_prompt:
|
||||
messages.append({"role": "system", "content": self._system_prompt})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": config.temperature,
|
||||
"max_tokens": config.max_tokens,
|
||||
}
|
||||
if config.model_params:
|
||||
merge_openai_chat_model_params(payload, config.model_params)
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self._api_key}",
|
||||
**self._extra_headers,
|
||||
}
|
||||
url = f"{self._api_base}/chat/completions"
|
||||
|
||||
start = time.time()
|
||||
data = self._post_with_retries(url, payload, headers, config.timeout_seconds)
|
||||
latency = time.time() - start
|
||||
|
||||
choice = data.get("choices", [{}])[0]
|
||||
content = choice.get("message", {}).get("content", "")
|
||||
finish_reason = choice.get("finish_reason", "stop")
|
||||
usage = data.get("usage", {})
|
||||
|
||||
response = LLMResponse(
|
||||
content=content,
|
||||
model=data.get("model", model),
|
||||
usage={
|
||||
"prompt_tokens": usage.get("prompt_tokens", 0),
|
||||
"completion_tokens": usage.get("completion_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
},
|
||||
finish_reason=finish_reason,
|
||||
metadata={
|
||||
"provider": "openrouter",
|
||||
"latency_seconds": round(latency, 3),
|
||||
"response_id": data.get("id", ""),
|
||||
},
|
||||
)
|
||||
self._consume_budget(config, response)
|
||||
return response
|
||||
|
||||
def validate_config(self, config: RunConfig) -> bool:
|
||||
if not self._api_key:
|
||||
return False
|
||||
if not (self._model or config.model_name):
|
||||
return False
|
||||
if not (0.0 <= config.temperature <= 2.0):
|
||||
return False
|
||||
return True
|
||||
|
||||
# Internals
|
||||
|
||||
def _post_with_retries(
|
||||
self,
|
||||
url: str,
|
||||
payload: Dict[str, Any],
|
||||
headers: Dict[str, str],
|
||||
timeout: int,
|
||||
) -> Dict[str, Any]:
|
||||
last_exc: Optional[Exception] = None
|
||||
for attempt in range(self._max_retries + 1):
|
||||
try:
|
||||
return post_json(url, payload, headers, timeout=timeout)
|
||||
except LLMRateLimitError as exc:
|
||||
last_exc = exc
|
||||
if attempt < self._max_retries:
|
||||
time.sleep(2 ** attempt)
|
||||
except LLMAPIError as exc:
|
||||
if exc.status_code >= 500 and attempt < self._max_retries:
|
||||
last_exc = exc
|
||||
time.sleep(2 ** attempt)
|
||||
else:
|
||||
raise
|
||||
raise last_exc # type: ignore[misc]
|
||||
|
||||
Reference in New Issue
Block a user