Files
llm-connect/llm_connect/openrouter.py
tegwick 583ab57a59
Some checks failed
CI / test (3.10) (push) Has been cancelled
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
Set response_format json_schema strict=False in OpenRouter adapter
The previous strict=True default rejected the activity-core daily-triage
schema (and most real-world application schemas) because OpenAI strict
mode requires additionalProperties:false on every object and every
property in the required list. Application-supplied schemas typically
do not meet that bar — adding additionalProperties recursively at the
adapter would be surprising and may break callers that rely on extra
fields. Flipping strict to False keeps the schema as a soft constraint;
the model still produces structured output and the activity-core
canary's 400 from OpenRouter goes away.

Callers who need strict enforcement can pass response_format directly
via model_params, where the adapter's pass-through handling preserves
the strict flag they set.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-02 14:18:33 +02:00

207 lines
8.3 KiB
Python

"""
OpenRouter adapter — calls the OpenAI-compatible chat completions API.
"""
import time
from typing import Optional, Dict, Any
from llm_connect.adapter import LLMAdapter
from llm_connect.models import RunConfig, LLMResponse
from llm_connect.config import LLMConfig, resolve_api_key, find_project_root
from llm_connect._http import post_json
from llm_connect.exceptions import (
LLMConfigurationError,
LLMAPIError,
LLMRateLimitError,
)
_DEFAULT_MODEL = "anthropic/claude-sonnet-4"
class OpenRouterAdapter(LLMAdapter):
"""LLM adapter that calls the OpenRouter chat completions endpoint.
Constructor args override values from *config*; *config* overrides
global defaults. The model used for a given call is resolved as:
``constructor model > RunConfig.model_name > default``.
"""
def __init__(
self,
model: Optional[str] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
config: Optional[LLMConfig] = None,
system_prompt: Optional[str] = None,
extra_headers: Optional[Dict[str, str]] = None,
max_retries: Optional[int] = None,
):
self._config = config or LLMConfig()
self._model = model or self._config.model or _DEFAULT_MODEL
self._api_base = (api_base or self._config.api_base).rstrip("/")
self._system_prompt = system_prompt
self._extra_headers = extra_headers or {}
self._max_retries = max_retries if max_retries is not None else self._config.max_retries
# Resolve API key
root = find_project_root()
key_file_paths = [root / "apikey-openrouter.txt"] if root else []
self._api_key = resolve_api_key(
explicit=api_key or self._config.api_key,
env_var="OPENROUTER_API_KEY",
key_file_paths=key_file_paths,
)
# ── LLMAdapter interface ────────────────────────────────────────
def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
self._preflight_budget(config)
model = self._model if self._model != _DEFAULT_MODEL else (config.model_name or self._model)
messages: list[Dict[str, str]] = []
if self._system_prompt:
messages.append({"role": "system", "content": self._system_prompt})
messages.append({"role": "user", "content": prompt})
payload: Dict[str, Any] = {
"model": model,
"messages": messages,
"temperature": config.temperature,
"max_tokens": config.max_tokens,
}
if config.model_params:
_merge_model_params(payload, config.model_params)
headers = {
"Authorization": f"Bearer {self._api_key}",
**self._extra_headers,
}
url = f"{self._api_base}/chat/completions"
start = time.time()
data = self._post_with_retries(url, payload, headers, config.timeout_seconds)
latency = time.time() - start
# Parse response
choice = data.get("choices", [{}])[0]
content = choice.get("message", {}).get("content", "")
finish_reason = choice.get("finish_reason", "stop")
usage = data.get("usage", {})
response = LLMResponse(
content=content,
model=data.get("model", model),
usage={
"prompt_tokens": usage.get("prompt_tokens", 0),
"completion_tokens": usage.get("completion_tokens", 0),
"total_tokens": usage.get("total_tokens", 0),
},
finish_reason=finish_reason,
metadata={
"provider": "openrouter",
"latency_seconds": round(latency, 3),
"response_id": data.get("id", ""),
},
)
self._consume_budget(config, response)
return response
def validate_config(self, config: RunConfig) -> bool:
if not self._api_key:
return False
if not (self._model or config.model_name):
return False
if not (0.0 <= config.temperature <= 2.0):
return False
return True
# ── Internals ───────────────────────────────────────────────────
def _post_with_retries(
self,
url: str,
payload: Dict[str, Any],
headers: Dict[str, str],
timeout: int,
) -> Dict[str, Any]:
last_exc: Optional[Exception] = None
for attempt in range(self._max_retries + 1):
try:
return post_json(url, payload, headers, timeout=timeout)
except LLMRateLimitError as exc:
last_exc = exc
if attempt < self._max_retries:
time.sleep(2 ** attempt)
except LLMAPIError as exc:
if exc.status_code >= 500 and attempt < self._max_retries:
last_exc = exc
time.sleep(2 ** attempt)
else:
raise
raise last_exc # type: ignore[misc]
# OpenAI Chat Completions fields that map straight through from model_params.
# Anything not in this set is provider-specific and must be either translated
# or dropped — we never blind-merge into the payload, because OpenRouter
# rejects unknown top-level fields with HTTP 400.
_OPENAI_PASSTHROUGH_FIELDS = frozenset({
"top_p", "n", "stream", "stop", "presence_penalty",
"frequency_penalty", "logit_bias", "user", "seed",
"tools", "tool_choice", "response_format",
"logprobs", "top_logprobs", "parallel_tool_calls",
})
# Provider-specific model_params keys that have no OpenAI Chat Completions
# equivalent and must be silently dropped to keep payloads valid.
_DROPPED_NON_OPENAI_FIELDS = frozenset({
"reasoning_effort", # Claude CLI / Anthropic-specific
"max_depth", # llm-connect's own depth knob
"claude_cli_path", # adapter wiring leak
"json_schema", # translated below into response_format
})
def _merge_model_params(payload: Dict[str, Any], model_params: Dict[str, Any]) -> None:
"""Merge RunConfig.model_params into an OpenAI Chat Completions payload.
Pass-through whitelisted OpenAI keys, translate json_schema into the
proper response_format wrapper, drop known provider-specific fields,
and ignore anything else rather than letting it through and triggering
a 400 from OpenRouter (the failure mode that hit CUST-WP-0045 on
2026-06-02 — reasoning_effort and a top-level json_schema were merged
into the body and the API rejected both).
"""
schema = model_params.get("json_schema")
if schema is not None and "response_format" not in payload:
if isinstance(schema, str):
try:
import json as _json
schema = _json.loads(schema)
except (ValueError, TypeError):
schema = None
if isinstance(schema, dict):
# strict=False: OpenAI's strict mode requires additionalProperties
# to be false on every object and every property in the required
# list. Most application-supplied schemas are not written that
# way (the activity-core daily-triage schema, for example, has
# neither). With strict=False, OpenRouter still honours the
# schema as a soft constraint and the model's output remains
# structured. Callers can opt back into strict by including
# `strict: true` themselves in a custom `response_format`.
payload["response_format"] = {
"type": "json_schema",
"json_schema": {
"name": "structured_output",
"schema": schema,
"strict": False,
},
}
for key, value in model_params.items():
if key in _DROPPED_NON_OPENAI_FIELDS:
continue
if key in _OPENAI_PASSTHROUGH_FIELDS:
payload[key] = value
# else: silently drop unknown keys rather than risk a 400.