""" OpenRouter adapter — calls the OpenAI-compatible chat completions API. """ import time from typing import Optional, Dict, Any from llm_connect.adapter import LLMAdapter from llm_connect.models import RunConfig, LLMResponse from llm_connect.config import LLMConfig, resolve_api_key, find_project_root from llm_connect._http import post_json from llm_connect.exceptions import ( LLMConfigurationError, LLMAPIError, LLMRateLimitError, ) _DEFAULT_MODEL = "anthropic/claude-sonnet-4" class OpenRouterAdapter(LLMAdapter): """LLM adapter that calls the OpenRouter chat completions endpoint. Constructor args override values from *config*; *config* overrides global defaults. The model used for a given call is resolved as: ``constructor model > RunConfig.model_name > default``. """ def __init__( self, model: Optional[str] = None, api_key: Optional[str] = None, api_base: Optional[str] = None, config: Optional[LLMConfig] = None, system_prompt: Optional[str] = None, extra_headers: Optional[Dict[str, str]] = None, max_retries: Optional[int] = None, ): self._config = config or LLMConfig() # Track whether the model was explicitly supplied (constructor or # LLMConfig). Comparing self._model to _DEFAULT_MODEL is not enough — # callers who pass --model anthropic/claude-sonnet-4 happen to match # the default and would otherwise be misrouted to RunConfig.model_name # (which defaults to "gpt-4" — quietly sending every call to OpenAI's # gpt-4 model, which is what broke the activity-core CUST-WP-0045 # canary on 2026-06-02). self._explicit_model = model is not None or self._config.model is not None self._model = model or self._config.model or _DEFAULT_MODEL self._api_base = (api_base or self._config.api_base).rstrip("/") self._system_prompt = system_prompt self._extra_headers = extra_headers or {} self._max_retries = max_retries if max_retries is not None else self._config.max_retries # Resolve API key root = find_project_root() key_file_paths = [root / "apikey-openrouter.txt"] if root else [] self._api_key = resolve_api_key( explicit=api_key or self._config.api_key, env_var="OPENROUTER_API_KEY", key_file_paths=key_file_paths, ) # ── LLMAdapter interface ──────────────────────────────────────── def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse: self._preflight_budget(config) # Explicit constructor/LLMConfig model wins; only fall back to the # per-call RunConfig.model_name when the adapter wasn't told what to # use. RunConfig.model_name defaults to "gpt-4", so falling back # unconditionally would silently misroute callers. if self._explicit_model: model = self._model else: model = config.model_name or self._model messages: list[Dict[str, str]] = [] if self._system_prompt: messages.append({"role": "system", "content": self._system_prompt}) messages.append({"role": "user", "content": prompt}) payload: Dict[str, Any] = { "model": model, "messages": messages, "temperature": config.temperature, "max_tokens": config.max_tokens, } if config.model_params: _merge_model_params(payload, config.model_params) headers = { "Authorization": f"Bearer {self._api_key}", **self._extra_headers, } url = f"{self._api_base}/chat/completions" start = time.time() data = self._post_with_retries(url, payload, headers, config.timeout_seconds) latency = time.time() - start # Parse response choice = data.get("choices", [{}])[0] content = choice.get("message", {}).get("content", "") finish_reason = choice.get("finish_reason", "stop") usage = data.get("usage", {}) response = LLMResponse( content=content, model=data.get("model", model), usage={ "prompt_tokens": usage.get("prompt_tokens", 0), "completion_tokens": usage.get("completion_tokens", 0), "total_tokens": usage.get("total_tokens", 0), }, finish_reason=finish_reason, metadata={ "provider": "openrouter", "latency_seconds": round(latency, 3), "response_id": data.get("id", ""), }, ) self._consume_budget(config, response) return response def validate_config(self, config: RunConfig) -> bool: if not self._api_key: return False if not (self._model or config.model_name): return False if not (0.0 <= config.temperature <= 2.0): return False return True # ── Internals ─────────────────────────────────────────────────── def _post_with_retries( self, url: str, payload: Dict[str, Any], headers: Dict[str, str], timeout: int, ) -> Dict[str, Any]: last_exc: Optional[Exception] = None for attempt in range(self._max_retries + 1): try: return post_json(url, payload, headers, timeout=timeout) except LLMRateLimitError as exc: last_exc = exc if attempt < self._max_retries: time.sleep(2 ** attempt) except LLMAPIError as exc: if exc.status_code >= 500 and attempt < self._max_retries: last_exc = exc time.sleep(2 ** attempt) else: raise raise last_exc # type: ignore[misc] # OpenAI Chat Completions fields that map straight through from model_params. # Anything not in this set is provider-specific and must be either translated # or dropped — we never blind-merge into the payload, because OpenRouter # rejects unknown top-level fields with HTTP 400. _OPENAI_PASSTHROUGH_FIELDS = frozenset({ "top_p", "n", "stream", "stop", "presence_penalty", "frequency_penalty", "logit_bias", "user", "seed", "tools", "tool_choice", "response_format", "logprobs", "top_logprobs", "parallel_tool_calls", }) # Provider-specific model_params keys that have no OpenAI Chat Completions # equivalent and must be silently dropped to keep payloads valid. _DROPPED_NON_OPENAI_FIELDS = frozenset({ "reasoning_effort", # Claude CLI / Anthropic-specific "max_depth", # llm-connect's own depth knob "claude_cli_path", # adapter wiring leak "json_schema", # translated below into response_format }) def _merge_model_params(payload: Dict[str, Any], model_params: Dict[str, Any]) -> None: """Merge RunConfig.model_params into an OpenAI Chat Completions payload. Pass-through whitelisted OpenAI keys, translate json_schema into the proper response_format wrapper, drop known provider-specific fields, and ignore anything else rather than letting it through and triggering a 400 from OpenRouter (the failure mode that hit CUST-WP-0045 on 2026-06-02 — reasoning_effort and a top-level json_schema were merged into the body and the API rejected both). """ schema = model_params.get("json_schema") if schema is not None and "response_format" not in payload: if isinstance(schema, str): try: import json as _json schema = _json.loads(schema) except (ValueError, TypeError): schema = None if isinstance(schema, dict): # strict=False: OpenAI's strict mode requires additionalProperties # to be false on every object and every property in the required # list. Most application-supplied schemas are not written that # way (the activity-core daily-triage schema, for example, has # neither). With strict=False, OpenRouter still honours the # schema as a soft constraint and the model's output remains # structured. Callers can opt back into strict by including # `strict: true` themselves in a custom `response_format`. payload["response_format"] = { "type": "json_schema", "json_schema": { "name": "structured_output", "schema": schema, "strict": False, }, } for key, value in model_params.items(): if key in _DROPPED_NON_OPENAI_FIELDS: continue if key in _OPENAI_PASSTHROUGH_FIELDS: payload[key] = value # else: silently drop unknown keys rather than risk a 400.