llm-connect/llm_connect/openrouter.py

"""
OpenRouter adapter — calls the OpenAI-compatible chat completions API.
"""

import time
from typing import Optional, Dict, Any

from llm_connect.adapter import LLMAdapter
from llm_connect.models import RunConfig, LLMResponse
from llm_connect.config import LLMConfig, resolve_api_key, find_project_root
from llm_connect._http import post_json
from llm_connect.exceptions import (
    LLMConfigurationError,
    LLMAPIError,
    LLMRateLimitError,
)

_DEFAULT_MODEL = "anthropic/claude-sonnet-4"


class OpenRouterAdapter(LLMAdapter):
    """LLM adapter that calls the OpenRouter chat completions endpoint.

    Constructor args override values from *config*; *config* overrides
    global defaults.  The model used for a given call is resolved as:
    ``constructor model > RunConfig.model_name > default``.
    """

    def __init__(
        self,
        model: Optional[str] = None,
        api_key: Optional[str] = None,
        api_base: Optional[str] = None,
        config: Optional[LLMConfig] = None,
        system_prompt: Optional[str] = None,
        extra_headers: Optional[Dict[str, str]] = None,
        max_retries: Optional[int] = None,
    ):
        self._config = config or LLMConfig()
        self._model = model or self._config.model or _DEFAULT_MODEL
        self._api_base = (api_base or self._config.api_base).rstrip("/")
        self._system_prompt = system_prompt
        self._extra_headers = extra_headers or {}
        self._max_retries = max_retries if max_retries is not None else self._config.max_retries

        # Resolve API key
        root = find_project_root()
        key_file_paths = [root / "apikey-openrouter.txt"] if root else []
        self._api_key = resolve_api_key(
            explicit=api_key or self._config.api_key,
            env_var="OPENROUTER_API_KEY",
            key_file_paths=key_file_paths,
        )

    # ── LLMAdapter interface ────────────────────────────────────────

    def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
        self._preflight_budget(config)
        model = self._model if self._model != _DEFAULT_MODEL else (config.model_name or self._model)

        messages: list[Dict[str, str]] = []
        if self._system_prompt:
            messages.append({"role": "system", "content": self._system_prompt})
        messages.append({"role": "user", "content": prompt})

        payload: Dict[str, Any] = {
            "model": model,
            "messages": messages,
            "temperature": config.temperature,
            "max_tokens": config.max_tokens,
        }
        if config.model_params:
            _merge_model_params(payload, config.model_params)

        headers = {
            "Authorization": f"Bearer {self._api_key}",
            **self._extra_headers,
        }
        url = f"{self._api_base}/chat/completions"

        start = time.time()
        data = self._post_with_retries(url, payload, headers, config.timeout_seconds)
        latency = time.time() - start

        # Parse response
        choice = data.get("choices", [{}])[0]
        content = choice.get("message", {}).get("content", "")
        finish_reason = choice.get("finish_reason", "stop")
        usage = data.get("usage", {})

        response = LLMResponse(
            content=content,
            model=data.get("model", model),
            usage={
                "prompt_tokens": usage.get("prompt_tokens", 0),
                "completion_tokens": usage.get("completion_tokens", 0),
                "total_tokens": usage.get("total_tokens", 0),
            },
            finish_reason=finish_reason,
            metadata={
                "provider": "openrouter",
                "latency_seconds": round(latency, 3),
                "response_id": data.get("id", ""),
            },
        )
        self._consume_budget(config, response)
        return response

    def validate_config(self, config: RunConfig) -> bool:
        if not self._api_key:
            return False
        if not (self._model or config.model_name):
            return False
        if not (0.0 <= config.temperature <= 2.0):
            return False
        return True

    # ── Internals ───────────────────────────────────────────────────

    def _post_with_retries(
        self,
        url: str,
        payload: Dict[str, Any],
        headers: Dict[str, str],
        timeout: int,
    ) -> Dict[str, Any]:
        last_exc: Optional[Exception] = None
        for attempt in range(self._max_retries + 1):
            try:
                return post_json(url, payload, headers, timeout=timeout)
            except LLMRateLimitError as exc:
                last_exc = exc
                if attempt < self._max_retries:
                    time.sleep(2 ** attempt)
            except LLMAPIError as exc:
                if exc.status_code >= 500 and attempt < self._max_retries:
                    last_exc = exc
                    time.sleep(2 ** attempt)
                else:
                    raise
        raise last_exc  # type: ignore[misc]


# OpenAI Chat Completions fields that map straight through from model_params.
# Anything not in this set is provider-specific and must be either translated
# or dropped — we never blind-merge into the payload, because OpenRouter
# rejects unknown top-level fields with HTTP 400.
_OPENAI_PASSTHROUGH_FIELDS = frozenset({
    "top_p", "n", "stream", "stop", "presence_penalty",
    "frequency_penalty", "logit_bias", "user", "seed",
    "tools", "tool_choice", "response_format",
    "logprobs", "top_logprobs", "parallel_tool_calls",
})

# Provider-specific model_params keys that have no OpenAI Chat Completions
# equivalent and must be silently dropped to keep payloads valid.
_DROPPED_NON_OPENAI_FIELDS = frozenset({
    "reasoning_effort",  # Claude CLI / Anthropic-specific
    "max_depth",         # llm-connect's own depth knob
    "claude_cli_path",   # adapter wiring leak
    "json_schema",       # translated below into response_format
})


def _merge_model_params(payload: Dict[str, Any], model_params: Dict[str, Any]) -> None:
    """Merge RunConfig.model_params into an OpenAI Chat Completions payload.

    Pass-through whitelisted OpenAI keys, translate json_schema into the
    proper response_format wrapper, drop known provider-specific fields,
    and ignore anything else rather than letting it through and triggering
    a 400 from OpenRouter (the failure mode that hit CUST-WP-0045 on
    2026-06-02 — reasoning_effort and a top-level json_schema were merged
    into the body and the API rejected both).
    """
    schema = model_params.get("json_schema")
    if schema is not None and "response_format" not in payload:
        if isinstance(schema, str):
            try:
                import json as _json
                schema = _json.loads(schema)
            except (ValueError, TypeError):
                schema = None
        if isinstance(schema, dict):
            # strict=False: OpenAI's strict mode requires additionalProperties
            # to be false on every object and every property in the required
            # list. Most application-supplied schemas are not written that
            # way (the activity-core daily-triage schema, for example, has
            # neither). With strict=False, OpenRouter still honours the
            # schema as a soft constraint and the model's output remains
            # structured. Callers can opt back into strict by including
            # `strict: true` themselves in a custom `response_format`.
            payload["response_format"] = {
                "type": "json_schema",
                "json_schema": {
                    "name": "structured_output",
                    "schema": schema,
                    "strict": False,
                },
            }

    for key, value in model_params.items():
        if key in _DROPPED_NON_OPENAI_FIELDS:
            continue
        if key in _OPENAI_PASSTHROUGH_FIELDS:
            payload[key] = value
        # else: silently drop unknown keys rather than risk a 400.