Implement llm-connect ADHOC diagnostics

2026-06-03 11:56:21 +02:00
parent 79c899b694
commit 24f4c09d42
17 changed files with 1618 additions and 611 deletions
--- a/llm_connect/openrouter.py
+++ b/llm_connect/openrouter.py
@@ -1,221 +1,151 @@
-"""
-OpenRouter adapter — calls the OpenAI-compatible chat completions API.
-"""
-
-import time
-from typing import Optional, Dict, Any
-
-from llm_connect.adapter import LLMAdapter
-from llm_connect.models import RunConfig, LLMResponse
-from llm_connect.config import LLMConfig, resolve_api_key, find_project_root
-from llm_connect._http import post_json
-from llm_connect.exceptions import (
-    LLMConfigurationError,
-    LLMAPIError,
-    LLMRateLimitError,
-)
-
-_DEFAULT_MODEL = "anthropic/claude-sonnet-4"
-
-
-class OpenRouterAdapter(LLMAdapter):
-    """LLM adapter that calls the OpenRouter chat completions endpoint.
-
-    Constructor args override values from *config*; *config* overrides
-    global defaults.  The model used for a given call is resolved as:
-    ``constructor model > RunConfig.model_name > default``.
-    """
-
-    def __init__(
-        self,
-        model: Optional[str] = None,
-        api_key: Optional[str] = None,
-        api_base: Optional[str] = None,
-        config: Optional[LLMConfig] = None,
-        system_prompt: Optional[str] = None,
-        extra_headers: Optional[Dict[str, str]] = None,
-        max_retries: Optional[int] = None,
-    ):
-        self._config = config or LLMConfig()
-        # Track whether the model was explicitly supplied (constructor or
-        # LLMConfig). Comparing self._model to _DEFAULT_MODEL is not enough —
-        # callers who pass --model anthropic/claude-sonnet-4 happen to match
-        # the default and would otherwise be misrouted to RunConfig.model_name
-        # (which defaults to "gpt-4" — quietly sending every call to OpenAI's
-        # gpt-4 model, which is what broke the activity-core CUST-WP-0045
-        # canary on 2026-06-02).
-        self._explicit_model = model is not None or self._config.model is not None
-        self._model = model or self._config.model or _DEFAULT_MODEL
-        self._api_base = (api_base or self._config.api_base).rstrip("/")
-        self._system_prompt = system_prompt
-        self._extra_headers = extra_headers or {}
-        self._max_retries = max_retries if max_retries is not None else self._config.max_retries
-
-        # Resolve API key
-        root = find_project_root()
-        key_file_paths = [root / "apikey-openrouter.txt"] if root else []
-        self._api_key = resolve_api_key(
-            explicit=api_key or self._config.api_key,
-            env_var="OPENROUTER_API_KEY",
-            key_file_paths=key_file_paths,
-        )
-
-    # ── LLMAdapter interface ────────────────────────────────────────
-
-    def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
-        self._preflight_budget(config)
-        # Explicit constructor/LLMConfig model wins; only fall back to the
-        # per-call RunConfig.model_name when the adapter wasn't told what to
-        # use. RunConfig.model_name defaults to "gpt-4", so falling back
-        # unconditionally would silently misroute callers.
-        if self._explicit_model:
-            model = self._model
-        else:
-            model = config.model_name or self._model
-
-        messages: list[Dict[str, str]] = []
-        if self._system_prompt:
-            messages.append({"role": "system", "content": self._system_prompt})
-        messages.append({"role": "user", "content": prompt})
-
-        payload: Dict[str, Any] = {
-            "model": model,
-            "messages": messages,
-            "temperature": config.temperature,
-            "max_tokens": config.max_tokens,
-        }
-        if config.model_params:
-            _merge_model_params(payload, config.model_params)
-
-        headers = {
-            "Authorization": f"Bearer {self._api_key}",
-            **self._extra_headers,
-        }
-        url = f"{self._api_base}/chat/completions"
-
-        start = time.time()
-        data = self._post_with_retries(url, payload, headers, config.timeout_seconds)
-        latency = time.time() - start
-
-        # Parse response
-        choice = data.get("choices", [{}])[0]
-        content = choice.get("message", {}).get("content", "")
-        finish_reason = choice.get("finish_reason", "stop")
-        usage = data.get("usage", {})
-
-        response = LLMResponse(
-            content=content,
-            model=data.get("model", model),
-            usage={
-                "prompt_tokens": usage.get("prompt_tokens", 0),
-                "completion_tokens": usage.get("completion_tokens", 0),
-                "total_tokens": usage.get("total_tokens", 0),
-            },
-            finish_reason=finish_reason,
-            metadata={
-                "provider": "openrouter",
-                "latency_seconds": round(latency, 3),
-                "response_id": data.get("id", ""),
-            },
-        )
-        self._consume_budget(config, response)
-        return response
-
-    def validate_config(self, config: RunConfig) -> bool:
-        if not self._api_key:
-            return False
-        if not (self._model or config.model_name):
-            return False
-        if not (0.0 <= config.temperature <= 2.0):
-            return False
-        return True
-
-    # ── Internals ───────────────────────────────────────────────────
-
-    def _post_with_retries(
-        self,
-        url: str,
-        payload: Dict[str, Any],
-        headers: Dict[str, str],
-        timeout: int,
-    ) -> Dict[str, Any]:
-        last_exc: Optional[Exception] = None
-        for attempt in range(self._max_retries + 1):
-            try:
-                return post_json(url, payload, headers, timeout=timeout)
-            except LLMRateLimitError as exc:
-                last_exc = exc
-                if attempt < self._max_retries:
-                    time.sleep(2 ** attempt)
-            except LLMAPIError as exc:
-                if exc.status_code >= 500 and attempt < self._max_retries:
-                    last_exc = exc
-                    time.sleep(2 ** attempt)
-                else:
-                    raise
-        raise last_exc  # type: ignore[misc]
-
-
-# OpenAI Chat Completions fields that map straight through from model_params.
-# Anything not in this set is provider-specific and must be either translated
-# or dropped — we never blind-merge into the payload, because OpenRouter
-# rejects unknown top-level fields with HTTP 400.
-_OPENAI_PASSTHROUGH_FIELDS = frozenset({
-    "top_p", "n", "stream", "stop", "presence_penalty",
-    "frequency_penalty", "logit_bias", "user", "seed",
-    "tools", "tool_choice", "response_format",
-    "logprobs", "top_logprobs", "parallel_tool_calls",
-})
-
-# Provider-specific model_params keys that have no OpenAI Chat Completions
-# equivalent and must be silently dropped to keep payloads valid.
-_DROPPED_NON_OPENAI_FIELDS = frozenset({
-    "reasoning_effort",  # Claude CLI / Anthropic-specific
-    "max_depth",         # llm-connect's own depth knob
-    "claude_cli_path",   # adapter wiring leak
-    "json_schema",       # translated below into response_format
-})
-
-
-def _merge_model_params(payload: Dict[str, Any], model_params: Dict[str, Any]) -> None:
-    """Merge RunConfig.model_params into an OpenAI Chat Completions payload.
-
-    Pass-through whitelisted OpenAI keys, translate json_schema into the
-    proper response_format wrapper, drop known provider-specific fields,
-    and ignore anything else rather than letting it through and triggering
-    a 400 from OpenRouter (the failure mode that hit CUST-WP-0045 on
-    2026-06-02 — reasoning_effort and a top-level json_schema were merged
-    into the body and the API rejected both).
-    """
-    schema = model_params.get("json_schema")
-    if schema is not None and "response_format" not in payload:
-        if isinstance(schema, str):
-            try:
-                import json as _json
-                schema = _json.loads(schema)
-            except (ValueError, TypeError):
-                schema = None
-        if isinstance(schema, dict):
-            # strict=False: OpenAI's strict mode requires additionalProperties
-            # to be false on every object and every property in the required
-            # list. Most application-supplied schemas are not written that
-            # way (the activity-core daily-triage schema, for example, has
-            # neither). With strict=False, OpenRouter still honours the
-            # schema as a soft constraint and the model's output remains
-            # structured. Callers can opt back into strict by including
-            # `strict: true` themselves in a custom `response_format`.
-            payload["response_format"] = {
-                "type": "json_schema",
-                "json_schema": {
-                    "name": "structured_output",
-                    "schema": schema,
-                    "strict": False,
-                },
-            }
-
-    for key, value in model_params.items():
-        if key in _DROPPED_NON_OPENAI_FIELDS:
-            continue
-        if key in _OPENAI_PASSTHROUGH_FIELDS:
-            payload[key] = value
-        # else: silently drop unknown keys rather than risk a 400.
+"""
+OpenRouter adapter - calls the OpenAI-compatible chat completions API.
+"""
+
+import time
+from typing import Any, Dict, Optional
+
+from llm_connect._http import post_json
+from llm_connect._payload import merge_openai_chat_model_params
+from llm_connect.adapter import LLMAdapter
+from llm_connect.config import LLMConfig, find_project_root, resolve_api_key
+from llm_connect.exceptions import LLMAPIError, LLMRateLimitError
+from llm_connect.models import LLMResponse, RunConfig
+
+_DEFAULT_MODEL = "anthropic/claude-sonnet-4"
+
+
+class OpenRouterAdapter(LLMAdapter):
+    """LLM adapter that calls the OpenRouter chat completions endpoint.
+
+    Constructor args override values from *config*; *config* overrides
+    global defaults. The model used for a given call is resolved as:
+    ``constructor model > RunConfig.model_name > default``.
+    """
+
+    def __init__(
+        self,
+        model: Optional[str] = None,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        config: Optional[LLMConfig] = None,
+        system_prompt: Optional[str] = None,
+        extra_headers: Optional[Dict[str, str]] = None,
+        max_retries: Optional[int] = None,
+    ):
+        self._config = config or LLMConfig()
+        # Track whether the model was explicitly supplied (constructor or
+        # LLMConfig). Comparing self._model to _DEFAULT_MODEL is not enough:
+        # callers who pass --model anthropic/claude-sonnet-4 happen to match
+        # the default and would otherwise be misrouted to RunConfig.model_name
+        # (which defaults to "gpt-4", quietly sending every call to OpenAI's
+        # gpt-4 model, which is what broke the activity-core CUST-WP-0045
+        # canary on 2026-06-02).
+        self._explicit_model = model is not None or self._config.model is not None
+        self._model = model or self._config.model or _DEFAULT_MODEL
+        self._api_base = (api_base or self._config.api_base).rstrip("/")
+        self._system_prompt = system_prompt
+        self._extra_headers = extra_headers or {}
+        self._max_retries = max_retries if max_retries is not None else self._config.max_retries
+
+        root = find_project_root()
+        key_file_paths = [root / "apikey-openrouter.txt"] if root else []
+        self._api_key = resolve_api_key(
+            explicit=api_key or self._config.api_key,
+            env_var="OPENROUTER_API_KEY",
+            key_file_paths=key_file_paths,
+        )
+
+    # LLMAdapter interface
+
+    def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
+        self._preflight_budget(config)
+        # Explicit constructor/LLMConfig model wins; only fall back to the
+        # per-call RunConfig.model_name when the adapter was not told what to
+        # use. RunConfig.model_name defaults to "gpt-4", so falling back
+        # unconditionally would silently misroute callers.
+        if self._explicit_model:
+            model = self._model
+        else:
+            model = config.model_name or self._model
+
+        messages: list[Dict[str, str]] = []
+        if self._system_prompt:
+            messages.append({"role": "system", "content": self._system_prompt})
+        messages.append({"role": "user", "content": prompt})
+
+        payload: Dict[str, Any] = {
+            "model": model,
+            "messages": messages,
+            "temperature": config.temperature,
+            "max_tokens": config.max_tokens,
+        }
+        if config.model_params:
+            merge_openai_chat_model_params(payload, config.model_params)
+
+        headers = {
+            "Authorization": f"Bearer {self._api_key}",
+            **self._extra_headers,
+        }
+        url = f"{self._api_base}/chat/completions"
+
+        start = time.time()
+        data = self._post_with_retries(url, payload, headers, config.timeout_seconds)
+        latency = time.time() - start
+
+        choice = data.get("choices", [{}])[0]
+        content = choice.get("message", {}).get("content", "")
+        finish_reason = choice.get("finish_reason", "stop")
+        usage = data.get("usage", {})
+
+        response = LLMResponse(
+            content=content,
+            model=data.get("model", model),
+            usage={
+                "prompt_tokens": usage.get("prompt_tokens", 0),
+                "completion_tokens": usage.get("completion_tokens", 0),
+                "total_tokens": usage.get("total_tokens", 0),
+            },
+            finish_reason=finish_reason,
+            metadata={
+                "provider": "openrouter",
+                "latency_seconds": round(latency, 3),
+                "response_id": data.get("id", ""),
+            },
+        )
+        self._consume_budget(config, response)
+        return response
+
+    def validate_config(self, config: RunConfig) -> bool:
+        if not self._api_key:
+            return False
+        if not (self._model or config.model_name):
+            return False
+        if not (0.0 <= config.temperature <= 2.0):
+            return False
+        return True
+
+    # Internals
+
+    def _post_with_retries(
+        self,
+        url: str,
+        payload: Dict[str, Any],
+        headers: Dict[str, str],
+        timeout: int,
+    ) -> Dict[str, Any]:
+        last_exc: Optional[Exception] = None
+        for attempt in range(self._max_retries + 1):
+            try:
+                return post_json(url, payload, headers, timeout=timeout)
+            except LLMRateLimitError as exc:
+                last_exc = exc
+                if attempt < self._max_retries:
+                    time.sleep(2 ** attempt)
+            except LLMAPIError as exc:
+                if exc.status_code >= 500 and attempt < self._max_retries:
+                    last_exc = exc
+                    time.sleep(2 ** attempt)
+                else:
+                    raise
+        raise last_exc  # type: ignore[misc]