Implement llm-connect ADHOC diagnostics

2026-06-03 11:56:21 +02:00
parent 79c899b694
commit 24f4c09d42
17 changed files with 1618 additions and 611 deletions
--- a/llm_connect/claude_code.py
+++ b/llm_connect/claude_code.py
@@ -1,277 +1,289 @@
-"""
-Claude Code CLI adapter — runs the ``claude`` CLI as a subprocess.
-"""
-
-import asyncio
-import json
-import os
-import subprocess
-from pathlib import Path
-from typing import Optional
-
-from llm_connect.adapter import LLMAdapter
-from llm_connect.models import RunConfig, LLMResponse
-from llm_connect.config import LLMConfig
-from llm_connect._token_estimator import estimate_tokens
-from llm_connect.exceptions import (
-    LLMSubprocessError,
-    LLMTimeoutError,
-)
-
-
-class ClaudeCodeAdapter(LLMAdapter):
-    """LLM adapter that shells out to the ``claude`` CLI with ``--print``.
-
-    The compiled prompt is piped via **stdin** to avoid shell argument
-    length limits (compiled prompts can exceed 30 KB).
-    """
-
-    def __init__(
-        self,
-        cli_path: Optional[str] = None,
-        model: Optional[str] = None,
-        config: Optional[LLMConfig] = None,
-    ):
-        self._config = config or LLMConfig(provider="claude-code")
-        self._cli_path = cli_path or self._resolve_cli_path()
-        self._model = model
-
-    # ── LLMAdapter interface ────────────────────────────────────────
-
-    def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
-        self._preflight_budget(config)
-        cmd = self._build_command(config)
-
-        timeout = config.timeout_seconds or self._config.timeout_seconds
-
-        try:
-            result = subprocess.run(
-                cmd,
-                input=prompt,
-                capture_output=True,
-                text=True,
-                timeout=timeout,
-            )
-        except subprocess.TimeoutExpired as exc:
-            raise LLMTimeoutError(
-                f"claude CLI timed out after {timeout}s",
-                cause=exc,
-            ) from exc
-
-        if result.returncode != 0:
-            raise LLMSubprocessError(
-                f"claude CLI exited with code {result.returncode}",
-                return_code=result.returncode,
-                stderr=result.stderr,
-            )
-
-        content = _unwrap_cli_json_envelope(result.stdout, config)
-        prompt_tokens = estimate_tokens(prompt)
-        completion_tokens = estimate_tokens(content)
-
-        response = LLMResponse(
-            content=content,
-            model=self._model or "claude-code-cli",
-            usage={
-                "prompt_tokens": prompt_tokens,
-                "completion_tokens": completion_tokens,
-                "total_tokens": prompt_tokens + completion_tokens,
-            },
-            finish_reason="stop",
-            metadata={
-                "provider": "claude-code",
-                "cli_path": self._cli_path,
-            },
-        )
-        self._consume_budget(config, response)
-        return response
-
-    async def async_execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
-        """Native async implementation using asyncio.create_subprocess_exec."""
-        self._preflight_budget(config)
-        cmd = self._build_command(config)
-
-        timeout = config.timeout_seconds or self._config.timeout_seconds
-
-        try:
-            proc = await asyncio.create_subprocess_exec(
-                *cmd,
-                stdin=asyncio.subprocess.PIPE,
-                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE,
-            )
-            stdout_bytes, stderr_bytes = await asyncio.wait_for(
-                proc.communicate(input=prompt.encode()),
-                timeout=timeout,
-            )
-        except asyncio.TimeoutError as exc:
-            raise LLMTimeoutError(
-                f"claude CLI timed out after {timeout}s",
-                cause=exc,
-            ) from exc
-
-        if proc.returncode != 0:
-            raise LLMSubprocessError(
-                f"claude CLI exited with code {proc.returncode}",
-                return_code=proc.returncode,
-                stderr=stderr_bytes.decode(),
-            )
-
-        content = _unwrap_cli_json_envelope(stdout_bytes.decode(), config)
-        prompt_tokens = estimate_tokens(prompt)
-        completion_tokens = estimate_tokens(content)
-
-        response = LLMResponse(
-            content=content,
-            model=self._model or "claude-code-cli",
-            usage={
-                "prompt_tokens": prompt_tokens,
-                "completion_tokens": completion_tokens,
-                "total_tokens": prompt_tokens + completion_tokens,
-            },
-            finish_reason="stop",
-            metadata={
-                "provider": "claude-code",
-                "cli_path": self._cli_path,
-                "async": True,
-            },
-        )
-        self._consume_budget(config, response)
-        return response
-
-    def validate_config(self, config: RunConfig) -> bool:
-        try:
-            result = subprocess.run(
-                [self._cli_path, "--version"],
-                capture_output=True,
-                text=True,
-                timeout=10,
-            )
-            return result.returncode == 0
-        except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
-            return False
-
-    def _build_command(self, config: RunConfig) -> list[str]:
-        cmd = [self._cli_path, "--print"]
-        if self._model:
-            cmd.extend(["--model", self._model])
-
-        json_schema = _json_schema_arg(config)
-        if json_schema:
-            cmd.extend(["--json-schema", json_schema])
-            # With --json-schema alone the CLI prints conversational text on
-            # stdout while the structured payload ships on a sidecar channel
-            # callers cannot reach. --output-format json forces the structured
-            # response (wrapped in an envelope) onto stdout.
-            cmd.extend(["--output-format", "json"])
-        return cmd
-
-    def _resolve_cli_path(self) -> str:
-        configured = (
-            os.environ.get("LLM_CONNECT_CLAUDE_CLI_PATH")
-            or os.environ.get("CLAUDE_CLI_PATH")
-            or self._config.claude_cli_path
-        )
-        if configured and configured != "claude":
-            return configured
-
-        local_cli = Path.home() / ".local" / "bin" / "claude"
-        if local_cli.exists():
-            return str(local_cli)
-        return configured or "claude"
-
-
-def _json_schema_arg(config: RunConfig) -> str | None:
-    schema = (config.model_params or {}).get("json_schema")
-    if not schema:
-        return None
-    if isinstance(schema, str):
-        return schema
-    if isinstance(schema, dict):
-        return json.dumps(schema, separators=(",", ":"))
-    return None
-
-
-# Envelope field names Claude Code's `--output-format json` is known to use
-# for the model's primary textual response. Used as a fall-back when no field
-# carries a JSON-parseable payload (e.g. plain prose generation).
-_ENVELOPE_TEXT_FIELDS = ("result", "result_text", "content", "text", "output")
-
-
-def _unwrap_cli_json_envelope(stdout: str, config: RunConfig) -> str:
-    """Extract the model's payload from Claude CLI's --output-format json envelope.
-
-    Only runs when --json-schema was set (the only code path that adds
-    --output-format json to the CLI invocation). Other callers keep the raw
-    stdout behavior unchanged.
-
-    Strategy: when --json-schema is set the caller wants JSON back, so prefer
-    any envelope field whose value is itself valid JSON (dict, list, or a
-    string that parses as JSON). This handles two observed envelope shapes:
-
-    1. Short prompts where the model emits the structured payload directly
-       in the `result` field as a JSON-encoded string.
-    2. Longer prompts where the model emits a conversational preamble in
-       `result` and the schema-enforced JSON in a separate field (the exact
-       field name varies across CLI versions).
-
-    Fall back to the first text field only when no JSON-bearing field exists,
-    so non-schema callers via this code path still see the model's prose.
-    Surface the raw envelope as a last resort so the operator can see what
-    shape arrived and extend the strategy.
-    """
-    if not _json_schema_arg(config):
-        return stdout
-    text = stdout.strip()
-    if not text:
-        return stdout
-    try:
-        envelope = json.loads(text)
-    except json.JSONDecodeError:
-        return stdout
-    if not isinstance(envelope, dict):
-        return stdout
-
-    json_payload = _find_json_payload(envelope)
-    if json_payload is not None:
-        return json_payload
-
-    for key in _ENVELOPE_TEXT_FIELDS:
-        value = envelope.get(key)
-        if isinstance(value, str):
-            return value
-        if isinstance(value, (dict, list)):
-            return json.dumps(value)
-
-    return stdout
-
-
-def _find_json_payload(envelope: dict) -> str | None:
-    """Return the first envelope value that represents valid JSON.
-
-    Insertion order is preserved by Python dicts, so this prefers fields the
-    CLI lists earliest in its envelope. Skips obvious metadata keys (cost,
-    usage, timing) so we never accidentally pick a numeric or telemetry value.
-    """
-    for key, value in envelope.items():
-        if key in _ENVELOPE_METADATA_KEYS:
-            continue
-        if isinstance(value, (dict, list)):
-            return json.dumps(value)
-        if isinstance(value, str):
-            stripped = value.strip()
-            if stripped.startswith(("{", "[")):
-                try:
-                    json.loads(stripped)
-                except json.JSONDecodeError:
-                    continue
-                return stripped
-    return None
-
-
-# Envelope keys that carry telemetry, never the model payload.
-_ENVELOPE_METADATA_KEYS = frozenset({
-    "type", "subtype", "model", "usage", "total_cost_usd", "cost_usd",
-    "duration_ms", "duration_api_ms", "num_turns", "session_id",
-    "is_error", "stop_reason", "permission_denials", "uuid",
-})
+"""
+Claude Code CLI adapter - runs the ``claude`` CLI as a subprocess.
+"""
+
+import asyncio
+import json
+import os
+import subprocess
+from pathlib import Path
+from typing import Optional
+
+from llm_connect._diagnostics import (
+    record_adapter_transformation,
+    record_provider_request,
+    record_provider_response,
+)
+from llm_connect._token_estimator import estimate_tokens
+from llm_connect.adapter import LLMAdapter
+from llm_connect.config import LLMConfig
+from llm_connect.exceptions import LLMSubprocessError, LLMTimeoutError
+from llm_connect.models import LLMResponse, RunConfig
+
+
+class ClaudeCodeAdapter(LLMAdapter):
+    """LLM adapter that shells out to the ``claude`` CLI with ``--print``.
+
+    The compiled prompt is piped via stdin to avoid shell argument length
+    limits. Compiled prompts can exceed 30 KB.
+    """
+
+    def __init__(
+        self,
+        cli_path: Optional[str] = None,
+        model: Optional[str] = None,
+        config: Optional[LLMConfig] = None,
+    ):
+        self._config = config or LLMConfig(provider="claude-code")
+        self._cli_path = cli_path or self._resolve_cli_path()
+        self._model = model
+
+    # LLMAdapter interface
+
+    def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
+        self._preflight_budget(config)
+        cmd = self._build_command(config)
+
+        timeout = config.timeout_seconds or self._config.timeout_seconds
+        record_provider_request(command=cmd, payload={"stdin": prompt})
+
+        try:
+            result = subprocess.run(
+                cmd,
+                input=prompt,
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+            )
+        except subprocess.TimeoutExpired as exc:
+            raise LLMTimeoutError(
+                f"claude CLI timed out after {timeout}s",
+                cause=exc,
+            ) from exc
+
+        record_provider_response(
+            status=result.returncode,
+            body={"stdout": result.stdout, "stderr": result.stderr},
+        )
+        if result.returncode != 0:
+            raise LLMSubprocessError(
+                f"claude CLI exited with code {result.returncode}",
+                return_code=result.returncode,
+                stderr=result.stderr,
+            )
+
+        content = _unwrap_cli_json_envelope(result.stdout, config)
+        prompt_tokens = estimate_tokens(prompt)
+        completion_tokens = estimate_tokens(content)
+
+        response = LLMResponse(
+            content=content,
+            model=self._model or "claude-code-cli",
+            usage={
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": prompt_tokens + completion_tokens,
+            },
+            finish_reason="stop",
+            metadata={
+                "provider": "claude-code",
+                "cli_path": self._cli_path,
+            },
+        )
+        self._consume_budget(config, response)
+        return response
+
+    async def async_execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
+        """Native async implementation using asyncio.create_subprocess_exec."""
+        self._preflight_budget(config)
+        cmd = self._build_command(config)
+
+        timeout = config.timeout_seconds or self._config.timeout_seconds
+        record_provider_request(command=cmd, payload={"stdin": prompt})
+
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdin=asyncio.subprocess.PIPE,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+            stdout_bytes, stderr_bytes = await asyncio.wait_for(
+                proc.communicate(input=prompt.encode()),
+                timeout=timeout,
+            )
+        except asyncio.TimeoutError as exc:
+            raise LLMTimeoutError(
+                f"claude CLI timed out after {timeout}s",
+                cause=exc,
+            ) from exc
+
+        stdout = stdout_bytes.decode()
+        stderr = stderr_bytes.decode()
+        record_provider_response(
+            status=proc.returncode,
+            body={"stdout": stdout, "stderr": stderr},
+        )
+        if proc.returncode != 0:
+            raise LLMSubprocessError(
+                f"claude CLI exited with code {proc.returncode}",
+                return_code=proc.returncode,
+                stderr=stderr,
+            )
+
+        content = _unwrap_cli_json_envelope(stdout, config)
+        prompt_tokens = estimate_tokens(prompt)
+        completion_tokens = estimate_tokens(content)
+
+        response = LLMResponse(
+            content=content,
+            model=self._model or "claude-code-cli",
+            usage={
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": prompt_tokens + completion_tokens,
+            },
+            finish_reason="stop",
+            metadata={
+                "provider": "claude-code",
+                "cli_path": self._cli_path,
+                "async": True,
+            },
+        )
+        self._consume_budget(config, response)
+        return response
+
+    def validate_config(self, config: RunConfig) -> bool:
+        try:
+            result = subprocess.run(
+                [self._cli_path, "--version"],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            return result.returncode == 0
+        except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
+            return False
+
+    def _build_command(self, config: RunConfig) -> list[str]:
+        cmd = [self._cli_path, "--print"]
+        if self._model:
+            cmd.extend(["--model", self._model])
+
+        json_schema = _json_schema_arg(config)
+        if json_schema:
+            cmd.extend(["--json-schema", json_schema])
+            # With --json-schema alone the CLI prints conversational text on
+            # stdout while the structured payload ships on a sidecar channel
+            # callers cannot reach. --output-format json forces the structured
+            # response (wrapped in an envelope) onto stdout.
+            cmd.extend(["--output-format", "json"])
+        return cmd
+
+    def _resolve_cli_path(self) -> str:
+        configured = (
+            os.environ.get("LLM_CONNECT_CLAUDE_CLI_PATH")
+            or os.environ.get("CLAUDE_CLI_PATH")
+            or self._config.claude_cli_path
+        )
+        if configured and configured != "claude":
+            return configured
+
+        local_cli = Path.home() / ".local" / "bin" / "claude"
+        if local_cli.exists():
+            return str(local_cli)
+        return configured or "claude"
+
+
+def _json_schema_arg(config: RunConfig) -> str | None:
+    schema = (config.model_params or {}).get("json_schema")
+    if not schema:
+        return None
+    if isinstance(schema, str):
+        return schema
+    if isinstance(schema, dict):
+        return json.dumps(schema, separators=(",", ":"))
+    return None
+
+
+# Envelope field names Claude Code's --output-format json is known to use for
+# the model's primary textual response. Used as a fallback when no field carries
+# a JSON-parseable payload, such as plain prose generation.
+_ENVELOPE_TEXT_FIELDS = ("result", "result_text", "content", "text", "output")
+
+
+def _unwrap_cli_json_envelope(stdout: str, config: RunConfig) -> str:
+    """Extract the model's payload from Claude CLI's --output-format json envelope.
+
+    Only runs when --json-schema was set. Other callers keep the raw stdout
+    behavior unchanged.
+    """
+    if not _json_schema_arg(config):
+        return stdout
+    text = stdout.strip()
+    if not text:
+        return stdout
+    try:
+        envelope = json.loads(text)
+    except json.JSONDecodeError:
+        return stdout
+    if not isinstance(envelope, dict):
+        return stdout
+
+    json_payload = _find_json_payload(envelope)
+    if json_payload is not None:
+        return _record_unwrap(stdout, json_payload)
+
+    for key in _ENVELOPE_TEXT_FIELDS:
+        value = envelope.get(key)
+        if isinstance(value, str):
+            return _record_unwrap(stdout, value)
+        if isinstance(value, (dict, list)):
+            return _record_unwrap(stdout, json.dumps(value))
+
+    return stdout
+
+
+def _find_json_payload(envelope: dict) -> str | None:
+    """Return the first envelope value that represents valid JSON."""
+    for key, value in envelope.items():
+        if key in _ENVELOPE_METADATA_KEYS:
+            continue
+        if isinstance(value, (dict, list)):
+            return json.dumps(value)
+        if isinstance(value, str):
+            stripped = value.strip()
+            if stripped.startswith(("{", "[")):
+                try:
+                    json.loads(stripped)
+                except json.JSONDecodeError:
+                    continue
+                return stripped
+    return None
+
+
+# Envelope keys that carry telemetry, never the model payload.
+_ENVELOPE_METADATA_KEYS = frozenset(
+    {
+        "type",
+        "subtype",
+        "model",
+        "usage",
+        "total_cost_usd",
+        "cost_usd",
+        "duration_ms",
+        "duration_api_ms",
+        "num_turns",
+        "session_id",
+        "is_error",
+        "stop_reason",
+        "permission_denials",
+        "uuid",
+    }
+)
+
+
+def _record_unwrap(stdout: str, content: str) -> str:
+    if content != stdout:
+        record_adapter_transformation("unwrap_cli_envelope", stdout, content)
+    return content