llm-connect/llm_connect/claude_code.py

"""
Claude Code CLI adapter - runs the ``claude`` CLI as a subprocess.
"""

import asyncio
import json
import os
import subprocess
from pathlib import Path
from typing import Optional

from llm_connect._diagnostics import (
    record_adapter_transformation,
    record_provider_request,
    record_provider_response,
)
from llm_connect._token_estimator import estimate_tokens
from llm_connect.adapter import LLMAdapter
from llm_connect.config import LLMConfig
from llm_connect.exceptions import LLMSubprocessError, LLMTimeoutError
from llm_connect.models import LLMResponse, RunConfig


class ClaudeCodeAdapter(LLMAdapter):
    """LLM adapter that shells out to the ``claude`` CLI with ``--print``.

    The compiled prompt is piped via stdin to avoid shell argument length
    limits. Compiled prompts can exceed 30 KB.
    """

    def __init__(
        self,
        cli_path: Optional[str] = None,
        model: Optional[str] = None,
        config: Optional[LLMConfig] = None,
    ):
        self._config = config or LLMConfig(provider="claude-code")
        self._cli_path = cli_path or self._resolve_cli_path()
        self._model = model

    # LLMAdapter interface

    def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
        self._preflight_budget(config)
        cmd = self._build_command(config)

        timeout = config.timeout_seconds or self._config.timeout_seconds
        record_provider_request(command=cmd, payload={"stdin": prompt})

        try:
            result = subprocess.run(
                cmd,
                input=prompt,
                capture_output=True,
                text=True,
                timeout=timeout,
            )
        except subprocess.TimeoutExpired as exc:
            raise LLMTimeoutError(
                f"claude CLI timed out after {timeout}s",
                cause=exc,
            ) from exc

        record_provider_response(
            status=result.returncode,
            body={"stdout": result.stdout, "stderr": result.stderr},
        )
        if result.returncode != 0:
            raise LLMSubprocessError(
                f"claude CLI exited with code {result.returncode}",
                return_code=result.returncode,
                stderr=result.stderr,
            )

        content = _unwrap_cli_json_envelope(result.stdout, config)
        prompt_tokens = estimate_tokens(prompt)
        completion_tokens = estimate_tokens(content)

        response = LLMResponse(
            content=content,
            model=self._model or "claude-code-cli",
            usage={
                "prompt_tokens": prompt_tokens,
                "completion_tokens": completion_tokens,
                "total_tokens": prompt_tokens + completion_tokens,
            },
            finish_reason="stop",
            metadata={
                "provider": "claude-code",
                "cli_path": self._cli_path,
            },
        )
        self._consume_budget(config, response)
        return response

    async def async_execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
        """Native async implementation using asyncio.create_subprocess_exec."""
        self._preflight_budget(config)
        cmd = self._build_command(config)

        timeout = config.timeout_seconds or self._config.timeout_seconds
        record_provider_request(command=cmd, payload={"stdin": prompt})

        try:
            proc = await asyncio.create_subprocess_exec(
                *cmd,
                stdin=asyncio.subprocess.PIPE,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE,
            )
            stdout_bytes, stderr_bytes = await asyncio.wait_for(
                proc.communicate(input=prompt.encode()),
                timeout=timeout,
            )
        except asyncio.TimeoutError as exc:
            raise LLMTimeoutError(
                f"claude CLI timed out after {timeout}s",
                cause=exc,
            ) from exc

        stdout = stdout_bytes.decode()
        stderr = stderr_bytes.decode()
        record_provider_response(
            status=proc.returncode,
            body={"stdout": stdout, "stderr": stderr},
        )
        if proc.returncode != 0:
            raise LLMSubprocessError(
                f"claude CLI exited with code {proc.returncode}",
                return_code=proc.returncode,
                stderr=stderr,
            )

        content = _unwrap_cli_json_envelope(stdout, config)
        prompt_tokens = estimate_tokens(prompt)
        completion_tokens = estimate_tokens(content)

        response = LLMResponse(
            content=content,
            model=self._model or "claude-code-cli",
            usage={
                "prompt_tokens": prompt_tokens,
                "completion_tokens": completion_tokens,
                "total_tokens": prompt_tokens + completion_tokens,
            },
            finish_reason="stop",
            metadata={
                "provider": "claude-code",
                "cli_path": self._cli_path,
                "async": True,
            },
        )
        self._consume_budget(config, response)
        return response

    def validate_config(self, config: RunConfig) -> bool:
        try:
            result = subprocess.run(
                [self._cli_path, "--version"],
                capture_output=True,
                text=True,
                timeout=10,
            )
            return result.returncode == 0
        except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
            return False

    def _build_command(self, config: RunConfig) -> list[str]:
        cmd = [self._cli_path, "--print"]
        if self._model:
            cmd.extend(["--model", self._model])

        json_schema = _json_schema_arg(config)
        if json_schema:
            cmd.extend(["--json-schema", json_schema])
            # With --json-schema alone the CLI prints conversational text on
            # stdout while the structured payload ships on a sidecar channel
            # callers cannot reach. --output-format json forces the structured
            # response (wrapped in an envelope) onto stdout.
            cmd.extend(["--output-format", "json"])
        return cmd

    def _resolve_cli_path(self) -> str:
        configured = (
            os.environ.get("LLM_CONNECT_CLAUDE_CLI_PATH")
            or os.environ.get("CLAUDE_CLI_PATH")
            or self._config.claude_cli_path
        )
        if configured and configured != "claude":
            return configured

        local_cli = Path.home() / ".local" / "bin" / "claude"
        if local_cli.exists():
            return str(local_cli)
        return configured or "claude"


def _json_schema_arg(config: RunConfig) -> str | None:
    schema = (config.model_params or {}).get("json_schema")
    if not schema:
        return None
    if isinstance(schema, str):
        return schema
    if isinstance(schema, dict):
        return json.dumps(schema, separators=(",", ":"))
    return None


# Envelope field names Claude Code's --output-format json is known to use for
# the model's primary textual response. Used as a fallback when no field carries
# a JSON-parseable payload, such as plain prose generation.
_ENVELOPE_TEXT_FIELDS = ("result", "result_text", "content", "text", "output")


def _unwrap_cli_json_envelope(stdout: str, config: RunConfig) -> str:
    """Extract the model's payload from Claude CLI's --output-format json envelope.

    Only runs when --json-schema was set. Other callers keep the raw stdout
    behavior unchanged.
    """
    if not _json_schema_arg(config):
        return stdout
    text = stdout.strip()
    if not text:
        return stdout
    try:
        envelope = json.loads(text)
    except json.JSONDecodeError:
        return stdout
    if not isinstance(envelope, dict):
        return stdout

    json_payload = _find_json_payload(envelope)
    if json_payload is not None:
        return _record_unwrap(stdout, json_payload)

    for key in _ENVELOPE_TEXT_FIELDS:
        value = envelope.get(key)
        if isinstance(value, str):
            return _record_unwrap(stdout, value)
        if isinstance(value, (dict, list)):
            return _record_unwrap(stdout, json.dumps(value))

    return stdout


def _find_json_payload(envelope: dict) -> str | None:
    """Return the first envelope value that represents valid JSON."""
    for key, value in envelope.items():
        if key in _ENVELOPE_METADATA_KEYS:
            continue
        if isinstance(value, (dict, list)):
            return json.dumps(value)
        if isinstance(value, str):
            stripped = value.strip()
            if stripped.startswith(("{", "[")):
                try:
                    json.loads(stripped)
                except json.JSONDecodeError:
                    continue
                return stripped
    return None


# Envelope keys that carry telemetry, never the model payload.
_ENVELOPE_METADATA_KEYS = frozenset(
    {
        "type",
        "subtype",
        "model",
        "usage",
        "total_cost_usd",
        "cost_usd",
        "duration_ms",
        "duration_api_ms",
        "num_turns",
        "session_id",
        "is_error",
        "stop_reason",
        "permission_denials",
        "uuid",
    }
)


def _record_unwrap(stdout: str, content: str) -> str:
    if content != stdout:
        record_adapter_transformation("unwrap_cli_envelope", stdout, content)
    return content