Files
llm-connect/llm_connect/claude_code.py
tegwick 24f4c09d42
Some checks failed
CI / test (3.10) (push) Has been cancelled
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
Implement llm-connect ADHOC diagnostics
2026-06-03 11:56:21 +02:00

290 lines
9.3 KiB
Python

"""
Claude Code CLI adapter - runs the ``claude`` CLI as a subprocess.
"""
import asyncio
import json
import os
import subprocess
from pathlib import Path
from typing import Optional
from llm_connect._diagnostics import (
record_adapter_transformation,
record_provider_request,
record_provider_response,
)
from llm_connect._token_estimator import estimate_tokens
from llm_connect.adapter import LLMAdapter
from llm_connect.config import LLMConfig
from llm_connect.exceptions import LLMSubprocessError, LLMTimeoutError
from llm_connect.models import LLMResponse, RunConfig
class ClaudeCodeAdapter(LLMAdapter):
"""LLM adapter that shells out to the ``claude`` CLI with ``--print``.
The compiled prompt is piped via stdin to avoid shell argument length
limits. Compiled prompts can exceed 30 KB.
"""
def __init__(
self,
cli_path: Optional[str] = None,
model: Optional[str] = None,
config: Optional[LLMConfig] = None,
):
self._config = config or LLMConfig(provider="claude-code")
self._cli_path = cli_path or self._resolve_cli_path()
self._model = model
# LLMAdapter interface
def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
self._preflight_budget(config)
cmd = self._build_command(config)
timeout = config.timeout_seconds or self._config.timeout_seconds
record_provider_request(command=cmd, payload={"stdin": prompt})
try:
result = subprocess.run(
cmd,
input=prompt,
capture_output=True,
text=True,
timeout=timeout,
)
except subprocess.TimeoutExpired as exc:
raise LLMTimeoutError(
f"claude CLI timed out after {timeout}s",
cause=exc,
) from exc
record_provider_response(
status=result.returncode,
body={"stdout": result.stdout, "stderr": result.stderr},
)
if result.returncode != 0:
raise LLMSubprocessError(
f"claude CLI exited with code {result.returncode}",
return_code=result.returncode,
stderr=result.stderr,
)
content = _unwrap_cli_json_envelope(result.stdout, config)
prompt_tokens = estimate_tokens(prompt)
completion_tokens = estimate_tokens(content)
response = LLMResponse(
content=content,
model=self._model or "claude-code-cli",
usage={
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens,
},
finish_reason="stop",
metadata={
"provider": "claude-code",
"cli_path": self._cli_path,
},
)
self._consume_budget(config, response)
return response
async def async_execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
"""Native async implementation using asyncio.create_subprocess_exec."""
self._preflight_budget(config)
cmd = self._build_command(config)
timeout = config.timeout_seconds or self._config.timeout_seconds
record_provider_request(command=cmd, payload={"stdin": prompt})
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout_bytes, stderr_bytes = await asyncio.wait_for(
proc.communicate(input=prompt.encode()),
timeout=timeout,
)
except asyncio.TimeoutError as exc:
raise LLMTimeoutError(
f"claude CLI timed out after {timeout}s",
cause=exc,
) from exc
stdout = stdout_bytes.decode()
stderr = stderr_bytes.decode()
record_provider_response(
status=proc.returncode,
body={"stdout": stdout, "stderr": stderr},
)
if proc.returncode != 0:
raise LLMSubprocessError(
f"claude CLI exited with code {proc.returncode}",
return_code=proc.returncode,
stderr=stderr,
)
content = _unwrap_cli_json_envelope(stdout, config)
prompt_tokens = estimate_tokens(prompt)
completion_tokens = estimate_tokens(content)
response = LLMResponse(
content=content,
model=self._model or "claude-code-cli",
usage={
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens,
},
finish_reason="stop",
metadata={
"provider": "claude-code",
"cli_path": self._cli_path,
"async": True,
},
)
self._consume_budget(config, response)
return response
def validate_config(self, config: RunConfig) -> bool:
try:
result = subprocess.run(
[self._cli_path, "--version"],
capture_output=True,
text=True,
timeout=10,
)
return result.returncode == 0
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
return False
def _build_command(self, config: RunConfig) -> list[str]:
cmd = [self._cli_path, "--print"]
if self._model:
cmd.extend(["--model", self._model])
json_schema = _json_schema_arg(config)
if json_schema:
cmd.extend(["--json-schema", json_schema])
# With --json-schema alone the CLI prints conversational text on
# stdout while the structured payload ships on a sidecar channel
# callers cannot reach. --output-format json forces the structured
# response (wrapped in an envelope) onto stdout.
cmd.extend(["--output-format", "json"])
return cmd
def _resolve_cli_path(self) -> str:
configured = (
os.environ.get("LLM_CONNECT_CLAUDE_CLI_PATH")
or os.environ.get("CLAUDE_CLI_PATH")
or self._config.claude_cli_path
)
if configured and configured != "claude":
return configured
local_cli = Path.home() / ".local" / "bin" / "claude"
if local_cli.exists():
return str(local_cli)
return configured or "claude"
def _json_schema_arg(config: RunConfig) -> str | None:
schema = (config.model_params or {}).get("json_schema")
if not schema:
return None
if isinstance(schema, str):
return schema
if isinstance(schema, dict):
return json.dumps(schema, separators=(",", ":"))
return None
# Envelope field names Claude Code's --output-format json is known to use for
# the model's primary textual response. Used as a fallback when no field carries
# a JSON-parseable payload, such as plain prose generation.
_ENVELOPE_TEXT_FIELDS = ("result", "result_text", "content", "text", "output")
def _unwrap_cli_json_envelope(stdout: str, config: RunConfig) -> str:
"""Extract the model's payload from Claude CLI's --output-format json envelope.
Only runs when --json-schema was set. Other callers keep the raw stdout
behavior unchanged.
"""
if not _json_schema_arg(config):
return stdout
text = stdout.strip()
if not text:
return stdout
try:
envelope = json.loads(text)
except json.JSONDecodeError:
return stdout
if not isinstance(envelope, dict):
return stdout
json_payload = _find_json_payload(envelope)
if json_payload is not None:
return _record_unwrap(stdout, json_payload)
for key in _ENVELOPE_TEXT_FIELDS:
value = envelope.get(key)
if isinstance(value, str):
return _record_unwrap(stdout, value)
if isinstance(value, (dict, list)):
return _record_unwrap(stdout, json.dumps(value))
return stdout
def _find_json_payload(envelope: dict) -> str | None:
"""Return the first envelope value that represents valid JSON."""
for key, value in envelope.items():
if key in _ENVELOPE_METADATA_KEYS:
continue
if isinstance(value, (dict, list)):
return json.dumps(value)
if isinstance(value, str):
stripped = value.strip()
if stripped.startswith(("{", "[")):
try:
json.loads(stripped)
except json.JSONDecodeError:
continue
return stripped
return None
# Envelope keys that carry telemetry, never the model payload.
_ENVELOPE_METADATA_KEYS = frozenset(
{
"type",
"subtype",
"model",
"usage",
"total_cost_usd",
"cost_usd",
"duration_ms",
"duration_api_ms",
"num_turns",
"session_id",
"is_error",
"stop_reason",
"permission_denials",
"uuid",
}
)
def _record_unwrap(stdout: str, content: str) -> str:
if content != stdout:
record_adapter_transformation("unwrap_cli_envelope", stdout, content)
return content