Files
llm-connect/llm_connect/claude_code.py
tegwick 9de0f495db
Some checks failed
CI / test (3.10) (push) Has been cancelled
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
Pass --output-format json with --json-schema and unwrap CLI envelope
The Claude Code adapter previously passed --json-schema alone. On Claude
CLI 2.1.160 that combination still emits the model's conversational
preamble on stdout while the schema-enforced structured payload ships on
a sidecar channel the adapter cannot read. Result: callers requesting
structured output got prose that fails JSON parsing downstream — exactly
the failure mode the activity-core CUST-WP-0045 daily triage canary hit
on 2026-06-01 ("Triage report generated and returned via structured
output. Key signals:..." → json.loads error at column 1).

Fix: when --json-schema is set, also pass --output-format json. The CLI
then writes a JSON envelope on stdout. The adapter unwraps it by
probing a small allowlist of known text-bearing fields (result,
result_text, content, text, output). Unknown envelope shapes fall
through to raw stdout so the operator can introspect the structure and
extend the allowlist.

The unwrap path is only triggered when --json-schema was set, so non-
schema callers keep the existing raw-stdout behavior.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-02 10:20:24 +02:00

231 lines
8.1 KiB
Python

"""
Claude Code CLI adapter — runs the ``claude`` CLI as a subprocess.
"""
import asyncio
import json
import os
import subprocess
from pathlib import Path
from typing import Optional
from llm_connect.adapter import LLMAdapter
from llm_connect.models import RunConfig, LLMResponse
from llm_connect.config import LLMConfig
from llm_connect._token_estimator import estimate_tokens
from llm_connect.exceptions import (
LLMSubprocessError,
LLMTimeoutError,
)
class ClaudeCodeAdapter(LLMAdapter):
"""LLM adapter that shells out to the ``claude`` CLI with ``--print``.
The compiled prompt is piped via **stdin** to avoid shell argument
length limits (compiled prompts can exceed 30 KB).
"""
def __init__(
self,
cli_path: Optional[str] = None,
model: Optional[str] = None,
config: Optional[LLMConfig] = None,
):
self._config = config or LLMConfig(provider="claude-code")
self._cli_path = cli_path or self._resolve_cli_path()
self._model = model
# ── LLMAdapter interface ────────────────────────────────────────
def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
self._preflight_budget(config)
cmd = self._build_command(config)
timeout = config.timeout_seconds or self._config.timeout_seconds
try:
result = subprocess.run(
cmd,
input=prompt,
capture_output=True,
text=True,
timeout=timeout,
)
except subprocess.TimeoutExpired as exc:
raise LLMTimeoutError(
f"claude CLI timed out after {timeout}s",
cause=exc,
) from exc
if result.returncode != 0:
raise LLMSubprocessError(
f"claude CLI exited with code {result.returncode}",
return_code=result.returncode,
stderr=result.stderr,
)
content = _unwrap_cli_json_envelope(result.stdout, config)
prompt_tokens = estimate_tokens(prompt)
completion_tokens = estimate_tokens(content)
response = LLMResponse(
content=content,
model=self._model or "claude-code-cli",
usage={
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens,
},
finish_reason="stop",
metadata={
"provider": "claude-code",
"cli_path": self._cli_path,
},
)
self._consume_budget(config, response)
return response
async def async_execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
"""Native async implementation using asyncio.create_subprocess_exec."""
self._preflight_budget(config)
cmd = self._build_command(config)
timeout = config.timeout_seconds or self._config.timeout_seconds
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout_bytes, stderr_bytes = await asyncio.wait_for(
proc.communicate(input=prompt.encode()),
timeout=timeout,
)
except asyncio.TimeoutError as exc:
raise LLMTimeoutError(
f"claude CLI timed out after {timeout}s",
cause=exc,
) from exc
if proc.returncode != 0:
raise LLMSubprocessError(
f"claude CLI exited with code {proc.returncode}",
return_code=proc.returncode,
stderr=stderr_bytes.decode(),
)
content = _unwrap_cli_json_envelope(stdout_bytes.decode(), config)
prompt_tokens = estimate_tokens(prompt)
completion_tokens = estimate_tokens(content)
response = LLMResponse(
content=content,
model=self._model or "claude-code-cli",
usage={
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens,
},
finish_reason="stop",
metadata={
"provider": "claude-code",
"cli_path": self._cli_path,
"async": True,
},
)
self._consume_budget(config, response)
return response
def validate_config(self, config: RunConfig) -> bool:
try:
result = subprocess.run(
[self._cli_path, "--version"],
capture_output=True,
text=True,
timeout=10,
)
return result.returncode == 0
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
return False
def _build_command(self, config: RunConfig) -> list[str]:
cmd = [self._cli_path, "--print"]
if self._model:
cmd.extend(["--model", self._model])
json_schema = _json_schema_arg(config)
if json_schema:
cmd.extend(["--json-schema", json_schema])
# With --json-schema alone the CLI prints conversational text on
# stdout while the structured payload ships on a sidecar channel
# callers cannot reach. --output-format json forces the structured
# response (wrapped in an envelope) onto stdout.
cmd.extend(["--output-format", "json"])
return cmd
def _resolve_cli_path(self) -> str:
configured = (
os.environ.get("LLM_CONNECT_CLAUDE_CLI_PATH")
or os.environ.get("CLAUDE_CLI_PATH")
or self._config.claude_cli_path
)
if configured and configured != "claude":
return configured
local_cli = Path.home() / ".local" / "bin" / "claude"
if local_cli.exists():
return str(local_cli)
return configured or "claude"
def _json_schema_arg(config: RunConfig) -> str | None:
schema = (config.model_params or {}).get("json_schema")
if not schema:
return None
if isinstance(schema, str):
return schema
if isinstance(schema, dict):
return json.dumps(schema, separators=(",", ":"))
return None
# Field names Claude Code's `--output-format json` envelope is known to use
# for the model's primary textual response. Probed in order; the first match
# wins. If none match (because the envelope shape is something we haven't
# seen), we return the raw envelope string so the caller still gets the data
# and can introspect it.
_ENVELOPE_TEXT_FIELDS = ("result", "result_text", "content", "text", "output")
def _unwrap_cli_json_envelope(stdout: str, config: RunConfig) -> str:
"""Extract the model's payload from Claude CLI's --output-format json envelope.
Only attempts unwrap when --json-schema was set, because that's the only
code path that adds --output-format json to the CLI invocation. Other
paths keep raw stdout (current behavior preserved).
"""
if not _json_schema_arg(config):
return stdout
text = stdout.strip()
if not text:
return stdout
try:
envelope = json.loads(text)
except json.JSONDecodeError:
return stdout
if not isinstance(envelope, dict):
return stdout
for key in _ENVELOPE_TEXT_FIELDS:
if key in envelope:
value = envelope[key]
if isinstance(value, str):
return value
if isinstance(value, (dict, list)):
return json.dumps(value)
# Unknown envelope shape — surface it raw so the operator can see it
# in the validation error and we can update _ENVELOPE_TEXT_FIELDS.
return stdout