generated from coulomb/repo-seed
Implement llm-connect ADHOC diagnostics
This commit is contained in:
@@ -1,277 +1,289 @@
|
||||
"""
|
||||
Claude Code CLI adapter — runs the ``claude`` CLI as a subprocess.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from llm_connect.adapter import LLMAdapter
|
||||
from llm_connect.models import RunConfig, LLMResponse
|
||||
from llm_connect.config import LLMConfig
|
||||
from llm_connect._token_estimator import estimate_tokens
|
||||
from llm_connect.exceptions import (
|
||||
LLMSubprocessError,
|
||||
LLMTimeoutError,
|
||||
)
|
||||
|
||||
|
||||
class ClaudeCodeAdapter(LLMAdapter):
|
||||
"""LLM adapter that shells out to the ``claude`` CLI with ``--print``.
|
||||
|
||||
The compiled prompt is piped via **stdin** to avoid shell argument
|
||||
length limits (compiled prompts can exceed 30 KB).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cli_path: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
config: Optional[LLMConfig] = None,
|
||||
):
|
||||
self._config = config or LLMConfig(provider="claude-code")
|
||||
self._cli_path = cli_path or self._resolve_cli_path()
|
||||
self._model = model
|
||||
|
||||
# ── LLMAdapter interface ────────────────────────────────────────
|
||||
|
||||
def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
|
||||
self._preflight_budget(config)
|
||||
cmd = self._build_command(config)
|
||||
|
||||
timeout = config.timeout_seconds or self._config.timeout_seconds
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
input=prompt,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
)
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
raise LLMTimeoutError(
|
||||
f"claude CLI timed out after {timeout}s",
|
||||
cause=exc,
|
||||
) from exc
|
||||
|
||||
if result.returncode != 0:
|
||||
raise LLMSubprocessError(
|
||||
f"claude CLI exited with code {result.returncode}",
|
||||
return_code=result.returncode,
|
||||
stderr=result.stderr,
|
||||
)
|
||||
|
||||
content = _unwrap_cli_json_envelope(result.stdout, config)
|
||||
prompt_tokens = estimate_tokens(prompt)
|
||||
completion_tokens = estimate_tokens(content)
|
||||
|
||||
response = LLMResponse(
|
||||
content=content,
|
||||
model=self._model or "claude-code-cli",
|
||||
usage={
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
},
|
||||
finish_reason="stop",
|
||||
metadata={
|
||||
"provider": "claude-code",
|
||||
"cli_path": self._cli_path,
|
||||
},
|
||||
)
|
||||
self._consume_budget(config, response)
|
||||
return response
|
||||
|
||||
async def async_execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
|
||||
"""Native async implementation using asyncio.create_subprocess_exec."""
|
||||
self._preflight_budget(config)
|
||||
cmd = self._build_command(config)
|
||||
|
||||
timeout = config.timeout_seconds or self._config.timeout_seconds
|
||||
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdin=asyncio.subprocess.PIPE,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
||||
proc.communicate(input=prompt.encode()),
|
||||
timeout=timeout,
|
||||
)
|
||||
except asyncio.TimeoutError as exc:
|
||||
raise LLMTimeoutError(
|
||||
f"claude CLI timed out after {timeout}s",
|
||||
cause=exc,
|
||||
) from exc
|
||||
|
||||
if proc.returncode != 0:
|
||||
raise LLMSubprocessError(
|
||||
f"claude CLI exited with code {proc.returncode}",
|
||||
return_code=proc.returncode,
|
||||
stderr=stderr_bytes.decode(),
|
||||
)
|
||||
|
||||
content = _unwrap_cli_json_envelope(stdout_bytes.decode(), config)
|
||||
prompt_tokens = estimate_tokens(prompt)
|
||||
completion_tokens = estimate_tokens(content)
|
||||
|
||||
response = LLMResponse(
|
||||
content=content,
|
||||
model=self._model or "claude-code-cli",
|
||||
usage={
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
},
|
||||
finish_reason="stop",
|
||||
metadata={
|
||||
"provider": "claude-code",
|
||||
"cli_path": self._cli_path,
|
||||
"async": True,
|
||||
},
|
||||
)
|
||||
self._consume_budget(config, response)
|
||||
return response
|
||||
|
||||
def validate_config(self, config: RunConfig) -> bool:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self._cli_path, "--version"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
return result.returncode == 0
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
return False
|
||||
|
||||
def _build_command(self, config: RunConfig) -> list[str]:
|
||||
cmd = [self._cli_path, "--print"]
|
||||
if self._model:
|
||||
cmd.extend(["--model", self._model])
|
||||
|
||||
json_schema = _json_schema_arg(config)
|
||||
if json_schema:
|
||||
cmd.extend(["--json-schema", json_schema])
|
||||
# With --json-schema alone the CLI prints conversational text on
|
||||
# stdout while the structured payload ships on a sidecar channel
|
||||
# callers cannot reach. --output-format json forces the structured
|
||||
# response (wrapped in an envelope) onto stdout.
|
||||
cmd.extend(["--output-format", "json"])
|
||||
return cmd
|
||||
|
||||
def _resolve_cli_path(self) -> str:
|
||||
configured = (
|
||||
os.environ.get("LLM_CONNECT_CLAUDE_CLI_PATH")
|
||||
or os.environ.get("CLAUDE_CLI_PATH")
|
||||
or self._config.claude_cli_path
|
||||
)
|
||||
if configured and configured != "claude":
|
||||
return configured
|
||||
|
||||
local_cli = Path.home() / ".local" / "bin" / "claude"
|
||||
if local_cli.exists():
|
||||
return str(local_cli)
|
||||
return configured or "claude"
|
||||
|
||||
|
||||
def _json_schema_arg(config: RunConfig) -> str | None:
|
||||
schema = (config.model_params or {}).get("json_schema")
|
||||
if not schema:
|
||||
return None
|
||||
if isinstance(schema, str):
|
||||
return schema
|
||||
if isinstance(schema, dict):
|
||||
return json.dumps(schema, separators=(",", ":"))
|
||||
return None
|
||||
|
||||
|
||||
# Envelope field names Claude Code's `--output-format json` is known to use
|
||||
# for the model's primary textual response. Used as a fall-back when no field
|
||||
# carries a JSON-parseable payload (e.g. plain prose generation).
|
||||
_ENVELOPE_TEXT_FIELDS = ("result", "result_text", "content", "text", "output")
|
||||
|
||||
|
||||
def _unwrap_cli_json_envelope(stdout: str, config: RunConfig) -> str:
|
||||
"""Extract the model's payload from Claude CLI's --output-format json envelope.
|
||||
|
||||
Only runs when --json-schema was set (the only code path that adds
|
||||
--output-format json to the CLI invocation). Other callers keep the raw
|
||||
stdout behavior unchanged.
|
||||
|
||||
Strategy: when --json-schema is set the caller wants JSON back, so prefer
|
||||
any envelope field whose value is itself valid JSON (dict, list, or a
|
||||
string that parses as JSON). This handles two observed envelope shapes:
|
||||
|
||||
1. Short prompts where the model emits the structured payload directly
|
||||
in the `result` field as a JSON-encoded string.
|
||||
2. Longer prompts where the model emits a conversational preamble in
|
||||
`result` and the schema-enforced JSON in a separate field (the exact
|
||||
field name varies across CLI versions).
|
||||
|
||||
Fall back to the first text field only when no JSON-bearing field exists,
|
||||
so non-schema callers via this code path still see the model's prose.
|
||||
Surface the raw envelope as a last resort so the operator can see what
|
||||
shape arrived and extend the strategy.
|
||||
"""
|
||||
if not _json_schema_arg(config):
|
||||
return stdout
|
||||
text = stdout.strip()
|
||||
if not text:
|
||||
return stdout
|
||||
try:
|
||||
envelope = json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
return stdout
|
||||
if not isinstance(envelope, dict):
|
||||
return stdout
|
||||
|
||||
json_payload = _find_json_payload(envelope)
|
||||
if json_payload is not None:
|
||||
return json_payload
|
||||
|
||||
for key in _ENVELOPE_TEXT_FIELDS:
|
||||
value = envelope.get(key)
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
if isinstance(value, (dict, list)):
|
||||
return json.dumps(value)
|
||||
|
||||
return stdout
|
||||
|
||||
|
||||
def _find_json_payload(envelope: dict) -> str | None:
|
||||
"""Return the first envelope value that represents valid JSON.
|
||||
|
||||
Insertion order is preserved by Python dicts, so this prefers fields the
|
||||
CLI lists earliest in its envelope. Skips obvious metadata keys (cost,
|
||||
usage, timing) so we never accidentally pick a numeric or telemetry value.
|
||||
"""
|
||||
for key, value in envelope.items():
|
||||
if key in _ENVELOPE_METADATA_KEYS:
|
||||
continue
|
||||
if isinstance(value, (dict, list)):
|
||||
return json.dumps(value)
|
||||
if isinstance(value, str):
|
||||
stripped = value.strip()
|
||||
if stripped.startswith(("{", "[")):
|
||||
try:
|
||||
json.loads(stripped)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
return stripped
|
||||
return None
|
||||
|
||||
|
||||
# Envelope keys that carry telemetry, never the model payload.
|
||||
_ENVELOPE_METADATA_KEYS = frozenset({
|
||||
"type", "subtype", "model", "usage", "total_cost_usd", "cost_usd",
|
||||
"duration_ms", "duration_api_ms", "num_turns", "session_id",
|
||||
"is_error", "stop_reason", "permission_denials", "uuid",
|
||||
})
|
||||
"""
|
||||
Claude Code CLI adapter - runs the ``claude`` CLI as a subprocess.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from llm_connect._diagnostics import (
|
||||
record_adapter_transformation,
|
||||
record_provider_request,
|
||||
record_provider_response,
|
||||
)
|
||||
from llm_connect._token_estimator import estimate_tokens
|
||||
from llm_connect.adapter import LLMAdapter
|
||||
from llm_connect.config import LLMConfig
|
||||
from llm_connect.exceptions import LLMSubprocessError, LLMTimeoutError
|
||||
from llm_connect.models import LLMResponse, RunConfig
|
||||
|
||||
|
||||
class ClaudeCodeAdapter(LLMAdapter):
|
||||
"""LLM adapter that shells out to the ``claude`` CLI with ``--print``.
|
||||
|
||||
The compiled prompt is piped via stdin to avoid shell argument length
|
||||
limits. Compiled prompts can exceed 30 KB.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cli_path: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
config: Optional[LLMConfig] = None,
|
||||
):
|
||||
self._config = config or LLMConfig(provider="claude-code")
|
||||
self._cli_path = cli_path or self._resolve_cli_path()
|
||||
self._model = model
|
||||
|
||||
# LLMAdapter interface
|
||||
|
||||
def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
|
||||
self._preflight_budget(config)
|
||||
cmd = self._build_command(config)
|
||||
|
||||
timeout = config.timeout_seconds or self._config.timeout_seconds
|
||||
record_provider_request(command=cmd, payload={"stdin": prompt})
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
input=prompt,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
)
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
raise LLMTimeoutError(
|
||||
f"claude CLI timed out after {timeout}s",
|
||||
cause=exc,
|
||||
) from exc
|
||||
|
||||
record_provider_response(
|
||||
status=result.returncode,
|
||||
body={"stdout": result.stdout, "stderr": result.stderr},
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise LLMSubprocessError(
|
||||
f"claude CLI exited with code {result.returncode}",
|
||||
return_code=result.returncode,
|
||||
stderr=result.stderr,
|
||||
)
|
||||
|
||||
content = _unwrap_cli_json_envelope(result.stdout, config)
|
||||
prompt_tokens = estimate_tokens(prompt)
|
||||
completion_tokens = estimate_tokens(content)
|
||||
|
||||
response = LLMResponse(
|
||||
content=content,
|
||||
model=self._model or "claude-code-cli",
|
||||
usage={
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
},
|
||||
finish_reason="stop",
|
||||
metadata={
|
||||
"provider": "claude-code",
|
||||
"cli_path": self._cli_path,
|
||||
},
|
||||
)
|
||||
self._consume_budget(config, response)
|
||||
return response
|
||||
|
||||
async def async_execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
|
||||
"""Native async implementation using asyncio.create_subprocess_exec."""
|
||||
self._preflight_budget(config)
|
||||
cmd = self._build_command(config)
|
||||
|
||||
timeout = config.timeout_seconds or self._config.timeout_seconds
|
||||
record_provider_request(command=cmd, payload={"stdin": prompt})
|
||||
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdin=asyncio.subprocess.PIPE,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
||||
proc.communicate(input=prompt.encode()),
|
||||
timeout=timeout,
|
||||
)
|
||||
except asyncio.TimeoutError as exc:
|
||||
raise LLMTimeoutError(
|
||||
f"claude CLI timed out after {timeout}s",
|
||||
cause=exc,
|
||||
) from exc
|
||||
|
||||
stdout = stdout_bytes.decode()
|
||||
stderr = stderr_bytes.decode()
|
||||
record_provider_response(
|
||||
status=proc.returncode,
|
||||
body={"stdout": stdout, "stderr": stderr},
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
raise LLMSubprocessError(
|
||||
f"claude CLI exited with code {proc.returncode}",
|
||||
return_code=proc.returncode,
|
||||
stderr=stderr,
|
||||
)
|
||||
|
||||
content = _unwrap_cli_json_envelope(stdout, config)
|
||||
prompt_tokens = estimate_tokens(prompt)
|
||||
completion_tokens = estimate_tokens(content)
|
||||
|
||||
response = LLMResponse(
|
||||
content=content,
|
||||
model=self._model or "claude-code-cli",
|
||||
usage={
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
},
|
||||
finish_reason="stop",
|
||||
metadata={
|
||||
"provider": "claude-code",
|
||||
"cli_path": self._cli_path,
|
||||
"async": True,
|
||||
},
|
||||
)
|
||||
self._consume_budget(config, response)
|
||||
return response
|
||||
|
||||
def validate_config(self, config: RunConfig) -> bool:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self._cli_path, "--version"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
return result.returncode == 0
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
return False
|
||||
|
||||
def _build_command(self, config: RunConfig) -> list[str]:
|
||||
cmd = [self._cli_path, "--print"]
|
||||
if self._model:
|
||||
cmd.extend(["--model", self._model])
|
||||
|
||||
json_schema = _json_schema_arg(config)
|
||||
if json_schema:
|
||||
cmd.extend(["--json-schema", json_schema])
|
||||
# With --json-schema alone the CLI prints conversational text on
|
||||
# stdout while the structured payload ships on a sidecar channel
|
||||
# callers cannot reach. --output-format json forces the structured
|
||||
# response (wrapped in an envelope) onto stdout.
|
||||
cmd.extend(["--output-format", "json"])
|
||||
return cmd
|
||||
|
||||
def _resolve_cli_path(self) -> str:
|
||||
configured = (
|
||||
os.environ.get("LLM_CONNECT_CLAUDE_CLI_PATH")
|
||||
or os.environ.get("CLAUDE_CLI_PATH")
|
||||
or self._config.claude_cli_path
|
||||
)
|
||||
if configured and configured != "claude":
|
||||
return configured
|
||||
|
||||
local_cli = Path.home() / ".local" / "bin" / "claude"
|
||||
if local_cli.exists():
|
||||
return str(local_cli)
|
||||
return configured or "claude"
|
||||
|
||||
|
||||
def _json_schema_arg(config: RunConfig) -> str | None:
|
||||
schema = (config.model_params or {}).get("json_schema")
|
||||
if not schema:
|
||||
return None
|
||||
if isinstance(schema, str):
|
||||
return schema
|
||||
if isinstance(schema, dict):
|
||||
return json.dumps(schema, separators=(",", ":"))
|
||||
return None
|
||||
|
||||
|
||||
# Envelope field names Claude Code's --output-format json is known to use for
|
||||
# the model's primary textual response. Used as a fallback when no field carries
|
||||
# a JSON-parseable payload, such as plain prose generation.
|
||||
_ENVELOPE_TEXT_FIELDS = ("result", "result_text", "content", "text", "output")
|
||||
|
||||
|
||||
def _unwrap_cli_json_envelope(stdout: str, config: RunConfig) -> str:
|
||||
"""Extract the model's payload from Claude CLI's --output-format json envelope.
|
||||
|
||||
Only runs when --json-schema was set. Other callers keep the raw stdout
|
||||
behavior unchanged.
|
||||
"""
|
||||
if not _json_schema_arg(config):
|
||||
return stdout
|
||||
text = stdout.strip()
|
||||
if not text:
|
||||
return stdout
|
||||
try:
|
||||
envelope = json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
return stdout
|
||||
if not isinstance(envelope, dict):
|
||||
return stdout
|
||||
|
||||
json_payload = _find_json_payload(envelope)
|
||||
if json_payload is not None:
|
||||
return _record_unwrap(stdout, json_payload)
|
||||
|
||||
for key in _ENVELOPE_TEXT_FIELDS:
|
||||
value = envelope.get(key)
|
||||
if isinstance(value, str):
|
||||
return _record_unwrap(stdout, value)
|
||||
if isinstance(value, (dict, list)):
|
||||
return _record_unwrap(stdout, json.dumps(value))
|
||||
|
||||
return stdout
|
||||
|
||||
|
||||
def _find_json_payload(envelope: dict) -> str | None:
|
||||
"""Return the first envelope value that represents valid JSON."""
|
||||
for key, value in envelope.items():
|
||||
if key in _ENVELOPE_METADATA_KEYS:
|
||||
continue
|
||||
if isinstance(value, (dict, list)):
|
||||
return json.dumps(value)
|
||||
if isinstance(value, str):
|
||||
stripped = value.strip()
|
||||
if stripped.startswith(("{", "[")):
|
||||
try:
|
||||
json.loads(stripped)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
return stripped
|
||||
return None
|
||||
|
||||
|
||||
# Envelope keys that carry telemetry, never the model payload.
|
||||
_ENVELOPE_METADATA_KEYS = frozenset(
|
||||
{
|
||||
"type",
|
||||
"subtype",
|
||||
"model",
|
||||
"usage",
|
||||
"total_cost_usd",
|
||||
"cost_usd",
|
||||
"duration_ms",
|
||||
"duration_api_ms",
|
||||
"num_turns",
|
||||
"session_id",
|
||||
"is_error",
|
||||
"stop_reason",
|
||||
"permission_denials",
|
||||
"uuid",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _record_unwrap(stdout: str, content: str) -> str:
|
||||
if content != stdout:
|
||||
record_adapter_transformation("unwrap_cli_envelope", stdout, content)
|
||||
return content
|
||||
|
||||
Reference in New Issue
Block a user