Prefer JSON-bearing envelope fields, skip metadata, in Claude CLI unwrap
Some checks failed
CI / test (3.10) (push) Has been cancelled
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled

The first CUST-WP-0045 canary retry after 9de0f49 still failed schema
validation with `Expecting value: line 1 column 1 (char 0)`. The original
allowlist returned envelope.result verbatim, which on longer prompts
carries the model's conversational preamble ("Triage report generated
and returned via structured output. Key signals: ..."), not the
schema-enforced JSON. The actual structured payload lives in a different
envelope field whose name varies across CLI versions.

Make the unwrap order-aware:
  1. Scan envelope fields and return the first one whose value parses as
     JSON (dict, list, or a string that loads cleanly). Skip well-known
     metadata keys (type, usage, total_cost_usd, etc.) so telemetry can
     never be mistaken for the model payload.
  2. Fall back to the original text-field allowlist only when no field
     carries JSON, so non-schema callers via this same code path still
     see the model's prose.
  3. Surface the raw envelope as last resort.

This is robust against unknown envelope shapes — as long as the schema-
enforced JSON appears somewhere in a non-metadata field, the adapter
will find it.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-02 12:44:25 +02:00
parent 9de0f495db
commit 435da49263
2 changed files with 129 additions and 37 deletions

View File

@@ -192,20 +192,33 @@ def _json_schema_arg(config: RunConfig) -> str | None:
return None return None
# Field names Claude Code's `--output-format json` envelope is known to use # Envelope field names Claude Code's `--output-format json` is known to use
# for the model's primary textual response. Probed in order; the first match # for the model's primary textual response. Used as a fall-back when no field
# wins. If none match (because the envelope shape is something we haven't # carries a JSON-parseable payload (e.g. plain prose generation).
# seen), we return the raw envelope string so the caller still gets the data
# and can introspect it.
_ENVELOPE_TEXT_FIELDS = ("result", "result_text", "content", "text", "output") _ENVELOPE_TEXT_FIELDS = ("result", "result_text", "content", "text", "output")
def _unwrap_cli_json_envelope(stdout: str, config: RunConfig) -> str: def _unwrap_cli_json_envelope(stdout: str, config: RunConfig) -> str:
"""Extract the model's payload from Claude CLI's --output-format json envelope. """Extract the model's payload from Claude CLI's --output-format json envelope.
Only attempts unwrap when --json-schema was set, because that's the only Only runs when --json-schema was set (the only code path that adds
code path that adds --output-format json to the CLI invocation. Other --output-format json to the CLI invocation). Other callers keep the raw
paths keep raw stdout (current behavior preserved). stdout behavior unchanged.
Strategy: when --json-schema is set the caller wants JSON back, so prefer
any envelope field whose value is itself valid JSON (dict, list, or a
string that parses as JSON). This handles two observed envelope shapes:
1. Short prompts where the model emits the structured payload directly
in the `result` field as a JSON-encoded string.
2. Longer prompts where the model emits a conversational preamble in
`result` and the schema-enforced JSON in a separate field (the exact
field name varies across CLI versions).
Fall back to the first text field only when no JSON-bearing field exists,
so non-schema callers via this code path still see the model's prose.
Surface the raw envelope as a last resort so the operator can see what
shape arrived and extend the strategy.
""" """
if not _json_schema_arg(config): if not _json_schema_arg(config):
return stdout return stdout
@@ -218,13 +231,47 @@ def _unwrap_cli_json_envelope(stdout: str, config: RunConfig) -> str:
return stdout return stdout
if not isinstance(envelope, dict): if not isinstance(envelope, dict):
return stdout return stdout
json_payload = _find_json_payload(envelope)
if json_payload is not None:
return json_payload
for key in _ENVELOPE_TEXT_FIELDS: for key in _ENVELOPE_TEXT_FIELDS:
if key in envelope: value = envelope.get(key)
value = envelope[key] if isinstance(value, str):
if isinstance(value, str): return value
return value if isinstance(value, (dict, list)):
if isinstance(value, (dict, list)): return json.dumps(value)
return json.dumps(value)
# Unknown envelope shape — surface it raw so the operator can see it
# in the validation error and we can update _ENVELOPE_TEXT_FIELDS.
return stdout return stdout
def _find_json_payload(envelope: dict) -> str | None:
"""Return the first envelope value that represents valid JSON.
Insertion order is preserved by Python dicts, so this prefers fields the
CLI lists earliest in its envelope. Skips obvious metadata keys (cost,
usage, timing) so we never accidentally pick a numeric or telemetry value.
"""
for key, value in envelope.items():
if key in _ENVELOPE_METADATA_KEYS:
continue
if isinstance(value, (dict, list)):
return json.dumps(value)
if isinstance(value, str):
stripped = value.strip()
if stripped.startswith(("{", "[")):
try:
json.loads(stripped)
except json.JSONDecodeError:
continue
return stripped
return None
# Envelope keys that carry telemetry, never the model payload.
_ENVELOPE_METADATA_KEYS = frozenset({
"type", "subtype", "model", "usage", "total_cost_usd", "cost_usd",
"duration_ms", "duration_api_ms", "num_turns", "session_id",
"is_error", "stop_reason", "permission_denials", "uuid",
})

View File

@@ -16,11 +16,13 @@ def test_execute_prompt_passes_json_schema_to_claude_cli(monkeypatch):
calls["capture_output"] = capture_output calls["capture_output"] = capture_output
calls["text"] = text calls["text"] = text
calls["timeout"] = timeout calls["timeout"] = timeout
return SimpleNamespace( # With --output-format json the CLI returns an envelope.
returncode=0, envelope = {
stdout='{"summary":"ok","recommendations":[]}', "type": "result",
stderr="", "result": '{"summary":"ok","recommendations":[]}',
) }
import json as _json
return SimpleNamespace(returncode=0, stdout=_json.dumps(envelope), stderr="")
monkeypatch.setattr("llm_connect.claude_code.subprocess.run", fake_run) monkeypatch.setattr("llm_connect.claude_code.subprocess.run", fake_run)
adapter = ClaudeCodeAdapter(cli_path="/custom/claude") adapter = ClaudeCodeAdapter(cli_path="/custom/claude")
@@ -33,22 +35,18 @@ def test_execute_prompt_passes_json_schema_to_claude_cli(monkeypatch):
), ),
) )
assert calls == { assert calls["cmd"] == [
"cmd": [ "/custom/claude",
"/custom/claude", "--print",
"--print", "--json-schema",
"--json-schema", '{"type":"object"}',
'{"type":"object"}', "--output-format",
"--output-format", "json",
"json", ]
], assert calls["input"] == "Produce a report."
"input": "Produce a report.", assert calls["timeout"] == 42
"capture_output": True, # Envelope's result field carries the schema-enforced JSON; the adapter
"text": True, # unwraps it before returning to the caller.
"timeout": 42,
}
# Stdout shape that does not match any known envelope field is returned
# verbatim so the caller can introspect and we can extend the field list.
assert response.content == '{"summary":"ok","recommendations":[]}' assert response.content == '{"summary":"ok","recommendations":[]}'
@@ -80,6 +78,53 @@ def test_execute_prompt_unwraps_cli_json_envelope_result_field(monkeypatch):
assert response.content == '{"summary":"ok","recommendations":[]}' assert response.content == '{"summary":"ok","recommendations":[]}'
def test_execute_prompt_prefers_json_field_over_prose_preamble(monkeypatch):
"""When the model adds a prose preamble in the envelope's primary text
field but the schema-enforced JSON is in a different field, the adapter
must find and return the JSON, not the preamble."""
def fake_run(cmd, input, capture_output, text, timeout): # noqa: ANN001
envelope = {
"type": "result",
"result": "Triage report generated and returned via structured output. Key signals: healthy.",
"structured_result": '{"summary":"healthy","recommendations":[]}',
"total_cost_usd": 0.002,
}
import json as _json
return SimpleNamespace(returncode=0, stdout=_json.dumps(envelope), stderr="")
monkeypatch.setattr("llm_connect.claude_code.subprocess.run", fake_run)
adapter = ClaudeCodeAdapter(cli_path="/custom/claude")
response = adapter.execute_prompt(
"Long triage prompt.",
RunConfig(model_params={"json_schema": {"type": "object"}}),
)
assert response.content == '{"summary":"healthy","recommendations":[]}'
def test_execute_prompt_skips_envelope_metadata_keys(monkeypatch):
"""Metadata keys like `type`, `model`, `usage` must never be returned as
the model payload, even if their values look JSON-like."""
def fake_run(cmd, input, capture_output, text, timeout): # noqa: ANN001
envelope = {
"type": '{"this":"is_metadata"}', # decoy
"usage": {"input_tokens": 5}, # decoy dict
"result": '{"summary":"ok"}',
}
import json as _json
return SimpleNamespace(returncode=0, stdout=_json.dumps(envelope), stderr="")
monkeypatch.setattr("llm_connect.claude_code.subprocess.run", fake_run)
adapter = ClaudeCodeAdapter(cli_path="/custom/claude")
response = adapter.execute_prompt(
"Prompt.", RunConfig(model_params={"json_schema": {"type": "object"}})
)
assert response.content == '{"summary":"ok"}'
def test_execute_prompt_no_unwrap_without_json_schema(monkeypatch): def test_execute_prompt_no_unwrap_without_json_schema(monkeypatch):
"""Without --json-schema we do not pass --output-format json, so the """Without --json-schema we do not pass --output-format json, so the
envelope unwrap path stays inert and raw stdout passes through.""" envelope unwrap path stays inert and raw stdout passes through."""