From 435da49263bb3159669a0eabf26b87080da8cc75 Mon Sep 17 00:00:00 2001 From: tegwick Date: Tue, 2 Jun 2026 12:44:25 +0200 Subject: [PATCH] Prefer JSON-bearing envelope fields, skip metadata, in Claude CLI unwrap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first CUST-WP-0045 canary retry after 9de0f49 still failed schema validation with `Expecting value: line 1 column 1 (char 0)`. The original allowlist returned envelope.result verbatim, which on longer prompts carries the model's conversational preamble ("Triage report generated and returned via structured output. Key signals: ..."), not the schema-enforced JSON. The actual structured payload lives in a different envelope field whose name varies across CLI versions. Make the unwrap order-aware: 1. Scan envelope fields and return the first one whose value parses as JSON (dict, list, or a string that loads cleanly). Skip well-known metadata keys (type, usage, total_cost_usd, etc.) so telemetry can never be mistaken for the model payload. 2. Fall back to the original text-field allowlist only when no field carries JSON, so non-schema callers via this same code path still see the model's prose. 3. Surface the raw envelope as last resort. This is robust against unknown envelope shapes — as long as the schema- enforced JSON appears somewhere in a non-metadata field, the adapter will find it. Co-Authored-By: Claude Opus 4.7 --- llm_connect/claude_code.py | 79 +++++++++++++++++++++++++++------- tests/test_claude_code.py | 87 +++++++++++++++++++++++++++++--------- 2 files changed, 129 insertions(+), 37 deletions(-) diff --git a/llm_connect/claude_code.py b/llm_connect/claude_code.py index ab1c3cf..6a9c08f 100644 --- a/llm_connect/claude_code.py +++ b/llm_connect/claude_code.py @@ -192,20 +192,33 @@ def _json_schema_arg(config: RunConfig) -> str | None: return None -# Field names Claude Code's `--output-format json` envelope is known to use -# for the model's primary textual response. Probed in order; the first match -# wins. If none match (because the envelope shape is something we haven't -# seen), we return the raw envelope string so the caller still gets the data -# and can introspect it. +# Envelope field names Claude Code's `--output-format json` is known to use +# for the model's primary textual response. Used as a fall-back when no field +# carries a JSON-parseable payload (e.g. plain prose generation). _ENVELOPE_TEXT_FIELDS = ("result", "result_text", "content", "text", "output") def _unwrap_cli_json_envelope(stdout: str, config: RunConfig) -> str: """Extract the model's payload from Claude CLI's --output-format json envelope. - Only attempts unwrap when --json-schema was set, because that's the only - code path that adds --output-format json to the CLI invocation. Other - paths keep raw stdout (current behavior preserved). + Only runs when --json-schema was set (the only code path that adds + --output-format json to the CLI invocation). Other callers keep the raw + stdout behavior unchanged. + + Strategy: when --json-schema is set the caller wants JSON back, so prefer + any envelope field whose value is itself valid JSON (dict, list, or a + string that parses as JSON). This handles two observed envelope shapes: + + 1. Short prompts where the model emits the structured payload directly + in the `result` field as a JSON-encoded string. + 2. Longer prompts where the model emits a conversational preamble in + `result` and the schema-enforced JSON in a separate field (the exact + field name varies across CLI versions). + + Fall back to the first text field only when no JSON-bearing field exists, + so non-schema callers via this code path still see the model's prose. + Surface the raw envelope as a last resort so the operator can see what + shape arrived and extend the strategy. """ if not _json_schema_arg(config): return stdout @@ -218,13 +231,47 @@ def _unwrap_cli_json_envelope(stdout: str, config: RunConfig) -> str: return stdout if not isinstance(envelope, dict): return stdout + + json_payload = _find_json_payload(envelope) + if json_payload is not None: + return json_payload + for key in _ENVELOPE_TEXT_FIELDS: - if key in envelope: - value = envelope[key] - if isinstance(value, str): - return value - if isinstance(value, (dict, list)): - return json.dumps(value) - # Unknown envelope shape — surface it raw so the operator can see it - # in the validation error and we can update _ENVELOPE_TEXT_FIELDS. + value = envelope.get(key) + if isinstance(value, str): + return value + if isinstance(value, (dict, list)): + return json.dumps(value) + return stdout + + +def _find_json_payload(envelope: dict) -> str | None: + """Return the first envelope value that represents valid JSON. + + Insertion order is preserved by Python dicts, so this prefers fields the + CLI lists earliest in its envelope. Skips obvious metadata keys (cost, + usage, timing) so we never accidentally pick a numeric or telemetry value. + """ + for key, value in envelope.items(): + if key in _ENVELOPE_METADATA_KEYS: + continue + if isinstance(value, (dict, list)): + return json.dumps(value) + if isinstance(value, str): + stripped = value.strip() + if stripped.startswith(("{", "[")): + try: + json.loads(stripped) + except json.JSONDecodeError: + continue + return stripped + return None + + +# Envelope keys that carry telemetry, never the model payload. +_ENVELOPE_METADATA_KEYS = frozenset({ + "type", "subtype", "model", "usage", "total_cost_usd", "cost_usd", + "duration_ms", "duration_api_ms", "num_turns", "session_id", + "is_error", "stop_reason", "permission_denials", "uuid", +}) diff --git a/tests/test_claude_code.py b/tests/test_claude_code.py index 69dd309..afca4d3 100644 --- a/tests/test_claude_code.py +++ b/tests/test_claude_code.py @@ -16,11 +16,13 @@ def test_execute_prompt_passes_json_schema_to_claude_cli(monkeypatch): calls["capture_output"] = capture_output calls["text"] = text calls["timeout"] = timeout - return SimpleNamespace( - returncode=0, - stdout='{"summary":"ok","recommendations":[]}', - stderr="", - ) + # With --output-format json the CLI returns an envelope. + envelope = { + "type": "result", + "result": '{"summary":"ok","recommendations":[]}', + } + import json as _json + return SimpleNamespace(returncode=0, stdout=_json.dumps(envelope), stderr="") monkeypatch.setattr("llm_connect.claude_code.subprocess.run", fake_run) adapter = ClaudeCodeAdapter(cli_path="/custom/claude") @@ -33,22 +35,18 @@ def test_execute_prompt_passes_json_schema_to_claude_cli(monkeypatch): ), ) - assert calls == { - "cmd": [ - "/custom/claude", - "--print", - "--json-schema", - '{"type":"object"}', - "--output-format", - "json", - ], - "input": "Produce a report.", - "capture_output": True, - "text": True, - "timeout": 42, - } - # Stdout shape that does not match any known envelope field is returned - # verbatim so the caller can introspect and we can extend the field list. + assert calls["cmd"] == [ + "/custom/claude", + "--print", + "--json-schema", + '{"type":"object"}', + "--output-format", + "json", + ] + assert calls["input"] == "Produce a report." + assert calls["timeout"] == 42 + # Envelope's result field carries the schema-enforced JSON; the adapter + # unwraps it before returning to the caller. assert response.content == '{"summary":"ok","recommendations":[]}' @@ -80,6 +78,53 @@ def test_execute_prompt_unwraps_cli_json_envelope_result_field(monkeypatch): assert response.content == '{"summary":"ok","recommendations":[]}' +def test_execute_prompt_prefers_json_field_over_prose_preamble(monkeypatch): + """When the model adds a prose preamble in the envelope's primary text + field but the schema-enforced JSON is in a different field, the adapter + must find and return the JSON, not the preamble.""" + def fake_run(cmd, input, capture_output, text, timeout): # noqa: ANN001 + envelope = { + "type": "result", + "result": "Triage report generated and returned via structured output. Key signals: healthy.", + "structured_result": '{"summary":"healthy","recommendations":[]}', + "total_cost_usd": 0.002, + } + import json as _json + return SimpleNamespace(returncode=0, stdout=_json.dumps(envelope), stderr="") + + monkeypatch.setattr("llm_connect.claude_code.subprocess.run", fake_run) + adapter = ClaudeCodeAdapter(cli_path="/custom/claude") + + response = adapter.execute_prompt( + "Long triage prompt.", + RunConfig(model_params={"json_schema": {"type": "object"}}), + ) + + assert response.content == '{"summary":"healthy","recommendations":[]}' + + +def test_execute_prompt_skips_envelope_metadata_keys(monkeypatch): + """Metadata keys like `type`, `model`, `usage` must never be returned as + the model payload, even if their values look JSON-like.""" + def fake_run(cmd, input, capture_output, text, timeout): # noqa: ANN001 + envelope = { + "type": '{"this":"is_metadata"}', # decoy + "usage": {"input_tokens": 5}, # decoy dict + "result": '{"summary":"ok"}', + } + import json as _json + return SimpleNamespace(returncode=0, stdout=_json.dumps(envelope), stderr="") + + monkeypatch.setattr("llm_connect.claude_code.subprocess.run", fake_run) + adapter = ClaudeCodeAdapter(cli_path="/custom/claude") + + response = adapter.execute_prompt( + "Prompt.", RunConfig(model_params={"json_schema": {"type": "object"}}) + ) + + assert response.content == '{"summary":"ok"}' + + def test_execute_prompt_no_unwrap_without_json_schema(monkeypatch): """Without --json-schema we do not pass --output-format json, so the envelope unwrap path stays inert and raw stdout passes through."""