Prefer JSON-bearing envelope fields, skip metadata, in Claude CLI unwrap

The first CUST-WP-0045 canary retry after 9de0f49 still failed schema validation with `Expecting value: line 1 column 1 (char 0)`. The original allowlist returned envelope.result verbatim, which on longer prompts carries the model's conversational preamble ("Triage report generated and returned via structured output. Key signals: ..."), not the schema-enforced JSON. The actual structured payload lives in a different envelope field whose name varies across CLI versions. Make the unwrap order-aware: 1. Scan envelope fields and return the first one whose value parses as JSON (dict, list, or a string that loads cleanly). Skip well-known metadata keys (type, usage, total_cost_usd, etc.) so telemetry can never be mistaken for the model payload. 2. Fall back to the original text-field allowlist only when no field carries JSON, so non-schema callers via this same code path still see the model's prose. 3. Surface the raw envelope as last resort. This is robust against unknown envelope shapes — as long as the schema- enforced JSON appears somewhere in a non-metadata field, the adapter will find it. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-02 12:44:25 +02:00
parent 9de0f495db
commit 435da49263
2 changed files with 129 additions and 37 deletions
--- a/llm_connect/claude_code.py
+++ b/llm_connect/claude_code.py
@@ -192,20 +192,33 @@ def _json_schema_arg(config: RunConfig) -> str | None:
    return None


-# Field names Claude Code's `--output-format json` envelope is known to use
-# for the model's primary textual response. Probed in order; the first match
-# wins. If none match (because the envelope shape is something we haven't
-# seen), we return the raw envelope string so the caller still gets the data
-# and can introspect it.
+# Envelope field names Claude Code's `--output-format json` is known to use
+# for the model's primary textual response. Used as a fall-back when no field
+# carries a JSON-parseable payload (e.g. plain prose generation).
 _ENVELOPE_TEXT_FIELDS = ("result", "result_text", "content", "text", "output")


 def _unwrap_cli_json_envelope(stdout: str, config: RunConfig) -> str:
    """Extract the model's payload from Claude CLI's --output-format json envelope.

-    Only attempts unwrap when --json-schema was set, because that's the only
-    code path that adds --output-format json to the CLI invocation. Other
-    paths keep raw stdout (current behavior preserved).
+    Only runs when --json-schema was set (the only code path that adds
+    --output-format json to the CLI invocation). Other callers keep the raw
+    stdout behavior unchanged.
+
+    Strategy: when --json-schema is set the caller wants JSON back, so prefer
+    any envelope field whose value is itself valid JSON (dict, list, or a
+    string that parses as JSON). This handles two observed envelope shapes:
+
+    1. Short prompts where the model emits the structured payload directly
+       in the `result` field as a JSON-encoded string.
+    2. Longer prompts where the model emits a conversational preamble in
+       `result` and the schema-enforced JSON in a separate field (the exact
+       field name varies across CLI versions).
+
+    Fall back to the first text field only when no JSON-bearing field exists,
+    so non-schema callers via this code path still see the model's prose.
+    Surface the raw envelope as a last resort so the operator can see what
+    shape arrived and extend the strategy.
    """
    if not _json_schema_arg(config):
        return stdout
@@ -218,13 +231,47 @@ def _unwrap_cli_json_envelope(stdout: str, config: RunConfig) -> str:
        return stdout
    if not isinstance(envelope, dict):
        return stdout
+
+    json_payload = _find_json_payload(envelope)
+    if json_payload is not None:
+        return json_payload
+
    for key in _ENVELOPE_TEXT_FIELDS:
-        if key in envelope:
-            value = envelope[key]
-            if isinstance(value, str):
-                return value
-            if isinstance(value, (dict, list)):
-                return json.dumps(value)
-    # Unknown envelope shape — surface it raw so the operator can see it
-    # in the validation error and we can update _ENVELOPE_TEXT_FIELDS.
+        value = envelope.get(key)
+        if isinstance(value, str):
+            return value
+        if isinstance(value, (dict, list)):
+            return json.dumps(value)
+
    return stdout
+
+
+def _find_json_payload(envelope: dict) -> str | None:
+    """Return the first envelope value that represents valid JSON.
+
+    Insertion order is preserved by Python dicts, so this prefers fields the
+    CLI lists earliest in its envelope. Skips obvious metadata keys (cost,
+    usage, timing) so we never accidentally pick a numeric or telemetry value.
+    """
+    for key, value in envelope.items():
+        if key in _ENVELOPE_METADATA_KEYS:
+            continue
+        if isinstance(value, (dict, list)):
+            return json.dumps(value)
+        if isinstance(value, str):
+            stripped = value.strip()
+            if stripped.startswith(("{", "[")):
+                try:
+                    json.loads(stripped)
+                except json.JSONDecodeError:
+                    continue
+                return stripped
+    return None
+
+
+# Envelope keys that carry telemetry, never the model payload.
+_ENVELOPE_METADATA_KEYS = frozenset({
+    "type", "subtype", "model", "usage", "total_cost_usd", "cost_usd",
+    "duration_ms", "duration_api_ms", "num_turns", "session_id",
+    "is_error", "stop_reason", "permission_denials", "uuid",
+})
--- a/tests/test_claude_code.py
+++ b/tests/test_claude_code.py
@@ -16,11 +16,13 @@ def test_execute_prompt_passes_json_schema_to_claude_cli(monkeypatch):
        calls["capture_output"] = capture_output
        calls["text"] = text
        calls["timeout"] = timeout
-        return SimpleNamespace(
-            returncode=0,
-            stdout='{"summary":"ok","recommendations":[]}',
-            stderr="",
-        )
+        # With --output-format json the CLI returns an envelope.
+        envelope = {
+            "type": "result",
+            "result": '{"summary":"ok","recommendations":[]}',
+        }
+        import json as _json
+        return SimpleNamespace(returncode=0, stdout=_json.dumps(envelope), stderr="")

    monkeypatch.setattr("llm_connect.claude_code.subprocess.run", fake_run)
    adapter = ClaudeCodeAdapter(cli_path="/custom/claude")
@@ -33,22 +35,18 @@ def test_execute_prompt_passes_json_schema_to_claude_cli(monkeypatch):
        ),
    )

-    assert calls == {
-        "cmd": [
-            "/custom/claude",
-            "--print",
-            "--json-schema",
-            '{"type":"object"}',
-            "--output-format",
-            "json",
-        ],
-        "input": "Produce a report.",
-        "capture_output": True,
-        "text": True,
-        "timeout": 42,
-    }
-    # Stdout shape that does not match any known envelope field is returned
-    # verbatim so the caller can introspect and we can extend the field list.
+    assert calls["cmd"] == [
+        "/custom/claude",
+        "--print",
+        "--json-schema",
+        '{"type":"object"}',
+        "--output-format",
+        "json",
+    ]
+    assert calls["input"] == "Produce a report."
+    assert calls["timeout"] == 42
+    # Envelope's result field carries the schema-enforced JSON; the adapter
+    # unwraps it before returning to the caller.
    assert response.content == '{"summary":"ok","recommendations":[]}'


@@ -80,6 +78,53 @@ def test_execute_prompt_unwraps_cli_json_envelope_result_field(monkeypatch):
    assert response.content == '{"summary":"ok","recommendations":[]}'


+def test_execute_prompt_prefers_json_field_over_prose_preamble(monkeypatch):
+    """When the model adds a prose preamble in the envelope's primary text
+    field but the schema-enforced JSON is in a different field, the adapter
+    must find and return the JSON, not the preamble."""
+    def fake_run(cmd, input, capture_output, text, timeout):  # noqa: ANN001
+        envelope = {
+            "type": "result",
+            "result": "Triage report generated and returned via structured output. Key signals: healthy.",
+            "structured_result": '{"summary":"healthy","recommendations":[]}',
+            "total_cost_usd": 0.002,
+        }
+        import json as _json
+        return SimpleNamespace(returncode=0, stdout=_json.dumps(envelope), stderr="")
+
+    monkeypatch.setattr("llm_connect.claude_code.subprocess.run", fake_run)
+    adapter = ClaudeCodeAdapter(cli_path="/custom/claude")
+
+    response = adapter.execute_prompt(
+        "Long triage prompt.",
+        RunConfig(model_params={"json_schema": {"type": "object"}}),
+    )
+
+    assert response.content == '{"summary":"healthy","recommendations":[]}'
+
+
+def test_execute_prompt_skips_envelope_metadata_keys(monkeypatch):
+    """Metadata keys like `type`, `model`, `usage` must never be returned as
+    the model payload, even if their values look JSON-like."""
+    def fake_run(cmd, input, capture_output, text, timeout):  # noqa: ANN001
+        envelope = {
+            "type": '{"this":"is_metadata"}',  # decoy
+            "usage": {"input_tokens": 5},      # decoy dict
+            "result": '{"summary":"ok"}',
+        }
+        import json as _json
+        return SimpleNamespace(returncode=0, stdout=_json.dumps(envelope), stderr="")
+
+    monkeypatch.setattr("llm_connect.claude_code.subprocess.run", fake_run)
+    adapter = ClaudeCodeAdapter(cli_path="/custom/claude")
+
+    response = adapter.execute_prompt(
+        "Prompt.", RunConfig(model_params={"json_schema": {"type": "object"}})
+    )
+
+    assert response.content == '{"summary":"ok"}'
+
+
 def test_execute_prompt_no_unwrap_without_json_schema(monkeypatch):
    """Without --json-schema we do not pass --output-format json, so the
    envelope unwrap path stays inert and raw stdout passes through."""