generated from coulomb/repo-seed
Implement llm-connect ADHOC diagnostics
This commit is contained in:
81
tests/test_payload.py
Normal file
81
tests/test_payload.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from llm_connect._payload import merge_gemini_model_params, merge_openai_chat_model_params
|
||||
|
||||
|
||||
STRUCTURED_SCHEMA = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"summary": {"type": "string"},
|
||||
"recommendations": {"type": "array", "items": {"type": "string"}},
|
||||
},
|
||||
"required": ["summary", "recommendations"],
|
||||
}
|
||||
|
||||
|
||||
ACTIVITY_CORE_MODEL_PARAMS = {
|
||||
"reasoning_effort": "medium",
|
||||
"max_depth": 4,
|
||||
"json_schema": STRUCTURED_SCHEMA,
|
||||
"top_p": 0.8,
|
||||
}
|
||||
|
||||
|
||||
def test_openai_chat_model_params_translate_activity_core_shape():
|
||||
payload = {
|
||||
"model": "gpt-4.1-mini",
|
||||
"messages": [{"role": "user", "content": "triage"}],
|
||||
"temperature": 0.2,
|
||||
"max_tokens": 200,
|
||||
}
|
||||
|
||||
merge_openai_chat_model_params(payload, ACTIVITY_CORE_MODEL_PARAMS)
|
||||
|
||||
assert payload["response_format"] == {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "structured_output",
|
||||
"schema": STRUCTURED_SCHEMA,
|
||||
"strict": False,
|
||||
},
|
||||
}
|
||||
assert payload["top_p"] == 0.8
|
||||
assert "reasoning_effort" not in payload
|
||||
assert "max_depth" not in payload
|
||||
assert "json_schema" not in payload
|
||||
|
||||
|
||||
def test_openai_chat_model_params_preserve_explicit_response_format():
|
||||
explicit = {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "custom",
|
||||
"schema": STRUCTURED_SCHEMA,
|
||||
"strict": True,
|
||||
},
|
||||
}
|
||||
payload = {"model": "gpt-4.1-mini", "messages": []}
|
||||
|
||||
merge_openai_chat_model_params(
|
||||
payload,
|
||||
{"json_schema": STRUCTURED_SCHEMA, "response_format": explicit},
|
||||
)
|
||||
|
||||
assert payload["response_format"] == explicit
|
||||
|
||||
|
||||
def test_gemini_model_params_translate_activity_core_shape():
|
||||
payload = {
|
||||
"contents": [{"role": "user", "parts": [{"text": "triage"}]}],
|
||||
"generationConfig": {
|
||||
"temperature": 0.2,
|
||||
"maxOutputTokens": 200,
|
||||
},
|
||||
}
|
||||
|
||||
merge_gemini_model_params(payload, ACTIVITY_CORE_MODEL_PARAMS)
|
||||
|
||||
assert payload["generationConfig"]["responseMimeType"] == "application/json"
|
||||
assert payload["generationConfig"]["responseSchema"] == STRUCTURED_SCHEMA
|
||||
assert payload["generationConfig"]["topP"] == 0.8
|
||||
assert "reasoning_effort" not in payload
|
||||
assert "max_depth" not in payload
|
||||
assert "json_schema" not in payload
|
||||
62
tests/test_replay.py
Normal file
62
tests/test_replay.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from llm_connect.replay import parse_audit_record
|
||||
|
||||
|
||||
STRUCTURED_SCHEMA = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"summary": {"type": "string"},
|
||||
"recommendations": {"type": "array", "items": {"type": "string"}},
|
||||
},
|
||||
"required": ["summary", "recommendations"],
|
||||
}
|
||||
|
||||
|
||||
def test_replay_parses_openai_style_provider_response():
|
||||
record = {
|
||||
"provider": "openrouter",
|
||||
"config": {"model_params": {"json_schema": STRUCTURED_SCHEMA}},
|
||||
"provider_response": {
|
||||
"status": 200,
|
||||
"body": {
|
||||
"choices": [
|
||||
{
|
||||
"message": {
|
||||
"content": '{"summary":"ok","recommendations":[]}'
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
},
|
||||
"parsed_content": '{"summary":"ok","recommendations":[]}',
|
||||
}
|
||||
|
||||
report = parse_audit_record(record)
|
||||
|
||||
assert report["parsed_content"] == '{"summary":"ok","recommendations":[]}'
|
||||
assert report["matches_recorded_content"] is True
|
||||
assert report["structured_output"] == {"checked": True, "valid": True}
|
||||
|
||||
|
||||
def test_replay_reuses_claude_code_envelope_unwrapper():
|
||||
record = {
|
||||
"provider": "claude-code",
|
||||
"config": {"model_params": {"json_schema": STRUCTURED_SCHEMA}},
|
||||
"provider_response": {
|
||||
"status": 0,
|
||||
"body": {
|
||||
"stdout": (
|
||||
'{"type":"result","result":"prose",'
|
||||
'"structured_result":"{\\"summary\\":\\"ok\\",'
|
||||
'\\"recommendations\\":[]}"}'
|
||||
),
|
||||
"stderr": "",
|
||||
},
|
||||
},
|
||||
"parsed_content": '{"summary":"ok","recommendations":[]}',
|
||||
}
|
||||
|
||||
report = parse_audit_record(record)
|
||||
|
||||
assert report["parsed_content"] == '{"summary":"ok","recommendations":[]}'
|
||||
assert report["matches_recorded_content"] is True
|
||||
assert report["structured_output"] == {"checked": True, "valid": True}
|
||||
@@ -2,14 +2,22 @@
|
||||
Tests for LLMServer HTTP serve mode (FR-1).
|
||||
"""
|
||||
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import json
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
import pytest
|
||||
|
||||
from llm_connect._diagnostics import (
|
||||
record_adapter_transformation,
|
||||
record_provider_request,
|
||||
record_provider_response,
|
||||
)
|
||||
from llm_connect.adapter import MockLLMAdapter, ErrorLLMAdapter
|
||||
from llm_connect.models import RunConfig
|
||||
from llm_connect.models import LLMResponse, RunConfig
|
||||
from llm_connect.server import LLMServer
|
||||
|
||||
|
||||
@@ -45,6 +53,35 @@ def _post(url: str, body: dict) -> tuple[int, dict]:
|
||||
return exc.code, json.loads(exc.read())
|
||||
|
||||
|
||||
class DiagnosticLLMAdapter(MockLLMAdapter):
|
||||
def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
|
||||
record_provider_request(
|
||||
url="https://provider.example/v1/chat",
|
||||
payload={"prompt": prompt, "model": config.model_name},
|
||||
headers={"Authorization": "Bearer secret-token"},
|
||||
)
|
||||
response = super().execute_prompt(prompt, config)
|
||||
response.metadata["provider"] = "diagnostic"
|
||||
response.metadata["response_id"] = "diag-response"
|
||||
record_provider_response(status=200, body={"id": "diag-response", "content": response.content})
|
||||
record_adapter_transformation(
|
||||
"diagnostic_transform",
|
||||
{"before": prompt},
|
||||
{"after": response.content},
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
class BarrierLLMAdapter(MockLLMAdapter):
|
||||
def __init__(self):
|
||||
super().__init__(mock_response="parallel")
|
||||
self._barrier = threading.Barrier(2)
|
||||
|
||||
def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
|
||||
self._barrier.wait(timeout=2.0)
|
||||
return super().execute_prompt(prompt, config)
|
||||
|
||||
|
||||
class TestHealth:
|
||||
def test_health_returns_200(self, server):
|
||||
status, body = _get(f"http://127.0.0.1:{server.port}/health")
|
||||
@@ -65,6 +102,7 @@ class TestExecute:
|
||||
assert status == 200
|
||||
assert body["content"] == "hello world"
|
||||
assert body["finish_reason"] == "stop"
|
||||
assert "debug" not in body
|
||||
|
||||
def test_response_includes_usage(self, server):
|
||||
status, body = _post(
|
||||
@@ -150,3 +188,86 @@ class TestExecute:
|
||||
)
|
||||
assert status == 400
|
||||
assert "config" in body["error"]
|
||||
|
||||
def test_debug_query_returns_diagnostics(self):
|
||||
s = LLMServer(adapter=DiagnosticLLMAdapter(mock_response="debug body"), port=0)
|
||||
s.start()
|
||||
try:
|
||||
status, body = _post(
|
||||
f"http://127.0.0.1:{s.port}/execute?debug=1",
|
||||
{"prompt": "inspect", "config": {"model_name": "diagnostic-model"}},
|
||||
)
|
||||
finally:
|
||||
s.stop()
|
||||
|
||||
assert status == 200
|
||||
assert body["content"] == "debug body"
|
||||
debug = body["debug"]
|
||||
assert debug["provider_request"]["payload"] == {
|
||||
"prompt": "inspect",
|
||||
"model": "diagnostic-model",
|
||||
}
|
||||
assert debug["provider_request"]["headers_redacted"]["Authorization"] == "Bearer <redacted>"
|
||||
assert debug["provider_response"]["status"] == 200
|
||||
assert debug["adapter_transformations"][0]["step"] == "diagnostic_transform"
|
||||
|
||||
def test_debug_env_returns_diagnostics(self, monkeypatch):
|
||||
monkeypatch.setenv("LLM_CONNECT_DEBUG", "1")
|
||||
s = LLMServer(adapter=DiagnosticLLMAdapter(mock_response="debug body"), port=0)
|
||||
s.start()
|
||||
try:
|
||||
status, body = _post(
|
||||
f"http://127.0.0.1:{s.port}/execute",
|
||||
{"prompt": "inspect"},
|
||||
)
|
||||
finally:
|
||||
s.stop()
|
||||
|
||||
assert status == 200
|
||||
assert "debug" in body
|
||||
|
||||
def test_audit_dir_records_replayable_call(self, monkeypatch, tmp_path):
|
||||
monkeypatch.setenv("LLM_CONNECT_AUDIT_DIR", str(tmp_path))
|
||||
s = LLMServer(adapter=DiagnosticLLMAdapter(mock_response="audit body"), port=0)
|
||||
s.start()
|
||||
try:
|
||||
status, body = _post(
|
||||
f"http://127.0.0.1:{s.port}/execute",
|
||||
{"prompt": "audit me", "config": {"model_name": "audit-model"}},
|
||||
)
|
||||
finally:
|
||||
s.stop()
|
||||
|
||||
assert status == 200
|
||||
assert "debug" not in body
|
||||
files = list(tmp_path.glob("*.json"))
|
||||
assert len(files) == 1
|
||||
record = json.loads(files[0].read_text(encoding="utf-8"))
|
||||
assert record["prompt"] == "audit me"
|
||||
assert record["config"]["model_name"] == "audit-model"
|
||||
assert record["parsed_content"] == "audit body"
|
||||
assert record["provider_request"]["headers_redacted"]["Authorization"] == "Bearer <redacted>"
|
||||
assert record["provider_response"]["body"]["id"] == "diag-response"
|
||||
assert record["latency_seconds"] >= 0
|
||||
|
||||
def test_execute_requests_run_concurrently(self):
|
||||
s = LLMServer(adapter=BarrierLLMAdapter(), port=0)
|
||||
s.start()
|
||||
try:
|
||||
start = time.monotonic()
|
||||
with ThreadPoolExecutor(max_workers=2) as pool:
|
||||
futures = [
|
||||
pool.submit(
|
||||
_post,
|
||||
f"http://127.0.0.1:{s.port}/execute",
|
||||
{"prompt": f"request {idx}"},
|
||||
)
|
||||
for idx in range(2)
|
||||
]
|
||||
results = [future.result(timeout=3.0) for future in futures]
|
||||
elapsed = time.monotonic() - start
|
||||
finally:
|
||||
s.stop()
|
||||
|
||||
assert [status for status, _body in results] == [200, 200]
|
||||
assert elapsed < 1.5
|
||||
|
||||
142
tests/test_structured_output_smoke.py
Normal file
142
tests/test_structured_output_smoke.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import json
|
||||
|
||||
from llm_connect.gemini import GeminiAdapter
|
||||
from llm_connect.models import RunConfig
|
||||
from llm_connect.openai import OpenAIAdapter
|
||||
from llm_connect.openrouter import OpenRouterAdapter
|
||||
|
||||
|
||||
STRUCTURED_SCHEMA = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"summary": {"type": "string"},
|
||||
"recommendations": {"type": "array", "items": {"type": "string"}},
|
||||
},
|
||||
"required": ["summary", "recommendations"],
|
||||
}
|
||||
|
||||
|
||||
SMOKE_CONFIG = RunConfig(
|
||||
model_name="gpt-4",
|
||||
temperature=0.1,
|
||||
max_tokens=300,
|
||||
model_params={
|
||||
"reasoning_effort": "medium",
|
||||
"max_depth": 3,
|
||||
"json_schema": STRUCTURED_SCHEMA,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def test_openrouter_structured_output_payload_and_model_routing(monkeypatch):
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def fake_post_json(url, payload, headers=None, timeout=300): # noqa: ANN001
|
||||
captured["url"] = url
|
||||
captured["payload"] = payload
|
||||
captured["headers"] = headers
|
||||
captured["timeout"] = timeout
|
||||
return {
|
||||
"id": "or-response",
|
||||
"model": payload["model"],
|
||||
"choices": [
|
||||
{
|
||||
"message": {
|
||||
"content": json.dumps(
|
||||
{"summary": "ok", "recommendations": ["keep payload clean"]}
|
||||
)
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"usage": {"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3},
|
||||
}
|
||||
|
||||
monkeypatch.setattr("llm_connect.openrouter.post_json", fake_post_json)
|
||||
adapter = OpenRouterAdapter(
|
||||
model="anthropic/claude-sonnet-4",
|
||||
api_key="or-test",
|
||||
api_base="https://openrouter.example/api/v1",
|
||||
)
|
||||
|
||||
response = adapter.execute_prompt("Return JSON.", SMOKE_CONFIG)
|
||||
payload = captured["payload"]
|
||||
|
||||
assert response.model == "anthropic/claude-sonnet-4"
|
||||
assert payload["model"] == "anthropic/claude-sonnet-4"
|
||||
assert payload["response_format"]["json_schema"]["schema"] == STRUCTURED_SCHEMA
|
||||
assert payload["response_format"]["json_schema"]["strict"] is False
|
||||
assert "reasoning_effort" not in payload
|
||||
assert "max_depth" not in payload
|
||||
assert "json_schema" not in payload
|
||||
|
||||
|
||||
def test_openai_structured_output_payload(monkeypatch):
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def fake_post_json(url, payload, headers=None, timeout=300): # noqa: ANN001
|
||||
captured["payload"] = payload
|
||||
return {
|
||||
"id": "oa-response",
|
||||
"model": payload["model"],
|
||||
"choices": [
|
||||
{
|
||||
"message": {
|
||||
"content": json.dumps({"summary": "ok", "recommendations": []})
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"usage": {"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3},
|
||||
}
|
||||
|
||||
monkeypatch.setattr("llm_connect.openai.post_json", fake_post_json)
|
||||
adapter = OpenAIAdapter(model="gpt-4.1-mini", api_key="sk-test")
|
||||
|
||||
response = adapter.execute_prompt("Return JSON.", SMOKE_CONFIG)
|
||||
payload = captured["payload"]
|
||||
|
||||
assert response.model == "gpt-4.1-mini"
|
||||
assert payload["model"] == "gpt-4.1-mini"
|
||||
assert payload["response_format"]["json_schema"]["schema"] == STRUCTURED_SCHEMA
|
||||
assert "reasoning_effort" not in payload
|
||||
assert "max_depth" not in payload
|
||||
assert "json_schema" not in payload
|
||||
|
||||
|
||||
def test_gemini_structured_output_payload(monkeypatch):
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def fake_post_json(url, payload, headers=None, timeout=300): # noqa: ANN001
|
||||
captured["url"] = url
|
||||
captured["payload"] = payload
|
||||
return {
|
||||
"candidates": [
|
||||
{
|
||||
"content": {
|
||||
"parts": [
|
||||
{"text": json.dumps({"summary": "ok", "recommendations": []})}
|
||||
]
|
||||
},
|
||||
"finishReason": "STOP",
|
||||
}
|
||||
],
|
||||
"usageMetadata": {
|
||||
"promptTokenCount": 1,
|
||||
"candidatesTokenCount": 2,
|
||||
"totalTokenCount": 3,
|
||||
},
|
||||
}
|
||||
|
||||
monkeypatch.setattr("llm_connect.gemini.post_json", fake_post_json)
|
||||
adapter = GeminiAdapter(model="gemini-2.5-flash", api_key="gemini-test")
|
||||
|
||||
response = adapter.execute_prompt("Return JSON.", SMOKE_CONFIG)
|
||||
payload = captured["payload"]
|
||||
|
||||
assert response.model == "gemini-2.5-flash"
|
||||
assert payload["generationConfig"]["responseMimeType"] == "application/json"
|
||||
assert payload["generationConfig"]["responseSchema"] == STRUCTURED_SCHEMA
|
||||
assert "reasoning_effort" not in payload
|
||||
assert "max_depth" not in payload
|
||||
assert "json_schema" not in payload
|
||||
Reference in New Issue
Block a user