generated from coulomb/repo-seed
Add activity-core LLM endpoint support
This commit is contained in:
143
tests/test_profiles.py
Normal file
143
tests/test_profiles.py
Normal file
@@ -0,0 +1,143 @@
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from llm_connect.adapter import MockLLMAdapter
|
||||
from llm_connect.exceptions import LLMConfigurationError
|
||||
from llm_connect.models import RunConfig
|
||||
from llm_connect.profiles import (
|
||||
CUSTODIAN_TRIAGE_BALANCED,
|
||||
ProfiledLLMAdapter,
|
||||
RuntimeProfile,
|
||||
default_runtime_profiles,
|
||||
)
|
||||
|
||||
|
||||
def test_profile_dispatch_merges_defaults_and_request_params():
|
||||
created: list[MockLLMAdapter] = []
|
||||
|
||||
def factory(provider: str, model: str) -> MockLLMAdapter:
|
||||
created.append(MockLLMAdapter(mock_response=f"{provider}:{model}"))
|
||||
return created[-1]
|
||||
|
||||
profile = RuntimeProfile(
|
||||
name=CUSTODIAN_TRIAGE_BALANCED,
|
||||
provider="mock",
|
||||
model="triage-model",
|
||||
config=RunConfig(
|
||||
model_name="triage-model",
|
||||
temperature=0.2,
|
||||
max_tokens=1800,
|
||||
max_depth=2,
|
||||
timeout_seconds=300,
|
||||
model_params={"reasoning_effort": "medium"},
|
||||
),
|
||||
)
|
||||
adapter = ProfiledLLMAdapter(
|
||||
MockLLMAdapter(mock_response="default"),
|
||||
{profile.name: profile},
|
||||
adapter_factory=factory,
|
||||
)
|
||||
|
||||
response = adapter.execute_prompt(
|
||||
"Return JSON.",
|
||||
RunConfig(
|
||||
model_name=CUSTODIAN_TRIAGE_BALANCED,
|
||||
model_params={"json_schema": {"type": "object"}},
|
||||
),
|
||||
)
|
||||
|
||||
assert response.model == "triage-model"
|
||||
assert response.metadata["profile"] == CUSTODIAN_TRIAGE_BALANCED
|
||||
assert response.metadata["profile_provider"] == "mock"
|
||||
assert len(created) == 1
|
||||
resolved = created[0].last_config
|
||||
assert resolved.model_name == "triage-model"
|
||||
assert resolved.temperature == 0.2
|
||||
assert resolved.max_tokens == 1800
|
||||
assert resolved.max_depth == 2
|
||||
assert resolved.model_params == {
|
||||
"reasoning_effort": "medium",
|
||||
"json_schema": {"type": "object"},
|
||||
}
|
||||
|
||||
|
||||
def test_profile_dispatch_preserves_explicit_request_scalars():
|
||||
created: list[MockLLMAdapter] = []
|
||||
|
||||
def factory(provider: str, model: str) -> MockLLMAdapter:
|
||||
created.append(MockLLMAdapter())
|
||||
return created[-1]
|
||||
|
||||
profile = RuntimeProfile(
|
||||
name=CUSTODIAN_TRIAGE_BALANCED,
|
||||
provider="mock",
|
||||
model="triage-model",
|
||||
config=RunConfig(model_name="triage-model", temperature=0.2, max_tokens=1800),
|
||||
)
|
||||
adapter = ProfiledLLMAdapter(
|
||||
MockLLMAdapter(),
|
||||
{profile.name: profile},
|
||||
adapter_factory=factory,
|
||||
)
|
||||
|
||||
adapter.execute_prompt(
|
||||
"Prompt.",
|
||||
RunConfig(
|
||||
model_name=CUSTODIAN_TRIAGE_BALANCED,
|
||||
temperature=0.4,
|
||||
max_tokens=123,
|
||||
),
|
||||
)
|
||||
|
||||
assert created[0].last_config.temperature == 0.4
|
||||
assert created[0].last_config.max_tokens == 123
|
||||
|
||||
|
||||
def test_non_profile_model_passes_through_to_default_adapter():
|
||||
default = MockLLMAdapter(mock_response="direct")
|
||||
adapter = ProfiledLLMAdapter(default, {})
|
||||
|
||||
response = adapter.execute_prompt("Prompt.", RunConfig(model_name="gpt-4"))
|
||||
|
||||
assert response.content == "direct"
|
||||
assert default.call_count == 1
|
||||
assert default.last_config.model_name == "gpt-4"
|
||||
|
||||
|
||||
def test_unknown_custodian_profile_fails_without_secret_context():
|
||||
adapter = ProfiledLLMAdapter(MockLLMAdapter(), {})
|
||||
|
||||
with pytest.raises(LLMConfigurationError) as excinfo:
|
||||
adapter.execute_prompt("Prompt.", RunConfig(model_name="custodian-missing"))
|
||||
|
||||
assert "Unknown LLM runtime profile" in str(excinfo.value)
|
||||
assert excinfo.value.context == {"profile": "custodian-missing"}
|
||||
|
||||
|
||||
def test_default_profiles_can_be_overridden_from_json_env(monkeypatch):
|
||||
monkeypatch.setenv(
|
||||
"LLM_CONNECT_PROFILES_JSON",
|
||||
json.dumps(
|
||||
{
|
||||
CUSTODIAN_TRIAGE_BALANCED: {
|
||||
"provider": "gemini",
|
||||
"model": "gemini-2.5-flash",
|
||||
"config": {
|
||||
"temperature": 0.1,
|
||||
"max_tokens": 900,
|
||||
"model_params": {"reasoning_effort": "low"},
|
||||
},
|
||||
}
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
profiles = default_runtime_profiles(provider="mock", model="fallback")
|
||||
profile = profiles[CUSTODIAN_TRIAGE_BALANCED]
|
||||
|
||||
assert profile.provider == "gemini"
|
||||
assert profile.model == "gemini-2.5-flash"
|
||||
assert profile.config.temperature == 0.1
|
||||
assert profile.config.max_tokens == 900
|
||||
assert profile.config.model_params == {"reasoning_effort": "low"}
|
||||
Reference in New Issue
Block a user