import json import pytest from llm_connect.adapter import MockLLMAdapter from llm_connect.exceptions import LLMConfigurationError from llm_connect.models import RunConfig from llm_connect.profiles import ( CUSTODIAN_TRIAGE_BALANCED, ProfiledLLMAdapter, RuntimeProfile, default_runtime_profiles, ) def test_profile_dispatch_merges_defaults_and_request_params(): created: list[MockLLMAdapter] = [] def factory(provider: str, model: str) -> MockLLMAdapter: created.append(MockLLMAdapter(mock_response=f"{provider}:{model}")) return created[-1] profile = RuntimeProfile( name=CUSTODIAN_TRIAGE_BALANCED, provider="mock", model="triage-model", config=RunConfig( model_name="triage-model", temperature=0.2, max_tokens=1800, max_depth=2, timeout_seconds=300, model_params={"reasoning_effort": "medium"}, ), ) adapter = ProfiledLLMAdapter( MockLLMAdapter(mock_response="default"), {profile.name: profile}, adapter_factory=factory, ) response = adapter.execute_prompt( "Return JSON.", RunConfig( model_name=CUSTODIAN_TRIAGE_BALANCED, model_params={"json_schema": {"type": "object"}}, ), ) assert response.model == "triage-model" assert response.metadata["profile"] == CUSTODIAN_TRIAGE_BALANCED assert response.metadata["profile_provider"] == "mock" assert len(created) == 1 resolved = created[0].last_config assert resolved.model_name == "triage-model" assert resolved.temperature == 0.2 assert resolved.max_tokens == 1800 assert resolved.max_depth == 2 assert resolved.model_params == { "reasoning_effort": "medium", "json_schema": {"type": "object"}, } def test_profile_dispatch_preserves_explicit_request_scalars(): created: list[MockLLMAdapter] = [] def factory(provider: str, model: str) -> MockLLMAdapter: created.append(MockLLMAdapter()) return created[-1] profile = RuntimeProfile( name=CUSTODIAN_TRIAGE_BALANCED, provider="mock", model="triage-model", config=RunConfig(model_name="triage-model", temperature=0.2, max_tokens=1800), ) adapter = ProfiledLLMAdapter( MockLLMAdapter(), {profile.name: profile}, adapter_factory=factory, ) adapter.execute_prompt( "Prompt.", RunConfig( model_name=CUSTODIAN_TRIAGE_BALANCED, temperature=0.4, max_tokens=123, ), ) assert created[0].last_config.temperature == 0.4 assert created[0].last_config.max_tokens == 123 def test_non_profile_model_passes_through_to_default_adapter(): default = MockLLMAdapter(mock_response="direct") adapter = ProfiledLLMAdapter(default, {}) response = adapter.execute_prompt("Prompt.", RunConfig(model_name="gpt-4")) assert response.content == "direct" assert default.call_count == 1 assert default.last_config.model_name == "gpt-4" def test_unknown_custodian_profile_fails_without_secret_context(): adapter = ProfiledLLMAdapter(MockLLMAdapter(), {}) with pytest.raises(LLMConfigurationError) as excinfo: adapter.execute_prompt("Prompt.", RunConfig(model_name="custodian-missing")) assert "Unknown LLM runtime profile" in str(excinfo.value) assert excinfo.value.context == {"profile": "custodian-missing"} def test_default_profiles_can_be_overridden_from_json_env(monkeypatch): monkeypatch.setenv( "LLM_CONNECT_PROFILES_JSON", json.dumps( { CUSTODIAN_TRIAGE_BALANCED: { "provider": "gemini", "model": "gemini-2.5-flash", "config": { "temperature": 0.1, "max_tokens": 900, "model_params": {"reasoning_effort": "low"}, }, } } ), ) profiles = default_runtime_profiles(provider="mock", model="fallback") profile = profiles[CUSTODIAN_TRIAGE_BALANCED] assert profile.provider == "gemini" assert profile.model == "gemini-2.5-flash" assert profile.config.temperature == 0.1 assert profile.config.max_tokens == 900 assert profile.config.model_params == {"reasoning_effort": "low"}