Add activity-core LLM endpoint support

2026-06-07 19:24:45 +02:00
parent 1d9fc107ed
commit 14ba47c129
25 changed files with 2082 additions and 18 deletions
--- a/tests/test_profiles.py
+++ b/tests/test_profiles.py
@@ -0,0 +1,143 @@
+import json
+
+import pytest
+
+from llm_connect.adapter import MockLLMAdapter
+from llm_connect.exceptions import LLMConfigurationError
+from llm_connect.models import RunConfig
+from llm_connect.profiles import (
+    CUSTODIAN_TRIAGE_BALANCED,
+    ProfiledLLMAdapter,
+    RuntimeProfile,
+    default_runtime_profiles,
+)
+
+
+def test_profile_dispatch_merges_defaults_and_request_params():
+    created: list[MockLLMAdapter] = []
+
+    def factory(provider: str, model: str) -> MockLLMAdapter:
+        created.append(MockLLMAdapter(mock_response=f"{provider}:{model}"))
+        return created[-1]
+
+    profile = RuntimeProfile(
+        name=CUSTODIAN_TRIAGE_BALANCED,
+        provider="mock",
+        model="triage-model",
+        config=RunConfig(
+            model_name="triage-model",
+            temperature=0.2,
+            max_tokens=1800,
+            max_depth=2,
+            timeout_seconds=300,
+            model_params={"reasoning_effort": "medium"},
+        ),
+    )
+    adapter = ProfiledLLMAdapter(
+        MockLLMAdapter(mock_response="default"),
+        {profile.name: profile},
+        adapter_factory=factory,
+    )
+
+    response = adapter.execute_prompt(
+        "Return JSON.",
+        RunConfig(
+            model_name=CUSTODIAN_TRIAGE_BALANCED,
+            model_params={"json_schema": {"type": "object"}},
+        ),
+    )
+
+    assert response.model == "triage-model"
+    assert response.metadata["profile"] == CUSTODIAN_TRIAGE_BALANCED
+    assert response.metadata["profile_provider"] == "mock"
+    assert len(created) == 1
+    resolved = created[0].last_config
+    assert resolved.model_name == "triage-model"
+    assert resolved.temperature == 0.2
+    assert resolved.max_tokens == 1800
+    assert resolved.max_depth == 2
+    assert resolved.model_params == {
+        "reasoning_effort": "medium",
+        "json_schema": {"type": "object"},
+    }
+
+
+def test_profile_dispatch_preserves_explicit_request_scalars():
+    created: list[MockLLMAdapter] = []
+
+    def factory(provider: str, model: str) -> MockLLMAdapter:
+        created.append(MockLLMAdapter())
+        return created[-1]
+
+    profile = RuntimeProfile(
+        name=CUSTODIAN_TRIAGE_BALANCED,
+        provider="mock",
+        model="triage-model",
+        config=RunConfig(model_name="triage-model", temperature=0.2, max_tokens=1800),
+    )
+    adapter = ProfiledLLMAdapter(
+        MockLLMAdapter(),
+        {profile.name: profile},
+        adapter_factory=factory,
+    )
+
+    adapter.execute_prompt(
+        "Prompt.",
+        RunConfig(
+            model_name=CUSTODIAN_TRIAGE_BALANCED,
+            temperature=0.4,
+            max_tokens=123,
+        ),
+    )
+
+    assert created[0].last_config.temperature == 0.4
+    assert created[0].last_config.max_tokens == 123
+
+
+def test_non_profile_model_passes_through_to_default_adapter():
+    default = MockLLMAdapter(mock_response="direct")
+    adapter = ProfiledLLMAdapter(default, {})
+
+    response = adapter.execute_prompt("Prompt.", RunConfig(model_name="gpt-4"))
+
+    assert response.content == "direct"
+    assert default.call_count == 1
+    assert default.last_config.model_name == "gpt-4"
+
+
+def test_unknown_custodian_profile_fails_without_secret_context():
+    adapter = ProfiledLLMAdapter(MockLLMAdapter(), {})
+
+    with pytest.raises(LLMConfigurationError) as excinfo:
+        adapter.execute_prompt("Prompt.", RunConfig(model_name="custodian-missing"))
+
+    assert "Unknown LLM runtime profile" in str(excinfo.value)
+    assert excinfo.value.context == {"profile": "custodian-missing"}
+
+
+def test_default_profiles_can_be_overridden_from_json_env(monkeypatch):
+    monkeypatch.setenv(
+        "LLM_CONNECT_PROFILES_JSON",
+        json.dumps(
+            {
+                CUSTODIAN_TRIAGE_BALANCED: {
+                    "provider": "gemini",
+                    "model": "gemini-2.5-flash",
+                    "config": {
+                        "temperature": 0.1,
+                        "max_tokens": 900,
+                        "model_params": {"reasoning_effort": "low"},
+                    },
+                }
+            }
+        ),
+    )
+
+    profiles = default_runtime_profiles(provider="mock", model="fallback")
+    profile = profiles[CUSTODIAN_TRIAGE_BALANCED]
+
+    assert profile.provider == "gemini"
+    assert profile.model == "gemini-2.5-flash"
+    assert profile.config.temperature == 0.1
+    assert profile.config.max_tokens == 900
+    assert profile.config.model_params == {"reasoning_effort": "low"}