generated from coulomb/repo-seed
69 lines
2.1 KiB
Python
69 lines
2.1 KiB
Python
"""llm-connect adapter for instruction execution.
|
|
|
|
activity-core deliberately talks to llm-connect over its small HTTP surface
|
|
instead of importing provider-specific SDKs. This keeps the activity worker on
|
|
owned infrastructure while leaving provider selection, API keys, and model
|
|
routing behind the existing llm-connect boundary.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from typing import Any
|
|
|
|
import httpx
|
|
|
|
|
|
class DisabledLLMClient:
|
|
"""LLM client used when no llm-connect endpoint is configured."""
|
|
|
|
def complete(
|
|
self,
|
|
prompt: str,
|
|
model: str = "",
|
|
config: dict[str, Any] | None = None,
|
|
) -> str: # noqa: ARG002
|
|
raise RuntimeError("LLM_CONNECT_URL is not configured")
|
|
|
|
|
|
class LLMConnectClient:
|
|
"""Small synchronous client for llm-connect server mode."""
|
|
|
|
def __init__(self, base_url: str, timeout_seconds: float = 300.0) -> None:
|
|
self.base_url = base_url.rstrip("/")
|
|
self.timeout_seconds = timeout_seconds
|
|
|
|
def complete(
|
|
self,
|
|
prompt: str,
|
|
model: str = "",
|
|
config: dict[str, Any] | None = None,
|
|
) -> str:
|
|
run_config = dict(config or {})
|
|
if model and "model_name" not in run_config:
|
|
run_config["model_name"] = model
|
|
run_config.setdefault("timeout_seconds", int(self.timeout_seconds))
|
|
payload: dict[str, Any] = {
|
|
"prompt": prompt,
|
|
"config": run_config,
|
|
}
|
|
resp = httpx.post(
|
|
f"{self.base_url}/execute",
|
|
json=payload,
|
|
timeout=self.timeout_seconds,
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
content = data.get("content")
|
|
if not isinstance(content, str):
|
|
raise ValueError("llm-connect response missing string content")
|
|
return content
|
|
|
|
|
|
def get_llm_client() -> DisabledLLMClient | LLMConnectClient:
|
|
base_url = os.environ.get("LLM_CONNECT_URL", "").strip()
|
|
if not base_url:
|
|
return DisabledLLMClient()
|
|
timeout = float(os.environ.get("LLM_CONNECT_TIMEOUT_SECONDS", "300"))
|
|
return LLMConnectClient(base_url, timeout)
|