generated from coulomb/repo-seed
feat: integrate llm-connect FR-1/FR-3/FR-4 into IHF bridge
Some checks failed
Test / test (push) Has been cancelled
Some checks failed
Test / test (push) Has been cancelled
FR-3 (async_execute_prompt): CollectiveProposals now invokes all agents
concurrently via callAgentsBatch → single bridge subprocess with
asyncio.gather. Latency scales with slowest agent, not sum.
FR-4 (BudgetTracker): AgentDelegations passes tokenBudget to bridge;
llm-connect enforces it natively via BudgetTracker in RunConfig.
BudgetExceededError is a first-class BridgeError variant with total/
consumed/requested fields surfaced to the operator.
FR-1 (LLMServer passthrough): bridge accepts optional serverUrl field;
if present, calls POST {serverUrl}/execute instead of spawning a new
adapter. Infrastructure ready for hot-agent pre-warming (no schema
change required).
AgentBridge.hs: adds callAgentsBatch, callAgentWithBudget,
BudgetExceededError constructor, bridgeErrorMessage helper, defaultRequest,
requestToJson. All controllers updated to use bridgeErrorMessage.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,7 @@ module Application.Helper.AgentBridge where
|
||||
|
||||
-- IHF Phase 11 — Advanced AI Federation (IHUB-WP-0012)
|
||||
-- Haskell wrapper around scripts/llm_bridge.py (llm-connect subprocess bridge).
|
||||
-- Updated to use FR-1 (server passthrough), FR-3 (async batch), FR-4 (BudgetTracker).
|
||||
|
||||
import IHP.Prelude
|
||||
import IHP.ControllerPrelude
|
||||
@@ -22,6 +23,24 @@ data BridgeRequest = BridgeRequest
|
||||
, prompt :: !Text
|
||||
, maxTokens :: !Int
|
||||
, temperature :: !Double
|
||||
-- FR-4: optional BudgetTracker fields
|
||||
, budgetTotal :: !(Maybe Int) -- cap for this call; Nothing = no budget enforcement
|
||||
, budgetSpent :: !(Maybe Int) -- tokens already consumed in delegation chain
|
||||
-- FR-1: optional running LLMServer URL; if set, HTTP POST instead of new adapter
|
||||
, serverUrl :: !(Maybe Text)
|
||||
}
|
||||
|
||||
defaultRequest :: BridgeRequest
|
||||
defaultRequest = BridgeRequest
|
||||
{ provider = "openrouter"
|
||||
, model = ""
|
||||
, systemPrompt = Nothing
|
||||
, prompt = ""
|
||||
, maxTokens = 2000
|
||||
, temperature = 0.7
|
||||
, budgetTotal = Nothing
|
||||
, budgetSpent = Nothing
|
||||
, serverUrl = Nothing
|
||||
}
|
||||
|
||||
data BridgeResponse = BridgeResponse
|
||||
@@ -30,12 +49,26 @@ data BridgeResponse = BridgeResponse
|
||||
, tokensIn :: !Int
|
||||
, tokensOut :: !Int
|
||||
, finishReason :: !Text
|
||||
, budgetSpent :: !Int -- cumulative tokens spent (0 when no tracker)
|
||||
} deriving (Show)
|
||||
|
||||
data BridgeError = BridgeError
|
||||
{ errorMessage :: !Text
|
||||
, errorType :: !Text
|
||||
} deriving (Show)
|
||||
data BridgeError
|
||||
= BridgeError
|
||||
{ errorMessage :: !Text
|
||||
, errorType :: !Text
|
||||
}
|
||||
| BudgetExceededError
|
||||
{ errorMessage :: !Text
|
||||
, budgetTotal :: !Int
|
||||
, budgetConsumed :: !Int
|
||||
, budgetRequested :: !Int
|
||||
}
|
||||
deriving (Show)
|
||||
|
||||
-- BridgeError message for display
|
||||
bridgeErrorMessage :: BridgeError -> Text
|
||||
bridgeErrorMessage BridgeError { errorMessage } = errorMessage
|
||||
bridgeErrorMessage BudgetExceededError { errorMessage } = errorMessage
|
||||
|
||||
instance FromJSON BridgeResponse where
|
||||
parseJSON = A.withObject "BridgeResponse" \o -> BridgeResponse
|
||||
@@ -44,51 +77,120 @@ instance FromJSON BridgeResponse where
|
||||
<*> o .: "tokensIn"
|
||||
<*> o .: "tokensOut"
|
||||
<*> o .: "finishReason"
|
||||
<*> (o .:? "budgetSpent" >>= pure . fromMaybe 0)
|
||||
|
||||
instance FromJSON BridgeError where
|
||||
parseJSON = A.withObject "BridgeError" \o -> BridgeError
|
||||
<$> o .: "error"
|
||||
<*> o .: "errorType"
|
||||
parseJSON = A.withObject "BridgeError" \o -> do
|
||||
errType <- o .: "errorType"
|
||||
if errType == ("LLMBudgetExceededError" :: Text)
|
||||
then BudgetExceededError
|
||||
<$> o .: "error"
|
||||
<*> (o .:? "budgetTotal" >>= pure . fromMaybe 0)
|
||||
<*> (o .:? "budgetSpent" >>= pure . fromMaybe 0)
|
||||
<*> (o .:? "budgetRequested" >>= pure . fromMaybe 0)
|
||||
else BridgeError
|
||||
<$> o .: "error"
|
||||
<*> pure errType
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- Core bridge call
|
||||
-- JSON serialisation of a BridgeRequest
|
||||
|
||||
requestToJson :: BridgeRequest -> Value
|
||||
requestToJson req = A.object $
|
||||
[ "provider" .= req.provider
|
||||
, "model" .= req.model
|
||||
, "systemPrompt" .= req.systemPrompt
|
||||
, "prompt" .= req.prompt
|
||||
, "maxTokens" .= req.maxTokens
|
||||
, "temperature" .= req.temperature
|
||||
] <>
|
||||
[ "budgetTotal" .= t | Just t <- [req.budgetTotal] ] <>
|
||||
[ "budgetSpent" .= s | Just s <- [req.budgetSpent] ] <>
|
||||
[ "serverUrl" .= u | Just u <- [req.serverUrl] ]
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- Core bridge call — single request
|
||||
|
||||
-- | Invoke the llm_bridge.py subprocess with the given request.
|
||||
callBridge :: BridgeRequest -> IO (Either BridgeError BridgeResponse)
|
||||
callBridge req = do
|
||||
let payload = LBS.toStrict . encode $ object
|
||||
[ "provider" .= req.provider
|
||||
, "model" .= req.model
|
||||
, "systemPrompt" .= req.systemPrompt
|
||||
, "prompt" .= req.prompt
|
||||
, "maxTokens" .= req.maxTokens
|
||||
, "temperature" .= req.temperature
|
||||
]
|
||||
let payload = LBS.toStrict . A.encode $ requestToJson req
|
||||
(exitCode, stdout, stderr) <-
|
||||
readProcessWithExitCode "python3" ["scripts/llm_bridge.py"] (cs payload)
|
||||
let outBytes = LBS.fromStrict (cs stdout)
|
||||
case exitCode of
|
||||
ExitSuccess ->
|
||||
case decode outBytes of
|
||||
case A.decode outBytes of
|
||||
Just v -> pure (Right v)
|
||||
Nothing -> pure (Left (BridgeError "Unparseable bridge output" "ParseError"))
|
||||
ExitFailure _ ->
|
||||
case decode outBytes of
|
||||
case A.decode outBytes of
|
||||
Just v -> pure (Left v)
|
||||
Nothing -> pure (Left (BridgeError (cs stderr) "BridgeError"))
|
||||
|
||||
-- | Call the bridge using an AgentRegistration record.
|
||||
callAgent :: AgentRegistration -> Text -> IO (Either BridgeError BridgeResponse)
|
||||
callAgent agent userPrompt =
|
||||
callBridge BridgeRequest
|
||||
callBridge defaultRequest
|
||||
{ provider = agent.provider
|
||||
, model = agent.modelName
|
||||
, systemPrompt = agent.systemPrompt
|
||||
, prompt = userPrompt
|
||||
, maxTokens = 2000
|
||||
, temperature = 0.7
|
||||
}
|
||||
|
||||
-- | Call the bridge with an explicit token budget (FR-4).
|
||||
-- Used by AgentDelegations to enforce the configured tokenBudget at the bridge level.
|
||||
callAgentWithBudget :: AgentRegistration -> Text -> Int -> Int -> IO (Either BridgeError BridgeResponse)
|
||||
callAgentWithBudget agent userPrompt budgetCap alreadySpent =
|
||||
callBridge defaultRequest
|
||||
{ provider = agent.provider
|
||||
, model = agent.modelName
|
||||
, systemPrompt = agent.systemPrompt
|
||||
, prompt = userPrompt
|
||||
, maxTokens = budgetCap
|
||||
, budgetTotal = Just budgetCap
|
||||
, budgetSpent = if alreadySpent > 0 then Just alreadySpent else Nothing
|
||||
}
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- Batch bridge call — parallel execution via FR-3 async (single subprocess)
|
||||
|
||||
-- | Invoke all requests concurrently in a single bridge subprocess using
|
||||
-- asyncio.gather. Returns one result per input in the same order.
|
||||
-- This replaces sequential forM in CollectiveProposals.
|
||||
callBridgeBatch :: [BridgeRequest] -> IO [Either BridgeError BridgeResponse]
|
||||
callBridgeBatch [] = pure []
|
||||
callBridgeBatch reqs = do
|
||||
let payload = LBS.toStrict . A.encode $
|
||||
A.object ["batch" .= map requestToJson reqs]
|
||||
(exitCode, stdout, _stderr) <-
|
||||
readProcessWithExitCode "python3" ["scripts/llm_bridge.py"] (cs payload)
|
||||
let outBytes = LBS.fromStrict (cs stdout)
|
||||
case A.decode @A.Value outBytes of
|
||||
Just (A.Object o) | Just (A.Array arr) <- A.lookup "results" o ->
|
||||
pure $ map parseResult (toList arr)
|
||||
_ ->
|
||||
pure $ replicate (length reqs) (Left (BridgeError "Unparseable batch output" "ParseError"))
|
||||
where
|
||||
parseResult v = case A.fromJSON v of
|
||||
A.Success resp -> Right resp
|
||||
A.Error _ -> case A.fromJSON v of
|
||||
A.Success err -> Left err
|
||||
A.Error _ -> Left (BridgeError "Unparseable batch item" "ParseError")
|
||||
|
||||
-- | Batch variant using AgentRegistration records.
|
||||
callAgentsBatch :: [(AgentRegistration, Text)] -> IO [Either BridgeError BridgeResponse]
|
||||
callAgentsBatch pairs =
|
||||
callBridgeBatch
|
||||
[ defaultRequest
|
||||
{ provider = agent.provider
|
||||
, model = agent.modelName
|
||||
, systemPrompt = agent.systemPrompt
|
||||
, prompt = userPrompt
|
||||
}
|
||||
| (agent, userPrompt) <- pairs
|
||||
]
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- AI governance policy check
|
||||
|
||||
|
||||
@@ -1,11 +1,16 @@
|
||||
module Web.Controller.AgentDelegations where
|
||||
|
||||
-- IHF Phase 11 — Advanced AI Federation (IHUB-WP-0012 T06)
|
||||
-- Updated: delegation token budget enforced natively by llm-connect BudgetTracker (FR-4).
|
||||
|
||||
import Web.Controller.Prelude
|
||||
import Web.View.AgentDelegations.Index
|
||||
import Web.View.AgentDelegations.Show
|
||||
import Application.Helper.AgentBridge (callBridge, BridgeRequest(..))
|
||||
import Application.Helper.AgentBridge
|
||||
( callAgentWithBudget
|
||||
, BridgeError(..)
|
||||
, bridgeErrorMessage
|
||||
)
|
||||
|
||||
instance Controller AgentDelegationsController where
|
||||
|
||||
@@ -44,24 +49,32 @@ instance Controller AgentDelegationsController where
|
||||
|> set #status "pending"
|
||||
|> createRecord
|
||||
|
||||
result <- liftIO $ callBridge BridgeRequest
|
||||
{ provider = receivingAgent.provider
|
||||
, model = receivingAgent.modelName
|
||||
, systemPrompt = receivingAgent.systemPrompt
|
||||
, prompt = scope
|
||||
, maxTokens = tokenBudget
|
||||
, temperature = 0.7
|
||||
}
|
||||
-- FR-4: token budget passed to bridge → llm-connect BudgetTracker enforces it
|
||||
-- natively, raising LLMBudgetExceededError if the call would exceed the cap.
|
||||
result <- liftIO $ callAgentWithBudget receivingAgent scope tokenBudget 0
|
||||
|
||||
now <- getCurrentTime
|
||||
case result of
|
||||
Left BudgetExceededError { errorMessage, budgetTotal, budgetConsumed, budgetRequested } -> do
|
||||
delegation
|
||||
|> set #status "failed"
|
||||
|> set #result (Just . A.toJSON $ A.object
|
||||
[ "error" A..= errorMessage
|
||||
, "budgetTotal" A..= budgetTotal
|
||||
, "budgetConsumed" A..= budgetConsumed
|
||||
, "budgetRequested" A..= budgetRequested
|
||||
])
|
||||
|> set #completedAt (Just now)
|
||||
|> updateRecord
|
||||
setErrorMessage ("Budget exceeded: requested " <> show budgetRequested
|
||||
<> " tokens but only " <> show (budgetTotal - budgetConsumed) <> " remain")
|
||||
Left err -> do
|
||||
delegation
|
||||
|> set #status "failed"
|
||||
|> set #result (Just . A.toJSON $ A.object ["error" A..= err.errorMessage])
|
||||
|> set #result (Just . A.toJSON $ A.object ["error" A..= bridgeErrorMessage err])
|
||||
|> set #completedAt (Just now)
|
||||
|> updateRecord
|
||||
setErrorMessage ("Delegation failed: " <> err.errorMessage)
|
||||
setErrorMessage ("Delegation failed: " <> bridgeErrorMessage err)
|
||||
Right resp -> do
|
||||
delegation
|
||||
|> set #status "completed"
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
module Web.Controller.CollectiveProposals where
|
||||
|
||||
-- IHF Phase 11 — Advanced AI Federation (IHUB-WP-0012 T07)
|
||||
-- Updated: agents invoked concurrently via callAgentsBatch (FR-3 async).
|
||||
|
||||
import Web.Controller.Prelude
|
||||
import Web.View.CollectiveProposals.Index
|
||||
import Web.View.CollectiveProposals.Show
|
||||
import Application.Helper.AgentBridge (callAgent, BridgeResponse(..))
|
||||
import Application.Helper.AgentBridge (callAgent, callAgentsBatch, BridgeResponse(..))
|
||||
import Application.Helper.ModelRouter (resolveAllAgents)
|
||||
import Data.List (intercalate)
|
||||
|
||||
@@ -45,10 +46,15 @@ instance Controller CollectiveProposalsController where
|
||||
|> createRecord
|
||||
|
||||
agents <- resolveAllAgents hubId taskType
|
||||
contributions <- forM agents \agent -> do
|
||||
result <- liftIO $ callAgent agent prompt
|
||||
|
||||
-- FR-3: invoke all agents concurrently in a single bridge subprocess call
|
||||
-- instead of sequential forM. Latency now scales with the slowest agent,
|
||||
-- not the sum of all agents.
|
||||
results <- liftIO $ callAgentsBatch [(a, prompt) | a <- agents]
|
||||
|
||||
successContribs <- fmap catMaybes $ forM (zip agents results) \(agent, result) ->
|
||||
case result of
|
||||
Left err -> pure Nothing
|
||||
Left _ -> pure Nothing
|
||||
Right resp -> do
|
||||
contrib <- newRecord @CollectiveProposalContribution
|
||||
|> set #collectiveProposalId proposal.id
|
||||
@@ -60,22 +66,21 @@ instance Controller CollectiveProposalsController where
|
||||
|> createRecord
|
||||
pure (Just (contrib, resp))
|
||||
|
||||
let successContribs = catMaybes contributions
|
||||
consensusStatus <- if null successContribs
|
||||
then pure "divergent"
|
||||
then do
|
||||
proposal |> set #consensusStatus "divergent" |> updateRecord
|
||||
pure "divergent"
|
||||
else do
|
||||
let contribTexts = map (\(_, r) -> r.content) successContribs
|
||||
synthesisPrompt = "The following agents have independently proposed solutions. "
|
||||
<> "Synthesize a unified recommendation:\n\n"
|
||||
<> intercalate "\n---\n" contribTexts
|
||||
mSynthAgent <- resolveAllAgents hubId taskType >>= \case
|
||||
(a:_) -> pure (Just a)
|
||||
[] -> pure Nothing
|
||||
case mSynthAgent of
|
||||
Nothing -> do
|
||||
-- Synthesis uses the highest-priority agent (head of the list)
|
||||
case agents of
|
||||
[] -> do
|
||||
proposal |> set #consensusStatus "divergent" |> updateRecord
|
||||
pure "divergent"
|
||||
Just synthAgent -> do
|
||||
(synthAgent:_) -> do
|
||||
synthResult <- liftIO $ callAgent synthAgent synthesisPrompt
|
||||
case synthResult of
|
||||
Left _ -> do
|
||||
@@ -95,8 +100,7 @@ instance Controller CollectiveProposalsController where
|
||||
setSuccessMessage ("Collective proposal created (" <> consensusStatus <> ")")
|
||||
redirectTo ShowCollectiveProposalAction { collectiveProposalId = proposal.id }
|
||||
|
||||
-- | Simple consensus heuristic: if all contributions have a non-empty content
|
||||
-- and there are at least 2, mark as consensus; single contributor = pending.
|
||||
-- | Simple consensus heuristic: ≥2 successful contributions = consensus.
|
||||
detectConsensus :: [CollectiveProposalContribution] -> Text
|
||||
detectConsensus contribs
|
||||
| length contribs >= 2 = "consensus"
|
||||
|
||||
@@ -8,7 +8,7 @@ import Web.View.DecisionRecords.Edit
|
||||
import Generated.Types
|
||||
import IHP.Prelude
|
||||
import IHP.ControllerPrelude
|
||||
import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy)
|
||||
import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy, bridgeErrorMessage)
|
||||
import Application.Helper.ModelRouter (resolveAgent)
|
||||
import Data.List (intercalate)
|
||||
|
||||
@@ -227,7 +227,7 @@ instance Controller DecisionRecordsController where
|
||||
result <- liftIO $ callAgent agent userMsg
|
||||
case result of
|
||||
Left err -> do
|
||||
setErrorMessage ("Implementation proposal failed: " <> err.errorMessage)
|
||||
setErrorMessage ("Implementation proposal failed: " <> bridgeErrorMessage err)
|
||||
redirectTo ShowDecisionRecordAction { decisionRecordId }
|
||||
Right resp -> do
|
||||
newRecord @AgentProposal
|
||||
|
||||
@@ -8,7 +8,7 @@ import Web.View.RequirementCandidates.Edit
|
||||
import Generated.Types
|
||||
import IHP.Prelude
|
||||
import IHP.ControllerPrelude
|
||||
import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy)
|
||||
import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy, bridgeErrorMessage)
|
||||
import Application.Helper.ModelRouter (resolveAgent)
|
||||
import Data.List (intercalate)
|
||||
import Data.Aeson (decode, Value(..), Array)
|
||||
@@ -298,7 +298,7 @@ instance Controller RequirementCandidatesController where
|
||||
result <- liftIO $ callAgent agent userMsg
|
||||
case result of
|
||||
Left err -> do
|
||||
setErrorMessage ("Duplicate detection failed: " <> err.errorMessage)
|
||||
setErrorMessage ("Duplicate detection failed: " <> bridgeErrorMessage err)
|
||||
redirectTo ShowRequirementCandidateAction { requirementCandidateId }
|
||||
Right resp -> do
|
||||
newRecord @AgentProposal
|
||||
|
||||
@@ -11,7 +11,7 @@ import IHP.ControllerPrelude
|
||||
import Data.Aeson (toJSON, object, (.=))
|
||||
import Application.Helper.Controller (isInRegression, widgetCycleCounts)
|
||||
import Application.Helper.TypeRegistry (validateWidgetType, validatePolicyScope, activeWidgetTypes, activePolicyScopes)
|
||||
import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy)
|
||||
import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy, bridgeErrorMessage)
|
||||
import Application.Helper.ModelRouter (resolveAgent)
|
||||
import Data.List (intercalate)
|
||||
|
||||
@@ -209,7 +209,7 @@ instance Controller WidgetsController where
|
||||
result <- liftIO $ callAgent agent userMsg
|
||||
case result of
|
||||
Left err -> do
|
||||
setErrorMessage ("AI summarization failed: " <> err.errorMessage)
|
||||
setErrorMessage ("AI summarization failed: " <> bridgeErrorMessage err)
|
||||
redirectTo ShowWidgetAction { widgetId }
|
||||
Right resp -> do
|
||||
newRecord @AgentProposal
|
||||
@@ -258,7 +258,7 @@ instance Controller WidgetsController where
|
||||
result <- liftIO $ callAgent agent userMsg
|
||||
case result of
|
||||
Left err -> do
|
||||
setErrorMessage ("AI draft failed: " <> err.errorMessage)
|
||||
setErrorMessage ("AI draft failed: " <> bridgeErrorMessage err)
|
||||
redirectTo ShowWidgetAction { widgetId }
|
||||
Right resp -> do
|
||||
newRecord @AgentProposal
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
IHF llm-connect bridge — Phase 11 AI Federation (IHUB-WP-0012)
|
||||
Updated to use llm-connect FR-1 (server passthrough), FR-3 (async batch),
|
||||
FR-4 (BudgetTracker).
|
||||
|
||||
Usage:
|
||||
echo '{"provider":"openrouter","model":"...","prompt":"..."}' | python3 scripts/llm_bridge.py
|
||||
|
||||
Input JSON fields:
|
||||
SINGLE REQUEST — stdin JSON fields:
|
||||
provider — openrouter | gemini | openai | claude-code (default: openrouter)
|
||||
model — model name string (provider-specific)
|
||||
prompt — the user prompt
|
||||
@@ -13,54 +12,211 @@ Input JSON fields:
|
||||
api_key — optional; falls back to llm-connect env-var resolution
|
||||
maxTokens — max completion tokens (default: 2000)
|
||||
temperature — sampling temperature (default: 0.7)
|
||||
budgetTotal — optional int; if set, a BudgetTracker is created with this cap
|
||||
budgetSpent — optional int; tokens already consumed (for delegation chains)
|
||||
serverUrl — optional str; if set, POST to {serverUrl}/execute instead of
|
||||
spawning a local adapter (FR-1 server passthrough)
|
||||
|
||||
Output JSON (stdout, exit 0 on success):
|
||||
BATCH REQUEST — stdin JSON with "batch" key:
|
||||
batch — list of single-request objects (see above)
|
||||
All top-level fields (except batch) are ignored.
|
||||
|
||||
Output JSON — single request (stdout, exit 0 on success):
|
||||
content — generated text
|
||||
model — model name actually used
|
||||
tokensIn — prompt token count
|
||||
tokensOut — completion token count
|
||||
finishReason — stop reason string
|
||||
budgetSpent — cumulative tokens consumed from BudgetTracker after this call
|
||||
|
||||
Error JSON (stdout, exit 1 on LLMError):
|
||||
Output JSON — batch request (stdout, exit 0 even on partial failure):
|
||||
results — list of {content, model, tokensIn, tokensOut, finishReason}
|
||||
OR {error, errorType} per item
|
||||
|
||||
Error JSON (stdout, exit 1 on LLMError for single request):
|
||||
error — error message
|
||||
errorType — exception class name
|
||||
budgetTotal — present only for LLMBudgetExceededError
|
||||
budgetSpent — present only for LLMBudgetExceededError
|
||||
budgetRequested — present only for LLMBudgetExceededError
|
||||
"""
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
sys.path.insert(0, os.path.expanduser("~/llm-connect"))
|
||||
|
||||
from llm_connect import create_adapter, RunConfig
|
||||
from llm_connect.exceptions import LLMError
|
||||
from llm_connect import create_adapter, RunConfig, BudgetTracker
|
||||
from llm_connect.exceptions import LLMError, LLMBudgetExceededError
|
||||
|
||||
|
||||
def main() -> None:
|
||||
req = json.load(sys.stdin)
|
||||
# ---------------------------------------------------------------------------
|
||||
# Adapter / server call
|
||||
|
||||
def _make_config(req: dict, tracker: BudgetTracker | None) -> RunConfig:
|
||||
return RunConfig(
|
||||
model_name=req.get("model", ""),
|
||||
temperature=req.get("temperature", 0.7),
|
||||
max_tokens=req.get("maxTokens", 2000),
|
||||
budget_tracker=tracker,
|
||||
)
|
||||
|
||||
|
||||
def _call_server(server_url: str, req: dict, config: RunConfig) -> dict:
|
||||
"""Call a running LLMServer via HTTP POST /execute (FR-1)."""
|
||||
import urllib.request
|
||||
payload = json.dumps({
|
||||
"prompt": req["prompt"],
|
||||
"config": config.to_dict(),
|
||||
}).encode()
|
||||
http_req = urllib.request.Request(
|
||||
f"{server_url.rstrip('/')}/execute",
|
||||
data=payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(http_req, timeout=config.timeout_seconds) as resp:
|
||||
return json.loads(resp.read())
|
||||
|
||||
|
||||
def _execute_single(req: dict) -> dict:
|
||||
"""Execute one request dict, return a result or error dict."""
|
||||
# Build optional BudgetTracker (FR-4)
|
||||
tracker: BudgetTracker | None = None
|
||||
if "budgetTotal" in req:
|
||||
tracker = BudgetTracker(total=int(req["budgetTotal"]))
|
||||
already_spent = int(req.get("budgetSpent", 0))
|
||||
if already_spent > 0:
|
||||
tracker.consume(already_spent)
|
||||
|
||||
config = _make_config(req, tracker)
|
||||
|
||||
try:
|
||||
server_url = req.get("serverUrl")
|
||||
if server_url:
|
||||
# FR-1: delegate to running LLMServer
|
||||
raw = _call_server(server_url, req, config)
|
||||
spent = tracker.spent if tracker else 0
|
||||
return {
|
||||
"content": raw.get("content", ""),
|
||||
"model": raw.get("model", ""),
|
||||
"tokensIn": raw.get("usage", {}).get("prompt_tokens", 0),
|
||||
"tokensOut": raw.get("usage", {}).get("completion_tokens", 0),
|
||||
"finishReason": raw.get("finish_reason", "stop"),
|
||||
"budgetSpent": spent,
|
||||
}
|
||||
|
||||
adapter = create_adapter(
|
||||
provider=req.get("provider", "openrouter"),
|
||||
model=req.get("model"),
|
||||
api_key=req.get("api_key"),
|
||||
system_prompt=req.get("systemPrompt"),
|
||||
)
|
||||
config = RunConfig(
|
||||
model_name=req.get("model", ""),
|
||||
temperature=req.get("temperature", 0.7),
|
||||
max_tokens=req.get("maxTokens", 2000),
|
||||
)
|
||||
resp = adapter.execute_prompt(req["prompt"], config)
|
||||
print(json.dumps({
|
||||
"content": resp.content,
|
||||
"model": resp.model,
|
||||
"tokensIn": resp.usage.get("prompt_tokens", 0),
|
||||
"tokensOut": resp.usage.get("completion_tokens", 0),
|
||||
spent = tracker.spent if tracker else 0
|
||||
return {
|
||||
"content": resp.content,
|
||||
"model": resp.model,
|
||||
"tokensIn": resp.usage.get("prompt_tokens", 0),
|
||||
"tokensOut": resp.usage.get("completion_tokens", 0),
|
||||
"finishReason": resp.finish_reason,
|
||||
}))
|
||||
"budgetSpent": spent,
|
||||
}
|
||||
except LLMBudgetExceededError as e:
|
||||
return {
|
||||
"error": str(e),
|
||||
"errorType": "LLMBudgetExceededError",
|
||||
"budgetTotal": e.total,
|
||||
"budgetSpent": e.spent,
|
||||
"budgetRequested": e.requested,
|
||||
}
|
||||
except LLMError as e:
|
||||
json.dump({"error": str(e), "errorType": type(e).__name__}, sys.stdout)
|
||||
return {"error": str(e), "errorType": type(e).__name__}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Async batch execution (FR-3)
|
||||
|
||||
async def _execute_all_async(requests: list[dict]) -> list[dict]:
|
||||
"""Run all requests concurrently via async_execute_prompt (FR-3)."""
|
||||
async def _one(req: dict) -> dict:
|
||||
tracker: BudgetTracker | None = None
|
||||
if "budgetTotal" in req:
|
||||
tracker = BudgetTracker(total=int(req["budgetTotal"]))
|
||||
already_spent = int(req.get("budgetSpent", 0))
|
||||
if already_spent > 0:
|
||||
tracker.consume(already_spent)
|
||||
|
||||
config = _make_config(req, tracker)
|
||||
|
||||
try:
|
||||
server_url = req.get("serverUrl")
|
||||
if server_url:
|
||||
# Server calls are already non-blocking HTTP; run in executor
|
||||
loop = asyncio.get_running_loop()
|
||||
raw = await loop.run_in_executor(
|
||||
None, lambda: _call_server(server_url, req, config)
|
||||
)
|
||||
spent = tracker.spent if tracker else 0
|
||||
return {
|
||||
"content": raw.get("content", ""),
|
||||
"model": raw.get("model", ""),
|
||||
"tokensIn": raw.get("usage", {}).get("prompt_tokens", 0),
|
||||
"tokensOut": raw.get("usage", {}).get("completion_tokens", 0),
|
||||
"finishReason": raw.get("finish_reason", "stop"),
|
||||
"budgetSpent": spent,
|
||||
}
|
||||
|
||||
adapter = create_adapter(
|
||||
provider=req.get("provider", "openrouter"),
|
||||
model=req.get("model"),
|
||||
api_key=req.get("api_key"),
|
||||
system_prompt=req.get("systemPrompt"),
|
||||
)
|
||||
resp = await adapter.async_execute_prompt(req["prompt"], config)
|
||||
spent = tracker.spent if tracker else 0
|
||||
return {
|
||||
"content": resp.content,
|
||||
"model": resp.model,
|
||||
"tokensIn": resp.usage.get("prompt_tokens", 0),
|
||||
"tokensOut": resp.usage.get("completion_tokens", 0),
|
||||
"finishReason": resp.finish_reason,
|
||||
"budgetSpent": spent,
|
||||
}
|
||||
except LLMBudgetExceededError as e:
|
||||
return {
|
||||
"error": str(e),
|
||||
"errorType": "LLMBudgetExceededError",
|
||||
"budgetTotal": e.total,
|
||||
"budgetSpent": e.spent,
|
||||
"budgetRequested": e.requested,
|
||||
}
|
||||
except LLMError as e:
|
||||
return {"error": str(e), "errorType": type(e).__name__}
|
||||
|
||||
return await asyncio.gather(*[_one(r) for r in requests])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Entry point
|
||||
|
||||
def main() -> None:
|
||||
req = json.load(sys.stdin)
|
||||
|
||||
if "batch" in req:
|
||||
# Batch mode: run all requests concurrently (FR-3)
|
||||
results = asyncio.run(_execute_all_async(req["batch"]))
|
||||
print(json.dumps({"results": results}))
|
||||
return
|
||||
|
||||
# Single request
|
||||
result = _execute_single(req)
|
||||
if "error" in result:
|
||||
json.dump(result, sys.stdout)
|
||||
sys.exit(1)
|
||||
print(json.dumps(result))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user