feat: integrate llm-connect FR-1/FR-3/FR-4 into IHF bridge

FR-3 (async_execute_prompt): CollectiveProposals now invokes all agents concurrently via callAgentsBatch → single bridge subprocess with asyncio.gather. Latency scales with slowest agent, not sum. FR-4 (BudgetTracker): AgentDelegations passes tokenBudget to bridge; llm-connect enforces it natively via BudgetTracker in RunConfig. BudgetExceededError is a first-class BridgeError variant with total/ consumed/requested fields surfaced to the operator. FR-1 (LLMServer passthrough): bridge accepts optional serverUrl field; if present, calls POST {serverUrl}/execute instead of spawning a new adapter. Infrastructure ready for hot-agent pre-warming (no schema change required). AgentBridge.hs: adds callAgentsBatch, callAgentWithBudget, BudgetExceededError constructor, bridgeErrorMessage helper, defaultRequest, requestToJson. All controllers updated to use bridgeErrorMessage. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-01 22:48:29 +00:00
parent a400365d50
commit 674f5da0e1
7 changed files with 350 additions and 75 deletions
--- a/Application/Helper/AgentBridge.hs
+++ b/Application/Helper/AgentBridge.hs
@@ -2,6 +2,7 @@ module Application.Helper.AgentBridge where
 -- IHF Phase 11 — Advanced AI Federation (IHUB-WP-0012)
 -- Haskell wrapper around scripts/llm_bridge.py (llm-connect subprocess bridge).
 -- Updated to use FR-1 (server passthrough), FR-3 (async batch), FR-4 (BudgetTracker).
 import IHP.Prelude
 import IHP.ControllerPrelude
@@ -22,6 +23,24 @@ data BridgeRequest = BridgeRequest
    , prompt       :: !Text
    , maxTokens    :: !Int
    , temperature  :: !Double
    -- FR-4: optional BudgetTracker fields
    , budgetTotal  :: !(Maybe Int)    -- cap for this call; Nothing = no budget enforcement
    , budgetSpent  :: !(Maybe Int)    -- tokens already consumed in delegation chain
    -- FR-1: optional running LLMServer URL; if set, HTTP POST instead of new adapter
    , serverUrl    :: !(Maybe Text)
    }
 defaultRequest :: BridgeRequest
 defaultRequest = BridgeRequest
    { provider     = "openrouter"
    , model        = ""
    , systemPrompt = Nothing
    , prompt       = ""
    , maxTokens    = 2000
    , temperature  = 0.7
    , budgetTotal  = Nothing
    , budgetSpent  = Nothing
    , serverUrl    = Nothing
    }
 data BridgeResponse = BridgeResponse
@@ -30,12 +49,26 @@ data BridgeResponse = BridgeResponse
    , tokensIn     :: !Int
    , tokensOut    :: !Int
    , finishReason :: !Text
    , budgetSpent  :: !Int   -- cumulative tokens spent (0 when no tracker)
    } deriving (Show)
-data BridgeError = BridgeError
+data BridgeError
-    { errorMessage :: !Text
+    = BridgeError
-    , errorType    :: !Text
+        { errorMessage :: !Text
-    } deriving (Show)
+        , errorType    :: !Text
        }
    | BudgetExceededError
        { errorMessage    :: !Text
        , budgetTotal     :: !Int
        , budgetConsumed  :: !Int
        , budgetRequested :: !Int
        }
    deriving (Show)
 -- BridgeError message for display
 bridgeErrorMessage :: BridgeError -> Text
 bridgeErrorMessage BridgeError { errorMessage }    = errorMessage
 bridgeErrorMessage BudgetExceededError { errorMessage } = errorMessage
 instance FromJSON BridgeResponse where
    parseJSON = A.withObject "BridgeResponse" \o -> BridgeResponse
@@ -44,51 +77,120 @@ instance FromJSON BridgeResponse where
        <*> o .:  "tokensIn"
        <*> o .:  "tokensOut"
        <*> o .:  "finishReason"
        <*> (o .:? "budgetSpent" >>= pure . fromMaybe 0)
 instance FromJSON BridgeError where
-    parseJSON = A.withObject "BridgeError" \o -> BridgeError
+    parseJSON = A.withObject "BridgeError" \o -> do
-        <$> o .: "error"
+        errType <- o .: "errorType"
-        <*> o .: "errorType"
+        if errType == ("LLMBudgetExceededError" :: Text)
            then BudgetExceededError
                <$> o .: "error"
                <*> (o .:? "budgetTotal"     >>= pure . fromMaybe 0)
                <*> (o .:? "budgetSpent"     >>= pure . fromMaybe 0)
                <*> (o .:? "budgetRequested" >>= pure . fromMaybe 0)
            else BridgeError
                <$> o .: "error"
                <*> pure errType
 -- ---------------------------------------------------------------------------
-- Core bridge call
+-- JSON serialisation of a BridgeRequest
 requestToJson :: BridgeRequest -> Value
 requestToJson req = A.object $
    [ "provider"     .= req.provider
    , "model"        .= req.model
    , "systemPrompt" .= req.systemPrompt
    , "prompt"       .= req.prompt
    , "maxTokens"    .= req.maxTokens
    , "temperature"  .= req.temperature
    ] <>
    [ "budgetTotal" .= t | Just t <- [req.budgetTotal] ] <>
    [ "budgetSpent" .= s | Just s <- [req.budgetSpent] ] <>
    [ "serverUrl"   .= u | Just u <- [req.serverUrl]   ]
 -- ---------------------------------------------------------------------------
 -- Core bridge call — single request
 -- | Invoke the llm_bridge.py subprocess with the given request.
 callBridge :: BridgeRequest -> IO (Either BridgeError BridgeResponse)
 callBridge req = do
-    let payload = LBS.toStrict . encode $ object
+    let payload = LBS.toStrict . A.encode $ requestToJson req
            [ "provider"     .= req.provider
            , "model"        .= req.model
            , "systemPrompt" .= req.systemPrompt
            , "prompt"       .= req.prompt
            , "maxTokens"    .= req.maxTokens
            , "temperature"  .= req.temperature
            ]
    (exitCode, stdout, stderr) <-
        readProcessWithExitCode "python3" ["scripts/llm_bridge.py"] (cs payload)
    let outBytes = LBS.fromStrict (cs stdout)
    case exitCode of
        ExitSuccess ->
-            case decode outBytes of
+            case A.decode outBytes of
                Just v  -> pure (Right v)
                Nothing -> pure (Left (BridgeError "Unparseable bridge output" "ParseError"))
        ExitFailure _ ->
-            case decode outBytes of
+            case A.decode outBytes of
                Just v  -> pure (Left v)
                Nothing -> pure (Left (BridgeError (cs stderr) "BridgeError"))
 -- | Call the bridge using an AgentRegistration record.
 callAgent :: AgentRegistration -> Text -> IO (Either BridgeError BridgeResponse)
 callAgent agent userPrompt =
-    callBridge BridgeRequest
+    callBridge defaultRequest
        { provider     = agent.provider
        , model        = agent.modelName
        , systemPrompt = agent.systemPrompt
        , prompt       = userPrompt
        , maxTokens    = 2000
        , temperature  = 0.7
        }
 -- | Call the bridge with an explicit token budget (FR-4).
 -- Used by AgentDelegations to enforce the configured tokenBudget at the bridge level.
 callAgentWithBudget :: AgentRegistration -> Text -> Int -> Int -> IO (Either BridgeError BridgeResponse)
 callAgentWithBudget agent userPrompt budgetCap alreadySpent =
    callBridge defaultRequest
        { provider     = agent.provider
        , model        = agent.modelName
        , systemPrompt = agent.systemPrompt
        , prompt       = userPrompt
        , maxTokens    = budgetCap
        , budgetTotal  = Just budgetCap
        , budgetSpent  = if alreadySpent > 0 then Just alreadySpent else Nothing
        }
 -- ---------------------------------------------------------------------------
 -- Batch bridge call — parallel execution via FR-3 async (single subprocess)
 -- | Invoke all requests concurrently in a single bridge subprocess using
 -- asyncio.gather.  Returns one result per input in the same order.
 -- This replaces sequential forM in CollectiveProposals.
 callBridgeBatch :: [BridgeRequest] -> IO [Either BridgeError BridgeResponse]
 callBridgeBatch [] = pure []
 callBridgeBatch reqs = do
    let payload = LBS.toStrict . A.encode $
            A.object ["batch" .= map requestToJson reqs]
    (exitCode, stdout, _stderr) <-
        readProcessWithExitCode "python3" ["scripts/llm_bridge.py"] (cs payload)
    let outBytes = LBS.fromStrict (cs stdout)
    case A.decode @A.Value outBytes of
        Just (A.Object o) | Just (A.Array arr) <- A.lookup "results" o ->
            pure $ map parseResult (toList arr)
        _ ->
            pure $ replicate (length reqs) (Left (BridgeError "Unparseable batch output" "ParseError"))
  where
    parseResult v = case A.fromJSON v of
        A.Success resp -> Right resp
        A.Error _      -> case A.fromJSON v of
            A.Success err -> Left err
            A.Error _     -> Left (BridgeError "Unparseable batch item" "ParseError")
 -- | Batch variant using AgentRegistration records.
 callAgentsBatch :: [(AgentRegistration, Text)] -> IO [Either BridgeError BridgeResponse]
 callAgentsBatch pairs =
    callBridgeBatch
        [ defaultRequest
            { provider     = agent.provider
            , model        = agent.modelName
            , systemPrompt = agent.systemPrompt
            , prompt       = userPrompt
            }
        | (agent, userPrompt) <- pairs
        ]
 -- ---------------------------------------------------------------------------
 -- AI governance policy check
--- a/Web/Controller/AgentDelegations.hs
+++ b/Web/Controller/AgentDelegations.hs
@@ -1,11 +1,16 @@
 module Web.Controller.AgentDelegations where
 -- IHF Phase 11 — Advanced AI Federation (IHUB-WP-0012 T06)
 -- Updated: delegation token budget enforced natively by llm-connect BudgetTracker (FR-4).
 import Web.Controller.Prelude
 import Web.View.AgentDelegations.Index
 import Web.View.AgentDelegations.Show
-import Application.Helper.AgentBridge (callBridge, BridgeRequest(..))
+import Application.Helper.AgentBridge
    ( callAgentWithBudget
    , BridgeError(..)
    , bridgeErrorMessage
    )
 instance Controller AgentDelegationsController where
@@ -44,24 +49,32 @@ instance Controller AgentDelegationsController where
            |> set #status "pending"
            |> createRecord
-        result <- liftIO $ callBridge BridgeRequest
+        -- FR-4: token budget passed to bridge → llm-connect BudgetTracker enforces it
-            { provider     = receivingAgent.provider
+        -- natively, raising LLMBudgetExceededError if the call would exceed the cap.
-            , model        = receivingAgent.modelName
+        result <- liftIO $ callAgentWithBudget receivingAgent scope tokenBudget 0
            , systemPrompt = receivingAgent.systemPrompt
            , prompt       = scope
            , maxTokens    = tokenBudget
            , temperature  = 0.7
            }
        now <- getCurrentTime
        case result of
            Left BudgetExceededError { errorMessage, budgetTotal, budgetConsumed, budgetRequested } -> do
                delegation
                    |> set #status "failed"
                    |> set #result (Just . A.toJSON $ A.object
                        [ "error"            A..= errorMessage
                        , "budgetTotal"      A..= budgetTotal
                        , "budgetConsumed"   A..= budgetConsumed
                        , "budgetRequested"  A..= budgetRequested
                        ])
                    |> set #completedAt (Just now)
                    |> updateRecord
                setErrorMessage ("Budget exceeded: requested " <> show budgetRequested
                    <> " tokens but only " <> show (budgetTotal - budgetConsumed) <> " remain")
            Left err -> do
                delegation
                    |> set #status "failed"
-                    |> set #result (Just . A.toJSON $ A.object ["error" A..= err.errorMessage])
+                    |> set #result (Just . A.toJSON $ A.object ["error" A..= bridgeErrorMessage err])
                    |> set #completedAt (Just now)
                    |> updateRecord
-                setErrorMessage ("Delegation failed: " <> err.errorMessage)
+                setErrorMessage ("Delegation failed: " <> bridgeErrorMessage err)
            Right resp -> do
                delegation
                    |> set #status "completed"
--- a/Web/Controller/CollectiveProposals.hs
+++ b/Web/Controller/CollectiveProposals.hs
@@ -1,11 +1,12 @@
 module Web.Controller.CollectiveProposals where
 -- IHF Phase 11 — Advanced AI Federation (IHUB-WP-0012 T07)
 -- Updated: agents invoked concurrently via callAgentsBatch (FR-3 async).
 import Web.Controller.Prelude
 import Web.View.CollectiveProposals.Index
 import Web.View.CollectiveProposals.Show
-import Application.Helper.AgentBridge (callAgent, BridgeResponse(..))
+import Application.Helper.AgentBridge (callAgent, callAgentsBatch, BridgeResponse(..))
 import Application.Helper.ModelRouter (resolveAllAgents)
 import Data.List (intercalate)
@@ -45,10 +46,15 @@ instance Controller CollectiveProposalsController where
            |> createRecord
        agents <- resolveAllAgents hubId taskType
-        contributions <- forM agents \agent -> do
+
-            result <- liftIO $ callAgent agent prompt
+        -- FR-3: invoke all agents concurrently in a single bridge subprocess call
        -- instead of sequential forM.  Latency now scales with the slowest agent,
        -- not the sum of all agents.
        results <- liftIO $ callAgentsBatch [(a, prompt) | a <- agents]
        successContribs <- fmap catMaybes $ forM (zip agents results) \(agent, result) ->
            case result of
-                Left err -> pure Nothing
+                Left _     -> pure Nothing
                Right resp -> do
                    contrib <- newRecord @CollectiveProposalContribution
                        |> set #collectiveProposalId proposal.id
@@ -60,22 +66,21 @@ instance Controller CollectiveProposalsController where
                        |> createRecord
                    pure (Just (contrib, resp))
        let successContribs = catMaybes contributions
        consensusStatus <- if null successContribs
-            then pure "divergent"
+            then do
                proposal |> set #consensusStatus "divergent" |> updateRecord
                pure "divergent"
            else do
                let contribTexts = map (\(_, r) -> r.content) successContribs
                    synthesisPrompt = "The following agents have independently proposed solutions. "
                        <> "Synthesize a unified recommendation:\n\n"
                        <> intercalate "\n---\n" contribTexts
-                mSynthAgent <- resolveAllAgents hubId taskType >>= \case
+                -- Synthesis uses the highest-priority agent (head of the list)
-                    (a:_) -> pure (Just a)
+                case agents of
-                    []    -> pure Nothing
+                    [] -> do
                case mSynthAgent of
                    Nothing -> do
                        proposal |> set #consensusStatus "divergent" |> updateRecord
                        pure "divergent"
-                    Just synthAgent -> do
+                    (synthAgent:_) -> do
                        synthResult <- liftIO $ callAgent synthAgent synthesisPrompt
                        case synthResult of
                            Left _ -> do
@@ -95,8 +100,7 @@ instance Controller CollectiveProposalsController where
        setSuccessMessage ("Collective proposal created (" <> consensusStatus <> ")")
        redirectTo ShowCollectiveProposalAction { collectiveProposalId = proposal.id }
-- | Simple consensus heuristic: if all contributions have a non-empty content
+-- | Simple consensus heuristic: ≥2 successful contributions = consensus.
 -- and there are at least 2, mark as consensus; single contributor = pending.
 detectConsensus :: [CollectiveProposalContribution] -> Text
 detectConsensus contribs
    | length contribs >= 2 = "consensus"
--- a/Web/Controller/DecisionRecords.hs
+++ b/Web/Controller/DecisionRecords.hs
@@ -8,7 +8,7 @@ import Web.View.DecisionRecords.Edit
 import Generated.Types
 import IHP.Prelude
 import IHP.ControllerPrelude
-import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy)
+import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy, bridgeErrorMessage)
 import Application.Helper.ModelRouter (resolveAgent)
 import Data.List (intercalate)
@@ -227,7 +227,7 @@ instance Controller DecisionRecordsController where
                                result <- liftIO $ callAgent agent userMsg
                                case result of
                                    Left err -> do
-                                        setErrorMessage ("Implementation proposal failed: " <> err.errorMessage)
+                                        setErrorMessage ("Implementation proposal failed: " <> bridgeErrorMessage err)
                                        redirectTo ShowDecisionRecordAction { decisionRecordId }
                                    Right resp -> do
                                        newRecord @AgentProposal
--- a/Web/Controller/RequirementCandidates.hs
+++ b/Web/Controller/RequirementCandidates.hs
@@ -8,7 +8,7 @@ import Web.View.RequirementCandidates.Edit
 import Generated.Types
 import IHP.Prelude
 import IHP.ControllerPrelude
-import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy)
+import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy, bridgeErrorMessage)
 import Application.Helper.ModelRouter (resolveAgent)
 import Data.List (intercalate)
 import Data.Aeson (decode, Value(..), Array)
@@ -298,7 +298,7 @@ instance Controller RequirementCandidatesController where
                                result <- liftIO $ callAgent agent userMsg
                                case result of
                                    Left err -> do
-                                        setErrorMessage ("Duplicate detection failed: " <> err.errorMessage)
+                                        setErrorMessage ("Duplicate detection failed: " <> bridgeErrorMessage err)
                                        redirectTo ShowRequirementCandidateAction { requirementCandidateId }
                                    Right resp -> do
                                        newRecord @AgentProposal
--- a/Web/Controller/Widgets.hs
+++ b/Web/Controller/Widgets.hs
@@ -11,7 +11,7 @@ import IHP.ControllerPrelude
 import Data.Aeson (toJSON, object, (.=))
 import Application.Helper.Controller (isInRegression, widgetCycleCounts)
 import Application.Helper.TypeRegistry (validateWidgetType, validatePolicyScope, activeWidgetTypes, activePolicyScopes)
-import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy)
+import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy, bridgeErrorMessage)
 import Application.Helper.ModelRouter (resolveAgent)
 import Data.List (intercalate)
@@ -209,7 +209,7 @@ instance Controller WidgetsController where
                        result <- liftIO $ callAgent agent userMsg
                        case result of
                            Left err -> do
-                                setErrorMessage ("AI summarization failed: " <> err.errorMessage)
+                                setErrorMessage ("AI summarization failed: " <> bridgeErrorMessage err)
                                redirectTo ShowWidgetAction { widgetId }
                            Right resp -> do
                                newRecord @AgentProposal
@@ -258,7 +258,7 @@ instance Controller WidgetsController where
                        result <- liftIO $ callAgent agent userMsg
                        case result of
                            Left err -> do
-                                setErrorMessage ("AI draft failed: " <> err.errorMessage)
+                                setErrorMessage ("AI draft failed: " <> bridgeErrorMessage err)
                                redirectTo ShowWidgetAction { widgetId }
                            Right resp -> do
                                newRecord @AgentProposal
--- a/scripts/llm_bridge.py
+++ b/scripts/llm_bridge.py
@@ -1,11 +1,10 @@
 #!/usr/bin/env python3
 """
 IHF llm-connect bridge — Phase 11 AI Federation (IHUB-WP-0012)
 Updated to use llm-connect FR-1 (server passthrough), FR-3 (async batch),
 FR-4 (BudgetTracker).
-Usage:
+SINGLE REQUEST — stdin JSON fields:
    echo '{"provider":"openrouter","model":"...","prompt":"..."}' | python3 scripts/llm_bridge.py
 Input JSON fields:
    provider      — openrouter | gemini | openai | claude-code (default: openrouter)
    model         — model name string (provider-specific)
    prompt        — the user prompt
@@ -13,54 +12,211 @@ Input JSON fields:
    api_key       — optional; falls back to llm-connect env-var resolution
    maxTokens     — max completion tokens (default: 2000)
    temperature   — sampling temperature (default: 0.7)
    budgetTotal   — optional int; if set, a BudgetTracker is created with this cap
    budgetSpent   — optional int; tokens already consumed (for delegation chains)
    serverUrl     — optional str; if set, POST to {serverUrl}/execute instead of
                    spawning a local adapter (FR-1 server passthrough)
-Output JSON (stdout, exit 0 on success):
+BATCH REQUEST — stdin JSON with "batch" key:
    batch         — list of single-request objects (see above)
    All top-level fields (except batch) are ignored.
 Output JSON — single request (stdout, exit 0 on success):
    content       — generated text
    model         — model name actually used
    tokensIn      — prompt token count
    tokensOut     — completion token count
    finishReason  — stop reason string
    budgetSpent   — cumulative tokens consumed from BudgetTracker after this call
-Error JSON (stdout, exit 1 on LLMError):
+Output JSON — batch request (stdout, exit 0 even on partial failure):
    results       — list of {content, model, tokensIn, tokensOut, finishReason}
                    OR {error, errorType} per item
 Error JSON (stdout, exit 1 on LLMError for single request):
    error         — error message
    errorType     — exception class name
    budgetTotal   — present only for LLMBudgetExceededError
    budgetSpent   — present only for LLMBudgetExceededError
    budgetRequested — present only for LLMBudgetExceededError
 """
 import sys
 import json
 import os
 import asyncio
 from typing import Any
 sys.path.insert(0, os.path.expanduser("~/llm-connect"))
-from llm_connect import create_adapter, RunConfig
+from llm_connect import create_adapter, RunConfig, BudgetTracker
-from llm_connect.exceptions import LLMError
+from llm_connect.exceptions import LLMError, LLMBudgetExceededError
-def main() -> None:
+# ---------------------------------------------------------------------------
-    req = json.load(sys.stdin)
+# Adapter / server call
 def _make_config(req: dict, tracker: BudgetTracker | None) -> RunConfig:
    return RunConfig(
        model_name=req.get("model", ""),
        temperature=req.get("temperature", 0.7),
        max_tokens=req.get("maxTokens", 2000),
        budget_tracker=tracker,
    )
 def _call_server(server_url: str, req: dict, config: RunConfig) -> dict:
    """Call a running LLMServer via HTTP POST /execute (FR-1)."""
    import urllib.request
    payload = json.dumps({
        "prompt": req["prompt"],
        "config": config.to_dict(),
    }).encode()
    http_req = urllib.request.Request(
        f"{server_url.rstrip('/')}/execute",
        data=payload,
        headers={"Content-Type": "application/json"},
        method="POST",
    )
    with urllib.request.urlopen(http_req, timeout=config.timeout_seconds) as resp:
        return json.loads(resp.read())
 def _execute_single(req: dict) -> dict:
    """Execute one request dict, return a result or error dict."""
    # Build optional BudgetTracker (FR-4)
    tracker: BudgetTracker | None = None
    if "budgetTotal" in req:
        tracker = BudgetTracker(total=int(req["budgetTotal"]))
        already_spent = int(req.get("budgetSpent", 0))
        if already_spent > 0:
            tracker.consume(already_spent)
    config = _make_config(req, tracker)
    try:
        server_url = req.get("serverUrl")
        if server_url:
            # FR-1: delegate to running LLMServer
            raw = _call_server(server_url, req, config)
            spent = tracker.spent if tracker else 0
            return {
                "content":      raw.get("content", ""),
                "model":        raw.get("model", ""),
                "tokensIn":     raw.get("usage", {}).get("prompt_tokens", 0),
                "tokensOut":    raw.get("usage", {}).get("completion_tokens", 0),
                "finishReason": raw.get("finish_reason", "stop"),
                "budgetSpent":  spent,
            }
        adapter = create_adapter(
            provider=req.get("provider", "openrouter"),
            model=req.get("model"),
            api_key=req.get("api_key"),
            system_prompt=req.get("systemPrompt"),
        )
        config = RunConfig(
            model_name=req.get("model", ""),
            temperature=req.get("temperature", 0.7),
            max_tokens=req.get("maxTokens", 2000),
        )
        resp = adapter.execute_prompt(req["prompt"], config)
-        print(json.dumps({
+        spent = tracker.spent if tracker else 0
-            "content": resp.content,
+        return {
-            "model": resp.model,
+            "content":      resp.content,
-            "tokensIn": resp.usage.get("prompt_tokens", 0),
+            "model":        resp.model,
-            "tokensOut": resp.usage.get("completion_tokens", 0),
+            "tokensIn":     resp.usage.get("prompt_tokens", 0),
            "tokensOut":    resp.usage.get("completion_tokens", 0),
            "finishReason": resp.finish_reason,
-        }))
+            "budgetSpent":  spent,
        }
    except LLMBudgetExceededError as e:
        return {
            "error":            str(e),
            "errorType":        "LLMBudgetExceededError",
            "budgetTotal":      e.total,
            "budgetSpent":      e.spent,
            "budgetRequested":  e.requested,
        }
    except LLMError as e:
-        json.dump({"error": str(e), "errorType": type(e).__name__}, sys.stdout)
+        return {"error": str(e), "errorType": type(e).__name__}
 # ---------------------------------------------------------------------------
 # Async batch execution (FR-3)
 async def _execute_all_async(requests: list[dict]) -> list[dict]:
    """Run all requests concurrently via async_execute_prompt (FR-3)."""
    async def _one(req: dict) -> dict:
        tracker: BudgetTracker | None = None
        if "budgetTotal" in req:
            tracker = BudgetTracker(total=int(req["budgetTotal"]))
            already_spent = int(req.get("budgetSpent", 0))
            if already_spent > 0:
                tracker.consume(already_spent)
        config = _make_config(req, tracker)
        try:
            server_url = req.get("serverUrl")
            if server_url:
                # Server calls are already non-blocking HTTP; run in executor
                loop = asyncio.get_running_loop()
                raw = await loop.run_in_executor(
                    None, lambda: _call_server(server_url, req, config)
                )
                spent = tracker.spent if tracker else 0
                return {
                    "content":      raw.get("content", ""),
                    "model":        raw.get("model", ""),
                    "tokensIn":     raw.get("usage", {}).get("prompt_tokens", 0),
                    "tokensOut":    raw.get("usage", {}).get("completion_tokens", 0),
                    "finishReason": raw.get("finish_reason", "stop"),
                    "budgetSpent":  spent,
                }
            adapter = create_adapter(
                provider=req.get("provider", "openrouter"),
                model=req.get("model"),
                api_key=req.get("api_key"),
                system_prompt=req.get("systemPrompt"),
            )
            resp = await adapter.async_execute_prompt(req["prompt"], config)
            spent = tracker.spent if tracker else 0
            return {
                "content":      resp.content,
                "model":        resp.model,
                "tokensIn":     resp.usage.get("prompt_tokens", 0),
                "tokensOut":    resp.usage.get("completion_tokens", 0),
                "finishReason": resp.finish_reason,
                "budgetSpent":  spent,
            }
        except LLMBudgetExceededError as e:
            return {
                "error":            str(e),
                "errorType":        "LLMBudgetExceededError",
                "budgetTotal":      e.total,
                "budgetSpent":      e.spent,
                "budgetRequested":  e.requested,
            }
        except LLMError as e:
            return {"error": str(e), "errorType": type(e).__name__}
    return await asyncio.gather(*[_one(r) for r in requests])
 # ---------------------------------------------------------------------------
 # Entry point
 def main() -> None:
    req = json.load(sys.stdin)
    if "batch" in req:
        # Batch mode: run all requests concurrently (FR-3)
        results = asyncio.run(_execute_all_async(req["batch"]))
        print(json.dumps({"results": results}))
        return
    # Single request
    result = _execute_single(req)
    if "error" in result:
        json.dump(result, sys.stdout)
        sys.exit(1)
    print(json.dumps(result))
 if __name__ == "__main__":