feat: integrate llm-connect FR-1/FR-3/FR-4 into IHF bridge

FR-3 (async_execute_prompt): CollectiveProposals now invokes all agents concurrently via callAgentsBatch → single bridge subprocess with asyncio.gather. Latency scales with slowest agent, not sum. FR-4 (BudgetTracker): AgentDelegations passes tokenBudget to bridge; llm-connect enforces it natively via BudgetTracker in RunConfig. BudgetExceededError is a first-class BridgeError variant with total/ consumed/requested fields surfaced to the operator. FR-1 (LLMServer passthrough): bridge accepts optional serverUrl field; if present, calls POST {serverUrl}/execute instead of spawning a new adapter. Infrastructure ready for hot-agent pre-warming (no schema change required). AgentBridge.hs: adds callAgentsBatch, callAgentWithBudget, BudgetExceededError constructor, bridgeErrorMessage helper, defaultRequest, requestToJson. All controllers updated to use bridgeErrorMessage. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-01 22:48:29 +00:00
parent a400365d50
commit 674f5da0e1
7 changed files with 350 additions and 75 deletions
--- a/Application/Helper/AgentBridge.hs
+++ b/Application/Helper/AgentBridge.hs
@@ -2,6 +2,7 @@ module Application.Helper.AgentBridge where

 -- IHF Phase 11 — Advanced AI Federation (IHUB-WP-0012)
 -- Haskell wrapper around scripts/llm_bridge.py (llm-connect subprocess bridge).
+-- Updated to use FR-1 (server passthrough), FR-3 (async batch), FR-4 (BudgetTracker).

 import IHP.Prelude
 import IHP.ControllerPrelude
@@ -22,6 +23,24 @@ data BridgeRequest = BridgeRequest
    , prompt       :: !Text
    , maxTokens    :: !Int
    , temperature  :: !Double
+    -- FR-4: optional BudgetTracker fields
+    , budgetTotal  :: !(Maybe Int)    -- cap for this call; Nothing = no budget enforcement
+    , budgetSpent  :: !(Maybe Int)    -- tokens already consumed in delegation chain
+    -- FR-1: optional running LLMServer URL; if set, HTTP POST instead of new adapter
+    , serverUrl    :: !(Maybe Text)
+    }
+
+defaultRequest :: BridgeRequest
+defaultRequest = BridgeRequest
+    { provider     = "openrouter"
+    , model        = ""
+    , systemPrompt = Nothing
+    , prompt       = ""
+    , maxTokens    = 2000
+    , temperature  = 0.7
+    , budgetTotal  = Nothing
+    , budgetSpent  = Nothing
+    , serverUrl    = Nothing
    }

 data BridgeResponse = BridgeResponse
@@ -30,12 +49,26 @@ data BridgeResponse = BridgeResponse
    , tokensIn     :: !Int
    , tokensOut    :: !Int
    , finishReason :: !Text
+    , budgetSpent  :: !Int   -- cumulative tokens spent (0 when no tracker)
    } deriving (Show)

-data BridgeError = BridgeError
-    { errorMessage :: !Text
-    , errorType    :: !Text
-    } deriving (Show)
+data BridgeError
+    = BridgeError
+        { errorMessage :: !Text
+        , errorType    :: !Text
+        }
+    | BudgetExceededError
+        { errorMessage    :: !Text
+        , budgetTotal     :: !Int
+        , budgetConsumed  :: !Int
+        , budgetRequested :: !Int
+        }
+    deriving (Show)
+
+-- BridgeError message for display
+bridgeErrorMessage :: BridgeError -> Text
+bridgeErrorMessage BridgeError { errorMessage }    = errorMessage
+bridgeErrorMessage BudgetExceededError { errorMessage } = errorMessage

 instance FromJSON BridgeResponse where
    parseJSON = A.withObject "BridgeResponse" \o -> BridgeResponse
@@ -44,51 +77,120 @@ instance FromJSON BridgeResponse where
        <*> o .:  "tokensIn"
        <*> o .:  "tokensOut"
        <*> o .:  "finishReason"
+        <*> (o .:? "budgetSpent" >>= pure . fromMaybe 0)

 instance FromJSON BridgeError where
-    parseJSON = A.withObject "BridgeError" \o -> BridgeError
-        <$> o .: "error"
-        <*> o .: "errorType"
+    parseJSON = A.withObject "BridgeError" \o -> do
+        errType <- o .: "errorType"
+        if errType == ("LLMBudgetExceededError" :: Text)
+            then BudgetExceededError
+                <$> o .: "error"
+                <*> (o .:? "budgetTotal"     >>= pure . fromMaybe 0)
+                <*> (o .:? "budgetSpent"     >>= pure . fromMaybe 0)
+                <*> (o .:? "budgetRequested" >>= pure . fromMaybe 0)
+            else BridgeError
+                <$> o .: "error"
+                <*> pure errType

 -- ---------------------------------------------------------------------------
-- Core bridge call
+-- JSON serialisation of a BridgeRequest
+
+requestToJson :: BridgeRequest -> Value
+requestToJson req = A.object $
+    [ "provider"     .= req.provider
+    , "model"        .= req.model
+    , "systemPrompt" .= req.systemPrompt
+    , "prompt"       .= req.prompt
+    , "maxTokens"    .= req.maxTokens
+    , "temperature"  .= req.temperature
+    ] <>
+    [ "budgetTotal" .= t | Just t <- [req.budgetTotal] ] <>
+    [ "budgetSpent" .= s | Just s <- [req.budgetSpent] ] <>
+    [ "serverUrl"   .= u | Just u <- [req.serverUrl]   ]
+
+-- ---------------------------------------------------------------------------
+-- Core bridge call — single request

 -- | Invoke the llm_bridge.py subprocess with the given request.
 callBridge :: BridgeRequest -> IO (Either BridgeError BridgeResponse)
 callBridge req = do
-    let payload = LBS.toStrict . encode $ object
-            [ "provider"     .= req.provider
-            , "model"        .= req.model
-            , "systemPrompt" .= req.systemPrompt
-            , "prompt"       .= req.prompt
-            , "maxTokens"    .= req.maxTokens
-            , "temperature"  .= req.temperature
-            ]
+    let payload = LBS.toStrict . A.encode $ requestToJson req
    (exitCode, stdout, stderr) <-
        readProcessWithExitCode "python3" ["scripts/llm_bridge.py"] (cs payload)
    let outBytes = LBS.fromStrict (cs stdout)
    case exitCode of
        ExitSuccess ->
-            case decode outBytes of
+            case A.decode outBytes of
                Just v  -> pure (Right v)
                Nothing -> pure (Left (BridgeError "Unparseable bridge output" "ParseError"))
        ExitFailure _ ->
-            case decode outBytes of
+            case A.decode outBytes of
                Just v  -> pure (Left v)
                Nothing -> pure (Left (BridgeError (cs stderr) "BridgeError"))

 -- | Call the bridge using an AgentRegistration record.
 callAgent :: AgentRegistration -> Text -> IO (Either BridgeError BridgeResponse)
 callAgent agent userPrompt =
-    callBridge BridgeRequest
+    callBridge defaultRequest
        { provider     = agent.provider
        , model        = agent.modelName
        , systemPrompt = agent.systemPrompt
        , prompt       = userPrompt
-        , maxTokens    = 2000
-        , temperature  = 0.7
        }

+-- | Call the bridge with an explicit token budget (FR-4).
+-- Used by AgentDelegations to enforce the configured tokenBudget at the bridge level.
+callAgentWithBudget :: AgentRegistration -> Text -> Int -> Int -> IO (Either BridgeError BridgeResponse)
+callAgentWithBudget agent userPrompt budgetCap alreadySpent =
+    callBridge defaultRequest
+        { provider     = agent.provider
+        , model        = agent.modelName
+        , systemPrompt = agent.systemPrompt
+        , prompt       = userPrompt
+        , maxTokens    = budgetCap
+        , budgetTotal  = Just budgetCap
+        , budgetSpent  = if alreadySpent > 0 then Just alreadySpent else Nothing
+        }
+
+-- ---------------------------------------------------------------------------
+-- Batch bridge call — parallel execution via FR-3 async (single subprocess)
+
+-- | Invoke all requests concurrently in a single bridge subprocess using
+-- asyncio.gather.  Returns one result per input in the same order.
+-- This replaces sequential forM in CollectiveProposals.
+callBridgeBatch :: [BridgeRequest] -> IO [Either BridgeError BridgeResponse]
+callBridgeBatch [] = pure []
+callBridgeBatch reqs = do
+    let payload = LBS.toStrict . A.encode $
+            A.object ["batch" .= map requestToJson reqs]
+    (exitCode, stdout, _stderr) <-
+        readProcessWithExitCode "python3" ["scripts/llm_bridge.py"] (cs payload)
+    let outBytes = LBS.fromStrict (cs stdout)
+    case A.decode @A.Value outBytes of
+        Just (A.Object o) | Just (A.Array arr) <- A.lookup "results" o ->
+            pure $ map parseResult (toList arr)
+        _ ->
+            pure $ replicate (length reqs) (Left (BridgeError "Unparseable batch output" "ParseError"))
+  where
+    parseResult v = case A.fromJSON v of
+        A.Success resp -> Right resp
+        A.Error _      -> case A.fromJSON v of
+            A.Success err -> Left err
+            A.Error _     -> Left (BridgeError "Unparseable batch item" "ParseError")
+
+-- | Batch variant using AgentRegistration records.
+callAgentsBatch :: [(AgentRegistration, Text)] -> IO [Either BridgeError BridgeResponse]
+callAgentsBatch pairs =
+    callBridgeBatch
+        [ defaultRequest
+            { provider     = agent.provider
+            , model        = agent.modelName
+            , systemPrompt = agent.systemPrompt
+            , prompt       = userPrompt
+            }
+        | (agent, userPrompt) <- pairs
+        ]
+
 -- ---------------------------------------------------------------------------
 -- AI governance policy check

--- a/Web/Controller/AgentDelegations.hs
+++ b/Web/Controller/AgentDelegations.hs
@@ -1,11 +1,16 @@
 module Web.Controller.AgentDelegations where

 -- IHF Phase 11 — Advanced AI Federation (IHUB-WP-0012 T06)
+-- Updated: delegation token budget enforced natively by llm-connect BudgetTracker (FR-4).

 import Web.Controller.Prelude
 import Web.View.AgentDelegations.Index
 import Web.View.AgentDelegations.Show
-import Application.Helper.AgentBridge (callBridge, BridgeRequest(..))
+import Application.Helper.AgentBridge
+    ( callAgentWithBudget
+    , BridgeError(..)
+    , bridgeErrorMessage
+    )

 instance Controller AgentDelegationsController where

@@ -44,24 +49,32 @@ instance Controller AgentDelegationsController where
            |> set #status "pending"
            |> createRecord

-        result <- liftIO $ callBridge BridgeRequest
-            { provider     = receivingAgent.provider
-            , model        = receivingAgent.modelName
-            , systemPrompt = receivingAgent.systemPrompt
-            , prompt       = scope
-            , maxTokens    = tokenBudget
-            , temperature  = 0.7
-            }
+        -- FR-4: token budget passed to bridge → llm-connect BudgetTracker enforces it
+        -- natively, raising LLMBudgetExceededError if the call would exceed the cap.
+        result <- liftIO $ callAgentWithBudget receivingAgent scope tokenBudget 0

        now <- getCurrentTime
        case result of
+            Left BudgetExceededError { errorMessage, budgetTotal, budgetConsumed, budgetRequested } -> do
+                delegation
+                    |> set #status "failed"
+                    |> set #result (Just . A.toJSON $ A.object
+                        [ "error"            A..= errorMessage
+                        , "budgetTotal"      A..= budgetTotal
+                        , "budgetConsumed"   A..= budgetConsumed
+                        , "budgetRequested"  A..= budgetRequested
+                        ])
+                    |> set #completedAt (Just now)
+                    |> updateRecord
+                setErrorMessage ("Budget exceeded: requested " <> show budgetRequested
+                    <> " tokens but only " <> show (budgetTotal - budgetConsumed) <> " remain")
            Left err -> do
                delegation
                    |> set #status "failed"
-                    |> set #result (Just . A.toJSON $ A.object ["error" A..= err.errorMessage])
+                    |> set #result (Just . A.toJSON $ A.object ["error" A..= bridgeErrorMessage err])
                    |> set #completedAt (Just now)
                    |> updateRecord
-                setErrorMessage ("Delegation failed: " <> err.errorMessage)
+                setErrorMessage ("Delegation failed: " <> bridgeErrorMessage err)
            Right resp -> do
                delegation
                    |> set #status "completed"
--- a/Web/Controller/CollectiveProposals.hs
+++ b/Web/Controller/CollectiveProposals.hs
@@ -1,11 +1,12 @@
 module Web.Controller.CollectiveProposals where

 -- IHF Phase 11 — Advanced AI Federation (IHUB-WP-0012 T07)
+-- Updated: agents invoked concurrently via callAgentsBatch (FR-3 async).

 import Web.Controller.Prelude
 import Web.View.CollectiveProposals.Index
 import Web.View.CollectiveProposals.Show
-import Application.Helper.AgentBridge (callAgent, BridgeResponse(..))
+import Application.Helper.AgentBridge (callAgent, callAgentsBatch, BridgeResponse(..))
 import Application.Helper.ModelRouter (resolveAllAgents)
 import Data.List (intercalate)

@@ -45,10 +46,15 @@ instance Controller CollectiveProposalsController where
            |> createRecord

        agents <- resolveAllAgents hubId taskType
-        contributions <- forM agents \agent -> do
-            result <- liftIO $ callAgent agent prompt
+
+        -- FR-3: invoke all agents concurrently in a single bridge subprocess call
+        -- instead of sequential forM.  Latency now scales with the slowest agent,
+        -- not the sum of all agents.
+        results <- liftIO $ callAgentsBatch [(a, prompt) | a <- agents]
+
+        successContribs <- fmap catMaybes $ forM (zip agents results) \(agent, result) ->
            case result of
-                Left err -> pure Nothing
+                Left _     -> pure Nothing
                Right resp -> do
                    contrib <- newRecord @CollectiveProposalContribution
                        |> set #collectiveProposalId proposal.id
@@ -60,22 +66,21 @@ instance Controller CollectiveProposalsController where
                        |> createRecord
                    pure (Just (contrib, resp))

-        let successContribs = catMaybes contributions
        consensusStatus <- if null successContribs
-            then pure "divergent"
+            then do
+                proposal |> set #consensusStatus "divergent" |> updateRecord
+                pure "divergent"
            else do
                let contribTexts = map (\(_, r) -> r.content) successContribs
                    synthesisPrompt = "The following agents have independently proposed solutions. "
                        <> "Synthesize a unified recommendation:\n\n"
                        <> intercalate "\n---\n" contribTexts
-                mSynthAgent <- resolveAllAgents hubId taskType >>= \case
-                    (a:_) -> pure (Just a)
-                    []    -> pure Nothing
-                case mSynthAgent of
-                    Nothing -> do
+                -- Synthesis uses the highest-priority agent (head of the list)
+                case agents of
+                    [] -> do
                        proposal |> set #consensusStatus "divergent" |> updateRecord
                        pure "divergent"
-                    Just synthAgent -> do
+                    (synthAgent:_) -> do
                        synthResult <- liftIO $ callAgent synthAgent synthesisPrompt
                        case synthResult of
                            Left _ -> do
@@ -95,8 +100,7 @@ instance Controller CollectiveProposalsController where
        setSuccessMessage ("Collective proposal created (" <> consensusStatus <> ")")
        redirectTo ShowCollectiveProposalAction { collectiveProposalId = proposal.id }

-- | Simple consensus heuristic: if all contributions have a non-empty content
-- and there are at least 2, mark as consensus; single contributor = pending.
+-- | Simple consensus heuristic: ≥2 successful contributions = consensus.
 detectConsensus :: [CollectiveProposalContribution] -> Text
 detectConsensus contribs
    | length contribs >= 2 = "consensus"
--- a/Web/Controller/DecisionRecords.hs
+++ b/Web/Controller/DecisionRecords.hs
@@ -8,7 +8,7 @@ import Web.View.DecisionRecords.Edit
 import Generated.Types
 import IHP.Prelude
 import IHP.ControllerPrelude
-import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy)
+import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy, bridgeErrorMessage)
 import Application.Helper.ModelRouter (resolveAgent)
 import Data.List (intercalate)

@@ -227,7 +227,7 @@ instance Controller DecisionRecordsController where
                                result <- liftIO $ callAgent agent userMsg
                                case result of
                                    Left err -> do
-                                        setErrorMessage ("Implementation proposal failed: " <> err.errorMessage)
+                                        setErrorMessage ("Implementation proposal failed: " <> bridgeErrorMessage err)
                                        redirectTo ShowDecisionRecordAction { decisionRecordId }
                                    Right resp -> do
                                        newRecord @AgentProposal
--- a/Web/Controller/RequirementCandidates.hs
+++ b/Web/Controller/RequirementCandidates.hs
@@ -8,7 +8,7 @@ import Web.View.RequirementCandidates.Edit
 import Generated.Types
 import IHP.Prelude
 import IHP.ControllerPrelude
-import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy)
+import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy, bridgeErrorMessage)
 import Application.Helper.ModelRouter (resolveAgent)
 import Data.List (intercalate)
 import Data.Aeson (decode, Value(..), Array)
@@ -298,7 +298,7 @@ instance Controller RequirementCandidatesController where
                                result <- liftIO $ callAgent agent userMsg
                                case result of
                                    Left err -> do
-                                        setErrorMessage ("Duplicate detection failed: " <> err.errorMessage)
+                                        setErrorMessage ("Duplicate detection failed: " <> bridgeErrorMessage err)
                                        redirectTo ShowRequirementCandidateAction { requirementCandidateId }
                                    Right resp -> do
                                        newRecord @AgentProposal
--- a/Web/Controller/Widgets.hs
+++ b/Web/Controller/Widgets.hs
@@ -11,7 +11,7 @@ import IHP.ControllerPrelude
 import Data.Aeson (toJSON, object, (.=))
 import Application.Helper.Controller (isInRegression, widgetCycleCounts)
 import Application.Helper.TypeRegistry (validateWidgetType, validatePolicyScope, activeWidgetTypes, activePolicyScopes)
-import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy)
+import Application.Helper.AgentBridge (callAgent, checkGovernancePolicy, bridgeErrorMessage)
 import Application.Helper.ModelRouter (resolveAgent)
 import Data.List (intercalate)

@@ -209,7 +209,7 @@ instance Controller WidgetsController where
                        result <- liftIO $ callAgent agent userMsg
                        case result of
                            Left err -> do
-                                setErrorMessage ("AI summarization failed: " <> err.errorMessage)
+                                setErrorMessage ("AI summarization failed: " <> bridgeErrorMessage err)
                                redirectTo ShowWidgetAction { widgetId }
                            Right resp -> do
                                newRecord @AgentProposal
@@ -258,7 +258,7 @@ instance Controller WidgetsController where
                        result <- liftIO $ callAgent agent userMsg
                        case result of
                            Left err -> do
-                                setErrorMessage ("AI draft failed: " <> err.errorMessage)
+                                setErrorMessage ("AI draft failed: " <> bridgeErrorMessage err)
                                redirectTo ShowWidgetAction { widgetId }
                            Right resp -> do
                                newRecord @AgentProposal
--- a/scripts/llm_bridge.py
+++ b/scripts/llm_bridge.py
@@ -1,11 +1,10 @@
 #!/usr/bin/env python3
 """
 IHF llm-connect bridge — Phase 11 AI Federation (IHUB-WP-0012)
+Updated to use llm-connect FR-1 (server passthrough), FR-3 (async batch),
+FR-4 (BudgetTracker).

-Usage:
-    echo '{"provider":"openrouter","model":"...","prompt":"..."}' | python3 scripts/llm_bridge.py
-
-Input JSON fields:
+SINGLE REQUEST — stdin JSON fields:
    provider      — openrouter | gemini | openai | claude-code (default: openrouter)
    model         — model name string (provider-specific)
    prompt        — the user prompt
@@ -13,54 +12,211 @@ Input JSON fields:
    api_key       — optional; falls back to llm-connect env-var resolution
    maxTokens     — max completion tokens (default: 2000)
    temperature   — sampling temperature (default: 0.7)
+    budgetTotal   — optional int; if set, a BudgetTracker is created with this cap
+    budgetSpent   — optional int; tokens already consumed (for delegation chains)
+    serverUrl     — optional str; if set, POST to {serverUrl}/execute instead of
+                    spawning a local adapter (FR-1 server passthrough)

-Output JSON (stdout, exit 0 on success):
+BATCH REQUEST — stdin JSON with "batch" key:
+    batch         — list of single-request objects (see above)
+    All top-level fields (except batch) are ignored.
+
+Output JSON — single request (stdout, exit 0 on success):
    content       — generated text
    model         — model name actually used
    tokensIn      — prompt token count
    tokensOut     — completion token count
    finishReason  — stop reason string
+    budgetSpent   — cumulative tokens consumed from BudgetTracker after this call

-Error JSON (stdout, exit 1 on LLMError):
+Output JSON — batch request (stdout, exit 0 even on partial failure):
+    results       — list of {content, model, tokensIn, tokensOut, finishReason}
+                    OR {error, errorType} per item
+
+Error JSON (stdout, exit 1 on LLMError for single request):
    error         — error message
    errorType     — exception class name
+    budgetTotal   — present only for LLMBudgetExceededError
+    budgetSpent   — present only for LLMBudgetExceededError
+    budgetRequested — present only for LLMBudgetExceededError
 """
 import sys
 import json
 import os
+import asyncio
+from typing import Any

 sys.path.insert(0, os.path.expanduser("~/llm-connect"))

-from llm_connect import create_adapter, RunConfig
-from llm_connect.exceptions import LLMError
+from llm_connect import create_adapter, RunConfig, BudgetTracker
+from llm_connect.exceptions import LLMError, LLMBudgetExceededError


-def main() -> None:
-    req = json.load(sys.stdin)
+# ---------------------------------------------------------------------------
+# Adapter / server call
+
+def _make_config(req: dict, tracker: BudgetTracker | None) -> RunConfig:
+    return RunConfig(
+        model_name=req.get("model", ""),
+        temperature=req.get("temperature", 0.7),
+        max_tokens=req.get("maxTokens", 2000),
+        budget_tracker=tracker,
+    )
+
+
+def _call_server(server_url: str, req: dict, config: RunConfig) -> dict:
+    """Call a running LLMServer via HTTP POST /execute (FR-1)."""
+    import urllib.request
+    payload = json.dumps({
+        "prompt": req["prompt"],
+        "config": config.to_dict(),
+    }).encode()
+    http_req = urllib.request.Request(
+        f"{server_url.rstrip('/')}/execute",
+        data=payload,
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    with urllib.request.urlopen(http_req, timeout=config.timeout_seconds) as resp:
+        return json.loads(resp.read())
+
+
+def _execute_single(req: dict) -> dict:
+    """Execute one request dict, return a result or error dict."""
+    # Build optional BudgetTracker (FR-4)
+    tracker: BudgetTracker | None = None
+    if "budgetTotal" in req:
+        tracker = BudgetTracker(total=int(req["budgetTotal"]))
+        already_spent = int(req.get("budgetSpent", 0))
+        if already_spent > 0:
+            tracker.consume(already_spent)
+
+    config = _make_config(req, tracker)

    try:
+        server_url = req.get("serverUrl")
+        if server_url:
+            # FR-1: delegate to running LLMServer
+            raw = _call_server(server_url, req, config)
+            spent = tracker.spent if tracker else 0
+            return {
+                "content":      raw.get("content", ""),
+                "model":        raw.get("model", ""),
+                "tokensIn":     raw.get("usage", {}).get("prompt_tokens", 0),
+                "tokensOut":    raw.get("usage", {}).get("completion_tokens", 0),
+                "finishReason": raw.get("finish_reason", "stop"),
+                "budgetSpent":  spent,
+            }
+
        adapter = create_adapter(
            provider=req.get("provider", "openrouter"),
            model=req.get("model"),
            api_key=req.get("api_key"),
            system_prompt=req.get("systemPrompt"),
        )
-        config = RunConfig(
-            model_name=req.get("model", ""),
-            temperature=req.get("temperature", 0.7),
-            max_tokens=req.get("maxTokens", 2000),
-        )
        resp = adapter.execute_prompt(req["prompt"], config)
-        print(json.dumps({
-            "content": resp.content,
-            "model": resp.model,
-            "tokensIn": resp.usage.get("prompt_tokens", 0),
-            "tokensOut": resp.usage.get("completion_tokens", 0),
+        spent = tracker.spent if tracker else 0
+        return {
+            "content":      resp.content,
+            "model":        resp.model,
+            "tokensIn":     resp.usage.get("prompt_tokens", 0),
+            "tokensOut":    resp.usage.get("completion_tokens", 0),
            "finishReason": resp.finish_reason,
-        }))
+            "budgetSpent":  spent,
+        }
+    except LLMBudgetExceededError as e:
+        return {
+            "error":            str(e),
+            "errorType":        "LLMBudgetExceededError",
+            "budgetTotal":      e.total,
+            "budgetSpent":      e.spent,
+            "budgetRequested":  e.requested,
+        }
    except LLMError as e:
-        json.dump({"error": str(e), "errorType": type(e).__name__}, sys.stdout)
+        return {"error": str(e), "errorType": type(e).__name__}
+
+
+# ---------------------------------------------------------------------------
+# Async batch execution (FR-3)
+
+async def _execute_all_async(requests: list[dict]) -> list[dict]:
+    """Run all requests concurrently via async_execute_prompt (FR-3)."""
+    async def _one(req: dict) -> dict:
+        tracker: BudgetTracker | None = None
+        if "budgetTotal" in req:
+            tracker = BudgetTracker(total=int(req["budgetTotal"]))
+            already_spent = int(req.get("budgetSpent", 0))
+            if already_spent > 0:
+                tracker.consume(already_spent)
+
+        config = _make_config(req, tracker)
+
+        try:
+            server_url = req.get("serverUrl")
+            if server_url:
+                # Server calls are already non-blocking HTTP; run in executor
+                loop = asyncio.get_running_loop()
+                raw = await loop.run_in_executor(
+                    None, lambda: _call_server(server_url, req, config)
+                )
+                spent = tracker.spent if tracker else 0
+                return {
+                    "content":      raw.get("content", ""),
+                    "model":        raw.get("model", ""),
+                    "tokensIn":     raw.get("usage", {}).get("prompt_tokens", 0),
+                    "tokensOut":    raw.get("usage", {}).get("completion_tokens", 0),
+                    "finishReason": raw.get("finish_reason", "stop"),
+                    "budgetSpent":  spent,
+                }
+
+            adapter = create_adapter(
+                provider=req.get("provider", "openrouter"),
+                model=req.get("model"),
+                api_key=req.get("api_key"),
+                system_prompt=req.get("systemPrompt"),
+            )
+            resp = await adapter.async_execute_prompt(req["prompt"], config)
+            spent = tracker.spent if tracker else 0
+            return {
+                "content":      resp.content,
+                "model":        resp.model,
+                "tokensIn":     resp.usage.get("prompt_tokens", 0),
+                "tokensOut":    resp.usage.get("completion_tokens", 0),
+                "finishReason": resp.finish_reason,
+                "budgetSpent":  spent,
+            }
+        except LLMBudgetExceededError as e:
+            return {
+                "error":            str(e),
+                "errorType":        "LLMBudgetExceededError",
+                "budgetTotal":      e.total,
+                "budgetSpent":      e.spent,
+                "budgetRequested":  e.requested,
+            }
+        except LLMError as e:
+            return {"error": str(e), "errorType": type(e).__name__}
+
+    return await asyncio.gather(*[_one(r) for r in requests])
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+
+def main() -> None:
+    req = json.load(sys.stdin)
+
+    if "batch" in req:
+        # Batch mode: run all requests concurrently (FR-3)
+        results = asyncio.run(_execute_all_async(req["batch"]))
+        print(json.dumps({"results": results}))
+        return
+
+    # Single request
+    result = _execute_single(req)
+    if "error" in result:
+        json.dump(result, sys.stdout)
        sys.exit(1)
+    print(json.dumps(result))


 if __name__ == "__main__":