generated from coulomb/repo-seed
163 lines
5.6 KiB
Python
163 lines
5.6 KiB
Python
"""
|
|
Minimal HTTP server for llm_connect — serve mode (FR-1).
|
|
|
|
Exposes:
|
|
POST /execute — run a prompt through the configured adapter
|
|
GET /health — liveness probe
|
|
|
|
Usage (programmatic)::
|
|
|
|
from llm_connect import MockLLMAdapter
|
|
from llm_connect.server import LLMServer
|
|
|
|
server = LLMServer(adapter=MockLLMAdapter(), port=8080)
|
|
server.start() # background thread
|
|
# ...
|
|
server.stop()
|
|
|
|
Usage (CLI)::
|
|
|
|
python -m llm_connect.server --port 8080 --provider openrouter --model anthropic/claude-sonnet-4
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import threading
|
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
from typing import Optional
|
|
|
|
from llm_connect.adapter import LLMAdapter
|
|
from llm_connect.models import RunConfig
|
|
|
|
|
|
class _Handler(BaseHTTPRequestHandler):
|
|
"""Request handler — adapter injected via server.adapter."""
|
|
|
|
def log_message(self, format, *args): # suppress default access log
|
|
pass
|
|
|
|
# ── GET ────────────────────────────────────────────────────────
|
|
|
|
def do_GET(self):
|
|
if self.path == "/health":
|
|
self._respond(200, {"status": "ok"})
|
|
else:
|
|
self._respond(404, {"error": "not found"})
|
|
|
|
# ── POST ───────────────────────────────────────────────────────
|
|
|
|
def do_POST(self):
|
|
if self.path != "/execute":
|
|
self._respond(404, {"error": "not found"})
|
|
return
|
|
|
|
length = int(self.headers.get("Content-Length", 0))
|
|
raw = self.rfile.read(length)
|
|
try:
|
|
data = json.loads(raw)
|
|
except (json.JSONDecodeError, ValueError):
|
|
self._respond(400, {"error": "invalid JSON body"})
|
|
return
|
|
|
|
prompt = data.get("prompt")
|
|
if not prompt:
|
|
self._respond(400, {"error": "missing required field: 'prompt'"})
|
|
return
|
|
|
|
cfg = data.get("config", {})
|
|
if not isinstance(cfg, dict):
|
|
self._respond(400, {"error": "field 'config' must be an object"})
|
|
return
|
|
config = RunConfig.from_dict(cfg)
|
|
|
|
try:
|
|
response = self.server.adapter.execute_prompt(prompt, config) # type: ignore[attr-defined]
|
|
self._respond(200, response.to_dict())
|
|
except Exception as exc:
|
|
self._respond(500, {"error": str(exc)})
|
|
|
|
# ── helpers ────────────────────────────────────────────────────
|
|
|
|
def _respond(self, status: int, body: dict) -> None:
|
|
payload = json.dumps(body).encode()
|
|
self.send_response(status)
|
|
self.send_header("Content-Type", "application/json")
|
|
self.send_header("Content-Length", str(len(payload)))
|
|
self.end_headers()
|
|
self.wfile.write(payload)
|
|
|
|
|
|
class LLMServer:
|
|
"""HTTP server wrapping an :class:`~llm_connect.adapter.LLMAdapter`.
|
|
|
|
Args:
|
|
adapter: The adapter that handles ``POST /execute`` requests.
|
|
host: Bind address (default ``"127.0.0.1"``).
|
|
port: TCP port (default ``8080``; ``0`` picks a free port).
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
adapter: LLMAdapter,
|
|
host: str = "127.0.0.1",
|
|
port: int = 8080,
|
|
) -> None:
|
|
self._httpd = HTTPServer((host, port), _Handler)
|
|
self._httpd.adapter = adapter # type: ignore[attr-defined]
|
|
self._thread: Optional[threading.Thread] = None
|
|
|
|
@property
|
|
def port(self) -> int:
|
|
"""Actual bound port (useful when ``port=0`` was requested)."""
|
|
return self._httpd.server_address[1]
|
|
|
|
@property
|
|
def host(self) -> str:
|
|
return self._httpd.server_address[0]
|
|
|
|
def start(self) -> None:
|
|
"""Start serving in a daemon background thread."""
|
|
self._thread = threading.Thread(target=self._httpd.serve_forever, daemon=True)
|
|
self._thread.start()
|
|
|
|
def stop(self) -> None:
|
|
"""Shut down the server and join the background thread."""
|
|
self._httpd.shutdown()
|
|
if self._thread is not None:
|
|
self._thread.join()
|
|
|
|
def serve_forever(self) -> None:
|
|
"""Block the calling thread until interrupted."""
|
|
self._httpd.serve_forever()
|
|
|
|
|
|
# ── CLI entry point ────────────────────────────────────────────────────────────
|
|
|
|
def _build_adapter(provider: str, model: Optional[str]) -> LLMAdapter:
|
|
from llm_connect.factory import create_adapter
|
|
return create_adapter(provider, model=model)
|
|
|
|
|
|
def main(argv=None) -> None:
|
|
parser = argparse.ArgumentParser(
|
|
prog="python -m llm_connect.server",
|
|
description="Start llm_connect HTTP serve mode.",
|
|
)
|
|
parser.add_argument("--port", type=int, default=8080, help="TCP port (default: 8080)")
|
|
parser.add_argument("--host", default="127.0.0.1", help="Bind address (default: 127.0.0.1)")
|
|
parser.add_argument("--provider", default="mock", help="Provider name passed to create_adapter")
|
|
parser.add_argument("--model", default=None, help="Model name (optional)")
|
|
args = parser.parse_args(argv)
|
|
|
|
adapter = _build_adapter(args.provider, args.model)
|
|
server = LLMServer(adapter=adapter, host=args.host, port=args.port)
|
|
print(f"llm_connect server listening on http://{args.host}:{args.port}")
|
|
try:
|
|
server.serve_forever()
|
|
except KeyboardInterrupt:
|
|
print("\nShutting down.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|