""" Minimal HTTP server for llm_connect — serve mode (FR-1). Exposes: POST /execute — run a prompt through the configured adapter GET /health — liveness probe Usage (programmatic):: from llm_connect import MockLLMAdapter from llm_connect.server import LLMServer server = LLMServer(adapter=MockLLMAdapter(), port=8080) server.start() # background thread # ... server.stop() Usage (CLI):: python -m llm_connect.server --port 8080 --provider openrouter --model anthropic/claude-sonnet-4 """ import argparse import json import threading from http.server import BaseHTTPRequestHandler, HTTPServer from typing import Optional from llm_connect.adapter import LLMAdapter from llm_connect.models import RunConfig class _Handler(BaseHTTPRequestHandler): """Request handler — adapter injected via server.adapter.""" def log_message(self, format, *args): # suppress default access log pass # ── GET ──────────────────────────────────────────────────────── def do_GET(self): if self.path == "/health": self._respond(200, {"status": "ok"}) else: self._respond(404, {"error": "not found"}) # ── POST ─────────────────────────────────────────────────────── def do_POST(self): if self.path != "/execute": self._respond(404, {"error": "not found"}) return length = int(self.headers.get("Content-Length", 0)) raw = self.rfile.read(length) try: data = json.loads(raw) except (json.JSONDecodeError, ValueError): self._respond(400, {"error": "invalid JSON body"}) return prompt = data.get("prompt") if not prompt: self._respond(400, {"error": "missing required field: 'prompt'"}) return cfg = data.get("config", {}) if not isinstance(cfg, dict): self._respond(400, {"error": "field 'config' must be an object"}) return config = RunConfig.from_dict(cfg) try: response = self.server.adapter.execute_prompt(prompt, config) # type: ignore[attr-defined] self._respond(200, response.to_dict()) except Exception as exc: self._respond(500, {"error": str(exc)}) # ── helpers ──────────────────────────────────────────────────── def _respond(self, status: int, body: dict) -> None: payload = json.dumps(body).encode() self.send_response(status) self.send_header("Content-Type", "application/json") self.send_header("Content-Length", str(len(payload))) self.end_headers() self.wfile.write(payload) class LLMServer: """HTTP server wrapping an :class:`~llm_connect.adapter.LLMAdapter`. Args: adapter: The adapter that handles ``POST /execute`` requests. host: Bind address (default ``"127.0.0.1"``). port: TCP port (default ``8080``; ``0`` picks a free port). """ def __init__( self, adapter: LLMAdapter, host: str = "127.0.0.1", port: int = 8080, ) -> None: self._httpd = HTTPServer((host, port), _Handler) self._httpd.adapter = adapter # type: ignore[attr-defined] self._thread: Optional[threading.Thread] = None @property def port(self) -> int: """Actual bound port (useful when ``port=0`` was requested).""" return self._httpd.server_address[1] @property def host(self) -> str: return self._httpd.server_address[0] def start(self) -> None: """Start serving in a daemon background thread.""" self._thread = threading.Thread(target=self._httpd.serve_forever, daemon=True) self._thread.start() def stop(self) -> None: """Shut down the server and join the background thread.""" self._httpd.shutdown() if self._thread is not None: self._thread.join() def serve_forever(self) -> None: """Block the calling thread until interrupted.""" self._httpd.serve_forever() # ── CLI entry point ──────────────────────────────────────────────────────────── def _build_adapter(provider: str, model: Optional[str]) -> LLMAdapter: from llm_connect.factory import create_adapter return create_adapter(provider, model=model) def main(argv=None) -> None: parser = argparse.ArgumentParser( prog="python -m llm_connect.server", description="Start llm_connect HTTP serve mode.", ) parser.add_argument("--port", type=int, default=8080, help="TCP port (default: 8080)") parser.add_argument("--host", default="127.0.0.1", help="Bind address (default: 127.0.0.1)") parser.add_argument("--provider", default="mock", help="Provider name passed to create_adapter") parser.add_argument("--model", default=None, help="Model name (optional)") args = parser.parse_args(argv) adapter = _build_adapter(args.provider, args.model) server = LLMServer(adapter=adapter, host=args.host, port=args.port) print(f"llm_connect server listening on http://{args.host}:{args.port}") try: server.serve_forever() except KeyboardInterrupt: print("\nShutting down.") if __name__ == "__main__": main()