""" Minimal HTTP server for llm_connect — serve mode (FR-1). Exposes: POST /execute — run a prompt through the configured adapter GET /health — liveness probe Usage (programmatic):: from llm_connect import MockLLMAdapter from llm_connect.server import LLMServer server = LLMServer(adapter=MockLLMAdapter(), port=8080) server.start() # background thread # ... server.stop() Usage (CLI):: python -m llm_connect.server --port 8080 --provider openrouter --model anthropic/claude-sonnet-4 """ import argparse import datetime as _dt import json import os import re import threading import time import uuid from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from pathlib import Path from typing import Optional from urllib.parse import parse_qs, urlsplit from llm_connect._diagnostics import capture_diagnostics from llm_connect.adapter import LLMAdapter from llm_connect.models import LLMResponse, RunConfig class _Handler(BaseHTTPRequestHandler): """Request handler — adapter injected via server.adapter.""" def log_message(self, format, *args): # suppress default access log pass # ── GET ──────────────────────────────────────────────────────── def do_GET(self): parsed = urlsplit(self.path) if parsed.path == "/health": self._respond(200, {"status": "ok"}) else: self._respond(404, {"error": "not found"}) # ── POST ─────────────────────────────────────────────────────── def do_POST(self): parsed = urlsplit(self.path) if parsed.path != "/execute": self._respond(404, {"error": "not found"}) return debug_enabled = _debug_requested(parsed.query) audit_dir = os.environ.get("LLM_CONNECT_AUDIT_DIR") length = int(self.headers.get("Content-Length", 0)) raw = self.rfile.read(length) try: data = json.loads(raw) except (json.JSONDecodeError, ValueError): self._respond(400, {"error": "invalid JSON body"}) return prompt = data.get("prompt") if not prompt: self._respond(400, {"error": "missing required field: 'prompt'"}) return cfg = data.get("config", {}) if not isinstance(cfg, dict): self._respond(400, {"error": "field 'config' must be an object"}) return config = RunConfig.from_dict(cfg) start = time.time() diagnostics_enabled = debug_enabled or bool(audit_dir) try: with capture_diagnostics(diagnostics_enabled) as diagnostics: response = self.server.adapter.execute_prompt(prompt, config) # type: ignore[attr-defined] latency = time.time() - start body = response.to_dict() debug = diagnostics.to_dict() if diagnostics is not None else None if debug_enabled and debug is not None: body["debug"] = debug if audit_dir: _write_audit_record(audit_dir, prompt, config, response, debug, latency) self._respond(200, body) except Exception as exc: self._respond(500, {"error": str(exc)}) # ── helpers ──────────────────────────────────────────────────── def _respond(self, status: int, body: dict) -> None: payload = json.dumps(body).encode() self.send_response(status) self.send_header("Content-Type", "application/json") self.send_header("Content-Length", str(len(payload))) self.end_headers() self.wfile.write(payload) class LLMServer: """HTTP server wrapping an :class:`~llm_connect.adapter.LLMAdapter`. Args: adapter: The adapter that handles ``POST /execute`` requests. host: Bind address (default ``"127.0.0.1"``). port: TCP port (default ``8080``; ``0`` picks a free port). """ def __init__( self, adapter: LLMAdapter, host: str = "127.0.0.1", port: int = 8080, ) -> None: self._httpd = ThreadingHTTPServer((host, port), _Handler) self._httpd.adapter = adapter # type: ignore[attr-defined] self._thread: Optional[threading.Thread] = None @property def port(self) -> int: """Actual bound port (useful when ``port=0`` was requested).""" return self._httpd.server_address[1] @property def host(self) -> str: return self._httpd.server_address[0] def start(self) -> None: """Start serving in a daemon background thread.""" self._thread = threading.Thread(target=self._httpd.serve_forever, daemon=True) self._thread.start() def stop(self) -> None: """Shut down the server and join the background thread.""" self._httpd.shutdown() if self._thread is not None: self._thread.join() def serve_forever(self) -> None: """Block the calling thread until interrupted.""" self._httpd.serve_forever() # ── CLI entry point ──────────────────────────────────────────────────────────── def _build_adapter(provider: str, model: Optional[str]) -> LLMAdapter: from llm_connect.factory import create_adapter return create_adapter(provider, model=model) def _debug_requested(query: str) -> bool: env = os.environ.get("LLM_CONNECT_DEBUG", "") if _truthy(env): return True values = parse_qs(query).get("debug", []) return any(_truthy(value) for value in values) def _truthy(value: str) -> bool: return value.strip().lower() in {"1", "true", "yes", "on"} def _write_audit_record( audit_dir: str, prompt: str, config: RunConfig, response: LLMResponse, debug: dict | None, latency_seconds: float, ) -> None: target_dir = Path(audit_dir) target_dir.mkdir(parents=True, exist_ok=True) now = _dt.datetime.now(_dt.timezone.utc) response_id = str(response.metadata.get("response_id") or uuid.uuid4().hex) filename = f"{now.strftime('%Y%m%dT%H%M%S%fZ')}-{_safe_filename(response_id)}.json" diagnostics = debug or {} record = { "timestamp": now.isoformat().replace("+00:00", "Z"), "prompt": prompt, "config": config.to_dict(), "provider": response.metadata.get("provider"), "provider_request": diagnostics.get("provider_request"), "provider_response": diagnostics.get("provider_response"), "adapter_transformations": diagnostics.get("adapter_transformations", []), "parsed_content": response.content, "latency_seconds": round(latency_seconds, 3), "response": response.to_dict(), } (target_dir / filename).write_text( json.dumps(record, indent=2, sort_keys=True), encoding="utf-8", ) def _safe_filename(value: str) -> str: return re.sub(r"[^A-Za-z0-9_.-]+", "-", value).strip("-") or "response" def main(argv=None) -> None: parser = argparse.ArgumentParser( prog="python -m llm_connect.server", description="Start llm_connect HTTP serve mode.", ) parser.add_argument("--port", type=int, default=8080, help="TCP port (default: 8080)") parser.add_argument("--host", default="127.0.0.1", help="Bind address (default: 127.0.0.1)") parser.add_argument("--provider", default="mock", help="Provider name passed to create_adapter") parser.add_argument("--model", default=None, help="Model name (optional)") args = parser.parse_args(argv) adapter = _build_adapter(args.provider, args.model) server = LLMServer(adapter=adapter, host=args.host, port=args.port) print(f"llm_connect server listening on http://{args.host}:{args.port}") try: server.serve_forever() except KeyboardInterrupt: print("\nShutting down.") if __name__ == "__main__": main()