Files
llm-connect/llm_connect/server.py
tegwick 24f4c09d42
Some checks failed
CI / test (3.10) (push) Has been cancelled
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
Implement llm-connect ADHOC diagnostics
2026-06-03 11:56:21 +02:00

234 lines
8.1 KiB
Python

"""
Minimal HTTP server for llm_connect — serve mode (FR-1).
Exposes:
POST /execute — run a prompt through the configured adapter
GET /health — liveness probe
Usage (programmatic)::
from llm_connect import MockLLMAdapter
from llm_connect.server import LLMServer
server = LLMServer(adapter=MockLLMAdapter(), port=8080)
server.start() # background thread
# ...
server.stop()
Usage (CLI)::
python -m llm_connect.server --port 8080 --provider openrouter --model anthropic/claude-sonnet-4
"""
import argparse
import datetime as _dt
import json
import os
import re
import threading
import time
import uuid
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from pathlib import Path
from typing import Optional
from urllib.parse import parse_qs, urlsplit
from llm_connect._diagnostics import capture_diagnostics
from llm_connect.adapter import LLMAdapter
from llm_connect.models import LLMResponse, RunConfig
class _Handler(BaseHTTPRequestHandler):
"""Request handler — adapter injected via server.adapter."""
def log_message(self, format, *args): # suppress default access log
pass
# ── GET ────────────────────────────────────────────────────────
def do_GET(self):
parsed = urlsplit(self.path)
if parsed.path == "/health":
self._respond(200, {"status": "ok"})
else:
self._respond(404, {"error": "not found"})
# ── POST ───────────────────────────────────────────────────────
def do_POST(self):
parsed = urlsplit(self.path)
if parsed.path != "/execute":
self._respond(404, {"error": "not found"})
return
debug_enabled = _debug_requested(parsed.query)
audit_dir = os.environ.get("LLM_CONNECT_AUDIT_DIR")
length = int(self.headers.get("Content-Length", 0))
raw = self.rfile.read(length)
try:
data = json.loads(raw)
except (json.JSONDecodeError, ValueError):
self._respond(400, {"error": "invalid JSON body"})
return
prompt = data.get("prompt")
if not prompt:
self._respond(400, {"error": "missing required field: 'prompt'"})
return
cfg = data.get("config", {})
if not isinstance(cfg, dict):
self._respond(400, {"error": "field 'config' must be an object"})
return
config = RunConfig.from_dict(cfg)
start = time.time()
diagnostics_enabled = debug_enabled or bool(audit_dir)
try:
with capture_diagnostics(diagnostics_enabled) as diagnostics:
response = self.server.adapter.execute_prompt(prompt, config) # type: ignore[attr-defined]
latency = time.time() - start
body = response.to_dict()
debug = diagnostics.to_dict() if diagnostics is not None else None
if debug_enabled and debug is not None:
body["debug"] = debug
if audit_dir:
_write_audit_record(audit_dir, prompt, config, response, debug, latency)
self._respond(200, body)
except Exception as exc:
self._respond(500, {"error": str(exc)})
# ── helpers ────────────────────────────────────────────────────
def _respond(self, status: int, body: dict) -> None:
payload = json.dumps(body).encode()
self.send_response(status)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(payload)))
self.end_headers()
self.wfile.write(payload)
class LLMServer:
"""HTTP server wrapping an :class:`~llm_connect.adapter.LLMAdapter`.
Args:
adapter: The adapter that handles ``POST /execute`` requests.
host: Bind address (default ``"127.0.0.1"``).
port: TCP port (default ``8080``; ``0`` picks a free port).
"""
def __init__(
self,
adapter: LLMAdapter,
host: str = "127.0.0.1",
port: int = 8080,
) -> None:
self._httpd = ThreadingHTTPServer((host, port), _Handler)
self._httpd.adapter = adapter # type: ignore[attr-defined]
self._thread: Optional[threading.Thread] = None
@property
def port(self) -> int:
"""Actual bound port (useful when ``port=0`` was requested)."""
return self._httpd.server_address[1]
@property
def host(self) -> str:
return self._httpd.server_address[0]
def start(self) -> None:
"""Start serving in a daemon background thread."""
self._thread = threading.Thread(target=self._httpd.serve_forever, daemon=True)
self._thread.start()
def stop(self) -> None:
"""Shut down the server and join the background thread."""
self._httpd.shutdown()
if self._thread is not None:
self._thread.join()
def serve_forever(self) -> None:
"""Block the calling thread until interrupted."""
self._httpd.serve_forever()
# ── CLI entry point ────────────────────────────────────────────────────────────
def _build_adapter(provider: str, model: Optional[str]) -> LLMAdapter:
from llm_connect.factory import create_adapter
return create_adapter(provider, model=model)
def _debug_requested(query: str) -> bool:
env = os.environ.get("LLM_CONNECT_DEBUG", "")
if _truthy(env):
return True
values = parse_qs(query).get("debug", [])
return any(_truthy(value) for value in values)
def _truthy(value: str) -> bool:
return value.strip().lower() in {"1", "true", "yes", "on"}
def _write_audit_record(
audit_dir: str,
prompt: str,
config: RunConfig,
response: LLMResponse,
debug: dict | None,
latency_seconds: float,
) -> None:
target_dir = Path(audit_dir)
target_dir.mkdir(parents=True, exist_ok=True)
now = _dt.datetime.now(_dt.timezone.utc)
response_id = str(response.metadata.get("response_id") or uuid.uuid4().hex)
filename = f"{now.strftime('%Y%m%dT%H%M%S%fZ')}-{_safe_filename(response_id)}.json"
diagnostics = debug or {}
record = {
"timestamp": now.isoformat().replace("+00:00", "Z"),
"prompt": prompt,
"config": config.to_dict(),
"provider": response.metadata.get("provider"),
"provider_request": diagnostics.get("provider_request"),
"provider_response": diagnostics.get("provider_response"),
"adapter_transformations": diagnostics.get("adapter_transformations", []),
"parsed_content": response.content,
"latency_seconds": round(latency_seconds, 3),
"response": response.to_dict(),
}
(target_dir / filename).write_text(
json.dumps(record, indent=2, sort_keys=True),
encoding="utf-8",
)
def _safe_filename(value: str) -> str:
return re.sub(r"[^A-Za-z0-9_.-]+", "-", value).strip("-") or "response"
def main(argv=None) -> None:
parser = argparse.ArgumentParser(
prog="python -m llm_connect.server",
description="Start llm_connect HTTP serve mode.",
)
parser.add_argument("--port", type=int, default=8080, help="TCP port (default: 8080)")
parser.add_argument("--host", default="127.0.0.1", help="Bind address (default: 127.0.0.1)")
parser.add_argument("--provider", default="mock", help="Provider name passed to create_adapter")
parser.add_argument("--model", default=None, help="Model name (optional)")
args = parser.parse_args(argv)
adapter = _build_adapter(args.provider, args.model)
server = LLMServer(adapter=adapter, host=args.host, port=args.port)
print(f"llm_connect server listening on http://{args.host}:{args.port}")
try:
server.serve_forever()
except KeyboardInterrupt:
print("\nShutting down.")
if __name__ == "__main__":
main()