Files
tele-mcp/mcp-telemetry-bridge/app/main.py

135 lines
5.5 KiB
Python

import os, time
from typing import Any, Dict, List, Optional
from fastapi import FastAPI, Body
import httpx
PROM = os.getenv("PROM_URL", "http://monitoring-kube-prometheus-prometheus.monitoring:9090")
LOKI = os.getenv("LOKI_URL", "http://loki.logging:3100")
K8S = os.getenv("K8S_API", "https://kubernetes.default.svc")
SERVICE_TOKEN_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/token"
CA_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
NAMESPACE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
def _sa_headers() -> Dict[str, str]:
token = ""
try:
with open(SERVICE_TOKEN_PATH, "r") as f:
token = f.read().strip()
except FileNotFoundError:
pass
return {"Authorization": f"Bearer {token}"} if token else {}
def _ssl_params() -> Dict[str, Any]:
return {"verify": CA_PATH} if os.path.exists(CA_PATH) else {}
app = FastAPI(title="MCP Telemetry Bridge", version="0.1.0")
RESOURCES = [
{"uri":"res://dashboards/top-pods-by-cpu.promql","mimeType":"text/plain","content":
"topk(10, sum by (pod, namespace) (rate(container_cpu_usage_seconds_total{container!=\"\",image!=\"\"}[1m])))"},
{"uri":"res://dashboards/pod-restarts.promql","mimeType":"text/plain","content":
"sum by (pod, namespace) (increase(kube_pod_container_status_restarts_total[10m])) > 0"},
{"uri":"res://dashboards/warn-events.logql","mimeType":"text/plain","content":
"{app=\"kube-apiserver\"} |= \"Warning\""},
]
TOOLS = [
{"name":"promql.query","inputSchema":{"type":"object","properties":{"expr":{"type":"string"},"range":{"type":"string"}}}},
{"name":"loki.query","inputSchema":{"type":"object","properties":{"logql":{"type":"string"},"limit":{"type":"integer"},"since":{"type":"string"}}}},
{"name":"k8s.get","inputSchema":{"type":"object","properties":{"kind":{"type":"string"},"namespace":{"type":"string"},"name":{"type":"string"}}}},
{"name":"k8s.events","inputSchema":{"type":"object","properties":{"namespace":{"type":"string"},"since":{"type":"string"}}}},
{"name":"inventory.snapshot","inputSchema":{"type":"object","properties":{}}},
]
PROMPTS = [
{"name":"Triage-Now","description":"Summarize current alerts, top offenders and recent warnings."}
]
@app.get("/healthz")
def healthz():
return {"status":"ok","ts": int(time.time())}
@app.get("/mcp/schema")
def mcp_schema():
return {"resources": RESOURCES, "tools": TOOLS, "prompts": PROMPTS}
@app.get("/mcp/resource")
def mcp_resource(uri: str):
for r in RESOURCES:
if r["uri"] == uri:
return {"uri": uri, "mimeType": r["mimeType"], "content": r["content"]}
return {"error": "not found", "uri": uri}
@app.post("/tools/promql.query")
async def promql_query(payload: Dict[str, Any] = Body(...)):
expr = payload.get("expr")
rng = payload.get("range")
params = {"query": expr} if not rng else {"query": f"sum_over_time(({expr})[{rng}])"}
async with httpx.AsyncClient() as c:
r = await c.get(f"{PROM}/api/v1/query", params=params, timeout=30.0)
return r.json()
@app.post("/tools/loki.query")
async def loki_query(payload: Dict[str, Any] = Body(...)):
logql = payload.get("logql")
limit = payload.get("limit", 100)
params = {"query": logql, "limit": str(limit)}
async with httpx.AsyncClient() as c:
r = await c.get(f"{LOKI}/loki/api/v1/query", params=params, timeout=30.0)
return r.json()
@app.post("/tools/k8s.get")
async def k8s_get(payload: Dict[str, Any] = Body(...)):
kind = payload.get("kind", "").lower()
ns = payload.get("namespace")
name = payload.get("name")
# Map a few common kinds
mapping = {
"pods": ("/api/v1", "pods"),
"pod": ("/api/v1", "pods"),
"namespaces": ("/api/v1", "namespaces"),
"nodes": ("/api/v1", "nodes"),
"services": ("/api/v1", "services"),
"events": ("/api/v1", "events"),
"deployments": ("/apis/apps/v1", "deployments"),
"daemonsets": ("/apis/apps/v1", "daemonsets"),
"statefulsets": ("/apis/apps/v1", "statefulsets"),
"replicasets": ("/apis/apps/v1", "replicasets"),
}
if kind not in mapping:
return {"error":"unsupported kind", "kind": kind}
base, res = mapping[kind]
url = f"{K8S}{base}"
if ns:
url += f"/namespaces/{ns}/{res}"
else:
url += f"/{res}"
if name:
url += f"/{name}"
async with httpx.AsyncClient(**_ssl_params(), headers=_sa_headers()) as c:
r = await c.get(url, timeout=30.0)
return r.json()
@app.post("/tools/k8s.events")
async def k8s_events(payload: Dict[str, Any] = Body(...)):
ns = payload.get("namespace")
url = f"{K8S}/api/v1"
if ns:
url += f"/namespaces/{ns}/events"
else:
url += "/events"
async with httpx.AsyncClient(**_ssl_params(), headers=_sa_headers()) as c:
r = await c.get(url, timeout=30.0)
return r.json()
@app.post("/tools/inventory.snapshot")
async def inventory_snapshot():
# Minimal cluster inventory
async with httpx.AsyncClient(**_ssl_params(), headers=_sa_headers()) as c:
nodes = (await c.get(f"{K8S}/api/v1/nodes", timeout=30.0)).json()
ns = (await c.get(f"{K8S}/api/v1/namespaces", timeout=30.0)).json()
dpls = (await c.get(f"{K8S}/apis/apps/v1/deployments", timeout=30.0)).json()
ds = (await c.get(f"{K8S}/apis/apps/v1/daemonsets", timeout=30.0)).json()
sts = (await c.get(f"{K8S}/apis/apps/v1/statefulsets", timeout=30.0)).json()
return {"nodes": nodes, "namespaces": ns, "deployments": dpls, "daemonsets": ds, "statefulsets": sts}