Bootstrap initial repo state
This commit is contained in:
2
ansible/inventories/local.ini
Normal file
2
ansible/inventories/local.ini
Normal file
@@ -0,0 +1,2 @@
|
||||
[all]
|
||||
localhost ansible_connection=local
|
||||
8
ansible/playbook.yml
Normal file
8
ansible/playbook.yml
Normal file
@@ -0,0 +1,8 @@
|
||||
- name: TeleMcp bootstrap
|
||||
hosts: all
|
||||
become: yes
|
||||
vars:
|
||||
values_dir: /opt/telemcp-values
|
||||
roles:
|
||||
- k8s_host
|
||||
- telemetry_stack
|
||||
19
ansible/roles/k8s_host/tasks/main.yml
Normal file
19
ansible/roles/k8s_host/tasks/main.yml
Normal file
@@ -0,0 +1,19 @@
|
||||
- name: Ensure base packages
|
||||
apt:
|
||||
name: [curl, jq, gnupg, ca-certificates]
|
||||
state: present
|
||||
update_cache: yes
|
||||
|
||||
- name: Install Helm if missing
|
||||
shell: |
|
||||
if ! command -v helm >/dev/null 2>&1; then
|
||||
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||
fi
|
||||
args:
|
||||
executable: /bin/bash
|
||||
|
||||
- name: Ensure kube config dir
|
||||
file:
|
||||
path: /root/.kube
|
||||
state: directory
|
||||
mode: "0700"
|
||||
4
ansible/roles/telemetry_stack/defaults/main.yml
Normal file
4
ansible/roles/telemetry_stack/defaults/main.yml
Normal file
@@ -0,0 +1,4 @@
|
||||
telemcp_namespace_monitoring: monitoring
|
||||
telemcp_namespace_logging: logging
|
||||
telemcp_namespace_mcp: mcp
|
||||
values_dir: /opt/telemcp-values
|
||||
50
ansible/roles/telemetry_stack/tasks/main.yml
Normal file
50
ansible/roles/telemetry_stack/tasks/main.yml
Normal file
@@ -0,0 +1,50 @@
|
||||
- name: Create values directory
|
||||
file:
|
||||
path: "{{ values_dir }}"
|
||||
state: directory
|
||||
mode: "0755"
|
||||
|
||||
- name: Install helm repos
|
||||
shell: |
|
||||
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||
helm repo add grafana https://grafana.github.io/helm-charts
|
||||
helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts
|
||||
helm repo update
|
||||
args:
|
||||
executable: /bin/bash
|
||||
|
||||
- name: Copy values files
|
||||
copy:
|
||||
src: "{{ item.src }}"
|
||||
dest: "{{ values_dir }}/{{ item.dest }}"
|
||||
mode: "0644"
|
||||
with_items:
|
||||
- { src: "../../../helm/values/kube-prometheus-stack.values.yaml", dest: "kube-prometheus-stack.values.yaml" }
|
||||
- { src: "../../../helm/values/loki.values.yaml", dest: "loki.values.yaml" }
|
||||
- { src: "../../../helm/values/otel-collector.values.yaml", dest: "otel-collector.values.yaml" }
|
||||
- { src: "../../../helm/mcp-telemetry-bridge/values.yaml", dest: "mcp-telemetry-bridge.values.yaml" }
|
||||
|
||||
- name: Deploy kube-prometheus-stack
|
||||
shell: |
|
||||
helm upgrade --install monitoring prometheus-community/kube-prometheus-stack -n {{ telemcp_namespace_monitoring }} --create-namespace -f {{ values_dir }}/kube-prometheus-stack.values.yaml
|
||||
args: { executable: /bin/bash }
|
||||
|
||||
- name: Deploy Loki
|
||||
shell: |
|
||||
helm upgrade --install loki grafana/loki -n {{ telemcp_namespace_logging }} --create-namespace -f {{ values_dir }}/loki.values.yaml
|
||||
args: { executable: /bin/bash }
|
||||
|
||||
- name: Deploy Promtail
|
||||
shell: |
|
||||
helm upgrade --install promtail grafana/promtail -n {{ telemcp_namespace_logging }} --create-namespace
|
||||
args: { executable: /bin/bash }
|
||||
|
||||
- name: (Optional) Deploy OpenTelemetry Collector
|
||||
shell: |
|
||||
helm upgrade --install otel-collector open-telemetry/opentelemetry-collector -n observability --create-namespace -f {{ values_dir }}/otel-collector.values.yaml
|
||||
args: { executable: /bin/bash }
|
||||
|
||||
- name: Deploy MCP Telemetry Bridge
|
||||
shell: |
|
||||
helm upgrade --install mcp-telemetry {{ playbook_dir }}/../helm/mcp-telemetry-bridge -n {{ telemcp_namespace_mcp }} --create-namespace -f {{ values_dir }}/mcp-telemetry-bridge.values.yaml
|
||||
args: { executable: /bin/bash }
|
||||
BIN
assets/TeleMcpLogo.png
Normal file
BIN
assets/TeleMcpLogo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.8 MiB |
1
environments/dev/README.md
Normal file
1
environments/dev/README.md
Normal file
@@ -0,0 +1 @@
|
||||
You can deploy charts directly with Ansible or manage them with Helmfile/Argo CD later.
|
||||
6
helm/mcp-telemetry-bridge/Chart.yaml
Normal file
6
helm/mcp-telemetry-bridge/Chart.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
apiVersion: v2
|
||||
name: mcp-telemetry-bridge
|
||||
description: MCP Telemetry Bridge for TeleMcp
|
||||
type: application
|
||||
version: 0.1.0
|
||||
appVersion: "0.1.0"
|
||||
35
helm/mcp-telemetry-bridge/templates/deployment.yaml
Normal file
35
helm/mcp-telemetry-bridge/templates/deployment.yaml
Normal file
@@ -0,0 +1,35 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: mcp-telemetry-bridge
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
selector:
|
||||
matchLabels: { app: mcp-telemetry-bridge }
|
||||
template:
|
||||
metadata:
|
||||
labels: { app: mcp-telemetry-bridge }
|
||||
spec:
|
||||
serviceAccountName: {{ .Values.serviceAccount.name }}
|
||||
containers:
|
||||
- name: bridge
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8000
|
||||
env:
|
||||
{{- range .Values.env }}
|
||||
- name: {{ .name }}
|
||||
value: "{{ .value }}"
|
||||
{{- end }}
|
||||
readinessProbe:
|
||||
httpGet: { path: /healthz, port: http }
|
||||
initialDelaySeconds: 3
|
||||
periodSeconds: 5
|
||||
livenessProbe:
|
||||
httpGet: { path: /healthz, port: http }
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
resources:
|
||||
{{ toYaml .Values.resources | indent 12 }}
|
||||
21
helm/mcp-telemetry-bridge/templates/networkpolicy.yaml
Normal file
21
helm/mcp-telemetry-bridge/templates/networkpolicy.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
{{- if .Values.networkPolicy.enabled }}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: mcp-telemetry-bridge-deny-all
|
||||
spec:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app: mcp-telemetry-bridge
|
||||
policyTypes: ["Ingress","Egress"]
|
||||
ingress:
|
||||
- {} # allow same-namespace by default; tighten as needed
|
||||
egress:
|
||||
- to:
|
||||
- namespaceSelector: {}
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 9090 # Prometheus
|
||||
- protocol: TCP
|
||||
port: 3100 # Loki
|
||||
{{- end }}
|
||||
26
helm/mcp-telemetry-bridge/templates/rbac.yaml
Normal file
26
helm/mcp-telemetry-bridge/templates/rbac.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
{{- if .Values.rbac.create }}
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: mcp-telemetry-readonly
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["pods","pods/log","nodes","events","namespaces","services","endpoints"]
|
||||
verbs: ["get","list","watch"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["deployments","daemonsets","statefulsets","replicasets"]
|
||||
verbs: ["get","list","watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: mcp-telemetry-readonly-binding
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: mcp-telemetry-readonly
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ .Values.serviceAccount.name }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
{{- end }}
|
||||
11
helm/mcp-telemetry-bridge/templates/service.yaml
Normal file
11
helm/mcp-telemetry-bridge/templates/service.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: mcp-telemetry-bridge
|
||||
spec:
|
||||
selector: { app: mcp-telemetry-bridge }
|
||||
ports:
|
||||
- name: http
|
||||
port: {{ .Values.service.port }}
|
||||
targetPort: 8000
|
||||
type: {{ .Values.service.type }}
|
||||
6
helm/mcp-telemetry-bridge/templates/serviceaccount.yaml
Normal file
6
helm/mcp-telemetry-bridge/templates/serviceaccount.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
37
helm/mcp-telemetry-bridge/values.yaml
Normal file
37
helm/mcp-telemetry-bridge/values.yaml
Normal file
@@ -0,0 +1,37 @@
|
||||
image:
|
||||
repository: ghcr.io/example/telemcp-bridge
|
||||
tag: "0.1.0"
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
replicaCount: 1
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 80
|
||||
|
||||
env:
|
||||
- name: PROM_URL
|
||||
value: "http://monitoring-kube-prometheus-prometheus.monitoring:9090"
|
||||
- name: LOKI_URL
|
||||
value: "http://loki.logging:3100"
|
||||
- name: K8S_API
|
||||
value: "https://kubernetes.default.svc"
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
|
||||
rbac:
|
||||
create: true
|
||||
|
||||
serviceAccount:
|
||||
create: true
|
||||
name: mcp-telemetry
|
||||
|
||||
networkPolicy:
|
||||
enabled: true
|
||||
allowFromNamespaces: [] # add namespace selectors if needed
|
||||
25
helm/values/kube-prometheus-stack.values.yaml
Normal file
25
helm/values/kube-prometheus-stack.values.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
kube-state-metrics:
|
||||
enabled: true
|
||||
|
||||
prometheus:
|
||||
prometheusSpec:
|
||||
retention: 5d
|
||||
scrapeInterval: 15s
|
||||
enableAdminAPI: false
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 512Mi
|
||||
|
||||
alertmanager:
|
||||
alertmanagerSpec:
|
||||
replicas: 1
|
||||
|
||||
defaultRules:
|
||||
create: true
|
||||
rules:
|
||||
kubeApiserverError: true
|
||||
kubeNodeNotReady: true
|
||||
kubePodCrashLooping: true
|
||||
kubeJobFailed: true
|
||||
etcdHighNumberOfFailedGRPCRequests: true
|
||||
15
helm/values/loki.values.yaml
Normal file
15
helm/values/loki.values.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
loki:
|
||||
auth_enabled: false
|
||||
commonConfig:
|
||||
replication_factor: 1
|
||||
storage:
|
||||
type: filesystem
|
||||
schemaConfig:
|
||||
configs:
|
||||
- from: "2024-01-01"
|
||||
store: boltdb-shipper
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
22
helm/values/otel-collector.values.yaml
Normal file
22
helm/values/otel-collector.values.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
mode: deployment
|
||||
config:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols: { http: {}, grpc: {} }
|
||||
processors:
|
||||
batch: {}
|
||||
exporters:
|
||||
prometheusremotewrite:
|
||||
endpoint: "http://monitoring-kube-prometheus-prometheus.monitoring:9090/api/v1/write"
|
||||
loki:
|
||||
endpoint: "http://loki.logging:3100/loki/api/v1/push"
|
||||
service:
|
||||
pipelines:
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [prometheusremotewrite]
|
||||
logs:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [loki]
|
||||
12
mcp-telemetry-bridge/Dockerfile
Normal file
12
mcp-telemetry-bridge/Dockerfile
Normal file
@@ -0,0 +1,12 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1
|
||||
|
||||
WORKDIR /app
|
||||
COPY requirements.txt /app/
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY app /app/app
|
||||
|
||||
EXPOSE 8000
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
134
mcp-telemetry-bridge/app/main.py
Normal file
134
mcp-telemetry-bridge/app/main.py
Normal file
@@ -0,0 +1,134 @@
|
||||
import os, time
|
||||
from typing import Any, Dict, List, Optional
|
||||
from fastapi import FastAPI, Body
|
||||
import httpx
|
||||
|
||||
PROM = os.getenv("PROM_URL", "http://monitoring-kube-prometheus-prometheus.monitoring:9090")
|
||||
LOKI = os.getenv("LOKI_URL", "http://loki.logging:3100")
|
||||
K8S = os.getenv("K8S_API", "https://kubernetes.default.svc")
|
||||
SERVICE_TOKEN_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/token"
|
||||
CA_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
|
||||
NAMESPACE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
|
||||
|
||||
def _sa_headers() -> Dict[str, str]:
|
||||
token = ""
|
||||
try:
|
||||
with open(SERVICE_TOKEN_PATH, "r") as f:
|
||||
token = f.read().strip()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
return {"Authorization": f"Bearer {token}"} if token else {}
|
||||
|
||||
def _ssl_params() -> Dict[str, Any]:
|
||||
return {"verify": CA_PATH} if os.path.exists(CA_PATH) else {}
|
||||
|
||||
app = FastAPI(title="MCP Telemetry Bridge", version="0.1.0")
|
||||
|
||||
RESOURCES = [
|
||||
{"uri":"res://dashboards/top-pods-by-cpu.promql","mimeType":"text/plain","content":
|
||||
"topk(10, sum by (pod, namespace) (rate(container_cpu_usage_seconds_total{container!=\"\",image!=\"\"}[1m])))"},
|
||||
{"uri":"res://dashboards/pod-restarts.promql","mimeType":"text/plain","content":
|
||||
"sum by (pod, namespace) (increase(kube_pod_container_status_restarts_total[10m])) > 0"},
|
||||
{"uri":"res://dashboards/warn-events.logql","mimeType":"text/plain","content":
|
||||
"{app=\"kube-apiserver\"} |= \"Warning\""},
|
||||
]
|
||||
|
||||
TOOLS = [
|
||||
{"name":"promql.query","inputSchema":{"type":"object","properties":{"expr":{"type":"string"},"range":{"type":"string"}}}},
|
||||
{"name":"loki.query","inputSchema":{"type":"object","properties":{"logql":{"type":"string"},"limit":{"type":"integer"},"since":{"type":"string"}}}},
|
||||
{"name":"k8s.get","inputSchema":{"type":"object","properties":{"kind":{"type":"string"},"namespace":{"type":"string"},"name":{"type":"string"}}}},
|
||||
{"name":"k8s.events","inputSchema":{"type":"object","properties":{"namespace":{"type":"string"},"since":{"type":"string"}}}},
|
||||
{"name":"inventory.snapshot","inputSchema":{"type":"object","properties":{}}},
|
||||
]
|
||||
|
||||
PROMPTS = [
|
||||
{"name":"Triage-Now","description":"Summarize current alerts, top offenders and recent warnings."}
|
||||
]
|
||||
|
||||
@app.get("/healthz")
|
||||
def healthz():
|
||||
return {"status":"ok","ts": int(time.time())}
|
||||
|
||||
@app.get("/mcp/schema")
|
||||
def mcp_schema():
|
||||
return {"resources": RESOURCES, "tools": TOOLS, "prompts": PROMPTS}
|
||||
|
||||
@app.get("/mcp/resource")
|
||||
def mcp_resource(uri: str):
|
||||
for r in RESOURCES:
|
||||
if r["uri"] == uri:
|
||||
return {"uri": uri, "mimeType": r["mimeType"], "content": r["content"]}
|
||||
return {"error": "not found", "uri": uri}
|
||||
|
||||
@app.post("/tools/promql.query")
|
||||
async def promql_query(payload: Dict[str, Any] = Body(...)):
|
||||
expr = payload.get("expr")
|
||||
rng = payload.get("range")
|
||||
params = {"query": expr} if not rng else {"query": f"sum_over_time(({expr})[{rng}])"}
|
||||
async with httpx.AsyncClient() as c:
|
||||
r = await c.get(f"{PROM}/api/v1/query", params=params, timeout=30.0)
|
||||
return r.json()
|
||||
|
||||
@app.post("/tools/loki.query")
|
||||
async def loki_query(payload: Dict[str, Any] = Body(...)):
|
||||
logql = payload.get("logql")
|
||||
limit = payload.get("limit", 100)
|
||||
params = {"query": logql, "limit": str(limit)}
|
||||
async with httpx.AsyncClient() as c:
|
||||
r = await c.get(f"{LOKI}/loki/api/v1/query", params=params, timeout=30.0)
|
||||
return r.json()
|
||||
|
||||
@app.post("/tools/k8s.get")
|
||||
async def k8s_get(payload: Dict[str, Any] = Body(...)):
|
||||
kind = payload.get("kind", "").lower()
|
||||
ns = payload.get("namespace")
|
||||
name = payload.get("name")
|
||||
# Map a few common kinds
|
||||
mapping = {
|
||||
"pods": ("/api/v1", "pods"),
|
||||
"pod": ("/api/v1", "pods"),
|
||||
"namespaces": ("/api/v1", "namespaces"),
|
||||
"nodes": ("/api/v1", "nodes"),
|
||||
"services": ("/api/v1", "services"),
|
||||
"events": ("/api/v1", "events"),
|
||||
"deployments": ("/apis/apps/v1", "deployments"),
|
||||
"daemonsets": ("/apis/apps/v1", "daemonsets"),
|
||||
"statefulsets": ("/apis/apps/v1", "statefulsets"),
|
||||
"replicasets": ("/apis/apps/v1", "replicasets"),
|
||||
}
|
||||
if kind not in mapping:
|
||||
return {"error":"unsupported kind", "kind": kind}
|
||||
base, res = mapping[kind]
|
||||
url = f"{K8S}{base}"
|
||||
if ns:
|
||||
url += f"/namespaces/{ns}/{res}"
|
||||
else:
|
||||
url += f"/{res}"
|
||||
if name:
|
||||
url += f"/{name}"
|
||||
async with httpx.AsyncClient(**_ssl_params(), headers=_sa_headers()) as c:
|
||||
r = await c.get(url, timeout=30.0)
|
||||
return r.json()
|
||||
|
||||
@app.post("/tools/k8s.events")
|
||||
async def k8s_events(payload: Dict[str, Any] = Body(...)):
|
||||
ns = payload.get("namespace")
|
||||
url = f"{K8S}/api/v1"
|
||||
if ns:
|
||||
url += f"/namespaces/{ns}/events"
|
||||
else:
|
||||
url += "/events"
|
||||
async with httpx.AsyncClient(**_ssl_params(), headers=_sa_headers()) as c:
|
||||
r = await c.get(url, timeout=30.0)
|
||||
return r.json()
|
||||
|
||||
@app.post("/tools/inventory.snapshot")
|
||||
async def inventory_snapshot():
|
||||
# Minimal cluster inventory
|
||||
async with httpx.AsyncClient(**_ssl_params(), headers=_sa_headers()) as c:
|
||||
nodes = (await c.get(f"{K8S}/api/v1/nodes", timeout=30.0)).json()
|
||||
ns = (await c.get(f"{K8S}/api/v1/namespaces", timeout=30.0)).json()
|
||||
dpls = (await c.get(f"{K8S}/apis/apps/v1/deployments", timeout=30.0)).json()
|
||||
ds = (await c.get(f"{K8S}/apis/apps/v1/daemonsets", timeout=30.0)).json()
|
||||
sts = (await c.get(f"{K8S}/apis/apps/v1/statefulsets", timeout=30.0)).json()
|
||||
return {"nodes": nodes, "namespaces": ns, "deployments": dpls, "daemonsets": ds, "statefulsets": sts}
|
||||
3
mcp-telemetry-bridge/requirements.txt
Normal file
3
mcp-telemetry-bridge/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
fastapi==0.112.2
|
||||
uvicorn[standard]==0.30.6
|
||||
httpx==0.27.2
|
||||
Reference in New Issue
Block a user