#!/usr/bin/env bash # Cluster-owned activity-core <-> llm-connect reconcile and non-secret evidence. set -euo pipefail ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" NAMESPACE="${ACTIVITY_CORE_NAMESPACE:-activity-core}" CLUSTER_HOST="${ACTIVITY_CORE_CLUSTER_HOST:-railiance01}" STATE_HUB_URL="${STATE_HUB_URL:-http://127.0.0.1:8000}" EXPECTED_URL="${LLM_CONNECT_URL:-http://llm-connect.activity-core.svc.cluster.local:8080}" EXPECTED_TIMEOUT="${LLM_CONNECT_TIMEOUT_SECONDS:-300}" SECRET_NAME="${LLM_CONNECT_PROVIDER_SECRET_NAME:-llm-connect-provider-secrets}" DEPLOYMENT_NAME="${LLM_CONNECT_DEPLOYMENT_NAME:-llm-connect}" LLM_CONNECT_REPO="${LLM_CONNECT_REPO:-/home/worsch/llm-connect}" LLM_CONNECT_REMOTE_REPO="${LLM_CONNECT_REMOTE_REPO:-}" APPLY_LLM_CONNECT_OVERLAY="${APPLY_LLM_CONNECT_OVERLAY:-1}" REQUIRE_SMOKE="${REQUIRE_LLM_CONNECT_SMOKE:-0}" EVIDENCE_WORKSTREAM_ID="${STATE_HUB_EVIDENCE_WORKSTREAM_ID:-}" EVIDENCE_TASK_ID="${STATE_HUB_EVIDENCE_TASK_ID:-}" PATCH_JSON="$( EXPECTED_URL="$EXPECTED_URL" EXPECTED_TIMEOUT="$EXPECTED_TIMEOUT" python3 - <<'PY' import json import os print(json.dumps({ "data": { "LLM_CONNECT_URL": os.environ["EXPECTED_URL"], "LLM_CONNECT_TIMEOUT_SECONDS": os.environ["EXPECTED_TIMEOUT"], } })) PY )" LIVE_URL="" LIVE_TIMEOUT="" SECRET_STATUS="unknown" SECRET_KEY_COUNT="0" DEPLOYMENT_STATUS="unknown" SMOKE_STATUS="skipped" SMOKE_SUMMARY="" EVIDENCE_STATUS="passed" FAILING_GATE="" export NAMESPACE CLUSTER_HOST STATE_HUB_URL EXPECTED_URL EXPECTED_TIMEOUT export SECRET_NAME DEPLOYMENT_NAME LLM_CONNECT_REPO LLM_CONNECT_REMOTE_REPO export APPLY_LLM_CONNECT_OVERLAY REQUIRE_SMOKE EVIDENCE_WORKSTREAM_ID EVIDENCE_TASK_ID export LIVE_URL LIVE_TIMEOUT SECRET_STATUS SECRET_KEY_COUNT DEPLOYMENT_STATUS export SMOKE_STATUS SMOKE_SUMMARY EVIDENCE_STATUS FAILING_GATE log() { printf '[activity-core-llm-connect] %s\n' "$*" } quote() { printf '%q' "$1" } cluster_bash() { local script="$1" if [[ -n "$CLUSTER_HOST" ]]; then ssh "$CLUSTER_HOST" "bash -s" <<<"$script" else bash -s <<<"$script" fi } post_evidence() { python3 - <<'PY' import json import os import sys import urllib.request status = os.environ["EVIDENCE_STATUS"] detail = { "producer": "railiance-cluster", "verification": "activity-core llm-connect live reconcile", "status": status, "failing_gate": os.environ.get("FAILING_GATE") or None, "cluster_host": os.environ.get("CLUSTER_HOST") or "local-kubectl", "namespace": os.environ["NAMESPACE"], "expected_url": os.environ["EXPECTED_URL"], "expected_timeout_seconds": os.environ["EXPECTED_TIMEOUT"], "live_url": os.environ.get("LIVE_URL") or None, "live_timeout_seconds": os.environ.get("LIVE_TIMEOUT") or None, "provider_secret": { "name": os.environ["SECRET_NAME"], "status": os.environ.get("SECRET_STATUS"), "key_count": int(os.environ.get("SECRET_KEY_COUNT") or "0"), }, "deployment": { "name": os.environ["DEPLOYMENT_NAME"], "status": os.environ.get("DEPLOYMENT_STATUS"), }, "smoke": { "status": os.environ.get("SMOKE_STATUS"), "summary": os.environ.get("SMOKE_SUMMARY") or None, }, } if status == "passed": summary = ( "Railiance activity-core llm-connect reconcile passed: runtime config, " "provider Secret, deployment, and smoke gate are all healthy." ) elif status == "blocked": summary = ( "Railiance activity-core llm-connect reconcile is blocked: " f"{os.environ.get('FAILING_GATE') or 'operator gate'}." ) else: summary = ( "Railiance activity-core llm-connect reconcile failed" + (f" at {os.environ.get('FAILING_GATE')}" if os.environ.get("FAILING_GATE") else "") + "." ) payload = { "summary": summary, "event_type": "note", "author": "railiance-cluster", "detail": detail, } if os.environ.get("EVIDENCE_WORKSTREAM_ID"): payload["workstream_id"] = os.environ["EVIDENCE_WORKSTREAM_ID"] if os.environ.get("EVIDENCE_TASK_ID"): payload["task_id"] = os.environ["EVIDENCE_TASK_ID"] body = json.dumps(payload).encode("utf-8") req = urllib.request.Request( os.environ["STATE_HUB_URL"].rstrip("/") + "/progress/", data=body, headers={"Content-Type": "application/json"}, method="POST", ) with urllib.request.urlopen(req, timeout=20) as resp: sys.stdout.write(resp.read().decode("utf-8")) PY } if [[ -z "$LLM_CONNECT_REMOTE_REPO" ]]; then if [[ -n "$CLUSTER_HOST" ]]; then LLM_CONNECT_REMOTE_REPO="$(ssh "$CLUSTER_HOST" pwd)/llm-connect" else LLM_CONNECT_REMOTE_REPO="$LLM_CONNECT_REPO" fi fi export LLM_CONNECT_REMOTE_REPO log "using cluster executor: ${CLUSTER_HOST:-local kubectl}" cluster_bash 'set -euo pipefail; command -v kubectl >/dev/null' log "reconciling non-secret activity-core runtime config" cluster_bash "$(cat </dev/null 2>&1; then kubectl -n $(quote "$NAMESPACE") get secret $(quote "$SECRET_NAME") -o go-template='{{ len .data }}' else printf missing fi EOF )" )" if [[ "$SECRET_KEY_COUNT" == "missing" ]]; then SECRET_STATUS="missing" SECRET_KEY_COUNT="0" elif [[ "${SECRET_KEY_COUNT:-0}" == "0" ]]; then SECRET_STATUS="empty" else SECRET_STATUS="present" fi export SECRET_STATUS SECRET_KEY_COUNT if [[ "$SECRET_STATUS" != "present" ]]; then EVIDENCE_STATUS="blocked" FAILING_GATE="provider Secret ${SECRET_NAME} ${SECRET_STATUS}" DEPLOYMENT_STATUS="not checked; provider Secret gate not satisfied" SMOKE_STATUS="blocked" SMOKE_SUMMARY="provider Secret must be populated outside Git/State Hub before deployment and smoke" export EVIDENCE_STATUS FAILING_GATE DEPLOYMENT_STATUS SMOKE_STATUS SMOKE_SUMMARY post_evidence [[ "$REQUIRE_SMOKE" == "1" ]] && exit 1 exit 0 fi if [[ "$APPLY_LLM_CONNECT_OVERLAY" == "1" ]]; then if [[ -n "$CLUSTER_HOST" ]]; then log "syncing llm-connect overlay to ${CLUSTER_HOST}:${LLM_CONNECT_REMOTE_REPO}/deploy/k8s/activity-core-llm-connect" ssh "$CLUSTER_HOST" "mkdir -p $(quote "$LLM_CONNECT_REMOTE_REPO")/deploy/k8s/activity-core-llm-connect" rsync -a --delete \ "$LLM_CONNECT_REPO/deploy/k8s/activity-core-llm-connect/" \ "${CLUSTER_HOST}:${LLM_CONNECT_REMOTE_REPO}/deploy/k8s/activity-core-llm-connect/" fi log "applying llm-connect overlay" cluster_bash "$(cat </dev/null 2>&1; then kubectl -n $(quote "$NAMESPACE") get deploy $(quote "$DEPLOYMENT_NAME") -o jsonpath='{.status.readyReplicas}/{.status.replicas}' else printf missing fi EOF )" )" export DEPLOYMENT_STATUS if [[ "$DEPLOYMENT_STATUS" == "missing" || "$DEPLOYMENT_STATUS" != "1/1" ]]; then EVIDENCE_STATUS="blocked" FAILING_GATE="llm-connect deployment not ready (${DEPLOYMENT_STATUS})" SMOKE_STATUS="blocked" SMOKE_SUMMARY="deployment must be ready before smoke" export EVIDENCE_STATUS FAILING_GATE SMOKE_STATUS SMOKE_SUMMARY post_evidence [[ "$REQUIRE_SMOKE" == "1" ]] && exit 1 exit 0 fi log "running in-namespace llm-connect fixture smoke" set +e SMOKE_OUTPUT="$( cluster_bash "$(cat <&1 )" SMOKE_CODE=$? set -e if [[ "$SMOKE_CODE" == "0" ]]; then SMOKE_STATUS="passed" SMOKE_SUMMARY="$SMOKE_OUTPUT" EVIDENCE_STATUS="passed" FAILING_GATE="" else SMOKE_STATUS="failed" SMOKE_SUMMARY="$(printf '%s' "$SMOKE_OUTPUT" | tail -n 5)" EVIDENCE_STATUS="failed" FAILING_GATE="llm-connect fixture smoke failed" fi export SMOKE_STATUS SMOKE_SUMMARY EVIDENCE_STATUS FAILING_GATE post_evidence exit "$SMOKE_CODE"