From 6abf75365bc08c91a45889f07e8fb52fff3a9b68 Mon Sep 17 00:00:00 2001 From: tegwick Date: Mon, 15 Jun 2026 22:44:13 +0200 Subject: [PATCH] Harden inter-hub production deploy trigger --- .../inter-hub-production-deploy.yaml | 50 +++++++++++ Makefile | 36 ++++---- docs/inter-hub-on-railiance01.md | 72 +++++++++++---- tools/check-oci-image.sh | 80 +++++++++++++++++ tools/inter-hub-smoke.sh | 87 +++++++++++++++++++ ...-inter-hub-production-trigger-hardening.md | 87 +++++++++++++++++++ 6 files changed, 380 insertions(+), 32 deletions(-) create mode 100644 .gitea/workflows/inter-hub-production-deploy.yaml create mode 100755 tools/check-oci-image.sh create mode 100755 tools/inter-hub-smoke.sh create mode 100644 workplans/RAILIANCE-WP-0011-inter-hub-production-trigger-hardening.md diff --git a/.gitea/workflows/inter-hub-production-deploy.yaml b/.gitea/workflows/inter-hub-production-deploy.yaml new file mode 100644 index 0000000..39edf9e --- /dev/null +++ b/.gitea/workflows/inter-hub-production-deploy.yaml @@ -0,0 +1,50 @@ +name: Inter-Hub production deploy + +on: + workflow_dispatch: + inputs: + image_tag: + description: "Immutable inter-hub image tag to deploy, for example 5101eb5" + required: true + confirm: + description: "Type deploy-inter-hub-production to confirm Railiance01 production deploy" + required: true + +jobs: + deploy: + runs-on: [self-hosted, haskelseed, linux_amd64] + timeout-minutes: 20 + env: + INTER_HUB_IMAGE_TAG: ${{ inputs.image_tag }} + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Validate dispatch inputs + env: + CONFIRM: ${{ inputs.confirm }} + run: | + set -euo pipefail + if [ "$CONFIRM" != "deploy-inter-hub-production" ]; then + echo "Refusing deploy: confirmation text did not match." >&2 + exit 1 + fi + if ! [[ "$INTER_HUB_IMAGE_TAG" =~ ^[A-Za-z0-9._-]+$ ]]; then + echo "Refusing deploy: image_tag contains unsupported characters." >&2 + exit 1 + fi + + - name: Verify image manifest exists + run: make check-inter-hub-image + + - name: Helm server dry-run + run: make inter-hub-server-dry-run + + - name: Deploy Inter-Hub + run: make inter-hub-deploy + + - name: Show release status + run: make inter-hub-status + + - name: Run public smoke checks + run: make inter-hub-smoke diff --git a/Makefile b/Makefile index aaea0fd..8fd33a1 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,7 @@ INTER_HUB_RELEASE ?= inter-hub INTER_HUB_NAMESPACE ?= inter-hub INTER_HUB_CHART ?= charts/inter-hub INTER_HUB_VALUES ?= helm/inter-hub-values.yaml +INTER_HUB_IMAGE_REPOSITORY ?= gitea.coulomb.social/coulomb/inter-hub INTER_HUB_IMAGE_TAG ?= INTER_HUB_BASE_URL ?= https://hub.coulomb.social @@ -31,6 +32,7 @@ REUSE_CERTIFICATE ?= reuse-surface-tls RAILIANCE01_KUBECONFIG ?= $(HOME)/.kube/config-hosteurope INTER_HUB_KUBECONFIG ?= $(RAILIANCE01_KUBECONFIG) REUSE_KUBECONFIG ?= $(RAILIANCE01_KUBECONFIG) +INTER_HUB_IMAGE_REF = $(INTER_HUB_IMAGE_REPOSITORY):$(INTER_HUB_IMAGE_TAG) INTER_HUB_IMAGE_SET_ARG = $(if $(strip $(INTER_HUB_IMAGE_TAG)),--set image.tag=$(INTER_HUB_IMAGE_TAG),) SOPS_SENTINEL ?= @@ -71,6 +73,9 @@ check-inter-hub-image-tag: ## Require an explicit inter-hub image tag for produc exit 1; \ } +check-inter-hub-image: check-inter-hub-image-tag ## Verify the inter-hub OCI image tag exists before deploy + tools/check-oci-image.sh "$(INTER_HUB_IMAGE_REF)" + ##@ Vergabe Teilnahme vergabe-dry-run: ## helm template render (no apply) for inspection @@ -113,12 +118,22 @@ vergabe-db-url-secret: ## Rebuild DATABASE_URL with a URL-encoded cnpg password ##@ Inter-Hub -inter-hub-dry-run: check-railiance01-kubeconfig ## helm template render (no apply) for inter-hub +inter-hub-render-baseline: ## Render checked-in inter-hub values for chart validation only + helm template $(INTER_HUB_RELEASE) $(INTER_HUB_CHART) \ + --namespace $(INTER_HUB_NAMESPACE) \ + -f $(INTER_HUB_VALUES) + +inter-hub-dry-run: check-railiance01-kubeconfig check-inter-hub-image-tag ## helm template render with an explicit production image tag KUBECONFIG="$(INTER_HUB_KUBECONFIG)" helm template $(INTER_HUB_RELEASE) $(INTER_HUB_CHART) \ --namespace $(INTER_HUB_NAMESPACE) \ -f $(INTER_HUB_VALUES) $(INTER_HUB_IMAGE_SET_ARG) -inter-hub-deploy: check-railiance01-kubeconfig check-inter-hub-image-tag ## Deploy / upgrade inter-hub Helm release on Railiance01 +inter-hub-server-dry-run: check-railiance01-kubeconfig check-inter-hub-image ## Helm server dry-run inter-hub upgrade on Railiance01 + KUBECONFIG="$(INTER_HUB_KUBECONFIG)" helm upgrade --install $(INTER_HUB_RELEASE) $(INTER_HUB_CHART) \ + --namespace $(INTER_HUB_NAMESPACE) --create-namespace \ + -f $(INTER_HUB_VALUES) $(INTER_HUB_IMAGE_SET_ARG) --dry-run=server --timeout 5m + +inter-hub-deploy: check-railiance01-kubeconfig check-inter-hub-image ## Deploy / upgrade inter-hub Helm release on Railiance01 KUBECONFIG="$(INTER_HUB_KUBECONFIG)" helm upgrade --install $(INTER_HUB_RELEASE) $(INTER_HUB_CHART) \ --namespace $(INTER_HUB_NAMESPACE) --create-namespace \ -f $(INTER_HUB_VALUES) $(INTER_HUB_IMAGE_SET_ARG) --wait --timeout 5m @@ -133,20 +148,7 @@ inter-hub-release-info: check-railiance01-kubeconfig ## Show inter-hub Helm hist KUBECONFIG="$(INTER_HUB_KUBECONFIG)" kubectl get pods -n $(INTER_HUB_NAMESPACE) -o wide inter-hub-smoke: ## Verify public inter-hub v2 route and OpenAPI surface after rollout - @status="$$(curl -sS -o /tmp/inter-hub-api-v2-hubs.body -w "%{http_code}" "$(INTER_HUB_BASE_URL)/api/v2/hubs")"; \ - if [ "$$status" != "401" ]; then \ - echo "expected $(INTER_HUB_BASE_URL)/api/v2/hubs to return 401, got $$status" >&2; \ - cat /tmp/inter-hub-api-v2-hubs.body >&2; \ - exit 1; \ - fi; \ - echo "ok: /api/v2/hubs returned 401" - @tmp="$$(mktemp)"; \ - trap 'rm -f "$$tmp"' EXIT; \ - curl -fsS "$(INTER_HUB_BASE_URL)/openapi.json" > "$$tmp"; \ - for route in /hubs /hub-capability-manifests /api-consumers /policy-scopes; do \ - grep -q "$$route" "$$tmp" || { echo "missing OpenAPI route: $$route" >&2; exit 1; }; \ - done; \ - echo "ok: OpenAPI lists expected v2 resources" + INTER_HUB_BASE_URL="$(INTER_HUB_BASE_URL)" tools/inter-hub-smoke.sh inter-hub-logs: check-railiance01-kubeconfig ## Tail inter-hub app logs from Railiance01 KUBECONFIG="$(INTER_HUB_KUBECONFIG)" kubectl logs -n $(INTER_HUB_NAMESPACE) -l app=$(INTER_HUB_RELEASE) -f --tail=50 @@ -181,4 +183,4 @@ help: ## Show this help /^[a-zA-Z0-9_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } \ /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) }' $(MAKEFILE_LIST) -.PHONY: check-tools check-sops k8s-server-dry-run apps-pg-status check-railiance01-kubeconfig check-inter-hub-image-tag vergabe-dry-run vergabe-deploy vergabe-ingress-deploy vergabe-status vergabe-migrate vergabe-seed vergabe-superuser vergabe-logs vergabe-db-url-secret inter-hub-dry-run inter-hub-deploy inter-hub-status inter-hub-release-info inter-hub-smoke inter-hub-logs reuse-dry-run reuse-deploy reuse-status reuse-smoke reuse-logs help +.PHONY: check-tools check-sops k8s-server-dry-run apps-pg-status check-railiance01-kubeconfig check-inter-hub-image-tag check-inter-hub-image vergabe-dry-run vergabe-deploy vergabe-ingress-deploy vergabe-status vergabe-migrate vergabe-seed vergabe-superuser vergabe-logs vergabe-db-url-secret inter-hub-render-baseline inter-hub-dry-run inter-hub-server-dry-run inter-hub-deploy inter-hub-status inter-hub-release-info inter-hub-smoke inter-hub-logs reuse-dry-run reuse-deploy reuse-status reuse-smoke reuse-logs help diff --git a/docs/inter-hub-on-railiance01.md b/docs/inter-hub-on-railiance01.md index 59763a2..9f70ff7 100644 --- a/docs/inter-hub-on-railiance01.md +++ b/docs/inter-hub-on-railiance01.md @@ -35,10 +35,15 @@ Read-only checks on 2026-06-15 showed: target. - The pod is Running and `certificate/inter-hub-tls` is Ready on Railiance01. - `GET https://hub.coulomb.social/api/v2/hubs` returned `200` unauthenticated, - proving the route is present but not satisfying the expected `401` auth gate. -- `/openapi.json`, `/api/openapi.json`, `/swagger.json`, and - `/api/swagger.json` returned `404`; the public OpenAPI route still needs to - be confirmed by the upstream inter-hub owner. + matching the public discovery contract. +- `GET https://hub.coulomb.social/api/v2/widgets` and + `GET https://hub.coulomb.social/api/v2/hub-registry` returned `401` + unauthenticated with `invalid_api_key`. +- OpenAPI is published at `/api/v2/openapi.json`; `/openapi.json` is not the + Inter-Hub v2 OpenAPI route. +- `gitea.coulomb.social/coulomb/inter-hub:5101eb5` returned + `manifest unknown` on 2026-06-15, so production deploy must wait for the + image publication gate to pass. ## Deploy @@ -46,23 +51,54 @@ Use the Railiance01 kubeconfig. The Makefile defaults to `~/.kube/config-hosteurope` and fails fast when it is missing. ```bash -make inter-hub-dry-run -INTER_HUB_IMAGE_TAG=91037a4 make inter-hub-deploy +make inter-hub-render-baseline +INTER_HUB_IMAGE_TAG=5101eb5 make check-inter-hub-image +INTER_HUB_IMAGE_TAG=5101eb5 make inter-hub-dry-run +INTER_HUB_IMAGE_TAG=5101eb5 make inter-hub-server-dry-run +INTER_HUB_IMAGE_TAG=5101eb5 make inter-hub-deploy make inter-hub-status ``` Deploy automation should pass the image tag without rewriting `helm/inter-hub-values.yaml`. The production deploy target requires this -explicit tag: +explicit tag and refuses to continue if the registry manifest does not exist: ```bash -INTER_HUB_IMAGE_TAG=91037a4 make inter-hub-deploy +INTER_HUB_IMAGE_TAG=5101eb5 make inter-hub-deploy ``` Use a tag at or after the commit that contains the required v2 bootstrap API -routes. The upstream inter-hub workplan names `91037a4` as the first expected -tag for the IHUB-WP-0019 route surface; verify the actual registry tag before -announcing rollout. +routes and any required follow-up fix. The upstream inter-hub workplan names +`5101eb5` for the API count-decoding fix needed by the current ops-hub +bootstrap path; verify the actual registry tag before announcing rollout. + +`make inter-hub-render-baseline` exists only for chart validation with the +checked-in values file. Production-facing dry-runs and deploys must use an +explicit `INTER_HUB_IMAGE_TAG`. + +## Workflow Dispatch + +`.gitea/workflows/inter-hub-production-deploy.yaml` provides a manual +production trigger. It requires: + +- `image_tag`, the immutable inter-hub image tag to deploy; +- `confirm`, with the exact value `deploy-inter-hub-production`; +- an approved self-hosted runner with Railiance01 kubeconfig access at + `~/.kube/config-hosteurope` or an equivalent `RAILIANCE01_KUBECONFIG` + override; +- registry access through `skopeo`, `crane`, `docker manifest inspect`, or the + registry HTTP API. + +The workflow runs the same local targets an attended operator would run: + +1. `make check-inter-hub-image` +2. `make inter-hub-server-dry-run` +3. `make inter-hub-deploy` +4. `make inter-hub-status` +5. `make inter-hub-smoke` + +If the image manifest is missing, stop and hand back to the inter-hub/forge +image publication path. Do not run Helm against a tag that fails preflight. ## Release verification @@ -97,7 +133,8 @@ or another committed migration mechanism. ## Smoke checks -After rollout, the unauthenticated v2 API gate should be present: +After rollout, the current v2 public-read/authenticated-write contract should +be present: ```bash make inter-hub-smoke @@ -105,6 +142,11 @@ make inter-hub-smoke Expected results: -- `GET https://hub.coulomb.social/api/v2/hubs` returns `401`, not `404`. -- OpenAPI lists `/hubs`, `/hub-capability-manifests`, `/api-consumers`, and - `/policy-scopes`. +- `GET https://hub.coulomb.social/api/v2/hubs` returns `200` public discovery + JSON. +- `GET https://hub.coulomb.social/api/v2/widgets` returns `401` without a key. +- `GET https://hub.coulomb.social/api/v2/hub-registry` returns `401` without a + key. +- `GET https://hub.coulomb.social/api/v2/openapi.json` returns OpenAPI JSON + listing `/hubs`, `/hub-capability-manifests`, `/api-consumers`, + `/policy-scopes`, `/widgets`, and `/hub-registry`. diff --git a/tools/check-oci-image.sh b/tools/check-oci-image.sh new file mode 100755 index 0000000..090e0e3 --- /dev/null +++ b/tools/check-oci-image.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +set -euo pipefail + +image_ref="${1:-}" + +if [[ -z "$image_ref" ]]; then + echo "usage: $0 " >&2 + exit 2 +fi + +failures=() + +try_tool() { + local name="$1" + shift + + if ! command -v "$name" >/dev/null 2>&1; then + return 1 + fi + + local output + if output="$("$@" 2>&1 >/dev/null)"; then + echo "ok: found image manifest with $name: $image_ref" + exit 0 + fi + + failures+=("$name: $output") + return 1 +} + +try_registry_api() { + if ! command -v curl >/dev/null 2>&1; then + return 1 + fi + + local ref_no_digest="${image_ref%@*}" + local ref_without_tag tag registry repo url output + + if [[ "$ref_no_digest" != *:* ]]; then + failures+=("registry-api: image ref must include an explicit tag") + return 1 + fi + + tag="${ref_no_digest##*:}" + ref_without_tag="${ref_no_digest%:*}" + registry="${ref_without_tag%%/*}" + repo="${ref_without_tag#*/}" + + if [[ -z "$registry" || -z "$repo" || "$registry" == "$repo" ]]; then + failures+=("registry-api: image ref must include registry and repository") + return 1 + fi + + url="https://${registry}/v2/${repo}/manifests/${tag}" + if output="$(curl -fsSL \ + -H "Accept: application/vnd.oci.image.index.v1+json" \ + -H "Accept: application/vnd.oci.image.manifest.v1+json" \ + -H "Accept: application/vnd.docker.distribution.manifest.list.v2+json" \ + -H "Accept: application/vnd.docker.distribution.manifest.v2+json" \ + -o /dev/null "$url" 2>&1)"; then + echo "ok: found image manifest with registry API: $image_ref" + exit 0 + fi + + failures+=("registry-api: $output") + return 1 +} + +try_tool skopeo skopeo inspect --raw "docker://${image_ref}" || true +try_tool crane crane manifest "$image_ref" || true +try_tool docker docker manifest inspect "$image_ref" || true +try_registry_api || true + +echo "ERROR: image manifest not found or not accessible: $image_ref" >&2 +if ((${#failures[@]} > 0)); then + printf '%s\n' "${failures[@]}" >&2 +else + echo "No supported manifest inspection tool was available." >&2 +fi +exit 1 diff --git a/tools/inter-hub-smoke.sh b/tools/inter-hub-smoke.sh new file mode 100755 index 0000000..a939be0 --- /dev/null +++ b/tools/inter-hub-smoke.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +set -euo pipefail + +base_url="${INTER_HUB_BASE_URL:-https://hub.coulomb.social}" +base_url="${base_url%/}" + +tmpdir="$(mktemp -d)" +trap 'rm -rf "$tmpdir"' EXIT + +request() { + local name="$1" + local url="$2" + local expected_status="$3" + local body="$tmpdir/${name}.json" + local status + + status="$(curl -sS -o "$body" -w "%{http_code}" "$url")" + if [[ "$status" != "$expected_status" ]]; then + echo "ERROR: expected $url to return $expected_status, got $status" >&2 + cat "$body" >&2 + echo >&2 + exit 1 + fi + + echo "$body" +} + +hubs_body="$(request hubs "${base_url}/api/v2/hubs" 200)" +python3 - "$hubs_body" <<'PY' +import json +import sys + +with open(sys.argv[1], encoding="utf-8") as fh: + payload = json.load(fh) + +if not isinstance(payload, dict) or not isinstance(payload.get("data"), list): + raise SystemExit("/api/v2/hubs did not return a paginated data list") + +print("ok: /api/v2/hubs returned public discovery JSON") +PY + +widgets_body="$(request widgets "${base_url}/api/v2/widgets" 401)" +hub_registry_body="$(request hub-registry "${base_url}/api/v2/hub-registry" 401)" + +python3 - "$widgets_body" "$hub_registry_body" <<'PY' +import json +import sys + +for path, filename in ( + ("/api/v2/widgets", sys.argv[1]), + ("/api/v2/hub-registry", sys.argv[2]), +): + with open(filename, encoding="utf-8") as fh: + payload = json.load(fh) + code = payload.get("code") if isinstance(payload, dict) else None + if code != "invalid_api_key": + raise SystemExit(f"{path} returned 401 but not invalid_api_key JSON") + print(f"ok: {path} requires an API key") +PY + +openapi_body="$(request openapi "${base_url}/api/v2/openapi.json" 200)" +python3 - "$openapi_body" <<'PY' +import json +import sys + +required_paths = { + "/hubs", + "/hub-capability-manifests", + "/api-consumers", + "/policy-scopes", + "/widgets", + "/hub-registry", +} + +with open(sys.argv[1], encoding="utf-8") as fh: + payload = json.load(fh) + +paths = payload.get("paths") +if not isinstance(paths, dict): + raise SystemExit("/api/v2/openapi.json did not include an OpenAPI paths object") + +missing = sorted(required_paths - set(paths)) +if missing: + raise SystemExit("OpenAPI missing paths: " + ", ".join(missing)) + +print("ok: /api/v2/openapi.json lists expected v2 resources") +PY diff --git a/workplans/RAILIANCE-WP-0011-inter-hub-production-trigger-hardening.md b/workplans/RAILIANCE-WP-0011-inter-hub-production-trigger-hardening.md new file mode 100644 index 0000000..be3dfc7 --- /dev/null +++ b/workplans/RAILIANCE-WP-0011-inter-hub-production-trigger-hardening.md @@ -0,0 +1,87 @@ +--- +id: RAILIANCE-WP-0011 +type: workplan +title: "Inter-Hub production trigger hardening" +domain: railiance +repo: railiance-apps +status: finished +owner: codex +topic_slug: railiance +created: "2026-06-15" +updated: "2026-06-15" +state_hub_workstream_id: "98cf42ae-9b64-4736-97e1-bae325ded1f9" +--- + +# Inter-Hub production trigger hardening + +## Goal + +Turn the local Inter-Hub deploy surface into a safe production trigger for +Railiance01. The trigger must refuse missing images before Helm, use the +current Inter-Hub v2 API smoke contract, and expose a manual workflow path that +has the same gates as an attended local operator deploy. + +## Add OCI Image Preflight + +```task +id: RAILIANCE-WP-0011-T01 +status: done +priority: high +state_hub_task_id: "10e27372-fb8b-40ac-b1f8-1c2c78fea0da" +``` + +Add a reusable image manifest preflight for +`gitea.coulomb.social/coulomb/inter-hub:` and wire production deploys to +fail before Helm when the requested tag is absent or inaccessible. + +## Split Baseline Render From Production Dry-Run + +```task +id: RAILIANCE-WP-0011-T02 +status: done +priority: high +state_hub_task_id: "c48320db-9ed7-4792-89a6-f55691919891" +``` + +Keep a baseline render target for chart validation with checked-in values, but +make production-facing Inter-Hub dry-runs require an explicit +`INTER_HUB_IMAGE_TAG`. + +## Update Inter-Hub Smoke Contract + +```task +id: RAILIANCE-WP-0011-T03 +status: done +priority: high +state_hub_task_id: "b3260f7a-6dcb-4bb4-ae53-bf81c0081e86" +``` + +Update `inter-hub-smoke` to match the current public-read/authenticated-write +contract: `/api/v2/hubs` returns public discovery, protected resources reject +anonymous access, and OpenAPI is served from `/api/v2/openapi.json`. + +## Add Manual Production Deploy Workflow + +```task +id: RAILIANCE-WP-0011-T04 +status: done +priority: high +state_hub_task_id: "32ca0b17-fb7c-4cd5-a846-ff92933daf89" +``` + +Add a `workflow_dispatch` Gitea Actions workflow that requires an immutable +image tag and confirmation text, verifies the image manifest, runs Helm +server-side dry-run, deploys, shows status, and runs smoke checks. + +## Update Runbook And Closure Evidence + +```task +id: RAILIANCE-WP-0011-T05 +status: done +priority: medium +state_hub_task_id: "0369b47a-09f0-4780-9c91-556049a0d505" +``` + +Document the local and workflow production paths, failure classification for a +missing image tag, current smoke expectations, and validation evidence for the +implemented deploy surface.