From 333fbcc237b1dc64651acf10c1de748c68f358a6 Mon Sep 17 00:00:00 2001 From: tegwick Date: Sun, 14 Jun 2026 16:47:24 +0200 Subject: [PATCH] chore(deploy): add railiance handoff guardrails [skip ci] --- .gitea/workflows/deploy.yaml | 7 + .sops.yaml | 8 ++ app.toml | 13 ++ deploy/railiance/RUNBOOK.md | 131 ++++++++++++------ deploy/railiance/secrets/.gitignore | 6 + deploy/railiance/secrets/README.md | 51 +++++++ .../secrets/inter-hub.env.example.yaml | 12 ++ .../secrets/k8s-secret-json-to-sops-input.py | 33 +++++ docs/new-hub-quickstart.md | 28 ++-- .../IHUB-WP-0018-railiance01-deployment.md | 64 ++++++--- 10 files changed, 279 insertions(+), 74 deletions(-) create mode 100644 .sops.yaml create mode 100644 app.toml create mode 100644 deploy/railiance/secrets/.gitignore create mode 100644 deploy/railiance/secrets/README.md create mode 100644 deploy/railiance/secrets/inter-hub.env.example.yaml create mode 100755 deploy/railiance/secrets/k8s-secret-json-to-sops-input.py diff --git a/.gitea/workflows/deploy.yaml b/.gitea/workflows/deploy.yaml index 51d9486..d605348 100644 --- a/.gitea/workflows/deploy.yaml +++ b/.gitea/workflows/deploy.yaml @@ -3,6 +3,13 @@ name: Build and Deploy on: push: branches: [main] + paths-ignore: + - ".custodian-brief.md" + - ".sops.yaml" + - "app.toml" + - "deploy/railiance/**" + - "docs/**" + - "workplans/**" workflow_dispatch: jobs: diff --git a/.sops.yaml b/.sops.yaml new file mode 100644 index 0000000..18137f0 --- /dev/null +++ b/.sops.yaml @@ -0,0 +1,8 @@ +# SOPS encryption policy for inter-hub production handoff files. +# Encrypt any file ending in .sops.yaml with the shared Railiance age recipient. + +creation_rules: + - path_regex: \.sops\.yaml$ + key_groups: + - age: + - age1aq8twfd78wvpra0had8cezcnj96tj4q0068edrz5jez8d6xwmflqdepsh4 diff --git a/app.toml b/app.toml new file mode 100644 index 0000000..d1215bf --- /dev/null +++ b/app.toml @@ -0,0 +1,13 @@ +[app] +name = "inter-hub" +slug = "inter-hub" +kind = "native" +registry = "gitea.coulomb.social/coulomb/inter-hub" + +[deploy] +release = "inter-hub" +namespace = "inter-hub" +chart = "railiance-apps/charts/inter-hub" +values = "railiance-apps/helm/inter-hub-values.yaml" +runtime_secret = "inter-hub-env" +public_url = "https://hub.coulomb.social" diff --git a/deploy/railiance/RUNBOOK.md b/deploy/railiance/RUNBOOK.md index be5be04..ba628bc 100644 --- a/deploy/railiance/RUNBOOK.md +++ b/deploy/railiance/RUNBOOK.md @@ -4,20 +4,43 @@ - **Cluster:** Railiance01 (K3s, 92.205.62.239) - **Namespace:** `inter-hub` -- **Image registry:** `92.205.130.254:32166/coulomb/inter-hub:` (Gitea on CoulombCore) +- **Image registry:** `gitea.coulomb.social/coulomb/inter-hub:` - **Database:** CloudNativePG cluster `net-kingdom-pg` in `databases` namespace - RW endpoint: `net-kingdom-pg-rw.databases.svc.cluster.local:5432` - Database: `interhub`, User: `interhub` - **Ingress:** Traefik → `hub.coulomb.social` (TLS via letsencrypt-prod) - **Secrets:** `inter-hub-env` Secret in `inter-hub` namespace +- **App handoff:** `app.toml` points Railiance operators to + `railiance-apps/charts/inter-hub` with values from + `railiance-apps/helm/inter-hub-values.yaml` ## Deployment +Normal deployment is handled by Gitea Actions on push to `main`: + +- runner labels: `self-hosted`, `haskelseed` +- build: `nix build .#docker` +- publish: `gitea.coulomb.social/coulomb/inter-hub:` and `latest` +- deploy: `helm upgrade --install inter-hub deploy/helm/inter-hub ...` +- smoke: public landing page and v2 auth gate + +Manual deployment from this repo: + ```bash -# From workstation (image already built and pushed): helm upgrade --install inter-hub deploy/helm/inter-hub \ --namespace inter-hub --create-namespace \ - --set image.tag= + --set image.tag= \ + --wait --timeout 5m +``` + +Manual deployment through the Railiance app handoff chart: + +```bash +helm upgrade --install inter-hub /home/worsch/railiance-apps/charts/inter-hub \ + --namespace inter-hub --create-namespace \ + -f /home/worsch/railiance-apps/helm/inter-hub-values.yaml \ + --set image.tag= \ + --wait --timeout 5m ``` ## Image Build (on haskelseed) @@ -28,42 +51,76 @@ cd /root/inter-hub # Build: nix build .#docker --log-format raw > /tmp/build.log 2>&1 -# Push — Gitea registry token realm points to gitea.coulomb.social:80 but Gitea -# only listens on port 32166; skopeo must use a pre-fetched token: +# Push: SHA=$(git rev-parse --short HEAD) -SKOPEO=/nix/store/fwdagky9lfsyrgzxiq14zijcziazfdsn-skopeo-1.22.2/bin/skopeo -TOKEN=$(curl -s \ - "http://92.205.130.254:32166/v2/token?service=container_registry&scope=repository:coulomb/inter-hub:push,pull" \ - -u 'tegwick:' | awk -F'"' '/token/{print $4}') -$SKOPEO copy --insecure-policy --dest-tls-verify=false \ +TOKEN=$(curl -fsS \ + "https://gitea.coulomb.social/v2/token?service=container_registry&scope=repository:coulomb/inter-hub:push,pull" \ + -u "tegwick:" | awk -F'"' '/token/{print $4}') +skopeo copy --insecure-policy \ --dest-registry-token "$TOKEN" \ docker-archive:result \ - docker://92.205.130.254:32166/coulomb/inter-hub:$SHA + docker://gitea.coulomb.social/coulomb/inter-hub:$SHA ``` **Notes:** -- `skopeo` is in the Nix profile but not on PATH — use the full store path above. -- The IHP Nix Docker image has NO `/bin/RunProdServer` symlink. The binary lives at - `/nix/store/-inter-hub/bin/RunProdServer` (hash changes per build). - Use `kubectl exec deploy/inter-hub -- /nix/store/*-inter-hub/bin/RunProdServer ` - if a shell is not available (the Nix image has no `/bin/sh`). +- Haskelseed is a build/deploy runner, not the production app host. +- The IHP Nix Docker image may not have `/bin/sh`. Prefer Kubernetes-native + checks from other pods or the database pod when possible. ## Gitea Registry Credentials -The Gitea token for registry push is stored in `~/.config/tea/config.yml` on the -workstation. If the token has expired, generate a new one: -1. Go to http://92.205.130.254:32166 → Settings → Applications → Generate new token -2. Scope: `package:write` -3. Update `~/.config/tea/config.yml` on the workstation -4. Update the `GITEA_TOKEN` in any CI/CD secrets +The deploy workflow uses the repository Actions secret `REGISTRY_TOKEN` to +request a short-lived registry bearer token from +`https://gitea.coulomb.social/v2/token`. + +If publishing starts failing with an authentication error: +1. Generate or rotate a Gitea token with package write access. +2. Update the `REGISTRY_TOKEN` Actions secret for `coulomb/inter-hub`. +3. Rerun the workflow or push a non-production test commit. + +Do not print token values in logs, State Hub, or commits. + +## Runtime Secret Source + +The live deployment currently consumes the Kubernetes Secret +`inter-hub/inter-hub-env`. The durable source file is: + +```text +deploy/railiance/secrets/inter-hub.env.sops.yaml +``` + +Create or refresh it from the live Secret using: + +```bash +tmp="$(mktemp)" +trap 'rm -f "$tmp"' EXIT + +kubectl -n inter-hub get secret inter-hub-env -o json \ + | python3 deploy/railiance/secrets/k8s-secret-json-to-sops-input.py \ + > "$tmp" + +sops --encrypt \ + --age age1aq8twfd78wvpra0had8cezcnj96tj4q0068edrz5jez8d6xwmflqdepsh4 \ + "$tmp" > deploy/railiance/secrets/inter-hub.env.sops.yaml +``` + +Apply the encrypted source: + +```bash +sops -d deploy/railiance/secrets/inter-hub.env.sops.yaml \ + | kubectl apply -f - +kubectl rollout restart deployment/inter-hub -n inter-hub +kubectl rollout status deployment/inter-hub -n inter-hub +``` ## Database Migration -IHP migrations run automatically on startup via the init container in the Deployment. -To run migrations manually: +IHP migrations can be run from the production image when needed. Because the +image is Nix-built and may not contain a shell, first inspect the binary path: ```bash -kubectl exec -n inter-hub deploy/inter-hub -- /bin/RunProdServer migrate +kubectl exec -n inter-hub deploy/inter-hub -- find /nix/store -path '*inter-hub*/bin/RunProdServer' +kubectl exec -n inter-hub deploy/inter-hub -- /nix/store/-inter-hub/bin/RunProdServer migrate ``` To check migration status: @@ -97,14 +154,8 @@ helm rollback inter-hub 1 --namespace inter-hub To rotate the session secret: ```bash -kubectl create secret generic inter-hub-env \ - --namespace inter-hub \ - --from-literal=DATABASE_URL='...' \ - --from-literal=IHP_SESSION_SECRET='' \ - --from-literal=IHP_BASEURL='https://hub.coulomb.social' \ - --from-literal=PORT='8000' \ - --from-literal=IHP_ENV='Production' \ - --dry-run=client -o yaml | kubectl apply -f - +sops deploy/railiance/secrets/inter-hub.env.sops.yaml +sops -d deploy/railiance/secrets/inter-hub.env.sops.yaml | kubectl apply -f - kubectl rollout restart deployment/inter-hub -n inter-hub ``` @@ -116,10 +167,9 @@ To rotate the database password: ## Smoke Test ```bash -curl -s https://hub.coulomb.social/ | grep "Inter-Hub" # Landing 200 -curl -s https://hub.coulomb.social/capabilities | grep "Capabilities" -curl -s https://hub.coulomb.social/api/v2/hubs # 401 expected -curl -H "Authorization: Bearer " https://hub.coulomb.social/api/v2/hubs # 200 +curl -fsS https://hub.coulomb.social/ | grep "inter-hub" +curl -fsS https://hub.coulomb.social/api/v2/openapi.json >/dev/null +curl -s -o /dev/null -w "%{http_code}" https://hub.coulomb.social/api/v2/widgets | grep 401 ``` ## Database Connection Check @@ -153,7 +203,8 @@ EOF ## haskelseed Build VM - **Host:** 192.168.178.135 -- **Access:** `ssh root@192.168.178.135` (password in team secrets) -- **Repo:** `/root/inter-hub` (git initialized locally; pull requires Gitea token) -- **Build logs:** `/tmp/nix-build-docker.log` +- **Access:** ops-bridge SSH path with the approved operator key +- **Role:** self-hosted Gitea Actions runner and Nix build machine only +- **Runner:** OpenRC `act_runner` service registered to `https://gitea.coulomb.social` +- **Build logs:** Gitea Actions logs and temporary runner work directories - **Nix store:** `/dev/sdb1` (100 GB, mounted at `/nix`) diff --git a/deploy/railiance/secrets/.gitignore b/deploy/railiance/secrets/.gitignore new file mode 100644 index 0000000..22af0ea --- /dev/null +++ b/deploy/railiance/secrets/.gitignore @@ -0,0 +1,6 @@ +* +!.gitignore +!README.md +!*.example.yaml +!*.sops.yaml +!*.py diff --git a/deploy/railiance/secrets/README.md b/deploy/railiance/secrets/README.md new file mode 100644 index 0000000..5150bbe --- /dev/null +++ b/deploy/railiance/secrets/README.md @@ -0,0 +1,51 @@ +# inter-hub Runtime Secret + +`inter-hub.env.sops.yaml` is the durable source for the production +`inter-hub/inter-hub-env` Kubernetes Secret. The file is encrypted with the +shared Railiance age recipient declared in the repo root `.sops.yaml`. + +Do not commit plaintext secret material. This directory ignores plaintext files +by default; only `*.sops.yaml`, examples, docs, and helper scripts are tracked. + +## Create Or Refresh + +Use an attended operator shell with `kubectl`, `sops`, and access to the shared +Railiance age identity: + +```bash +tmp="$(mktemp)" +trap 'rm -f "$tmp"' EXIT + +kubectl -n inter-hub get secret inter-hub-env -o json \ + | python3 deploy/railiance/secrets/k8s-secret-json-to-sops-input.py \ + > "$tmp" + +sops --encrypt \ + --age age1aq8twfd78wvpra0had8cezcnj96tj4q0068edrz5jez8d6xwmflqdepsh4 \ + "$tmp" > deploy/railiance/secrets/inter-hub.env.sops.yaml +``` + +Review only non-secret metadata before committing: + +```bash +sops -d deploy/railiance/secrets/inter-hub.env.sops.yaml \ + | sed -n '1,8p' +``` + +## Apply + +```bash +sops -d deploy/railiance/secrets/inter-hub.env.sops.yaml \ + | kubectl apply -f - + +kubectl rollout restart deployment/inter-hub -n inter-hub +kubectl rollout status deployment/inter-hub -n inter-hub +``` + +## Expected Keys + +- `DATABASE_URL` +- `IHP_SESSION_SECRET` +- `IHP_BASEURL` +- `PORT` +- `IHP_ENV` diff --git a/deploy/railiance/secrets/inter-hub.env.example.yaml b/deploy/railiance/secrets/inter-hub.env.example.yaml new file mode 100644 index 0000000..d4175ba --- /dev/null +++ b/deploy/railiance/secrets/inter-hub.env.example.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Secret +metadata: + name: inter-hub-env + namespace: inter-hub +type: Opaque +stringData: + DATABASE_URL: "postgresql://interhub:@net-kingdom-pg-rw.databases.svc.cluster.local:5432/interhub?sslmode=disable" + IHP_SESSION_SECRET: "<64-char-hex>" + IHP_BASEURL: "https://hub.coulomb.social" + PORT: "8000" + IHP_ENV: "Production" diff --git a/deploy/railiance/secrets/k8s-secret-json-to-sops-input.py b/deploy/railiance/secrets/k8s-secret-json-to-sops-input.py new file mode 100755 index 0000000..19b049a --- /dev/null +++ b/deploy/railiance/secrets/k8s-secret-json-to-sops-input.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +"""Convert a Kubernetes Secret JSON document into a SOPS-ready Secret manifest. + +The output contains decoded secret values under stringData and must be redirected +to a temporary file, encrypted with sops, and removed immediately. +""" + +import base64 +import json +import sys + + +def yaml_string(value: str) -> str: + return json.dumps(value) + + +source = json.load(sys.stdin) +metadata = source.get("metadata", {}) +name = metadata.get("name", "inter-hub-env") +namespace = metadata.get("namespace", "inter-hub") +data = source.get("data", {}) + +print("apiVersion: v1") +print("kind: Secret") +print("metadata:") +print(f" name: {yaml_string(name)}") +print(f" namespace: {yaml_string(namespace)}") +print("type: Opaque") +print("stringData:") + +for key in sorted(data): + decoded = base64.b64decode(data[key]).decode("utf-8") + print(f" {key}: {yaml_string(decoded)}") diff --git a/docs/new-hub-quickstart.md b/docs/new-hub-quickstart.md index f0960a7..725038c 100644 --- a/docs/new-hub-quickstart.md +++ b/docs/new-hub-quickstart.md @@ -194,10 +194,11 @@ POST events. Everything downstream is managed by inter-hub. ### Prerequisites The same build infrastructure used for inter-hub works directly: -- haskelseed VM (`192.168.178.135`) with GHC 9.10.3 in the Nix store +- haskelseed VM (`192.168.178.135`) as the CI/Nix build runner with GHC 9.10.3 + in the Nix store - `devenv` for reproducible environments - The painful one-time Nix setup is already done — a new IHP project reuses - the same Nix store + the same Nix store when built on the runner ### Bootstrap a new hub repo @@ -213,8 +214,9 @@ devenv up ``` The first `devenv up` on a fresh machine takes 20–40 min to fetch Nix -dependencies. On haskelseed, most dependencies are already in the Nix store -and the setup takes ~2 minutes. +dependencies. On haskelseed, most dependencies are already in the Nix store, +which is why it is useful as a build runner. It is not the production runtime +host for inter-hub. ### Connect to inter-hub's API @@ -243,20 +245,10 @@ postEvent apiKey widgetId eventType metadata = do ### Shared database (optional) -If your hub needs read access to inter-hub's tables (e.g., to join against -`requirements` or `decision_records`), connect to the same PostgreSQL: - -```bash -# In your hub's .env: -DATABASE_URL=postgresql://ihp:ihp@192.168.178.135/interhub -``` - -Your IHP app can then use `query @DecisionRecord` directly without going -through the API. This is appropriate for tightly-coupled hubs that are -part of the same operational boundary. - -For loosely-coupled hubs (separate teams, separate deploy cadence), use -the API only — do not share the database. +Production inter-hub runs on Railiance01 K3s and uses PostgreSQL inside the +Railiance cluster. Do not connect new hubs to a haskelseed database. Prefer the +API boundary for extension hubs; request a governed read model or dedicated +service account if a hub truly needs database-level integration. ### How fast is the Haskell build for a new hub? diff --git a/workplans/IHUB-WP-0018-railiance01-deployment.md b/workplans/IHUB-WP-0018-railiance01-deployment.md index 53a0b8c..099192a 100644 --- a/workplans/IHUB-WP-0018-railiance01-deployment.md +++ b/workplans/IHUB-WP-0018-railiance01-deployment.md @@ -224,47 +224,63 @@ state_hub_task_id: "926f82d1-15cd-425d-8a41-3d6b51c07f0b" Create `deploy/railiance/secrets/inter-hub.env.sops.yaml` with: ```yaml -# sops encrypted — do not edit manually -DATABASE_URL: postgresql://interhub:@pgpool.railiance-platform.svc:5432/interhub -IHP_SESSION_SECRET: <64-char-hex> -IHP_BASEURL: https://hub.coulomb.social +apiVersion: v1 +kind: Secret +metadata: + name: inter-hub-env + namespace: inter-hub +type: Opaque +stringData: + DATABASE_URL: postgresql://interhub:@net-kingdom-pg-rw.databases.svc.cluster.local:5432/interhub?sslmode=disable + IHP_SESSION_SECRET: <64-char-hex> + IHP_BASEURL: https://hub.coulomb.social + PORT: "8000" + IHP_ENV: Production ``` Encrypt with the age key: ```bash -sops --encrypt --age $(cat ~/.config/sops/age/keys.txt | grep public | awk '{print $4}') \ - deploy/railiance/secrets/inter-hub.env.sops.yaml > deploy/railiance/secrets/inter-hub.env.sops.yaml +sops --encrypt \ + --age age1aq8twfd78wvpra0had8cezcnj96tj4q0068edrz5jez8d6xwmflqdepsh4 \ + /tmp/inter-hub-env.yaml > deploy/railiance/secrets/inter-hub.env.sops.yaml ``` -Commit the encrypted file. The Gitea Actions workflow decrypts at deploy time -using the age key from a Kubernetes Secret (bootstrapped once manually). +Commit only the encrypted file. Apply it with +`sops -d deploy/railiance/secrets/inter-hub.env.sops.yaml | kubectl apply -f -`. **Recovery note (2026-06-14):** Runtime secrets were bootstrapped manually in Kubernetes so production could deploy safely. This task remains in progress until the durable SOPS-encrypted source for `DATABASE_URL`, `IHP_SESSION_SECRET`, and related runtime env is committed and wired into the deploy path. +**Progress note (2026-06-14):** Added repo root `.sops.yaml`, plaintext +guardrails under `deploy/railiance/secrets/`, an example Secret manifest, and +`k8s-secret-json-to-sops-input.py` to convert the live Kubernetes Secret into a +SOPS-ready manifest without printing values. This remains in progress because +`deploy/railiance/secrets/inter-hub.env.sops.yaml` is not committed yet; local +`sops` tooling was not available during this session. + ### R6 — Helm chart in railiance-apps ```task id: IHUB-WP-0018-T06 -status: in_progress +status: done priority: high state_hub_task_id: "4c4acc98-5773-4289-ad57-03f3fd5c381c" ``` -Create `helm/inter-hub/` in the `railiance-apps` repository following the +Create `charts/inter-hub/` in the `railiance-apps` repository following the Railiance app.toml contract. Minimal chart: ``` -helm/inter-hub/ +charts/inter-hub/ Chart.yaml name: inter-hub, version: 0.1.0 values.yaml image.tag, ingress.host, resources - values.prod.yaml replicas: 1, resources.requests.memory: 1Gi +helm/inter-hub-values.yaml + production non-secret overrides templates/ deployment.yaml envFrom: secretRef inter-hub-env service.yaml ClusterIP :8000 ingress.yaml Traefik annotations, TLS - secret.yaml created by sops-operator or external-secrets ``` `app.toml` in the inter-hub repo root for railiance CLI integration: @@ -273,10 +289,10 @@ helm/inter-hub/ name = "inter-hub" slug = "inter-hub" kind = "native" -registry = "registry.coulomb.social/coulomb/inter-hub" +registry = "gitea.coulomb.social/coulomb/inter-hub" [deploy] -chart = "railiance-apps/helm/inter-hub" +chart = "railiance-apps/charts/inter-hub" namespace = "inter-hub" ``` @@ -291,6 +307,11 @@ successfully deployed the app to Railiance01. This task remains in progress because the repo-root `app.toml` and railiance-apps handoff are still not completed. +**Completion note (2026-06-14):** Added repo-root `app.toml` in inter-hub and +added `charts/inter-hub`, `helm/inter-hub-values.yaml`, Makefile targets, and +server-dry-run coverage in `railiance-apps`. The chart rendered successfully on +haskelseed with `helm template`. + ### R7 — Gitea Actions CI/CD pipeline ```task @@ -358,6 +379,11 @@ token from the repo `REGISTRY_TOKEN` Actions secret, deploys with Helm, and runs public smoke checks. Gitea Actions run `2913` completed successfully for commit `5663fab`. +**Load-control note (2026-06-14):** Added workflow `paths-ignore` for docs, +workplans, `.custodian-brief.md`, `app.toml`, `.sops.yaml`, and +`deploy/railiance/**` so State Hub consistency/doc-only commits do not consume a +haskelseed build/deploy cycle. + ### R8 — Staged deployment and smoke test ```task @@ -398,7 +424,7 @@ and unauthenticated `/api/v2/widgets` returns 401. ```task id: IHUB-WP-0018-T09 -status: in_progress +status: done priority: medium state_hub_task_id: "4d1e55c7-8dbb-480f-b07b-6c5e39a04218" ``` @@ -420,6 +446,12 @@ deployment evidence has been recorded here. Remaining documentation work is to capture the durable secret-management and railiance-apps handoff path once R5 and R6 are completed. +**Completion note (2026-06-14):** Updated `deploy/railiance/RUNBOOK.md` for the +current Gitea registry host, runner-based build/deploy path, SOPS secret handoff, +current smoke checks, and haskelseed's build-runner-only role. Updated +`docs/new-hub-quickstart.md` so haskelseed is no longer described as a +production/shared database runtime. + ## Exit Criteria - `https://hub.coulomb.social/` returns the Landing page (200, no auth)