From 1a5b65a338fcf8bc9c75da8ece1eb2b7cbb37335 Mon Sep 17 00:00:00 2001 From: tegwick Date: Tue, 19 May 2026 04:50:40 +0200 Subject: [PATCH] RAILIANCE-WP-0003 T02-T06: provision shared apps-pg cnpg cluster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the shared CloudNativePG cluster apps-pg for S5 application databases: - helm/apps-pg-cluster.yaml — Cluster CR, PG 16, 1 instance, 10Gi - helm/apps-pg-networkpolicies.yaml — egress-to-kube-api + ingress-from-cnpg-operator + label-based ingress opt-in (railiance.io/postgres-client=apps-pg) - helm/apps-pg-secret.sops.yaml.template — bootstrap credential template (encrypt with SOPS before committing the real .sops.yaml) - Makefile targets: apps-pg-deploy, apps-pg-status (with cnpg-plugin fallback), apps-pg-shell (apps_admin/apps_meta), apps-pg-logs - docs/apps-pg.md (codex) — consumer onboarding contract clarifying the CNPG 1.28 role/database lifecycle boundary Also fixes helm/gitea-db-cluster.yaml: spec.postgresql.version is not a valid CNPG v1 field (strict decoding rejects it). Replaced with spec.imageName matching the live cluster (postgresql:18.1-system-trixie) so make db-deploy is a no-op instead of an apply rejection. Live state at commit time: Cluster apps-pg in healthy state, primary apps-pg-1 Running, smoke-tested via psql from a labeled temp ns. Co-Authored-By: codex Co-Authored-By: Claude Opus 4.7 --- Makefile | 24 ++++++- docs/apps-pg.md | 97 ++++++++++++++++++++++++++ helm/apps-pg-cluster.yaml | 44 ++++++++++++ helm/apps-pg-networkpolicies.yaml | 71 +++++++++++++++++++ helm/apps-pg-secret.sops.yaml.template | 25 +++++++ helm/gitea-db-cluster.yaml | 6 +- 6 files changed, 262 insertions(+), 5 deletions(-) create mode 100644 docs/apps-pg.md create mode 100644 helm/apps-pg-cluster.yaml create mode 100644 helm/apps-pg-networkpolicies.yaml create mode 100644 helm/apps-pg-secret.sops.yaml.template diff --git a/Makefile b/Makefile index 830f564..ac2e5c9 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,9 @@ SHELL := /usr/bin/env bash .DEFAULT_GOAL := help -KUBECONFIG ?= $(HOME)/.kube/config-hosteurope -KUBECTL := kubectl --kubeconfig=$(KUBECONFIG) +KUBECONFIG ?= $(firstword $(wildcard $(HOME)/.kube/config-hosteurope) $(HOME)/.kube/config) +KUBECTL_BIN ?= $(firstword $(shell command -v kubectl 2>/dev/null) $(wildcard $(HOME)/.local/bin/kubectl) kubectl) +KUBECTL := $(KUBECTL_BIN) --kubeconfig=$(KUBECONFIG) HELM := helm --kubeconfig=$(KUBECONFIG) NAMESPACE := platform @@ -28,6 +29,23 @@ db-shell: ## Open psql shell on gitea-db primary db-logs: ## Tail gitea-db primary logs $(KUBECTL) logs -n databases -l cnpg.io/cluster=gitea-db -f --tail=50 +##@ Shared apps-pg (S5 application databases) + +apps-pg-deploy: ## Apply shared apps-pg cnpg Cluster + NetworkPolicies + $(KUBECTL) apply -f helm/apps-pg-cluster.yaml + $(KUBECTL) apply -f helm/apps-pg-networkpolicies.yaml + +apps-pg-status: ## Show apps-pg cnpg cluster health + $(KUBECTL) cnpg status apps-pg -n databases 2>/dev/null || \ + $(KUBECTL) get cluster apps-pg -n databases -o wide + +apps-pg-shell: ## Open psql shell on apps-pg primary as apps_admin / apps_meta + $(KUBECTL) cnpg psql apps-pg -n databases -- -U apps_admin apps_meta 2>/dev/null || \ + $(KUBECTL) exec -it -n databases apps-pg-1 -- psql -U apps_admin apps_meta + +apps-pg-logs: ## Tail apps-pg primary logs + $(KUBECTL) logs -n databases -l cnpg.io/cluster=apps-pg -f --tail=50 + ##@ PostgreSQL HA (legacy — superseded by cnpg above) pg-deploy: ## Deploy / upgrade standalone PostgreSQL HA to platform namespace @@ -103,4 +121,4 @@ help: ## Show this help /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-22s\033[0m %s\n", $$1, $$2 } \ /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) }' $(MAKEFILE_LIST) -.PHONY: db-deploy db-status db-shell db-logs pg-deploy pg-status pg-pgpool-check valkey-deploy valkey-status openbao-repo openbao-dry-run openbao-deploy openbao-status backup help +.PHONY: db-deploy db-status db-shell db-logs apps-pg-deploy apps-pg-status apps-pg-shell apps-pg-logs pg-deploy pg-status pg-pgpool-check valkey-deploy valkey-status openbao-repo openbao-dry-run openbao-deploy openbao-status backup help diff --git a/docs/apps-pg.md b/docs/apps-pg.md new file mode 100644 index 0000000..6d59f34 --- /dev/null +++ b/docs/apps-pg.md @@ -0,0 +1,97 @@ +# apps-pg Shared PostgreSQL Cluster + +`apps-pg` is the shared CloudNativePG cluster for S5 application +databases. It lives in the `databases` namespace and is owned by +`railiance-platform` as an S3 platform service. + +## Cluster Identity + +- CNPG Cluster: `apps-pg` +- Namespace: `databases` +- PostgreSQL major version: `16` +- Primary endpoint: `apps-pg-rw.databases.svc.cluster.local:5432` +- Read-only endpoint: `apps-pg-ro.databases.svc.cluster.local:5432` +- Bootstrap database: `apps_meta` +- Bootstrap role: `apps_admin` + +`apps_admin` is a platform bootstrap and smoke-test role. Do not copy it +into application namespaces, use it in S5 runtime configuration, or treat +it as a consumer credential. + +## Consumer Onboarding + +Each S5 application gets its own role, database, and runtime Secret. The +current flow is platform-administered until OpenBao or a dedicated +database onboarding automation owns the lifecycle end to end. + +1. Request an app database in the consuming repo workplan. Include the + app name, namespace, database name, role name, intended owners, and + any required extensions. +2. Platform reviews and approves the database/role names. Names should + be app-scoped, for example `vergabe` and `vergabe_db`. +3. Platform provisions the backing PostgreSQL role and credential for + the approved app. Until automation exists, this is a controlled + operator SQL action against `apps-pg`, not a self-service repo apply. +4. Add a CNPG `Database` manifest in the platform-managed database + manifests, with `spec.cluster.name: apps-pg` and `spec.owner` set to + the approved role. +5. Label the consuming namespace so NetworkPolicy allows access: + + ```bash + kubectl label namespace \ + railiance.io/postgres-client=apps-pg + ``` + +6. Publish or mirror the runtime Secret into the consumer namespace. The + Secret should contain only that app's role credential and DSN fields. +7. Wire the DSN into the application Helm values or runtime + configuration. Prefer the RW service for migrations and writes: + `postgresql://:@apps-pg-rw.databases.svc.cluster.local:5432/`. + +Example CNPG `Database` resource: + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: Database +metadata: + name: vergabe-db + namespace: databases +spec: + cluster: + name: apps-pg + name: vergabe_db + owner: vergabe +``` + +## CNPG Boundary + +CNPG 1.28 provides a standalone `Database` CRD. It does not provide a +standalone `Role` CRD in this cluster. Role lifecycle is cluster-scoped, +through `Cluster.spec.managed.roles` or a controlled SQL workflow. + +Consumer repos must therefore not assume they can create PostgreSQL +roles themselves. They can request a database and consume a runtime +Secret after the platform role has been provisioned. + +## Network Policy + +The `databases` namespace has a default-deny posture. `apps-pg` accepts +client traffic on TCP/5432 only from namespaces labeled: + +```text +railiance.io/postgres-client=apps-pg +``` + +The CNPG operator in `cnpg-system` is allowed to manage the cluster on +the standard PostgreSQL, instance manager, and metrics ports. + +## Backup And Roadmap + +`apps-pg` starts as a conservative single-instance, 10Gi cluster to match +the current node capacity and existing CNPG footprint. Adding a replica, +PgBouncer/CNPG `Pooler`, resize policy, and CNPG-native backup coverage +are follow-up platform work items. + +Until backup coverage is explicitly added, consumer onboarding should +record whether app data is disposable, externally reproducible, or +requires an immediate backup follow-up before production use. diff --git a/helm/apps-pg-cluster.yaml b/helm/apps-pg-cluster.yaml new file mode 100644 index 0000000..a6f1f32 --- /dev/null +++ b/helm/apps-pg-cluster.yaml @@ -0,0 +1,44 @@ +--- +# Shared CNPG Cluster for S5 application databases (RAILIANCE-WP-0003). +# Owned by railiance-platform (S3). Operator lives in cnpg-system. +# +# Apply: kubectl apply -f helm/apps-pg-cluster.yaml +# Status: kubectl cnpg status apps-pg -n databases (requires cnpg kubectl plugin) +# or: kubectl get cluster apps-pg -n databases -o wide +# +# Pre-condition: apps-pg-credentials Secret must exist in databases ns. +# See helm/apps-pg-secret.sops.yaml.template for the bootstrap recipe. +# +# Consumer onboarding: see docs/apps-pg.md. The bootstrap role apps_admin +# and meta DB apps_meta exist only to anchor the cluster; per-app roles +# and databases are added through the documented onboarding contract. +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: apps-pg + namespace: databases + labels: + app.kubernetes.io/name: apps-pg + app.kubernetes.io/component: database + app.kubernetes.io/managed-by: manual + railiance.io/layer: s3-platform + railiance.io/role: shared-apps-database +spec: + instances: 1 # bump to 3 when node RAM > 8GB + imageName: ghcr.io/cloudnative-pg/postgresql:16 + storage: + size: 10Gi + bootstrap: + initdb: + database: apps_meta + owner: apps_admin + secret: + name: apps-pg-credentials + # HA replica + connection pooler are deferred (RAILIANCE-WP-0003 Notes): + # managed: + # services: + # additional: + # - selectorType: rw + # serviceTemplate: + # metadata: + # name: apps-pg-pooler-rw diff --git a/helm/apps-pg-networkpolicies.yaml b/helm/apps-pg-networkpolicies.yaml new file mode 100644 index 0000000..c41aec4 --- /dev/null +++ b/helm/apps-pg-networkpolicies.yaml @@ -0,0 +1,71 @@ +# NetworkPolicies for the shared apps-pg cnpg cluster (RAILIANCE-WP-0003). +# The databases namespace has a default-deny-all policy; each cluster +# needs explicit egress-to-kube-api, ingress-from-cnpg-operator, and +# ingress-from-app-namespace policies. +# +# Unlike gitea-db (which hard-codes `default` as the consumer ns), this +# triplet uses a label-based opt-in: any namespace carrying the label +# `railiance.io/postgres-client=apps-pg` may connect on TCP/5432. The +# shared cluster cannot know its consumer namespaces in advance, so it +# expects each consumer to add this label as part of its onboarding. +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-egress-kube-api-apps-pg + namespace: databases +spec: + podSelector: + matchLabels: + cnpg.io/cluster: apps-pg + policyTypes: + - Egress + egress: + - ports: + - port: 6443 + protocol: TCP +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-ingress-from-cnpg-operator-apps-pg + namespace: databases +spec: + podSelector: + matchLabels: + cnpg.io/cluster: apps-pg + policyTypes: + - Ingress + ingress: + - from: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: cnpg-system + ports: + - port: 5432 + protocol: TCP + - port: 8000 + protocol: TCP + - port: 9187 + protocol: TCP +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-ingress-from-app-namespaces-apps-pg + namespace: databases +spec: + podSelector: + matchLabels: + cnpg.io/cluster: apps-pg + policyTypes: + - Ingress + ingress: + - from: + - namespaceSelector: + matchLabels: + railiance.io/postgres-client: apps-pg + podSelector: {} + ports: + - port: 5432 + protocol: TCP diff --git a/helm/apps-pg-secret.sops.yaml.template b/helm/apps-pg-secret.sops.yaml.template new file mode 100644 index 0000000..fede987 --- /dev/null +++ b/helm/apps-pg-secret.sops.yaml.template @@ -0,0 +1,25 @@ +# Template for the apps-pg-credentials Secret. +# DO NOT commit this file with real credentials. +# Encrypt with: sops -e -i helm/apps-pg-secret.sops.yaml +# Apply with: kubectl apply -f <(sops -d helm/apps-pg-secret.sops.yaml) +# +# This Secret is consumed by the bootstrap.initdb stanza of +# helm/apps-pg-cluster.yaml and only exists to create the platform +# bootstrap role `apps_admin` and meta DB `apps_meta`. It is NOT a +# runtime credential for any S5 application — those are issued per +# consumer through the onboarding contract in docs/apps-pg.md. +--- +apiVersion: v1 +kind: Secret +metadata: + name: apps-pg-credentials + namespace: databases + labels: + app.kubernetes.io/name: apps-pg + app.kubernetes.io/component: database-bootstrap + app.kubernetes.io/managed-by: manual + railiance.io/layer: s3-platform +type: kubernetes.io/basic-auth +stringData: + username: apps_admin + password: REPLACE_WITH_PASSWORD # encrypt with SOPS before committing diff --git a/helm/gitea-db-cluster.yaml b/helm/gitea-db-cluster.yaml index ac747de..62472bd 100644 --- a/helm/gitea-db-cluster.yaml +++ b/helm/gitea-db-cluster.yaml @@ -24,8 +24,10 @@ metadata: railiance.io/layer: s3-platform spec: instances: 1 # bump to 3 when node RAM > 8GB - postgresql: - version: "16" + # spec.postgresql.version is not a real CNPG v1 field; use imageName. + # Live cluster was upgraded to PG 18.1; match the live state so + # `make db-deploy` (kubectl apply) is a no-op rather than a rejection. + imageName: ghcr.io/cloudnative-pg/postgresql:18.1-system-trixie storage: size: 10Gi bootstrap: