Files
railiance-cluster/tools/create_railiance_overlay_repo.sh
tegwick f68e1b36da
Some checks failed
railiance-tests / smoke (push) Has been cancelled
Add Railiance Stage 1 run command
2026-06-27 16:24:17 +02:00

456 lines
12 KiB
Bash
Executable File

#!/usr/bin/env bash
# tools/create_railiance_overlay_repo.sh
# Create a local Railiance overlay repo skeleton for a third-party upstream app.
set -euo pipefail
APP_ID=""
APP_NAME=""
OWNER="platform"
CRITICALITY="medium"
UPSTREAM_URL=""
UPSTREAM_REVISION="main"
UPSTREAM_TRACKING="branch"
OUT_DIR=""
INIT_GIT=false
usage() {
cat <<'EOF'
Usage: tools/create_railiance_overlay_repo.sh --app-id <id> --upstream-url <url> [options]
Required:
--app-id <id> Stable lowercase app id, e.g. forgejo
--upstream-url <url> Upstream source repository or release URL
Options:
--name <name> Human-readable app name (default: app id)
--owner <owner> Owning team/domain (default: platform)
--criticality <level> low|medium|high|critical (default: medium)
--upstream-revision <rev> Upstream branch/tag/commit/release (default: main)
--upstream-tracking <kind> branch|tag|commit|release|digest (default: branch)
--out-dir <path> Output directory (default: <app-id>-railiance-overlay)
--init-git Initialize a local Git repo, without committing
-h|--help Show this help
The script writes local files only. It does not clone upstream code, call Gitea,
fetch secrets, or push a remote.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--app-id) APP_ID="${2:?}"; shift 2 ;;
--name) APP_NAME="${2:?}"; shift 2 ;;
--owner) OWNER="${2:?}"; shift 2 ;;
--criticality) CRITICALITY="${2:?}"; shift 2 ;;
--upstream-url) UPSTREAM_URL="${2:?}"; shift 2 ;;
--upstream-revision) UPSTREAM_REVISION="${2:?}"; shift 2 ;;
--upstream-tracking) UPSTREAM_TRACKING="${2:?}"; shift 2 ;;
--out-dir) OUT_DIR="${2:?}"; shift 2 ;;
--init-git) INIT_GIT=true; shift ;;
-h|--help) usage; exit 0 ;;
*) echo "Unknown arg: $1" >&2; usage >&2; exit 2 ;;
esac
done
if [[ -z "${APP_ID}" || -z "${UPSTREAM_URL}" ]]; then
echo "ERROR: --app-id and --upstream-url are required" >&2
usage >&2
exit 2
fi
if [[ ! "${APP_ID}" =~ ^[a-z0-9][a-z0-9-]*$ ]]; then
echo "ERROR: --app-id must match ^[a-z0-9][a-z0-9-]*$" >&2
exit 2
fi
case "${CRITICALITY}" in
low|medium|high|critical) ;;
*) echo "ERROR: --criticality must be low, medium, high, or critical" >&2; exit 2 ;;
esac
case "${UPSTREAM_TRACKING}" in
branch|tag|commit|release|digest) ;;
*) echo "ERROR: --upstream-tracking must be branch, tag, commit, release, or digest" >&2; exit 2 ;;
esac
if [[ -z "${APP_NAME}" ]]; then
APP_NAME="${APP_ID}"
fi
if [[ -z "${OUT_DIR}" ]]; then
OUT_DIR="${APP_ID}-railiance-overlay"
fi
if [[ -e "${OUT_DIR}" ]]; then
if [[ -n "$(ls -A "${OUT_DIR}")" ]]; then
echo "ERROR: output directory exists and is not empty: ${OUT_DIR}" >&2
exit 1
fi
fi
mkdir -p \
"${OUT_DIR}/railiance" \
"${OUT_DIR}/charts/${APP_ID}/templates" \
"${OUT_DIR}/values" \
"${OUT_DIR}/patches/upstream" \
"${OUT_DIR}/tests" \
"${OUT_DIR}/runbooks" \
"${OUT_DIR}/docs"
touch "${OUT_DIR}/patches/upstream/.gitkeep"
cat > "${OUT_DIR}/README.md" <<EOF
# ${APP_NAME} Railiance Overlay
This repository wraps the upstream ${APP_NAME} application for the Railiance
staged promotion lifecycle.
Upstream source is recorded in \`railiance/upstream.toml\`. Upstream code is not
vendored here by default. Railiance deployment mechanics live in this overlay:
\`railiance/app.toml\`, Helm chart files, stage values, tests, and runbooks.
## Stage 1
Run local validation:
\`\`\`bash
./tests/stage1.sh
\`\`\`
## Stage 2 And Stage 3
Use the Railiance promotion lifecycle once the deploy/observe/promote tooling is
available. Production-critical workloads require human approval before canary
exposure and production promotion.
EOF
cat > "${OUT_DIR}/railiance/upstream.toml" <<EOF
[upstream]
url = "${UPSTREAM_URL}"
revision = "${UPSTREAM_REVISION}"
tracking = "${UPSTREAM_TRACKING}"
license = "see-upstream"
notes = "Railiance overlay only; upstream code is not vendored here."
EOF
cat > "${OUT_DIR}/railiance/app.toml" <<EOF
schema_version = "railiance.app.v1"
[app]
id = "${APP_ID}"
name = "${APP_NAME}"
repo = "${APP_ID}-railiance-overlay"
owner = "${OWNER}"
criticality = "${CRITICALITY}"
description = "Railiance overlay for ${APP_NAME}."
[source]
revision = "${UPSTREAM_REVISION}"
artifact = "image"
digest_policy = "preferred"
[rollback]
strategy = "helm-revision"
command = "bin/railiance rollback ${APP_ID}"
verification = "Stable release health check returns 200 after rollback."
[platform]
dependencies = []
[secrets]
references = []
[[observability.health_endpoints]]
name = "local-health"
url = "http://127.0.0.1:8080/health"
stage = "stage1"
expected_status = 200
[[observability.health_endpoints]]
name = "cluster-health"
url = "http://${APP_ID}.${APP_ID}.svc.cluster.local:8080/health"
stage = "stage2"
expected_status = 200
[stages.stage1]
enabled = true
namespace = "local"
release = "${APP_ID}-local"
commands = ["./tests/stage1.sh"]
checks = ["stage1-script", "local-health"]
evidence = ["Stage 1 script result", "local health check or explicit not-run note"]
requires_approval = false
[stages.stage2]
enabled = true
namespace = "${APP_ID}"
release = "${APP_ID}-canary"
commands = ["bin/railiance deploy --stage 2 ${APP_ID}", "bin/railiance observe ${APP_ID}"]
checks = ["server-dry-run", "canary-ready", "cluster-health"]
evidence = ["release name", "pod readiness", "health 200", "State Hub progress id"]
requires_approval = true
canary_mode = "isolated"
observation_minutes = 30
[stages.stage3]
enabled = true
namespace = "${APP_ID}"
release = "${APP_ID}"
commands = ["bin/railiance promote ${APP_ID}", "bin/railiance observe ${APP_ID}"]
checks = ["stage2-accepted", "rollback-target", "cluster-health"]
evidence = ["promotion command id", "new stable digest", "post-promotion smoke"]
requires_approval = true
promotion_mode = "release-replace"
previous_stable = "helm:${APP_ID}:previous"
[[checks]]
id = "stage1-script"
type = "command"
stage = "stage1"
description = "Run generated Stage 1 validation script."
required = true
run = "./tests/stage1.sh"
timeout_seconds = 300
[[checks]]
id = "helm-template"
type = "helm"
stage = "stage1"
description = "Render Helm templates locally when Helm is available."
required = false
chart = "charts/${APP_ID}"
values = "values/stage1.yaml"
mode = "template"
[[checks]]
id = "local-health"
type = "http"
stage = "stage1"
description = "Confirm local service health when a local target is running."
required = false
url = "http://127.0.0.1:8080/health"
expected_status = 200
timeout_seconds = 10
[[checks]]
id = "server-dry-run"
type = "helm"
stage = "stage2"
description = "Render and submit a server-side dry run before canary."
required = true
chart = "charts/${APP_ID}"
values = "values/stage2-canary.yaml"
mode = "server-dry-run"
[[checks]]
id = "canary-ready"
type = "kubernetes"
stage = "stage2"
description = "Canary deployment reaches Available."
required = true
namespace = "${APP_ID}"
resource = "deploy/${APP_ID}-canary"
condition = "Available"
[[checks]]
id = "cluster-health"
type = "http"
stage = "stage2"
description = "Cluster health endpoint returns 200."
required = true
url = "http://${APP_ID}.${APP_ID}.svc.cluster.local:8080/health"
expected_status = 200
timeout_seconds = 10
[[checks]]
id = "stage2-accepted"
type = "manual"
stage = "stage3"
description = "Stage 2 gates passed for the same candidate artifact."
required = true
evidence_required = "State Hub Stage 2 acceptance progress id."
[[checks]]
id = "rollback-target"
type = "manual"
stage = "stage3"
description = "Previous stable release is recorded before promotion."
required = true
evidence_required = "Previous Helm revision or image digest."
EOF
cat > "${OUT_DIR}/charts/${APP_ID}/Chart.yaml" <<EOF
apiVersion: v2
name: ${APP_ID}
description: Railiance overlay chart for ${APP_NAME}
type: application
version: 0.1.0
appVersion: "${UPSTREAM_REVISION}"
EOF
cat > "${OUT_DIR}/charts/${APP_ID}/values.yaml" <<EOF
image:
repository: ${APP_ID}
tag: ${UPSTREAM_REVISION}
digest: ""
pullPolicy: IfNotPresent
replicaCount: 1
service:
port: 8080
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
env: []
secretRefs: []
EOF
cat > "${OUT_DIR}/charts/${APP_ID}/templates/deployment.yaml" <<'EOF'
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ .Release.Name }}
labels:
app.kubernetes.io/name: {{ .Chart.Name }}
app.kubernetes.io/instance: {{ .Release.Name }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
app.kubernetes.io/name: {{ .Chart.Name }}
app.kubernetes.io/instance: {{ .Release.Name }}
template:
metadata:
labels:
app.kubernetes.io/name: {{ .Chart.Name }}
app.kubernetes.io/instance: {{ .Release.Name }}
spec:
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: {{ .Values.service.port }}
resources:
{{ toYaml .Values.resources | indent 12 }}
EOF
cat > "${OUT_DIR}/charts/${APP_ID}/templates/service.yaml" <<'EOF'
apiVersion: v1
kind: Service
metadata:
name: {{ .Release.Name }}
labels:
app.kubernetes.io/name: {{ .Chart.Name }}
app.kubernetes.io/instance: {{ .Release.Name }}
spec:
selector:
app.kubernetes.io/name: {{ .Chart.Name }}
app.kubernetes.io/instance: {{ .Release.Name }}
ports:
- name: http
port: {{ .Values.service.port }}
targetPort: http
EOF
cat > "${OUT_DIR}/values/stage1.yaml" <<EOF
image:
repository: ${APP_ID}
tag: ${UPSTREAM_REVISION}
EOF
cat > "${OUT_DIR}/values/stage2-canary.yaml" <<EOF
image:
repository: ${APP_ID}
tag: ${UPSTREAM_REVISION}
replicaCount: 1
EOF
cat > "${OUT_DIR}/values/stage3-production.yaml" <<EOF
image:
repository: ${APP_ID}
tag: ${UPSTREAM_REVISION}
replicaCount: 2
EOF
cat > "${OUT_DIR}/tests/stage1.sh" <<EOF
#!/usr/bin/env bash
set -euo pipefail
cd "\$(dirname "\${BASH_SOURCE[0]}")/.."
python3 - <<'PY'
import pathlib
import tomllib
data = tomllib.loads(pathlib.Path('railiance/app.toml').read_text())
assert data['schema_version'] == 'railiance.app.v1'
assert data['app']['id'] == '${APP_ID}'
print('app.toml parse ok')
PY
if command -v helm >/dev/null 2>&1; then
helm template ${APP_ID}-local charts/${APP_ID} -f values/stage1.yaml >/tmp/${APP_ID}-stage1-render.yaml
echo 'helm template ok'
else
echo 'helm unavailable; skipped helm template check'
fi
EOF
chmod +x "${OUT_DIR}/tests/stage1.sh"
cat > "${OUT_DIR}/runbooks/rollback.md" <<EOF
# ${APP_NAME} Rollback
Rollback target: previous stable Helm release revision or image digest.
1. Confirm the current incident symptom and freeze further promotion actions.
2. Run the declared rollback command from \`railiance/app.toml\`.
3. Verify the stable health endpoint returns 200.
4. Record a State Hub progress note with non-secret evidence: release name,
previous stable target, rollback command id, health status, and follow-up.
Do not paste credentials, kubeconfigs, tokens, or private logs into evidence.
EOF
cat > "${OUT_DIR}/docs/promotion.md" <<EOF
# ${APP_NAME} Promotion Notes
This overlay follows the Railiance three-stage lifecycle.
- Stage 1 validates local render and non-production checks.
- Stage 2 deploys an isolated canary by default.
- Stage 3 replaces the stable release only after Stage 2 acceptance.
Before Stage 2, fill in real image repositories, platform dependencies,
observability endpoints, and rollback target details.
EOF
cat > "${OUT_DIR}/.gitignore" <<'EOF'
.DS_Store
__pycache__/
*.pyc
*.log
*.tmp
*.bak
.secrets/
secrets/
*.kubeconfig
.railiance_gitea.conf
EOF
if [[ "${INIT_GIT}" == true ]]; then
git -C "${OUT_DIR}" init
fi
echo "Created Railiance overlay repo skeleton: ${OUT_DIR}"
echo "Next: edit railiance/app.toml, run tests/stage1.sh, then commit the overlay repo."