Files
railiance-cluster/examples/railiance/app.toml
tegwick c38bdce6bb
Some checks failed
railiance-tests / smoke (push) Has been cancelled
Define Railiance app contract
2026-06-27 15:33:47 +02:00

177 lines
4.7 KiB
TOML

schema_version = "railiance.app.v1"
[app]
id = "example-service"
name = "Example Service"
repo = "railiance-apps/example-service"
owner = "platform"
criticality = "critical"
description = "Reference declaration for the Railiance staged promotion lifecycle."
[source]
revision = "git:main"
artifact = "image"
digest_policy = "required"
[rollback]
strategy = "helm-revision"
command = "bin/railiance rollback example-service"
verification = "GET /health returns 200 on the restored stable release."
[[platform.dependencies]]
name = "state-hub"
kind = "state-hub"
required = true
stage = "stage2"
evidence = "State Hub /healthz returns ok from the cluster path."
[[platform.dependencies]]
name = "postgres"
kind = "postgres"
required = true
stage = "stage2"
evidence = "Target database reports Ready and backup posture is current."
[[secrets.references]]
name = "runtime-api-key"
route = "openbao-api-key"
target = "ExternalSecret/example-service-runtime"
stage = "stage2"
required = true
[[observability.health_endpoints]]
name = "local-health"
url = "http://127.0.0.1:8080/health"
stage = "stage1"
expected_status = 200
[[observability.health_endpoints]]
name = "cluster-health"
url = "http://example-service.example-service.svc.cluster.local:8080/health"
stage = "stage2"
expected_status = 200
[[observability.metrics]]
name = "request-errors"
reference = 'promql:rate(http_requests_total{status=~"5.."}[5m])'
stage = "stage2"
[[observability.logs]]
name = "secret-leak-scan"
reference = "kubectl logs -n example-service deploy/example-service-canary"
stage = "stage2"
[stages.stage1]
enabled = true
namespace = "local"
release = "example-service-local"
commands = ["make test", "helm template charts/example-service"]
checks = ["unit-tests", "helm-template", "local-health"]
evidence = ["pytest output", "helm template success", "local health 200"]
requires_approval = false
[stages.stage2]
enabled = true
namespace = "example-service"
release = "example-service-canary"
commands = ["bin/railiance deploy --stage 2 example-service", "bin/railiance observe example-service"]
checks = ["server-dry-run", "canary-ready", "cluster-health", "operator-approval"]
evidence = ["release name", "pod readiness", "health 200", "State Hub progress id"]
requires_approval = true
canary_mode = "isolated"
observation_minutes = 60
[stages.stage3]
enabled = true
namespace = "example-service"
release = "example-service"
commands = ["bin/railiance promote example-service", "bin/railiance observe example-service"]
checks = ["stage2-accepted", "rollback-target", "cluster-health", "operator-approval"]
evidence = ["promotion command id", "new stable digest", "post-promotion smoke"]
requires_approval = true
promotion_mode = "release-replace"
previous_stable = "helm:example-service:previous"
[[checks]]
id = "unit-tests"
type = "command"
stage = "stage1"
description = "Run repository unit tests."
required = true
run = "make test"
timeout_seconds = 600
[[checks]]
id = "helm-template"
type = "helm"
stage = "stage1"
description = "Render Helm templates locally."
required = true
chart = "charts/example-service"
values = "values/local.yaml"
mode = "template"
[[checks]]
id = "local-health"
type = "http"
stage = "stage1"
description = "Confirm local service health."
required = true
url = "http://127.0.0.1:8080/health"
expected_status = 200
timeout_seconds = 10
[[checks]]
id = "server-dry-run"
type = "helm"
stage = "stage2"
description = "Render and submit a server-side dry run before canary."
required = true
chart = "charts/example-service"
values = "values/canary.yaml"
mode = "server-dry-run"
[[checks]]
id = "canary-ready"
type = "kubernetes"
stage = "stage2"
description = "Canary deployment reaches Available."
required = true
namespace = "example-service"
resource = "deploy/example-service-canary"
condition = "Available"
[[checks]]
id = "cluster-health"
type = "http"
stage = "stage2"
description = "Cluster health endpoint returns 200."
required = true
url = "http://example-service.example-service.svc.cluster.local:8080/health"
expected_status = 200
timeout_seconds = 10
[[checks]]
id = "operator-approval"
type = "manual"
stage = "stage2"
description = "Human approval is recorded before production-critical traffic changes."
required = true
evidence_required = "State Hub approval note id, candidate digest, rollback target."
[[checks]]
id = "stage2-accepted"
type = "manual"
stage = "stage3"
description = "Stage 2 gates passed for the same candidate artifact."
required = true
evidence_required = "State Hub Stage 2 acceptance progress id."
[[checks]]
id = "rollback-target"
type = "manual"
stage = "stage3"
description = "Previous stable release is recorded before promotion."
required = true
evidence_required = "Previous Helm revision or image digest."