diff --git a/bin/railiance b/bin/railiance index 09e2c1a..5cc2b96 100755 --- a/bin/railiance +++ b/bin/railiance @@ -17,6 +17,7 @@ Commands: cloudinit Emit minimal cloud-init user-data init-repo Idempotently furnish repo housekeeping create-overlay Scaffold a Railiance overlay repo for an upstream app + run Run Stage 1 local validation from railiance/app.toml build-spore Build a distributable "Spore" bundle seed-local Run the seed script on this machine checklist Pre-VM checklist @@ -41,6 +42,7 @@ case "$cmd" in cloudinit) cat "$ROOT/cloudinit/user-data.yaml" ;; init-repo) bash "$ROOT/tools/furnish_railiance_repo.sh" ;; create-overlay) bash "$ROOT/tools/create_railiance_overlay_repo.sh" "$@" ;; + run) exec railiance-run "$@" ;; build-spore) bash "$ROOT/tools/build_spore.sh" ;; seed-local) bash "$ROOT/tools/seed_node.sh" ;; checklist) diff --git a/docs/README.md b/docs/README.md index 9ec22d2..015fc97 100644 --- a/docs/README.md +++ b/docs/README.md @@ -76,6 +76,7 @@ From two bare Linux servers, a Git repo, and valid credentials, you can rebuild - [Deployment lifecycle](deployment-lifecycle.md) - [Railiance app.toml contract](app-toml-contract.md) - [Railiance overlay repo pattern](overlay-repo-pattern.md) +- [Railiance run command](railiance-run-command.md) ## 👥 Contributing diff --git a/docs/railiance-run-command.md b/docs/railiance-run-command.md new file mode 100644 index 0000000..7390e2a --- /dev/null +++ b/docs/railiance-run-command.md @@ -0,0 +1,52 @@ +# Railiance Run Command + +`bin/railiance run` executes Stage 1 local validation for a repository that +contains `railiance/app.toml`. + +The command is intentionally local and conservative: + +- reads `railiance/app.toml` using the `railiance.app.v1` contract; +- runs `[stages.stage1].commands` from the app directory; +- evaluates Stage 1 check ids listed in `[stages.stage1].checks` when they can + be checked locally; +- emits a machine-readable `railiance.run-result.v1` JSON result; +- records command references, exit codes, durations, and output byte counts, + but not shell text or command stdout/stderr content; +- strips credentials, query strings, and fragments from URLs before reporting HTTP + check results. + +## Usage + +```bash +bin/railiance run /path/to/app-or-overlay --pretty +bin/railiance run . --json-out .railiance/stage1-result.json +``` + +The process exits `0` only when all Stage 1 commands and required checks pass. +Optional checks may be skipped without failing the run. For example, an optional +local health endpoint can be declared before a local server command exists. + +## Supported Local Checks + +- `command`: runs the check `run` command in the app directory. +- `http`: calls the declared URL and compares the HTTP status. +- `helm`: runs `helm template` when Helm is installed. Required Helm checks fail + if Helm is unavailable; optional Helm checks are skipped. + +Other check types are reported as skipped or failed depending on whether the +check is required. Stage 2 and Stage 3 checks are never executed by +`railiance run`. + +## Result Shape + +The JSON result includes: + +- app identity and source revision; +- contract path and app directory; +- command/check status summaries using contract references instead of raw shell + commands; +- expected evidence labels from Stage 1; +- timing and exit status metadata. + +The result is suitable for later promotion gates and State Hub progress notes, +without embedding secrets or verbose logs. diff --git a/tools/README_tools.md b/tools/README_tools.md index a76455e..31538dd 100644 --- a/tools/README_tools.md +++ b/tools/README_tools.md @@ -59,6 +59,10 @@ This model emphasizes: --- +### `railiance-run` +- Executes Stage 1 local validation from `railiance/app.toml`. +- Emits a `railiance.run-result.v1` JSON result without command logs or secrets. + ### `create_railiance_overlay_repo.sh` - Scaffolds a local Railiance overlay repo for a third-party upstream app. - Records upstream identity without vendoring upstream code. diff --git a/tools/cmd/railiance-run b/tools/cmd/railiance-run new file mode 100755 index 0000000..45c15d9 --- /dev/null +++ b/tools/cmd/railiance-run @@ -0,0 +1,301 @@ +#!/usr/bin/env python3 +"""Railiance Stage 1 local validation command.""" + +from __future__ import annotations + +import argparse +import json +import shutil +import subprocess +import sys +import time +import tomllib +import urllib.error +import urllib.request +import urllib.parse +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +SUPPORTED_SCHEMA = "railiance.app.v1" + + +def utc_now() -> str: + return datetime.now(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def load_contract(app_dir: Path) -> tuple[Path, dict[str, Any]]: + path = app_dir / "railiance" / "app.toml" + if not path.exists(): + raise SystemExit(f"Missing Railiance contract: {path}") + with path.open("rb") as handle: + data = tomllib.load(handle) + if data.get("schema_version") != SUPPORTED_SCHEMA: + raise SystemExit( + f"Unsupported schema_version {data.get('schema_version')!r}; expected {SUPPORTED_SCHEMA}" + ) + return path, data + + +def command_result( + command: str, cwd: Path, timeout_seconds: int | None, command_ref: str +) -> dict[str, Any]: + started = time.monotonic() + timeout = timeout_seconds or 900 + try: + completed = subprocess.run( + command, + cwd=cwd, + shell=True, + text=True, + capture_output=True, + timeout=timeout, + check=False, + ) + status = "passed" if completed.returncode == 0 else "failed" + return { + "command_ref": command_ref, + "status": status, + "exit_code": completed.returncode, + "duration_seconds": round(time.monotonic() - started, 3), + "stdout_bytes": len(completed.stdout.encode()), + "stderr_bytes": len(completed.stderr.encode()), + } + except subprocess.TimeoutExpired as exc: + return { + "command_ref": command_ref, + "status": "failed", + "exit_code": None, + "duration_seconds": round(time.monotonic() - started, 3), + "error": f"timeout after {timeout}s", + "stdout_bytes": len((exc.stdout or "").encode()) if isinstance(exc.stdout, str) else 0, + "stderr_bytes": len((exc.stderr or "").encode()) if isinstance(exc.stderr, str) else 0, + } + + +def check_required(check: dict[str, Any]) -> bool: + return bool(check.get("required", True)) + + +def skipped(check: dict[str, Any], reason: str) -> dict[str, Any]: + required = check_required(check) + return { + "id": check.get("id"), + "type": check.get("type"), + "required": required, + "status": "failed" if required else "skipped", + "reason": reason, + } + + +def scrub_url(url: str) -> str: + try: + parts = urllib.parse.urlsplit(url) + except ValueError: + return "" + netloc = parts.netloc.rsplit("@", 1)[-1] + return urllib.parse.urlunsplit((parts.scheme, netloc, parts.path, "", "")) + + +def run_http_check(check: dict[str, Any]) -> dict[str, Any]: + started = time.monotonic() + url = str(check.get("url", "")) + timeout = int(check.get("timeout_seconds", 10)) + expected_status = int(check.get("expected_status", 200)) + required = check_required(check) + try: + with urllib.request.urlopen(url, timeout=timeout) as response: + status_code = response.getcode() + except (urllib.error.URLError, TimeoutError, ValueError) as exc: + return { + "id": check.get("id"), + "type": "http", + "required": required, + "status": "failed" if required else "skipped", + "url": scrub_url(url), + "duration_seconds": round(time.monotonic() - started, 3), + "reason": str(exc), + } + status = "passed" if status_code == expected_status else "failed" + return { + "id": check.get("id"), + "type": "http", + "required": required, + "status": status if required or status == "passed" else "skipped", + "url": scrub_url(url), + "expected_status": expected_status, + "actual_status": status_code, + "duration_seconds": round(time.monotonic() - started, 3), + } + + +def run_helm_check(check: dict[str, Any], app_dir: Path, release: str) -> dict[str, Any]: + if shutil.which("helm") is None: + return skipped(check, "helm is not installed") + chart = str(check.get("chart", "")) + values = str(check.get("values", "")) + mode = str(check.get("mode", "template")) + if mode not in {"template", "server-dry-run"}: + return skipped(check, f"unsupported helm mode for Stage 1: {mode}") + command = f"helm template {release} {chart}" + if values: + command += f" -f {values}" + result = command_result( + command, app_dir, int(check.get("timeout_seconds", 120)), f"checks.{check.get('id')}.helm" + ) + return { + "id": check.get("id"), + "type": "helm", + "required": check_required(check), + "status": result["status"], + "mode": mode, + "command_ref": result.get("command_ref"), + "exit_code": result.get("exit_code"), + "duration_seconds": result.get("duration_seconds"), + "stdout_bytes": result.get("stdout_bytes"), + "stderr_bytes": result.get("stderr_bytes"), + } + + +def run_check(check: dict[str, Any], app_dir: Path, release: str) -> dict[str, Any]: + check_type = check.get("type") + if check.get("stage") != "stage1": + return skipped(check, "not a Stage 1 check") + if check_type == "command": + command = str(check.get("run", "")) + if not command: + return skipped(check, "command check has no run field") + result = command_result( + command, app_dir, int(check.get("timeout_seconds", 900)), f"checks.{check.get('id')}.command" + ) + return { + "id": check.get("id"), + "type": "command", + "required": check_required(check), + **result, + } + if check_type == "http": + return run_http_check(check) + if check_type == "helm": + return run_helm_check(check, app_dir, release) + if check_type == "manual": + return skipped(check, "manual check cannot be satisfied by railiance run") + return skipped(check, f"unsupported local check type: {check_type}") + + +def required_failures(items: list[dict[str, Any]]) -> list[dict[str, Any]]: + return [item for item in items if item.get("required", True) and item.get("status") != "passed"] + + +def build_result(app_dir: Path, contract_path: Path, data: dict[str, Any]) -> dict[str, Any]: + stage = data.get("stages", {}).get("stage1", {}) + if not stage.get("enabled", False): + raise SystemExit("Stage 1 is disabled in railiance/app.toml") + + app = data.get("app", {}) + source = data.get("source", {}) + started_at = utc_now() + started_monotonic = time.monotonic() + + stage_commands = list(stage.get("commands", [])) + command_results = [ + command_result(command, app_dir, None, f"stages.stage1.commands[{index}]") + for index, command in enumerate(stage_commands) + ] + + check_ids = list(stage.get("checks", [])) + all_checks = {check.get("id"): check for check in data.get("checks", [])} + check_results = [] + for check_id in check_ids: + check = all_checks.get(check_id) + if check is None: + check_results.append( + { + "id": check_id, + "type": None, + "required": True, + "status": "failed", + "reason": "check id is referenced by Stage 1 but not defined", + } + ) + continue + check_results.append(run_check(check, app_dir, str(stage.get("release", app.get("id", "app"))))) + + command_failures = [item for item in command_results if item.get("status") != "passed"] + check_failures = required_failures(check_results) + status = "passed" if not command_failures and not check_failures else "failed" + + return { + "schema_version": "railiance.run-result.v1", + "status": status, + "stage": "stage1", + "started_at": started_at, + "finished_at": utc_now(), + "duration_seconds": round(time.monotonic() - started_monotonic, 3), + "app": { + "id": app.get("id"), + "name": app.get("name"), + "repo": app.get("repo"), + "owner": app.get("owner"), + "criticality": app.get("criticality"), + }, + "source": { + "revision": source.get("revision"), + "artifact": source.get("artifact"), + "digest_policy": source.get("digest_policy"), + }, + "contract": str(contract_path), + "app_dir": str(app_dir), + "release": stage.get("release"), + "namespace": stage.get("namespace"), + "requires_approval": bool(stage.get("requires_approval", False)), + "evidence_expected": list(stage.get("evidence", [])), + "commands": command_results, + "checks": check_results, + "summary": { + "commands_total": len(command_results), + "commands_failed": len(command_failures), + "checks_total": len(check_results), + "required_checks_failed": len(check_failures), + }, + } + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Run Railiance Stage 1 local validation from railiance/app.toml." + ) + parser.add_argument( + "app_dir", + nargs="?", + default=".", + help="Application or overlay repository directory (default: current directory).", + ) + parser.add_argument( + "--json-out", + help="Optional path to write the machine-readable run result.", + ) + parser.add_argument( + "--pretty", + action="store_true", + help="Pretty-print JSON output to stdout.", + ) + return parser.parse_args(argv) + + +def main(argv: list[str]) -> int: + args = parse_args(argv) + app_dir = Path(args.app_dir).resolve() + contract_path, data = load_contract(app_dir) + result = build_result(app_dir, contract_path, data) + rendered = json.dumps(result, indent=2 if args.pretty else None, sort_keys=True) + print(rendered) + if args.json_out: + output_path = Path(args.json_out) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(rendered + "\n", encoding="utf-8") + return 0 if result["status"] == "passed" else 1 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/tools/create_railiance_overlay_repo.sh b/tools/create_railiance_overlay_repo.sh index 3c6035c..a5bd6c4 100755 --- a/tools/create_railiance_overlay_repo.sh +++ b/tools/create_railiance_overlay_repo.sh @@ -178,8 +178,8 @@ enabled = true namespace = "local" release = "${APP_ID}-local" commands = ["./tests/stage1.sh"] -checks = ["helm-template", "local-health"] -evidence = ["helm template success", "local health check or explicit not-run note"] +checks = ["stage1-script", "local-health"] +evidence = ["Stage 1 script result", "local health check or explicit not-run note"] requires_approval = false [stages.stage2] @@ -204,12 +204,21 @@ requires_approval = true promotion_mode = "release-replace" previous_stable = "helm:${APP_ID}:previous" +[[checks]] +id = "stage1-script" +type = "command" +stage = "stage1" +description = "Run generated Stage 1 validation script." +required = true +run = "./tests/stage1.sh" +timeout_seconds = 300 + [[checks]] id = "helm-template" type = "helm" stage = "stage1" -description = "Render Helm templates locally." -required = true +description = "Render Helm templates locally when Helm is available." +required = false chart = "charts/${APP_ID}" values = "values/stage1.yaml" mode = "template" diff --git a/workplans/RAIL-BS-WP-0006-staged-promotion-lifecycle.md b/workplans/RAIL-BS-WP-0006-staged-promotion-lifecycle.md index 7014156..8dabd3e 100644 --- a/workplans/RAIL-BS-WP-0006-staged-promotion-lifecycle.md +++ b/workplans/RAIL-BS-WP-0006-staged-promotion-lifecycle.md @@ -135,7 +135,7 @@ logic into the upstream repository. ```task id: RAIL-BS-WP-0006-T04 -status: todo +status: done priority: high state_hub_task_id: "95c3311b-04bb-4c83-bda3-47958217b665" ``` @@ -152,6 +152,8 @@ Expected behavior: **Done when:** at least one representative app can complete Stage 1 locally. +2026-06-27: Added `tools/cmd/railiance-run`, the `bin/railiance run` dispatcher entry, and `docs/railiance-run-command.md`. The command reads `railiance/app.toml`, runs Stage 1 commands and local checks, and emits `railiance.run-result.v1` JSON without command logs or secret values. Updated the overlay generator so a generated Forgejo overlay completes Stage 1 locally in this environment; Helm rendering is optional when Helm is unavailable. + --- ### T05 - Canary Helm chart template