diff --git a/.gitea/workflows/forge-runner-smoke.yaml b/.gitea/workflows/forge-runner-smoke.yaml new file mode 100644 index 0000000..12582f4 --- /dev/null +++ b/.gitea/workflows/forge-runner-smoke.yaml @@ -0,0 +1,39 @@ +name: Forge Runner Smoke + +on: + workflow_dispatch: + push: + paths: + - ".gitea/workflows/forge-runner-smoke.yaml" + - "docs/gitea-actions-runner-substrate.md" + - "docs/gitea-actions-runner-evidence.md" + - "runner/**" + - "tools/gitea-runner-status.sh" + +jobs: + compatibility-smoke: + runs-on: [self-hosted, haskelseed] + steps: + - name: Report non-secret runner context + run: | + set -eu + echo "repository=${GITHUB_REPOSITORY:-unknown}" + echo "sha=${GITHUB_SHA:-unknown}" + echo "runner=${RUNNER_NAME:-unknown}" + uname -a + + - name: Check container tooling if present + run: | + set -eu + if command -v docker >/dev/null 2>&1; then + docker version --format 'docker-server={{.Server.Version}}' + else + echo "docker not present on smoke runner" + fi + + - name: Verify no baseline deployment or publish secrets + run: | + set -eu + test -z "${KUBECONFIG:-}" || { echo "KUBECONFIG unexpectedly set"; exit 1; } + test -z "${REGISTRY_PASSWORD:-}" || { echo "REGISTRY_PASSWORD unexpectedly set"; exit 1; } + test -z "${GITEA_RUNNER_REGISTRATION_TOKEN:-}" || { echo "runner registration token exposed"; exit 1; } diff --git a/Makefile b/Makefile index c0a7975..f12e3a8 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,7 @@ GITEA_DB_CLUSTER ?= gitea-db GITEA_DB_NAMESPACE ?= databases REGISTRY_DOCS ?= docs/gitea-container-registry.md docs/gitea-package-registry.md EVIDENCE_DOCS ?= docs/observability-operating-evidence.md docs/ci-runner-actions-gitops-ownership.md docs/backup-restore-secret-handoff.md +RUNNER_DOCS ?= docs/gitea-actions-runner-substrate.md docs/gitea-actions-runner-evidence.md SOPS_SENTINEL ?= $(GITEA_VALUES) ##@ Operator checks @@ -47,6 +48,34 @@ evidence-docs: ## Print forge evidence and handoff contracts sed -n '1,260p' "$$doc"; \ done +runner-docs: ## Print Gitea Actions runner substrate docs and evidence + @for doc in $(RUNNER_DOCS); do \ + printf '\n## %s\n\n' "$$doc"; \ + sed -n '1,260p' "$$doc"; \ + done + +runner-status: ## Read-only Actions runner, host, and inter-hub registry probes + bash tools/gitea-runner-status.sh + +check-runner-tools: ## Check local tools used by runner inspection targets + @missing=0; \ + for tool in curl ssh docker; do \ + if command -v $$tool >/dev/null 2>&1; then \ + echo "ok: $$tool"; \ + else \ + echo "missing: $$tool"; \ + missing=1; \ + fi; \ + done; \ + for tool in skopeo act_runner; do \ + if command -v $$tool >/dev/null 2>&1; then \ + echo "ok: $$tool"; \ + else \ + echo "optional: $$tool not found"; \ + fi; \ + done; \ + exit $$missing + ##@ Current Gitea gitea-deploy: ## Deploy / upgrade current Gitea forge runtime @@ -77,4 +106,4 @@ help: ## Show this help /^[a-zA-Z0-9_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } \ /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) }' $(MAKEFILE_LIST) -.PHONY: check-tools check-sops registry-docs evidence-docs gitea-deploy gitea-ingress-deploy gitea-status help +.PHONY: check-tools check-sops registry-docs evidence-docs runner-docs runner-status check-runner-tools gitea-deploy gitea-ingress-deploy gitea-status help diff --git a/README.md b/README.md index 7d9ebb1..026e3ef 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,8 @@ Key contracts: - `docs/initial-operating-contracts.md` - `docs/ci-runner-actions-gitops-ownership.md` +- `docs/gitea-actions-runner-substrate.md` +- `docs/gitea-actions-runner-evidence.md` - `docs/backup-restore-secret-handoff.md` - `docs/observability-operating-evidence.md` - `docs/gitea-container-registry.md` @@ -33,6 +35,9 @@ Useful entry points: ```bash make registry-docs make evidence-docs +make runner-docs +make runner-status +make check-runner-tools make check-tools make gitea-status make gitea-deploy diff --git a/docs/ci-runner-actions-gitops-ownership.md b/docs/ci-runner-actions-gitops-ownership.md index b25f229..fa7014f 100644 --- a/docs/ci-runner-actions-gitops-ownership.md +++ b/docs/ci-runner-actions-gitops-ownership.md @@ -1,6 +1,6 @@ # CI Runner, Actions, And GitOps Ownership -Last reviewed: 2026-06-05 +Last reviewed: 2026-06-07 Status: contract v1. This document defines ownership and handoffs only; it does not authorize a live runner deployment, credential change, GitOps controller @@ -161,10 +161,25 @@ should provide: - representative cluster dry-run evidence for cluster-access labels; - a rollback or disable path for a bad runner registration. +## Current Implementation Surface + +`docs/gitea-actions-runner-substrate.md` defines the first supported Gitea +Actions runner model, the initial haskelseed compatibility labels, the attended +registration path, and recovery steps. `make runner-status` provides the +read-only probe entry point, and `.gitea/workflows/forge-runner-smoke.yaml` +provides the first non-production proof workflow. + +The first compatibility labels are `self-hosted` and `haskelseed`, matching the +inter-hub blocker. New workflow consumers should prefer semantic labels such as +`linux`, `container-build`, and `registry-publish` once smoke evidence is +recorded. + ## Open Follow-Ups - `docs/observability-operating-evidence.md` defines the runner health and artifact evidence signals that consumers may cite. +- `docs/gitea-actions-runner-evidence.md` records the current runner inventory, + smoke evidence, and inter-hub unblock evidence. - WP-0006-T09 should declare runner substrate, label contracts, and evidence edges in Railiance Fabric. - `RAILIANCE-WP-0005-T05` should document app-side dry-run behavior once forge diff --git a/docs/gitea-actions-runner-evidence.md b/docs/gitea-actions-runner-evidence.md new file mode 100644 index 0000000..070f9f3 --- /dev/null +++ b/docs/gitea-actions-runner-evidence.md @@ -0,0 +1,181 @@ +# Gitea Actions Runner Evidence + +Last updated: 2026-06-07 + +Status: initial non-secret inventory for `FORGE-WP-0003`. Haskelseed now has a +running `act_runner` OpenRC service with labels matching the first compatibility +contract. Smoke proof is still pending. + +## Workstream + +- Workplan: `FORGE-WP-0003-actions-runner-substrate` +- State Hub workstream: `149a0316-64d1-4664-96d0-274577c32e63` +- Immediate consumer blocker: inter-hub `R7` waits on a runner matching + `self-hosted` and `haskelseed`. + +## Local Probe Results + +Collected from `/home/worsch/railiance-forge` on 2026-06-07. + +| Probe | Result | Note | +| --- | --- | --- | +| `curl` | available at `/usr/bin/curl` | Used for public endpoint checks. | +| `ssh` | available at `/usr/bin/ssh` | Direct `haskelseed` alias timed out; ops-bridge path reaches `root@192.168.178.135`. | +| `docker` | available at `/usr/bin/docker` | Local presence only; runner host Docker still pending. | +| `skopeo` | not available | Registry tag inspection pending. | +| local `act_runner` | not available | Haskelseed has `/usr/local/bin/act_runner`; local workstation does not. | +| `kubectl`, `helm`, `sops` | not available in `make check-tools` | Separate operator-tool gap for deploy-capable targets. | + +Public endpoint checks from this environment: + +| Endpoint | Result | Interpretation | +| --- | --- | --- | +| `https://gitea.coulomb.social/` | HTTP `404` | Public root route is not a useful Gitea web health signal here. | +| `https://gitea.coulomb.social/api/v1/version` | HTTP `404` | Public API version route is not exposed through the current ingress path. | +| `https://gitea.coulomb.social/v2/` | HTTP `401` | OCI registry route responds with an auth challenge. | +| `https://gitea.coulomb.social/api/packages/coulomb/pypi/simple/` | HTTP `404` | Public package route reachable but package/simple root did not return an index. | +| `https://hub.coulomb.social/api/v2/hubs` | HTTP `404` | Confirms inter-hub production still lacks the expected API surface. | + +Direct haskelseed alias probe: + +```text +ssh -o BatchMode=yes -o ConnectTimeout=5 haskelseed hostname +``` + +Result: + +```text +ssh: connect to host haskelseed port 22: Connection timed out +``` + +This does not prove the runner host is down; it proves this session does not +currently have the bare SSH alias path needed to inspect it. + +Ops-bridge haskelseed path: + +```bash +RUNNER_HOST=192.168.178.135 \ +RUNNER_SSH_USER=root \ +RUNNER_SSH_KEY=/home/worsch/.ssh/id_ops \ +make runner-status +``` + +Observed on 2026-06-07: + +| Field | Result | +| --- | --- | +| Hostname | `haskelseed.coulomb.social` | +| Kernel | `Linux 6.18.22-0-virt` on Alpine | +| `act_runner` | `/usr/local/bin/act_runner` | +| `act_runner --version` | `v0.6.1-1-g8e6b3be9` | +| `nix` | `/usr/local/bin/nix`, Determinate Nix `3.18.1`, Nix `2.33.4` | +| Init system | OpenRC (`/sbin/rc-service`) | +| `act_runner` OpenRC service | initially not present; installed and started on 2026-06-07 | +| `gitea-act-runner` OpenRC service | not present | +| live runner process | PID `5911` after activation | +| registration file | `/root/.runner`, mode `0644`, owner `root:root` | +| registration name | `haskelseed` | +| registration address | `http://92.205.130.254:32166` | +| registration labels before activation | `haskelseed:host`, `linux:host`, `x86_64:host` | +| registration labels after activation | `self-hosted:host`, `haskelseed:host`, `linux:host`, `linux_amd64:host`, `x86_64:host`, `container-build:host`, `registry-publish:host` | +| ephemeral | `false` | +| runner backup | `/root/.runner.bak-20260607225905` | + +Activation evidence: + +```text +rc-service act_runner restart +status: started +act_runner PID: 5911 +runner declared successfully with labels: +self-hosted, haskelseed, linux, linux_amd64, x86_64, container-build, registry-publish +``` + +## Runner Inventory + +Known from repo and State Hub: + +- Before `FORGE-WP-0003`, this repo had runner ownership contracts but no + runner deployment files, status script, smoke workflow, or runner evidence + file. +- Inter-hub reported that commits intended to trigger deployment did not update + production and that its workflow targets `self-hosted` and `haskelseed`. +- A local registration file exists on haskelseed and `act_runner` is running as + an OpenRC service. Gitea runner admin access has not yet been used to confirm + the runner in the UI, but the daemon log reports successful declaration. + +Pending attended checks: + +```bash +make runner-status + +RUNNER_HOST=192.168.178.135 \ +RUNNER_SSH_USER=root \ +RUNNER_SSH_KEY=/home/worsch/.ssh/id_ops \ +make runner-status + +ssh haskelseed 'hostname; command -v act_runner || true' +ssh haskelseed 'systemctl status act_runner --no-pager || systemctl status gitea-act-runner --no-pager || true' +ssh haskelseed 'journalctl -u act_runner -n 200 --no-pager || journalctl -u gitea-act-runner -n 200 --no-pager || true' +``` + +If Gitea runner admin access is available, verify the `coulomb` organization or +instance runner page for: + +- runner name `railiance-haskelseed-build-01`; +- labels `self-hosted`, `haskelseed`, `linux`, `linux_amd64`, + `container-build`, and `registry-publish`; +- online status; +- last contact time; +- workflow ids for the smoke run and later inter-hub run. + +## Registry Tag Evidence + +Tag inspection remains pending because `skopeo` is unavailable in this +environment. + +Run from an operator host with registry access: + +```bash +for tag in 91037a4 ae9e497 fa96fb8 7cc3173 latest; do + skopeo inspect --tls-verify=false \ + "docker://92.205.130.254:32166/coulomb/inter-hub:${tag}" \ + --format "${tag} {{.Name}} {{.Digest}}" +done +``` + +Record only image names, tags, digests, and status. Do not record registry +tokens. + +## Smoke Evidence Slot + +No smoke job has passed yet. Current haskelseed registration labels now match +the proposed first compatibility contract. + +Expected evidence after `.gitea/workflows/forge-runner-smoke.yaml` passes: + +| Field | Value | +| --- | --- | +| Date | `TODO` | +| Runner name | `TODO` | +| Labels used | `TODO` | +| Workflow id/url | `TODO` | +| Repo commit | `TODO` | +| Docker availability | `TODO` | +| Cluster credential absent | `TODO` | +| Registry credential absent in smoke | `TODO` | + +## Inter-Hub Unblock Slot + +Do not rerun production push probes until the smoke workflow passes. + +Expected evidence after inter-hub rerun: + +| Field | Value | +| --- | --- | +| Inter-hub commit | `TODO` | +| Workflow id/url | `TODO` | +| Image tag/digest | `TODO` | +| Deployment result | `TODO` | +| `https://hub.coulomb.social/api/v2/hubs` | `TODO` | +| Remaining blocker | `TODO` | diff --git a/docs/gitea-actions-runner-substrate.md b/docs/gitea-actions-runner-substrate.md new file mode 100644 index 0000000..0854406 --- /dev/null +++ b/docs/gitea-actions-runner-substrate.md @@ -0,0 +1,245 @@ +# Gitea Actions Runner Substrate + +Last reviewed: 2026-06-07 + +Status: implementation contract v1 and attended operating runbook. This +document does not contain runner registration tokens, package credentials, +kubeconfigs, SSH keys, or decrypted secret values. + +## Purpose + +Railiance needs a forge-owned Gitea Actions runner substrate so source and app +workflows can build, publish, and verify artifacts without each repo inventing +its own runner assumptions. The immediate blocker is inter-hub: its deployment +workflow currently targets `self-hosted` and `haskelseed`, but no forge-owned +runner deployment or health evidence existed after the first Gitea ownership +move. + +This runbook implements the first supported runner model and keeps the +separation clear: + +- `railiance-forge` owns runner placement, registration, labels, health, and + recovery. +- Source and app repos own workflow files and app-specific build/deploy logic. +- Cluster deploy authority is not bundled into the build runner unless a + separate human-reviewed approval says so. + +Primary upstream reference: https://docs.gitea.com/usage/actions/act-runner + +## First Supported Runner + +Use one attended Gitea `act_runner` on haskelseed as the compatibility runner +for the inter-hub unblock, unless an operator explicitly chooses a separate +runner host before registration. + +Initial contract: + +| Field | Value | +| --- | --- | +| Runner name | `railiance-haskelseed-build-01` | +| Runner scope | `coulomb` organization runner, or repository runner if org scope is not approved | +| Initial host | `haskelseed` | +| Capacity | `1` | +| Runtime | Docker-backed or host-backed `act_runner`, confirmed during attended install | +| Cluster deploy authority | Not included in baseline runner approval | +| Registry publish authority | Allowed only through repo/workflow-scoped Gitea secrets | + +The first runner exists to prove scheduling, build tooling, and registry publish +readiness. It must not silently become a cluster deployment runner. If a +workflow needs live cluster access, add a separate label and approval note such +as `cluster-deploy` or shift deployment to an app-owned release path. + +## Label Contract + +Register the first runner with compatibility labels for the inter-hub workflow +and semantic labels for future workflows: + +```text +self-hosted:host,haskelseed:host,linux:host,linux_amd64:host,container-build:host,registry-publish:host +``` + +Rules: + +- `self-hosted` and `haskelseed` are compatibility labels for the first unblock. +- New workflows should prefer semantic labels such as `linux`, + `container-build`, and `registry-publish`. +- `registry-publish` means the runner may execute jobs that receive + repo-scoped registry credentials from Gitea secrets. +- Do not add `cluster-read`, `cluster-dry-run`, `cluster-deploy`, or + `s5-release-check` until the cluster/platform credential path is reviewed. +- Keep capacity at `1` until haskelseed resource contention and workload safety + are understood. + +Gitea runner labels are supplied at registration with `--labels` and, on modern +Gitea/act_runner versions, can also be adjusted in the generated runner config. +Use the generated config from the installed runner version as the source of +truth for exact YAML keys. + +## Secret Boundaries + +Runner registration and publish credentials are live secrets. They must be +created and handled outside Git. + +Allowed in this repo: + +- secret names; +- operator prompts; +- paths such as `/run/secrets/railiance/gitea-act-runner-registration-token`; +- non-secret evidence such as runner name, labels, service status, workflow id, + commit SHA, image tag, and digest. + +Forbidden in this repo: + +- registration token values; +- Gitea package or registry tokens; +- tokenized URLs; +- SSH private keys; +- kubeconfigs or OpenBao tokens; +- complete environment dumps from runner jobs. + +## Attended Install + +Run these steps on the selected runner host. Adjust only the installation path +and package manager details for the host; do not change the label contract +without updating this document and State Hub. + +1. Prepare the host: + + ```bash + sudo install -d -o root -g root -m 0755 /etc/act_runner + sudo install -d -o act_runner -g act_runner -m 0750 /var/lib/act_runner + sudo install -d -o act_runner -g act_runner -m 0750 /var/cache/act_runner + ``` + +2. Install `act_runner` using the current Gitea-supported binary or package + source for the host. Record the version, but do not commit downloaded + binaries into this repo. + +3. Generate the runner config on the host: + + ```bash + sudo -u act_runner /usr/local/bin/act_runner generate-config \ + | sudo tee /etc/act_runner/config.yaml >/dev/null + sudo chmod 0640 /etc/act_runner/config.yaml + sudo chown root:act_runner /etc/act_runner/config.yaml + ``` + +4. Edit the generated config so capacity is `1` and labels match this document. + If the generated config for the installed version uses a different section + name, keep the generated structure and only change the corresponding values. + +5. Place the runner registration token in the approved secret path, for example: + + ```text + /run/secrets/railiance/gitea-act-runner-registration-token + ``` + +6. Register non-interactively without printing the token: + + ```bash + sudo -u act_runner sh -lc ' + set -eu + token="$(cat /run/secrets/railiance/gitea-act-runner-registration-token)" + exec /usr/local/bin/act_runner --config /etc/act_runner/config.yaml \ + register --no-interactive \ + --instance https://gitea.coulomb.social/ \ + --token "$token" \ + --name railiance-haskelseed-build-01 \ + --labels self-hosted:host,haskelseed:host,linux:host,linux_amd64:host,container-build:host,registry-publish:host + ' + ``` + +7. Install the matching service template, adjusted to the host paths if needed. + For systemd hosts, use `runner/act-runner.service.example`: + + ```bash + sudo cp runner/act-runner.service.example /etc/systemd/system/act_runner.service + sudo systemctl daemon-reload + sudo systemctl enable --now act_runner + ``` + + For Alpine/OpenRC hosts such as haskelseed, use + `runner/act-runner.openrc.example`: + + ```bash + sudo install -m 0755 runner/act-runner.openrc.example /etc/init.d/act_runner + sudo rc-update add act_runner default + sudo rc-service act_runner start + ``` + + If reusing the current haskelseed root registration at `/root/.runner`, use + `runner/act-runner-haskelseed.openrc.example` instead. This is less + portable than the dedicated `act_runner` user layout, but it matches the + existing registration state without printing or replacing the token. + +8. Inspect without exposing secrets: + + ```bash + systemctl status act_runner --no-pager + journalctl -u act_runner -n 100 --no-pager + rc-service act_runner status + tail -n 100 /var/log/act_runner.log + ``` + +9. From this repo, run: + + ```bash + make runner-status + + # If using the current ops-bridge haskelseed path: + RUNNER_HOST=192.168.178.135 \ + RUNNER_SSH_USER=root \ + RUNNER_SSH_KEY=/home/worsch/.ssh/id_ops \ + make runner-status + ``` + +## Recovery + +Stop or drain: + +```bash +sudo systemctl stop act_runner +sudo systemctl disable act_runner +``` + +Replace a runner: + +1. Stop the old service. +2. Revoke or rotate the old registration token in Gitea. +3. Move the old `/var/lib/act_runner/.runner` aside for evidence, not reuse. +4. Register the replacement with the same approved label contract. +5. Run the smoke workflow before re-enabling consumer workflows. + +Disable a bad registration: + +- Stop the service immediately. +- Remove or disable the runner in Gitea admin/org settings. +- Rotate any repo secrets that were available to failed jobs if exposure is + plausible. +- Record a State Hub note with runner name, labels, time window, and affected + workflows. + +## Smoke Workflow + +The repo includes `.gitea/workflows/forge-runner-smoke.yaml`. It is deliberately +small: it checks scheduling, basic host context, Docker availability if present, +and verifies that baseline cluster/registry secret environment variables are not +accidentally present. + +Do not rerun inter-hub production deploys until this smoke workflow has passed +on the approved runner and the result is recorded in +`docs/gitea-actions-runner-evidence.md`. + +## Current Blockers + +- Direct non-interactive SSH to the bare `haskelseed` alias timed out from this + environment on 2026-06-07. The current ops-bridge path reaches haskelseed at + `root@192.168.178.135` with `/home/worsch/.ssh/id_ops`. +- Haskelseed has `act_runner v0.6.1-1-g8e6b3be9` installed and `/root/.runner` + registered as `haskelseed`. On 2026-06-07 the OpenRC service was installed + from `runner/act-runner-haskelseed.openrc.example`, labels were updated to the + first compatibility contract, and the runner daemon declared successfully. +- `skopeo` is not installed in this environment, so registry tag inspection must + run from a host with `skopeo` or use an approved equivalent. +- Local `make check-tools` still lacks `kubectl`, `helm`, and `sops`; those are + separate operator-tool prerequisites for deploy-capable Gitea work. diff --git a/docs/observability-operating-evidence.md b/docs/observability-operating-evidence.md index 0b59852..1714b85 100644 --- a/docs/observability-operating-evidence.md +++ b/docs/observability-operating-evidence.md @@ -1,6 +1,6 @@ # Forge Observability And Operating Evidence -Last reviewed: 2026-06-05 +Last reviewed: 2026-06-07 Status: contract v1. This document defines checks, evidence, and future monitoring expectations. It does not authorize a live monitoring deployment, @@ -68,6 +68,9 @@ Git SSH: Actions and runners: +- Run `make runner-status` for the current read-only runner, public endpoint, + and inter-hub registry probes. The target degrades when optional tools such as + `skopeo` or `act_runner` are unavailable. - Record runner inventory by semantic label, trust level, and last successful sample job. - For privileged labels such as `package-publish`, `registry-publish`, @@ -75,6 +78,8 @@ Actions and runners: sample job or release job reference. - If no runner currently provides a required label, mark the dependent workflow as blocked on runner prerequisites instead of weakening the workflow. +- The current runner evidence log lives in + `docs/gitea-actions-runner-evidence.md`. ## Storage Growth Checks diff --git a/runner/README.md b/runner/README.md new file mode 100644 index 0000000..0e264f5 --- /dev/null +++ b/runner/README.md @@ -0,0 +1,24 @@ +# Runner Templates + +This directory contains non-secret templates for the first forge-owned Gitea +Actions runner. + +Use `docs/gitea-actions-runner-substrate.md` as the authoritative runbook. The +templates here are intentionally examples because the final host paths, +installer, and generated `act_runner` config must come from the attended runner +host. + +Files: + +- `act-runner.env.example` documents the non-secret environment shape and the + approved token file reference. +- `act-runner.service.example` is a systemd service skeleton for a binary + `act_runner` installation. +- `act-runner.openrc.example` is an OpenRC service skeleton for Alpine-style + hosts such as haskelseed. +- `act-runner-haskelseed.openrc.example` matches the current haskelseed + registration layout, where `/root/.runner` already exists and `act_runner` + runs from `/root`. + +Do not commit `.runner`, generated configs containing host-specific state, token +files, logs with secret material, or downloaded runner binaries. diff --git a/runner/act-runner-haskelseed.openrc.example b/runner/act-runner-haskelseed.openrc.example new file mode 100644 index 0000000..f5d6c90 --- /dev/null +++ b/runner/act-runner-haskelseed.openrc.example @@ -0,0 +1,17 @@ +#!/sbin/openrc-run + +name="Railiance haskelseed Gitea Actions runner" +description="Forge-owned haskelseed Gitea act_runner service" +command="/usr/local/bin/act_runner" +command_args="daemon" +command_user="root:root" +directory="/root" +pidfile="/run/act_runner.pid" +command_background="yes" +output_log="/var/log/act_runner.log" +error_log="/var/log/act_runner.err" + +depend() { + need net nix-daemon + after firewall +} diff --git a/runner/act-runner.env.example b/runner/act-runner.env.example new file mode 100644 index 0000000..7cba9a5 --- /dev/null +++ b/runner/act-runner.env.example @@ -0,0 +1,7 @@ +# Copy to a host-local path such as /etc/act_runner/railiance.env. +# Do not commit the copied file if it contains host-specific secret references. + +GITEA_INSTANCE_URL=https://gitea.coulomb.social/ +GITEA_RUNNER_NAME=railiance-haskelseed-build-01 +GITEA_RUNNER_LABELS=self-hosted:host,haskelseed:host,linux:host,linux_amd64:host,container-build:host,registry-publish:host +GITEA_RUNNER_REGISTRATION_TOKEN_FILE=/run/secrets/railiance/gitea-act-runner-registration-token diff --git a/runner/act-runner.openrc.example b/runner/act-runner.openrc.example new file mode 100644 index 0000000..4e44260 --- /dev/null +++ b/runner/act-runner.openrc.example @@ -0,0 +1,17 @@ +#!/sbin/openrc-run + +name="Railiance Gitea Actions runner" +description="Forge-owned Gitea act_runner service" +command="/usr/local/bin/act_runner" +command_args="daemon --config /etc/act_runner/config.yaml" +command_user="act_runner:act_runner" +directory="/var/lib/act_runner" +pidfile="/run/act_runner.pid" +command_background="yes" +output_log="/var/log/act_runner.log" +error_log="/var/log/act_runner.err" + +depend() { + need net + after firewall +} diff --git a/runner/act-runner.service.example b/runner/act-runner.service.example new file mode 100644 index 0000000..d509301 --- /dev/null +++ b/runner/act-runner.service.example @@ -0,0 +1,21 @@ +[Unit] +Description=Railiance Gitea Actions runner +Documentation=https://docs.gitea.com/usage/actions/act-runner +After=network-online.target docker.service +Wants=network-online.target + +[Service] +Type=simple +User=act_runner +Group=act_runner +WorkingDirectory=/var/lib/act_runner +EnvironmentFile=-/etc/act_runner/railiance.env +ExecStart=/usr/local/bin/act_runner daemon --config /etc/act_runner/config.yaml +ExecReload=/bin/kill -s HUP $MAINPID +TimeoutSec=0 +Restart=always +RestartSec=10 +NoNewPrivileges=true + +[Install] +WantedBy=multi-user.target diff --git a/tools/gitea-runner-status.sh b/tools/gitea-runner-status.sh new file mode 100644 index 0000000..ce136c3 --- /dev/null +++ b/tools/gitea-runner-status.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash + +set -u + +GITEA_URL="${GITEA_URL:-https://gitea.coulomb.social}" +RUNNER_HOST="${RUNNER_HOST:-haskelseed}" +RUNNER_SSH_USER="${RUNNER_SSH_USER:-}" +RUNNER_SSH_KEY="${RUNNER_SSH_KEY:-}" +SSH_CONNECT_TIMEOUT="${SSH_CONNECT_TIMEOUT:-5}" +INTER_HUB_IMAGE="${INTER_HUB_IMAGE:-92.205.130.254:32166/coulomb/inter-hub}" +INTER_HUB_TAGS="${INTER_HUB_TAGS:-91037a4 ae9e497 fa96fb8 7cc3173 latest}" + +section() { + printf '\n## %s\n' "$1" +} + +have() { + command -v "$1" >/dev/null 2>&1 +} + +tool_line() { + if have "$1"; then + printf 'ok: %s -> %s\n' "$1" "$(command -v "$1")" + else + printf 'missing: %s\n' "$1" + fi +} + +http_code() { + curl -sS -m 8 -o /dev/null -w '%{http_code}' "$1" 2>/dev/null || printf 'error' +} + +section "Tool availability" +for tool in curl ssh docker skopeo act_runner; do + tool_line "$tool" +done + +section "Public endpoint checks" +if have curl; then + printf 'gitea root: %s\n' "$(http_code "${GITEA_URL}/")" + printf 'gitea api version: %s\n' "$(http_code "${GITEA_URL}/api/v1/version")" + printf 'gitea registry /v2: %s\n' "$(http_code "${GITEA_URL}/v2/")" + printf 'gitea pypi simple root: %s\n' "$(http_code "${GITEA_URL}/api/packages/coulomb/pypi/simple/")" + printf 'inter-hub api /api/v2/hubs: %s\n' "$(http_code "https://hub.coulomb.social/api/v2/hubs")" +else + echo "curl missing; skipping endpoint checks" +fi + +section "Runner host probe" +if have ssh; then + ssh_target="${RUNNER_HOST}" + if [ -n "${RUNNER_SSH_USER}" ]; then + ssh_target="${RUNNER_SSH_USER}@${RUNNER_HOST}" + fi + ssh_args=(-o BatchMode=yes -o ConnectTimeout="${SSH_CONNECT_TIMEOUT}") + if [ -n "${RUNNER_SSH_KEY}" ]; then + ssh_args+=(-i "${RUNNER_SSH_KEY}") + fi + ssh "${ssh_args[@]}" "${ssh_target}" ' + set -u + echo "host=$(hostname)" + if command -v act_runner >/dev/null 2>&1; then + act_runner --version || true + else + echo "missing: act_runner" + fi + if command -v systemctl >/dev/null 2>&1; then + systemctl is-active act_runner 2>/dev/null || true + systemctl is-active gitea-act-runner 2>/dev/null || true + fi + if command -v rc-service >/dev/null 2>&1; then + rc-service act_runner status 2>/dev/null || true + rc-service gitea-act-runner status 2>/dev/null || true + rc-status 2>/dev/null | grep -Ei "act|runner|docker|podman|nix" || true + fi + if command -v pgrep >/dev/null 2>&1; then + pgrep -a act_runner || true + pgrep -a runner || true + fi + if [ -f /root/.runner ]; then + echo "runner_registration=/root/.runner" + grep -nE "\"(uuid|name|address|labels|ephemeral)\"" /root/.runner || true + sed -n "8,20p" /root/.runner 2>/dev/null || true + fi + ' 2>&1 || echo "runner host probe failed for ${RUNNER_HOST}" +else + echo "ssh missing; skipping runner host probe" +fi + +section "Inter-hub registry tags" +if have skopeo; then + for tag in ${INTER_HUB_TAGS}; do + if out="$(skopeo inspect --tls-verify=false --format '{{.Name}} {{.Digest}}' "docker://${INTER_HUB_IMAGE}:${tag}" 2>&1)"; then + printf 'ok: %s %s\n' "${tag}" "${out}" + else + printf 'missing-or-error: %s %s\n' "${tag}" "${out}" + fi + done +else + echo "skopeo missing; skipping registry tag inspection" +fi + +section "Evidence reminder" +echo "Record non-secret results in docs/gitea-actions-runner-evidence.md and State Hub." diff --git a/tools/haskelseed-runner-activate.sh b/tools/haskelseed-runner-activate.sh new file mode 100644 index 0000000..a853ff9 --- /dev/null +++ b/tools/haskelseed-runner-activate.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash + +set -euo pipefail + +RUNNER_HOST="${RUNNER_HOST:-192.168.178.135}" +RUNNER_SSH_USER="${RUNNER_SSH_USER:-root}" +RUNNER_SSH_KEY="${RUNNER_SSH_KEY:-/home/worsch/.ssh/id_ops}" +RUNNER_SERVICE_SOURCE="${RUNNER_SERVICE_SOURCE:-runner/act-runner-haskelseed.openrc.example}" +REMOTE_SERVICE_TMP="/tmp/act_runner.openrc.$$" + +ssh_args=(-o BatchMode=yes -o ConnectTimeout=8) +if [ -n "${RUNNER_SSH_KEY}" ]; then + ssh_args+=(-i "${RUNNER_SSH_KEY}") +fi +target="${RUNNER_SSH_USER}@${RUNNER_HOST}" + +scp "${ssh_args[@]}" "${RUNNER_SERVICE_SOURCE}" "${target}:${REMOTE_SERVICE_TMP}" + +ssh "${ssh_args[@]}" "${target}" "REMOTE_SERVICE_TMP='${REMOTE_SERVICE_TMP}' sh -s" <<'REMOTE' +set -eu + +if [ ! -f /root/.runner ]; then + echo "missing /root/.runner; register act_runner before activating service" >&2 + exit 1 +fi + +backup="/root/.runner.bak-$(date +%Y%m%d%H%M%S)" +cp /root/.runner "${backup}" + +awk ' + /"labels": \[/ { + print " \"labels\": [" + print " \"self-hosted:host\"," + print " \"haskelseed:host\"," + print " \"linux:host\"," + print " \"linux_amd64:host\"," + print " \"x86_64:host\"," + print " \"container-build:host\"," + print " \"registry-publish:host\"" + in_labels = 1 + next + } + in_labels && /]/ { + print " ]," + in_labels = 0 + next + } + !in_labels { print } +' /root/.runner > /root/.runner.tmp + +mv /root/.runner.tmp /root/.runner +chmod 0644 /root/.runner + +install -m 0755 "${REMOTE_SERVICE_TMP}" /etc/init.d/act_runner +rm -f "${REMOTE_SERVICE_TMP}" + +rc-update add act_runner default >/dev/null 2>&1 || true +rc-service act_runner restart + +echo "runner_backup=${backup}" +echo "runner_labels:" +sed -n '8,24p' /root/.runner +echo "service_status:" +rc-service act_runner status || true +echo "runner_process:" +pgrep -a act_runner || true +REMOTE diff --git a/workplans/FORGE-WP-0003-actions-runner-substrate.md b/workplans/FORGE-WP-0003-actions-runner-substrate.md new file mode 100644 index 0000000..37f9c05 --- /dev/null +++ b/workplans/FORGE-WP-0003-actions-runner-substrate.md @@ -0,0 +1,253 @@ +--- +id: FORGE-WP-0003 +type: workplan +title: "Gitea Actions runner substrate for Railiance workloads" +domain: railiance +repo: railiance-forge +status: active +owner: codex +topic_slug: railiance +planning_priority: high +created: "2026-06-07" +updated: "2026-06-07" +state_hub_workstream_id: "149a0316-64d1-4664-96d0-274577c32e63" +--- + +# Gitea Actions runner substrate for Railiance workloads + +## Context + +Inter-Hub reported that its production deployment is blocked on a forge-owned +Actions runner substrate. The inter-hub workflow currently targets +`self-hosted` and `haskelseed`, but production remained on the older API surface +after deployment-trigger commits. The current forge migration notes explicitly +excluded an Actions runner deployment, while the forge operating contract says +`railiance-forge` owns runner deployment, registration, labels, credential +boundaries, and health evidence. + +This workplan turns that ownership contract into an actionable runner substrate +without weakening repo or app boundaries. It should unblock inter-hub only after +the runner is registered, visible, and has passed a non-production sample job. + +## T01 - Register blocker and dependency evidence + +```task +id: FORGE-WP-0003-T01 +status: done +priority: high +state_hub_task_id: "b5a42f74-7792-4fbc-8e1f-16c1082ea194" +``` + +Capture the immediate dependency chain: + +- inter-hub `R7` waits on a self-hosted runner for labels currently written as + `self-hosted` and `haskelseed`; +- `hub.coulomb.social` still serves the older API surface after pushed + deployment-trigger commits; +- `docs/first-migration-plan.md` made runner deployment a non-goal for the first + forge migration; +- `docs/ci-runner-actions-gitops-ownership.md` assigns runner substrate + ownership to `railiance-forge`. + +Done when this workplan is registered in State Hub and the unread forge inbox +messages that created the blocker are marked read. + +--- + +## T02 - Inventory current Gitea Actions state + +```task +id: FORGE-WP-0003-T02 +status: wait +priority: high +state_hub_task_id: "87181d63-049e-4a2b-a5e3-bf16763246d7" +``` + +Inspect the current Gitea Actions configuration without printing secrets. + +Check: + +- whether Actions are enabled for the current Gitea instance; +- whether any `act_runner` service is already registered and online; +- whether a haskelseed runner exists, and which labels it advertises; +- runner logs around the inter-hub Build and Deploy attempts; +- registry tags for the blocked inter-hub commits, including the commit tag and + `latest` where applicable. + +Done when the actual current runner/registry state is recorded as non-secret +evidence in the repo and State Hub. + +**2026-06-07:** Added `docs/gitea-actions-runner-evidence.md` and +`make runner-status` to capture non-secret inventory. Current session evidence: +public inter-hub `/api/v2/hubs` still returns `404`, the direct `haskelseed` +SSH alias timed out, and `skopeo` is unavailable for registry tag inspection. +After ops-bridge was updated, haskelseed is reachable at `root@192.168.178.135` +with `/home/worsch/.ssh/id_ops`. Haskelseed has `act_runner +v0.6.1-1-g8e6b3be9` and `/root/.runner` registered as `haskelseed` with labels +`haskelseed:host`, `linux:host`, and `x86_64:host`, but no OpenRC service or +live runner process was observed. This task still waits on Gitea runner admin +visibility and registry tag inspection. + +**2026-06-07:** Activated the existing haskelseed runner registration through +ops-bridge. Backed up `/root/.runner` to +`/root/.runner.bak-20260607225905`, updated labels to include `self-hosted`, +`linux_amd64`, `container-build`, and `registry-publish`, installed the OpenRC +service from `runner/act-runner-haskelseed.openrc.example`, and started +`act_runner` as PID `5911`. The daemon log reports that runner `haskelseed` +declared successfully with labels `self-hosted`, `haskelseed`, `linux`, +`linux_amd64`, `x86_64`, `container-build`, and `registry-publish`. + +--- + +## T03 - Decide runner placement, labels, and capacity rules + +```task +id: FORGE-WP-0003-T03 +status: done +priority: high +state_hub_task_id: "eecde550-43a5-4d77-8e19-c991c5456b42" +``` + +Choose the first supported runner model. + +Decisions: + +- place the runner on haskelseed or on a separate approved runner host; +- publish semantic labels such as `linux`, `container-build`, and + `registry-publish`; +- decide whether to keep compatibility labels like `self-hosted` and + `haskelseed` during the first unblock; +- use concurrency `1` or an explicit build lock if haskelseed remains shared + infrastructure; +- treat cluster-deploy or cluster-access labels as separate approvals, not as + implicit side effects of the build runner. + +Done when the label and placement contract is documented with any required +human approvals called out. + +**2026-06-07:** Documented the first supported runner model in +`docs/gitea-actions-runner-substrate.md`: one haskelseed compatibility runner +named `railiance-haskelseed-build-01`, capacity `1`, compatibility labels +`self-hosted` and `haskelseed`, semantic labels `linux`, `linux_amd64`, +`container-build`, and `registry-publish`, and no implicit cluster-deploy label. + +--- + +## T04 - Build the runner deployment and recovery runbook + +```task +id: FORGE-WP-0003-T04 +status: done +priority: high +state_hub_task_id: "a3d0adfb-d1f9-4a5f-8e05-c4a8fbb160b1" +``` + +Create the forge-owned runner operating surface. + +Include: + +- installation or service definition for the selected runner host; +- registration-token custody path, referenced by name only; +- start, stop, restart, drain, replacement, and token-rotation steps; +- log inspection commands that avoid secret output; +- health and label inspection commands; +- rollback or disable path for a bad runner registration. + +Done when an operator can register and operate the runner from the forge repo +without committing decrypted secrets or machine-local assumptions. + +**2026-06-07:** Added the attended install/recovery runbook, non-secret +`runner/` templates, systemd and OpenRC service examples, `make runner-docs`, +`make runner-status`, and `make check-runner-tools`. Registration tokens are +referenced by file path only and are never committed. + +--- + +## T05 - Prove a non-production sample job + +```task +id: FORGE-WP-0003-T05 +status: wait +priority: high +state_hub_task_id: "9ada5b3e-2ddb-4a55-b9f4-5a6e00fef8b2" +``` + +Run a tiny non-production workflow against the runner before using it for +inter-hub deployment. + +The proof should show: + +- job scheduling reaches the expected runner; +- labels match the published contract; +- build tooling required by the first supported workload is present; +- no cluster deployment authority is granted unless separately approved; +- logs and State Hub evidence identify the runner and commit without exposing + tokens. + +Done when the sample job result is recorded and consumers can cite the runner +label as available. + +**2026-06-07:** Added `.gitea/workflows/forge-runner-smoke.yaml`. It cannot pass +until an approved runner is registered and visible to Gitea. + +**2026-06-07:** Haskelseed now has a running runner with matching labels. Smoke +execution is still pending until the workflow exists in the remote Gitea repo +and is dispatched or triggered. + +--- + +## T06 - Unblock the inter-hub deployment path + +```task +id: FORGE-WP-0003-T06 +status: wait +priority: high +state_hub_task_id: "53929202-40aa-4470-a249-9d0ee02d3213" +``` + +Coordinate the first real consumer unblock with inter-hub after T05 passes. + +Steps: + +- confirm the inter-hub workflow can target the approved runner labels; +- rerun or inspect the Build and Deploy workflow for the blocked commits; +- verify the expected inter-hub image tag exists in the registry; +- hand off runner evidence and any workflow adjustment recommendation to + inter-hub; +- avoid repeated production push probes until the runner is visible and ready. + +Done when inter-hub has a clear deployment result or a narrower non-runner +blocker. + +**2026-06-07:** Inter-hub unblock remains gated on T05. Do not rerun production +push probes until the forge smoke workflow passes. + +--- + +## T07 - Publish runner evidence and ongoing health checks + +```task +id: FORGE-WP-0003-T07 +status: done +priority: medium +state_hub_task_id: "c959a553-ec48-4e98-a752-168a2b067a81" +``` + +Update forge evidence docs and read-only operator targets so the runner is not a +one-off fix. + +Include: + +- runner inventory by label, placement, and trust level; +- last successful sample job and any publish job evidence; +- expected logs, dashboards, or status commands; +- documented alert or escalation condition for stuck jobs and offline runners; +- Forgejo migration notes so the same semantic labels can survive the future + Gitea-to-Forgejo cutover. + +Done when forge can continuously explain whether the runner substrate is healthy +and what labels downstream workflows may depend on. + +**2026-06-07:** Published runner evidence docs and Makefile probes. Current +health is explicitly `not proven`: no runner registration has been observed from +this session, and live host/Gitea inspection requires attended access.