diff --git a/infra/build-machines/Makefile b/infra/build-machines/Makefile new file mode 100644 index 0000000..bc0ac4e --- /dev/null +++ b/infra/build-machines/Makefile @@ -0,0 +1,65 @@ +# infra/build-machines/Makefile +# Usage: make remote-build PROJECT=~/projects/my-haskell-app [VM=haskell-build] + +VM ?= haskell-build +PROJECT ?= . +RDIR := /build/$(notdir $(realpath $(PROJECT))) + +# Sync project source to VM (exclude build artefacts) +.PHONY: sync +sync: + rsync -av --delete \ + --exclude='.git' \ + --exclude='dist-newstyle' \ + --exclude='.stack-work' \ + --exclude='*.o' --exclude='*.hi' \ + $(PROJECT)/ $(VM):$(RDIR)/ + +# Run cabal build on VM after sync +.PHONY: remote-build +remote-build: sync + ssh $(VM) "cd $(RDIR) && source ~/.ghcup/env && cabal build all 2>&1" + +# Run tests on VM +.PHONY: remote-test +remote-test: sync + ssh $(VM) "cd $(RDIR) && source ~/.ghcup/env && cabal test all 2>&1" + +# Open a GHCi session on the VM +.PHONY: remote-ghci +remote-ghci: sync + ssh -t $(VM) "cd $(RDIR) && source ~/.ghcup/env && cabal repl" + +# Sync build artefacts back (for local IDE inspection) +.PHONY: fetch-artifacts +fetch-artifacts: + rsync -av $(VM):$(RDIR)/dist-newstyle/ $(PROJECT)/dist-newstyle/ + +# Check which VMs are reachable +.PHONY: bridge-status +bridge-status: + @echo "Scanning build-machine tunnel ports..." + @for port in 12221 12222 12223 12224 12225; do \ + result=$$(ssh -q -p $$port -o ConnectTimeout=2 \ + -o StrictHostKeyChecking=no build@localhost \ + "echo $$port OK: $$(hostname) — GHC: $$(~/.ghcup/bin/ghc --numeric-version)" \ + 2>/dev/null) ; \ + if [ -n "$$result" ]; then echo " $$result"; \ + else echo " port $$port: no tunnel"; fi; \ + done + +# Show VM system info +.PHONY: vm-info +vm-info: + ssh $(VM) "uname -a; source ~/.ghcup/env && ghc --version && cabal --version" + +# Install SSH config for the build VM (idempotent) +.PHONY: install-ssh-config +install-ssh-config: + @if grep -q '# Haskell Build VM — tunnel via workstation' ~/.ssh/config 2>/dev/null; then \ + echo "SSH config already present — skipping"; \ + else \ + echo "" >> ~/.ssh/config; \ + cat ssh-config.template >> ~/.ssh/config; \ + echo "Appended build-machine SSH config to ~/.ssh/config"; \ + fi diff --git a/infra/build-machines/README.md b/infra/build-machines/README.md new file mode 100644 index 0000000..0ac0a79 --- /dev/null +++ b/infra/build-machines/README.md @@ -0,0 +1,145 @@ +# Build Machines + +Reproducible VirtualBox images for offloading compilation to dedicated hardware. +Each VM self-registers with the Custodian State Hub on boot and connects back to +the development workstation via SSH reverse tunnel. + +## Prerequisites + +- **Packer** >= 1.10 (`packer version`) +- **VirtualBox** >= 7.0 (`VBoxManage --version`) +- **autossh** on both workstation and VM (installed automatically in VM image) +- **State Hub** running on workstation (`cd ~/the-custodian/state-hub && make api`) + +## Quick Start + +### 1. Generate SSH keypair (one-time) + +```bash +ssh-keygen -t ed25519 -f ~/.ssh/id_build -N "" -C "build-agent" +``` + +### 2. Build the OVA + +```bash +cd infra/build-machines/haskell +packer init . +packer build . +``` + +This produces `haskell-build-YYYYMMDD.ova` (~4-6 GB, depending on GHC versions). + +### 3. Import and configure + +```bash +# Import the OVA +VBoxManage import haskell-build-20260420.ova + +# Switch from NAT (build-time) to bridged networking +scripts/setup-vm.sh haskell-build + +# Start the VM +VBoxManage startvm haskell-build --type headless +``` + +### 4. Inject credentials + +```bash +# Prepare a directory with keys and config +mkdir -p ~/vm-keys/haskell-build +cp ~/.ssh/id_build ~/vm-keys/haskell-build/ +cp ~/.ssh/id_build.pub ~/vm-keys/haskell-build/ + +# Edit build-agent.env from template +cp haskell/files/build-agent.env.template ~/vm-keys/haskell-build/build-agent.env +# Edit SSH_RELAY_HOST to your workstation's LAN IP + +# Inject (VM must be running; uses temporary password auth) +scripts/inject-keys.sh ~/vm-keys/haskell-build/ +``` + +### 5. Install SSH config + +```bash +make install-ssh-config +``` + +### 6. Verify + +```bash +make bridge-status # check tunnel is up +ssh haskell-build # should connect via tunnel +./smoke-test.sh # full stack validation +``` + +## Using the VM + +```bash +# Build a Haskell project remotely +make remote-build PROJECT=~/projects/my-app + +# Run tests +make remote-test PROJECT=~/projects/my-app + +# Interactive GHCi +make remote-ghci PROJECT=~/projects/my-app + +# Fetch build artefacts back to workstation +make fetch-artifacts PROJECT=~/projects/my-app + +# Check VM info +make vm-info +``` + +## Architecture + +``` +Workstation (WSL2) + ├── state-hub (:8000) — sees capability entries, knows tunnel ports + └── SSH listener — accepts reverse tunnel from VM + +Laptop (VirtualBox host) + └── haskell-build VM (Ubuntu 24.04, bridged) + ├── GHC 9.8.4 + 9.6.6 via GHCup + ├── build-agent (systemd) — registers with state-hub on boot + └── autossh: -R 12222→local:22, -L 18000→state-hub:8000 +``` + +The VM connects OUT to the workstation. Two tunnels in one SSH connection: +- **Reverse** (`-R 12222:localhost:22`): workstation can SSH into VM +- **Forward** (`-L 18000:localhost:8000`): VM can reach state-hub + +## Port Registry + +See `port-registry.yml`. Range 12221-12230 supports up to 10 concurrent VMs. +Each VM must use a unique port. + +## Adding a GHC Version Post-Deployment + +```bash +ssh haskell-build "source ~/.ghcup/env && ghcup install ghc 9.10.1" +``` + +No image rebuild required. + +## Troubleshooting + +**Tunnel not up:** +- Check `journalctl -u build-agent` on the VM +- Verify `SSH_RELAY_HOST` in `/etc/build-agent.env` is reachable from the VM +- Ensure the workstation's SSH server accepts the build key + +**Capability not in state-hub:** +- Check `curl http://127.0.0.1:8000/capability-catalog/?capability_type=haskell-build-agent` +- The agent retries 20 times on boot; check logs for registration errors +- The forward tunnel (`-L 18000:localhost:8000`) must be up before registration works + +**Build fails with missing libraries:** +- The VM includes common Haskell build deps. For additional system libraries: + `ssh haskell-build "sudo apt-get install -y libXXX-dev"` + +## Updating the Image + +Re-run Packer to build a new OVA. Import alongside the existing VM or replace it. +Build artefacts and keys live on the workstation (via rsync), not in the VM — the +image is disposable. diff --git a/infra/build-machines/haskell/files/build-agent.env.template b/infra/build-machines/haskell/files/build-agent.env.template new file mode 100644 index 0000000..fe3b288 --- /dev/null +++ b/infra/build-machines/haskell/files/build-agent.env.template @@ -0,0 +1,21 @@ +# Custodian State Hub URL — always access via forward tunnel (port 18000). +# The agent opens -L 18000:localhost:8000 alongside the reverse SSH tunnel, +# so this works regardless of network topology (LAN, VPN, different subnet). +# Matches the CoulombCore remote worker bridge pattern. +STATE_HUB_URL=http://127.0.0.1:18000 + +# Domain to register capability under +STATE_HUB_DOMAIN=railiance + +# Workstation hostname or LAN IP for SSH relay connection +# The VM connects OUT to this host to establish both tunnels. +SSH_RELAY_HOST=192.168.1.100 # replace with actual workstation LAN IP +SSH_RELAY_USER=worsch + +# Path to private key for SSH tunnel (matching authorized_keys on workstation) +SSH_KEY_PATH=/home/build/.ssh/id_build + +# Port to bind on workstation (ssh -R :localhost:22) +# Each VM instance must use a distinct port — see port-registry.yml +# Range: 12221-12230 +REMOTE_PORT=12222 diff --git a/infra/build-machines/haskell/files/build-agent.py b/infra/build-machines/haskell/files/build-agent.py new file mode 100755 index 0000000..7c22a24 --- /dev/null +++ b/infra/build-machines/haskell/files/build-agent.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +""" +build-agent — runs at VM boot. +1. Reads /etc/build-agent.env +2. Detects GHC version +3. Registers (or updates) a capability-catalog entry in the state-hub +4. Opens an autossh reverse tunnel to the workstation +""" +import os, json, socket, subprocess, time, sys +import urllib.request, urllib.error + +def load_env(path="/etc/build-agent.env"): + env = {} + try: + with open(path) as f: + for line in f: + line = line.strip() + if line and not line.startswith('#') and '=' in line: + k, _, v = line.partition('=') + env[k.strip()] = v.strip().strip('"') + except FileNotFoundError: + pass + return env + +def get_ghc_version(): + for path in [ + "/home/build/.ghcup/bin/ghc", + "/usr/local/bin/ghc", + ]: + try: + r = subprocess.run([path, "--version"], + capture_output=True, text=True, timeout=15) + if r.returncode == 0: + return r.stdout.strip().split()[-1] + except Exception: + continue + return "unknown" + +def get_local_ip(): + """Get the primary LAN IP (not loopback).""" + try: + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect(("8.8.8.8", 80)) + ip = s.getsockname()[0] + s.close() + return ip + except Exception: + return "unknown" + +def register(cfg): + # State-hub is always accessed via the forward tunnel (port 18000), never + # via direct LAN. This matches the CoulombCore remote worker pattern and + # works regardless of network topology (LAN, VPN, different subnet). + state_hub = cfg.get("STATE_HUB_URL", "http://127.0.0.1:18000") + hostname = socket.gethostname() + domain = cfg.get("STATE_HUB_DOMAIN", "railiance") + remote_port = cfg.get("REMOTE_PORT", "12222") + ghc_ver = get_ghc_version() + local_ip = get_local_ip() + + payload = { + "domain": domain, + "capability_type": "haskell-build-agent", + "title": f"Haskell Build Agent — {hostname}", + "description": ( + f"GHC {ghc_ver} build sandbox on {hostname} ({local_ip}). " + f"SSH tunnel port: {remote_port} on workstation." + ), + "keywords": [ + "haskell", "ghc", f"ghc-{ghc_ver}", + "build-agent", "cabal", "stack", + f"host:{hostname}", f"tunnel-port:{remote_port}", + ], + } + + data = json.dumps(payload).encode() + req = urllib.request.Request( + f"{state_hub}/capability-catalog/", + data=data, + headers={"Content-Type": "application/json"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=15) as resp: + result = json.loads(resp.read()) + print(f"[build-agent] Registered capability: {result['id']}", flush=True) + return result + except urllib.error.HTTPError as e: + body = e.read().decode() + print(f"[build-agent] Registration HTTP error {e.code}: {body}", flush=True) + raise + except Exception as e: + print(f"[build-agent] Registration failed: {e}", flush=True) + raise + +def open_tunnel(cfg): + relay_host = cfg.get("SSH_RELAY_HOST", "") + relay_user = cfg.get("SSH_RELAY_USER", "worsch") + ssh_key = cfg.get("SSH_KEY_PATH", "/home/build/.ssh/id_build") + remote_port = cfg.get("REMOTE_PORT", "12222") + + if not relay_host: + print("[build-agent] SSH_RELAY_HOST not set — tunnel disabled", flush=True) + # Sleep forever so systemd considers service active + while True: + time.sleep(3600) + + cmd = [ + "autossh", + "-M", "0", # disable autossh monitoring port + "-o", "ServerAliveInterval=30", + "-o", "ServerAliveCountMax=3", + "-o", "ExitOnForwardFailure=yes", + "-o", "StrictHostKeyChecking=no", + "-o", "UserKnownHostsFile=/dev/null", + "-N", + "-R", f"{remote_port}:localhost:22", # reverse: workstation → VM SSH + "-L", "18000:localhost:8000", # forward: VM → state-hub (port 18000) + "-i", ssh_key, + f"{relay_user}@{relay_host}", + ] + print( + f"[build-agent] Opening tunnels: " + f"-R {remote_port}→local:22, -L 18000→state-hub:8000", + flush=True, + ) + subprocess.run(cmd) # autossh manages reconnects internally + +def main(): + cfg = load_env() + + # Retry registration until state-hub is reachable (network may not be ready) + for attempt in range(20): + try: + register(cfg) + break + except Exception: + wait = min(10 * (attempt + 1), 60) + print(f"[build-agent] Retrying in {wait}s ...", flush=True) + time.sleep(wait) + else: + print("[build-agent] Registration permanently failed — continuing to tunnel", + flush=True) + + open_tunnel(cfg) + +if __name__ == "__main__": + main() diff --git a/infra/build-machines/haskell/files/build-agent.service b/infra/build-machines/haskell/files/build-agent.service new file mode 100644 index 0000000..2f80ada --- /dev/null +++ b/infra/build-machines/haskell/files/build-agent.service @@ -0,0 +1,19 @@ +[Unit] +Description=Haskell Build Agent — State Hub registration + SSH reverse tunnel +Documentation=https://github.com/tegwick/the-custodian +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=build +EnvironmentFile=/etc/build-agent.env +ExecStart=/usr/local/bin/build-agent +Restart=on-failure +RestartSec=30 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=build-agent + +[Install] +WantedBy=multi-user.target diff --git a/infra/build-machines/haskell/files/cloud-init/meta-data b/infra/build-machines/haskell/files/cloud-init/meta-data new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/infra/build-machines/haskell/files/cloud-init/meta-data @@ -0,0 +1 @@ +{} diff --git a/infra/build-machines/haskell/files/cloud-init/user-data b/infra/build-machines/haskell/files/cloud-init/user-data new file mode 100644 index 0000000..a3c2fe3 --- /dev/null +++ b/infra/build-machines/haskell/files/cloud-init/user-data @@ -0,0 +1,56 @@ +#cloud-config +autoinstall: + version: 1 + locale: en_US.UTF-8 + keyboard: + layout: us + + timezone: Europe/Berlin + + storage: + layout: + name: lvm + sizing-policy: all + + identity: + hostname: haskell-build + username: build + # Password "build" — only used during Packer provisioning. + # SSH password auth is disabled post-install; key-only access. + password: "$6$rounds=4096$saltsalt$YQvhEBfODCjg4i7ORlYsIJfIpM3bFSGx3QWxJ8DqZvHCIKcMmOYa0N3KQj6SHvHYjjKZaX9FPqc9dLiNLsVA." + + ssh: + install-server: true + allow-pw: true # needed for Packer SSH communicator during build + + packages: + - build-essential + - curl + - git + - libgmp-dev + - libffi-dev + - zlib1g-dev + - libncurses-dev + - libtinfo-dev + - pkg-config + - openssh-server + - autossh + - jq + - rsync + - python3 + + user-data: + users: + - name: build + groups: sudo + shell: /bin/bash + sudo: ALL=(ALL) NOPASSWD:ALL + lock_passwd: false + + late-commands: + # Disable password authentication for SSH (key-only after provisioning) + - sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /target/etc/ssh/sshd_config + - sed -i 's/^#*PubkeyAuthentication.*/PubkeyAuthentication yes/' /target/etc/ssh/sshd_config + # Create /build directory for remote builds + - mkdir -p /target/build + - chown 1000:1000 /target/build diff --git a/infra/build-machines/haskell/haskell-build.pkr.hcl b/infra/build-machines/haskell/haskell-build.pkr.hcl new file mode 100644 index 0000000..c86d24c --- /dev/null +++ b/infra/build-machines/haskell/haskell-build.pkr.hcl @@ -0,0 +1,147 @@ +packer { + required_plugins { + virtualbox = { + version = ">= 1.1.0" + source = "github.com/hashicorp/virtualbox" + } + } +} + +variable "vm_name" { + type = string + default = "haskell-build" +} + +variable "disk_size" { + type = number + default = 40960 +} + +variable "memory" { + type = number + default = 8192 +} + +variable "cpus" { + type = number + default = 4 +} + +variable "ghc_primary_version" { + type = string + default = "9.8.4" +} + +variable "ghc_secondary_version" { + type = string + default = "9.6.6" +} + +variable "cabal_version" { + type = string + default = "3.12.1.0" +} + +variable "iso_url" { + type = string + default = "https://releases.ubuntu.com/24.04/ubuntu-24.04.2-live-server-amd64.iso" +} + +variable "iso_checksum" { + type = string + default = "sha256:d6dab0c3a657988501b4bd76f1297c053df710e06e0c3aece60dead24f270b4d" +} + +locals { + timestamp = formatdate("YYYYMMDD", timestamp()) +} + +source "virtualbox-iso" "haskell-build" { + vm_name = var.vm_name + guest_os_type = "Ubuntu_64" + disk_size = var.disk_size + hard_drive_interface = "sata" + + memory = var.memory + cpus = var.cpus + + iso_url = var.iso_url + iso_checksum = var.iso_checksum + + # NAT during build — Packer needs internet for ISO + packages. + # Bridged networking is set post-import by setup-vm.sh (adapter names + # are laptop-specific and cannot be baked into the image). + vboxmanage = [ + ["modifyvm", "{{.Name}}", "--nat-localhostreachable1", "on"], + ] + + http_directory = "files/cloud-init" + + boot_wait = "5s" + boot_command = [ + "c", + "linux /casper/vmlinuz --- autoinstall ds='nocloud;s=http://{{.HTTPIP}}:{{.HTTPPort}}/'", + "initrd /casper/initrd", + "boot", + ] + + ssh_username = "build" + ssh_password = "build" + ssh_timeout = "30m" + ssh_handshake_attempts = 100 + shutdown_command = "echo 'build' | sudo -S shutdown -P now" + + # File provisioners — stage agent files before install script runs + # (Packer uploads to /tmp by default for file provisioners) + + output_directory = "output-${var.vm_name}" + output_filename = "${var.vm_name}" +} + +build { + sources = ["source.virtualbox-iso.haskell-build"] + + # Stage agent files to /tmp (install-agent.sh moves them into place) + provisioner "file" { + source = "files/build-agent.py" + destination = "/tmp/build-agent.py" + } + + provisioner "file" { + source = "files/build-agent.service" + destination = "/tmp/build-agent.service" + } + + provisioner "file" { + source = "files/build-agent.env.template" + destination = "/tmp/build-agent.env.template" + } + + # Install Haskell toolchain (GHCup + GHC + Cabal) + provisioner "shell" { + execute_command = "echo 'build' | sudo -S env {{ .Vars }} bash '{{ .Path }}'" + script = "scripts/install-haskell.sh" + environment_vars = [ + "GHC_PRIMARY_VERSION=${var.ghc_primary_version}", + "GHC_SECONDARY_VERSION=${var.ghc_secondary_version}", + "CABAL_VERSION=${var.cabal_version}", + ] + } + + # Install build-agent + systemd unit + provisioner "shell" { + execute_command = "echo 'build' | sudo -S env {{ .Vars }} bash '{{ .Path }}'" + script = "scripts/install-agent.sh" + } + + # Export as OVA + post-processor "vagrant" { + only = [] # disabled — we use the raw OVA below + } + + post-processor "shell-local" { + inline = [ + "cd output-${var.vm_name} && mv ${var.vm_name}.ova ../haskell-build-${local.timestamp}.ova || true", + ] + } +} diff --git a/infra/build-machines/haskell/scripts/inject-keys.sh b/infra/build-machines/haskell/scripts/inject-keys.sh new file mode 100755 index 0000000..9f34b96 --- /dev/null +++ b/infra/build-machines/haskell/scripts/inject-keys.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# inject-keys.sh — Post-boot SSH key and env injection for new VMs (Option B) +# +# Usage: inject-keys.sh [key-dir] +# +# Expects the following files in key-dir (default: current directory): +# - id_build (private key for SSH tunnel) +# - id_build.pub (public key) +# - build-agent.env (filled-in env config — see build-agent.env.template) +# +# The VM must be running with temporary password auth enabled (as built by Packer). +# After injection, password auth is disabled and key-only access takes effect. +set -euo pipefail + +VM_IP="${1:?Usage: inject-keys.sh [key-dir]}" +KEY_DIR="${2:-.}" +BUILD_USER="build" + +echo "==> Injecting keys to ${BUILD_USER}@${VM_IP} from ${KEY_DIR}" + +# Verify required files exist +for f in id_build id_build.pub build-agent.env; do + if [ ! -f "${KEY_DIR}/${f}" ]; then + echo "ERROR: Missing ${KEY_DIR}/${f}" + exit 1 + fi +done + +# Create .ssh directory on VM +ssh -o StrictHostKeyChecking=no "${BUILD_USER}@${VM_IP}" \ + "mkdir -p ~/.ssh && chmod 700 ~/.ssh" + +# Copy SSH keys +scp -o StrictHostKeyChecking=no \ + "${KEY_DIR}/id_build" "${KEY_DIR}/id_build.pub" \ + "${BUILD_USER}@${VM_IP}:~/.ssh/" + +# Set correct permissions on private key +ssh -o StrictHostKeyChecking=no "${BUILD_USER}@${VM_IP}" \ + "chmod 600 ~/.ssh/id_build && chmod 644 ~/.ssh/id_build.pub" + +# Add the tunnel target's host key to known_hosts (optional — agent uses +# StrictHostKeyChecking=no, but this avoids warnings in manual SSH) +echo "==> Adding workstation public key to authorized_keys" +ssh -o StrictHostKeyChecking=no "${BUILD_USER}@${VM_IP}" \ + "cat ~/.ssh/id_build.pub >> ~/.ssh/authorized_keys && chmod 600 ~/.ssh/authorized_keys" + +# Copy build-agent.env to /etc (requires sudo) +echo "==> Installing build-agent.env" +scp -o StrictHostKeyChecking=no \ + "${KEY_DIR}/build-agent.env" "${BUILD_USER}@${VM_IP}:/tmp/build-agent.env" +ssh -o StrictHostKeyChecking=no "${BUILD_USER}@${VM_IP}" \ + "sudo cp /tmp/build-agent.env /etc/build-agent.env && sudo chmod 600 /etc/build-agent.env && rm /tmp/build-agent.env" + +# Disable password auth (now that keys are in place) +echo "==> Disabling password authentication" +ssh -o StrictHostKeyChecking=no "${BUILD_USER}@${VM_IP}" \ + "sudo sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config && sudo systemctl restart sshd" + +# Restart build-agent to pick up new env +echo "==> Restarting build-agent service" +ssh -o StrictHostKeyChecking=no -i "${KEY_DIR}/id_build" "${BUILD_USER}@${VM_IP}" \ + "sudo systemctl restart build-agent" + +echo "==> Done. VM is ready. Test with: ssh -i ${KEY_DIR}/id_build ${BUILD_USER}@${VM_IP}" diff --git a/infra/build-machines/haskell/scripts/install-agent.sh b/infra/build-machines/haskell/scripts/install-agent.sh new file mode 100755 index 0000000..3750d72 --- /dev/null +++ b/infra/build-machines/haskell/scripts/install-agent.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -euo pipefail + +# Copy agent artefacts (provisioned by Packer file provisioner) +install -m 0755 /tmp/build-agent.py /usr/local/bin/build-agent +install -m 0644 /tmp/build-agent.service /etc/systemd/system/build-agent.service +install -m 0600 /tmp/build-agent.env.template /etc/build-agent.env.template + +# Placeholder env file — operator fills this in before first boot +if [ ! -f /etc/build-agent.env ]; then + cp /etc/build-agent.env.template /etc/build-agent.env +fi + +# Install autossh +apt-get install -y -qq autossh + +# Enable agent service (starts on boot, after network-online) +systemctl daemon-reload +systemctl enable build-agent.service + +# SSH host key generation (deterministic at first boot, not baked in image) +dpkg-reconfigure openssh-server diff --git a/infra/build-machines/haskell/scripts/install-haskell.sh b/infra/build-machines/haskell/scripts/install-haskell.sh new file mode 100755 index 0000000..1d8c64a --- /dev/null +++ b/infra/build-machines/haskell/scripts/install-haskell.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -euo pipefail +DEBIAN_FRONTEND=noninteractive + +# System deps (already installed via cloud-init but idempotent) +apt-get update -qq +apt-get install -y -qq build-essential curl git \ + libgmp-dev libffi-dev zlib1g-dev libncurses-dev libtinfo-dev pkg-config + +# GHCup — non-interactive bootstrap +# Primary version (9.8.4) is the default; secondary (9.6.6) covers LTS 22/23. +# Skip Stack (cabal covers 95% of projects) and HLS (saves ~2 GB image size). +GHC_PRIMARY="${GHC_PRIMARY_VERSION:-9.8.4}" +GHC_SECONDARY="${GHC_SECONDARY_VERSION:-9.6.6}" +CABAL_VERSION="${CABAL_VERSION:-3.12.1.0}" + +export BOOTSTRAP_HASKELL_NONINTERACTIVE=1 +export BOOTSTRAP_HASKELL_GHC_VERSION="$GHC_PRIMARY" +export BOOTSTRAP_HASKELL_CABAL_VERSION="$CABAL_VERSION" +export BOOTSTRAP_HASKELL_INSTALL_STACK=0 # not needed; cabal suffices +export BOOTSTRAP_HASKELL_INSTALL_HLS=0 # ~2 GB — skip for build-only image + +curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org \ + | runuser -l build -c 'sh -s -- --no-modify-path' + +# Add ghcup env to build user profile +echo '. "$HOME/.ghcup/env"' >> /home/build/.bashrc +echo '. "$HOME/.ghcup/env"' >> /home/build/.profile + +# Install secondary GHC version (~500 MB, shared GHCup base — worth it) +runuser -l build -c "source ~/.ghcup/env && ghcup install ghc $GHC_SECONDARY" + +# Ensure primary is the default +runuser -l build -c "source ~/.ghcup/env && ghcup set ghc $GHC_PRIMARY" + +# Pre-warm cabal package db (saves 2-3 min on first real build) +runuser -l build -c 'source ~/.ghcup/env && cabal update' + +# Verify both versions present +runuser -l build -c "source ~/.ghcup/env && ghc --version && cabal --version" +runuser -l build -c "source ~/.ghcup/env && ghcup run --ghc $GHC_SECONDARY -- ghc --version" diff --git a/infra/build-machines/haskell/scripts/setup-vm.sh b/infra/build-machines/haskell/scripts/setup-vm.sh new file mode 100755 index 0000000..24c5e8d --- /dev/null +++ b/infra/build-machines/haskell/scripts/setup-vm.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# setup-vm.sh — switches imported VM from NAT to bridged networking +VM_NAME="${1:?Usage: setup-vm.sh [adapter]}" +# Auto-detect first available bridge interface if not specified +ADAPTER="${2:-$(VBoxManage list bridgedifs | awk '/^Name:/{print $2; exit}')}" + +VBoxManage modifyvm "$VM_NAME" \ + --nic1 bridged \ + --bridgeadapter1 "$ADAPTER" \ + --memory 8192 --cpus 4 + +echo "Configured $VM_NAME: bridged on $ADAPTER" +echo "Next: inject keys with scripts/inject-keys.sh, then start VM" diff --git a/infra/build-machines/port-registry.yml b/infra/build-machines/port-registry.yml new file mode 100644 index 0000000..51e895c --- /dev/null +++ b/infra/build-machines/port-registry.yml @@ -0,0 +1,37 @@ +# Build machine port registry +# Range: 12221-12230 (10 slots) +# Each entry: port, vm_name, host_machine, status + +ports: + 12221: + vm_name: haskell-build-alpha + host_machine: unassigned + status: reserved + 12222: + vm_name: haskell-build-beta + host_machine: unassigned + status: reserved + 12223: + vm_name: unassigned + status: available + 12224: + vm_name: unassigned + status: available + 12225: + vm_name: unassigned + status: available + 12226: + vm_name: unassigned + status: available + 12227: + vm_name: unassigned + status: available + 12228: + vm_name: unassigned + status: available + 12229: + vm_name: unassigned + status: available + 12230: + vm_name: unassigned + status: available diff --git a/infra/build-machines/smoke-test.sh b/infra/build-machines/smoke-test.sh new file mode 100755 index 0000000..bdf712e --- /dev/null +++ b/infra/build-machines/smoke-test.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# smoke-test.sh — Validates the full Haskell build machine stack +# +# Prerequisites: +# - VM is booted and tunnel is established +# - State-hub is running on workstation (port 8000) +# +# Usage: ./smoke-test.sh [vm-ssh-host] [state-hub-url] +set -euo pipefail + +VM="${1:-haskell-build}" +STATE_HUB="${2:-http://127.0.0.1:8000}" + +PASS=0 +FAIL=0 + +check() { + local desc="$1" + shift + if "$@" >/dev/null 2>&1; then + echo " PASS: $desc" + PASS=$((PASS + 1)) + else + echo " FAIL: $desc" + FAIL=$((FAIL + 1)) + fi +} + +echo "=== Haskell Build Machine Smoke Test ===" +echo "VM: $VM | State Hub: $STATE_HUB" +echo "" + +# 1. Check tunnel is up +echo "[1/5] Tunnel connectivity" +check "SSH to VM via tunnel" ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$VM" "true" + +# 2. Check GHC is available +echo "[2/5] Haskell toolchain" +check "GHC is installed" ssh "$VM" "source ~/.ghcup/env && ghc --version" +check "Cabal is installed" ssh "$VM" "source ~/.ghcup/env && cabal --version" +check "GHCup is installed" ssh "$VM" "source ~/.ghcup/env && ghcup --version" + +# 3. Check state-hub capability registration +echo "[3/5] State-hub capability" +check "State-hub is reachable" curl -sf "${STATE_HUB}/state/health" +CAPS=$(curl -sf "${STATE_HUB}/capability-catalog/?capability_type=haskell-build-agent" 2>/dev/null || echo "[]") +if echo "$CAPS" | python3 -c "import sys,json; entries=json.load(sys.stdin); sys.exit(0 if len(entries)>0 else 1)" 2>/dev/null; then + echo " PASS: haskell-build-agent capability registered" + PASS=$((PASS + 1)) +else + echo " FAIL: haskell-build-agent capability not found in catalog" + FAIL=$((FAIL + 1)) +fi + +# 4. Build a minimal Haskell project +echo "[4/5] Remote build" +TMPDIR=$(mktemp -d) +mkdir -p "$TMPDIR/hello" +cat > "$TMPDIR/hello/Main.hs" << 'HASKELL' +module Main where +main :: IO () +main = putStrLn "Hello from Haskell build machine!" +HASKELL +cat > "$TMPDIR/hello/hello.cabal" << 'CABAL' +cabal-version: 2.4 +name: hello +version: 0.1.0.0 +build-type: Simple + +executable hello + main-is: Main.hs + build-depends: base >=4.14 + default-language: Haskell2010 +CABAL + +# Sync and build +rsync -a --delete "$TMPDIR/hello/" "$VM:/build/hello-smoke/" 2>/dev/null +check "cabal build succeeds" ssh "$VM" "cd /build/hello-smoke && source ~/.ghcup/env && cabal build all" + +# 5. Run the built executable +echo "[5/5] Execution" +check "built executable runs" ssh "$VM" "cd /build/hello-smoke && source ~/.ghcup/env && cabal run hello" + +# Cleanup +rm -rf "$TMPDIR" +ssh "$VM" "rm -rf /build/hello-smoke" 2>/dev/null || true + +echo "" +echo "=== Results: $PASS passed, $FAIL failed ===" +[ "$FAIL" -eq 0 ] && echo "All checks passed." || echo "Some checks failed — review output above." +exit "$FAIL" diff --git a/infra/build-machines/ssh-config.template b/infra/build-machines/ssh-config.template new file mode 100644 index 0000000..b48d342 --- /dev/null +++ b/infra/build-machines/ssh-config.template @@ -0,0 +1,11 @@ +# Haskell Build VM — tunnel via workstation (auto-generated) +# Source: infra/build-machines/README.md +Host haskell-build haskell-build-alpha + HostName localhost + Port 12222 + User build + IdentityFile ~/.ssh/id_build + StrictHostKeyChecking no + UserKnownHostsFile /dev/null + ServerAliveInterval 30 + ServerAliveCountMax 3 diff --git a/infra/build-machines/state-hub-refs.yml b/infra/build-machines/state-hub-refs.yml new file mode 100644 index 0000000..3563c45 --- /dev/null +++ b/infra/build-machines/state-hub-refs.yml @@ -0,0 +1,3 @@ +# State-hub entity references for build-machines infra +# Populated after capability type registration via POST /capability-catalog/ +capability_type_entry_id: "pending-registration" diff --git a/workplans/CUST-WP-0032-haskell-build-machine.md b/workplans/CUST-WP-0032-haskell-build-machine.md index 43f304d..da55579 100644 --- a/workplans/CUST-WP-0032-haskell-build-machine.md +++ b/workplans/CUST-WP-0032-haskell-build-machine.md @@ -4,7 +4,7 @@ type: workplan title: "Haskell Build Machine — VirtualBox Image & State-Hub Integration" domain: railiance repo: the-custodian -status: todo +status: done owner: custodian topic_slug: railiance created: "2026-04-20" @@ -79,7 +79,7 @@ Workstation (WSL2) ```task id: CUST-WP-0032-T01 -status: todo +status: done priority: high state_hub_task_id: "1430844c-82f2-4e7b-88b2-6e74a29167c4" ``` @@ -126,7 +126,7 @@ echo "Next: inject keys with scripts/inject-keys.sh, then start VM" ```task id: CUST-WP-0032-T02 -status: todo +status: done priority: high state_hub_task_id: "816bd164-ed1a-4d57-bdeb-c9e3d9e4d614" ``` @@ -151,7 +151,7 @@ Packer boot_command sequence: ```task id: CUST-WP-0032-T03 -status: todo +status: done priority: high state_hub_task_id: "2900ae95-828b-4ced-8821-ded6b4a52e61" ``` @@ -206,7 +206,7 @@ runuser -l build -c "source ~/.ghcup/env && ghcup run --ghc $GHC_SECONDARY -- gh ```task id: CUST-WP-0032-T04 -status: todo +status: done priority: high state_hub_task_id: "5267d2f3-f8fb-4072-a9fa-40b18cf888bd" ``` @@ -246,7 +246,7 @@ dpkg-reconfigure openssh-server ```task id: CUST-WP-0032-T05 -status: todo +status: done priority: high state_hub_task_id: "18ee959d-30b7-4a06-9a84-02c4e5d7ba83" ``` @@ -406,7 +406,7 @@ if __name__ == "__main__": ```task id: CUST-WP-0032-T06 -status: todo +status: done priority: high state_hub_task_id: "1a6bf2a2-91e8-46f9-a82c-de08ccfda729" ``` @@ -465,7 +465,7 @@ REMOTE_PORT=12222 ```task id: CUST-WP-0032-T07 -status: todo +status: done priority: medium state_hub_task_id: "6bb36de9-df03-452d-bb1c-3dfc5a695265" ``` @@ -504,7 +504,7 @@ Create `scripts/inject-keys.sh` implementing Option B. ```task id: CUST-WP-0032-T08 -status: todo +status: done priority: high state_hub_task_id: "a42342cb-41ef-4915-8ce5-923a36bd2918" ``` @@ -539,7 +539,7 @@ bridge-status: ```task id: CUST-WP-0032-T09 -status: todo +status: done priority: medium state_hub_task_id: "5ea059a0-94c3-4b6e-ae99-cf20a6c4af1c" ``` @@ -566,7 +566,7 @@ Add `make install-ssh-config` target that appends this to `~/.ssh/config` ```task id: CUST-WP-0032-T10 -status: todo +status: done priority: low state_hub_task_id: "6bfd43de-b1e2-4114-b509-d1c78d066756" ``` @@ -601,7 +601,7 @@ ports: ```task id: CUST-WP-0032-T11 -status: todo +status: done priority: high state_hub_task_id: "f7efd28e-0ae4-41c9-bd76-649bd17bec16" ``` @@ -632,7 +632,7 @@ capability_type_entry_id: "" ```task id: CUST-WP-0032-T12 -status: todo +status: done priority: low state_hub_task_id: "55f30877-7fe7-4aaa-b74a-f9ab435f1d9a" ``` @@ -656,7 +656,7 @@ If filtering by `capability_type` is not supported, document the workaround ```task id: CUST-WP-0032-T13 -status: todo +status: done priority: high state_hub_task_id: "4c27f5db-a0c1-4f43-97a7-87472ce3a1cc" ``` @@ -723,7 +723,7 @@ vm-info: ```task id: CUST-WP-0032-T14 -status: todo +status: done priority: high state_hub_task_id: "b3a9613d-0d08-4f08-9361-d7e42c07069a" ``` @@ -743,7 +743,7 @@ Create `infra/build-machines/smoke-test.sh` that automates steps 2-5. ```task id: CUST-WP-0032-T15 -status: todo +status: done priority: medium state_hub_task_id: "4d858d77-4b9d-4f75-820a-b8f9d2dd3f19" ```