diff --git a/Makefile b/Makefile index 408542a..a01c5b7 100644 --- a/Makefile +++ b/Makefile @@ -195,6 +195,14 @@ status: ## Show live security state of all hosts (UFW, fail2ban, SSH hardening) cd $(ANS_DIR) && ansible all -u $(SSH_USER) -m shell -a "systemctl is-active fail2ban" @echo "=== SSH hardening ===" cd $(ANS_DIR) && ansible all -u $(SSH_USER) -m shell -a "grep -iE '^(PermitRootLogin|PasswordAuthentication)' /etc/ssh/sshd_config" --become + @echo "" + @echo "--- Hint: run 'make verify' for a structured pass/fail report ---" + +verify: ## Run Goss test suite against all hosts — exits non-zero on failure + @echo "Running Goss baseline assertions..." + @cd $(ANS_DIR) && ansible-playbook playbooks/verify.yaml -u $(SSH_USER) && \ + echo "All assertions passed." || \ + (echo "One or more assertions FAILED — see reports/ for TAP output." && exit 1) converge: ## Converge all hosts to the baseline (idempotent) cd $(ANS_DIR) && ansible-playbook $(PLAY) -u $(SSH_USER) diff --git a/ansible/playbooks/verify.yaml b/ansible/playbooks/verify.yaml new file mode 100644 index 0000000..00fbdb5 --- /dev/null +++ b/ansible/playbooks/verify.yaml @@ -0,0 +1,13 @@ +--- +# verify.yaml — Deploy Goss, run baseline assertions, fetch TAP results. +# Exit code mirrors Goss: 0 = all pass, non-zero = failures. +# +# Usage: +# ansible-playbook ansible/playbooks/verify.yaml -u admin +# make verify + +- hosts: all + become: true + gather_facts: true + roles: + - role: goss diff --git a/ansible/roles/goss/tasks/main.yml b/ansible/roles/goss/tasks/main.yml new file mode 100644 index 0000000..bfebdff --- /dev/null +++ b/ansible/roles/goss/tasks/main.yml @@ -0,0 +1,56 @@ +--- +# Role: goss +# Installs the Goss binary, deploys test files, runs assertions, fetches results. + +- name: Set Goss version and paths + ansible.builtin.set_fact: + goss_version: "0.4.9" + goss_bin: /usr/local/bin/goss + goss_dir: /etc/goss + +- name: Create Goss config directory + ansible.builtin.file: + path: "{{ goss_dir }}" + state: directory + owner: root + group: root + mode: "0755" + +- name: Download Goss binary + ansible.builtin.get_url: + url: "https://github.com/goss-org/goss/releases/download/v{{ goss_version }}/goss-linux-amd64" + dest: "{{ goss_bin }}" + mode: "0755" + checksum: "sha256:https://github.com/goss-org/goss/releases/download/v{{ goss_version }}/goss-linux-amd64.sha256" + register: goss_download + +- name: Copy baseline test file + ansible.builtin.copy: + src: "{{ playbook_dir }}/../goss/baseline.yaml" + dest: "{{ goss_dir }}/baseline.yaml" + owner: root + group: root + mode: "0644" + +- name: Run Goss assertions (TAP output) + ansible.builtin.command: + cmd: "{{ goss_bin }} -g {{ goss_dir }}/baseline.yaml validate --format tap" + register: goss_result + failed_when: goss_result.rc != 0 + changed_when: false + +- name: Ensure local reports directory exists + ansible.builtin.file: + path: "{{ playbook_dir }}/../reports" + state: directory + mode: "0755" + delegate_to: localhost + become: false + +- name: Write TAP report locally + ansible.builtin.copy: + content: "{{ goss_result.stdout }}" + dest: "{{ playbook_dir }}/../reports/goss-{{ inventory_hostname }}-{{ ansible_date_time.date }}.tap" + mode: "0644" + delegate_to: localhost + become: false diff --git a/docs/adr/ADR-002-repo-boundary-hosts-vs-bootstrap.md b/docs/adr/ADR-002-repo-boundary-hosts-vs-bootstrap.md new file mode 100644 index 0000000..4b1e740 --- /dev/null +++ b/docs/adr/ADR-002-repo-boundary-hosts-vs-bootstrap.md @@ -0,0 +1,73 @@ +# ADR-002 — Repository Boundary: railiance-hosts vs railiance-bootstrap + +**Status:** Accepted +**Date:** 2026-03-09 +**Deciders:** Bernd Worsch + +--- + +## Context + +Two repositories exist in the Railiance domain that both touch server +configuration: + +- **`railiance-hosts`** — manages the OS baseline, security hardening, + inventory, secrets, and test suite for every managed node. +- **`railiance-bootstrap`** — installs Kubernetes (k3s), Helm, GitOps + tooling, and platform services on top of an already-converged base node. + +Prior to this ADR, `railiance-bootstrap` contained Ansible playbooks +(`harden.yml`, `bootstrap.yml`) that overlapped with OS-level tasks now +owned by `railiance-hosts`. This created a split responsibility that could +cause drift and conflicting configuration. + +--- + +## Decision + +### Ownership table + +| Concern | Owner | Notes | +|---------|-------|-------| +| SSH hardening (PermitRootLogin, PasswordAuthentication) | `railiance-hosts` | Defined in `spec/server-baseline.yaml` | +| UFW firewall rules (including k3s/Flannel ports) | `railiance-hosts` | Spec section: `firewall.rules` | +| fail2ban installation and SSH jail | `railiance-hosts` | Spec section: `security.fail2ban_jails` | +| Required OS packages (ufw, fail2ban, git, curl, age, sops) | `railiance-hosts` | Spec section: `packages.installed` | +| Admin user + sudo config | `railiance-hosts` | Spec section: `users` | +| HISTCONTROL and shell security defaults | `railiance-hosts` | Spec section: `security` | +| SOPS/age key agent | `railiance-hosts` | `roles/sops_agent` | +| k3s installation | `railiance-bootstrap` | Consumes a converged base node | +| Helm + GitOps tooling | `railiance-bootstrap` | | +| Application-layer Kubernetes resources | `railiance-bootstrap` | | + +### Rule + +> **Any item present in `spec/server-baseline.yaml` MUST NOT be managed +> by `railiance-bootstrap`.** + +`railiance-bootstrap` may add UFW rules for Kubernetes components (e.g. +NodePort ranges, cluster-internal ports) but must not remove or override +the base rules defined in this repo's spec. + +### Superseded files in `railiance-bootstrap` + +The following files in `railiance-bootstrap` are superseded by the roles +and spec in `railiance-hosts` and should not be used for new work: + +- `ansible/harden.yml` +- `ansible/bootstrap.yml` (the OS-hardening portions) + +An ecosystem todo (`[repo:railiance-bootstrap]`) should be filed to +formally retire these files or scope them down to k3s-only tasks. + +--- + +## Consequences + +- `railiance-hosts` converge step (`make converge`) must run and pass + before `railiance-bootstrap` deploys anything. +- Changes to the OS security baseline (new packages, firewall rules, + SSH settings) go into `spec/server-baseline.yaml` → update the Ansible + role → update `goss/baseline.yaml` — all in this repo. +- `make verify` provides a machine-readable assertion that the converge + step produced the expected state, suitable for CI gating. diff --git a/goss/baseline.yaml b/goss/baseline.yaml new file mode 100644 index 0000000..1bef9a1 --- /dev/null +++ b/goss/baseline.yaml @@ -0,0 +1,68 @@ +# Goss baseline assertions for railiance managed nodes +# Derived from spec/server-baseline.yaml — keep in sync. +# Run: goss -g /etc/goss/baseline.yaml validate + +package: + ufw: + installed: true + fail2ban: + installed: true + git: + installed: true + curl: + installed: true + vim: + installed: true + htop: + installed: true + age: + installed: true + sops: + installed: true + +service: + ufw: + enabled: true + running: true + fail2ban: + enabled: true + running: true + ssh: + enabled: true + running: true + +file: + /etc/ssh/sshd_config: + exists: true + contains: + - /^PermitRootLogin no/i + - /^PasswordAuthentication no/i + - /^PubkeyAuthentication yes/i + +user: + admin: + exists: true + groups: + - sudo + shell: /bin/bash + +command: + "ufw status": + exit-status: 0 + stdout: + - "Status: active" + - /22\/tcp.*ALLOW/ + - /6443\/tcp.*ALLOW/ + - /8472\/udp.*ALLOW/ + "grep NOPASSWD /etc/sudoers.d/admin": + exit-status: 0 + stdout: + - "NOPASSWD" + "grep -r HISTCONTROL /etc/profile.d/": + exit-status: 0 + stdout: + - "ignorespace" + "fail2ban-client status sshd": + exit-status: 0 + stdout: + - "Status for the jail: sshd" diff --git a/goss/vars/baseline-vars.yaml b/goss/vars/baseline-vars.yaml new file mode 100644 index 0000000..7743320 --- /dev/null +++ b/goss/vars/baseline-vars.yaml @@ -0,0 +1,11 @@ +# Parameterised values used in goss/baseline.yaml +# Override per host group if defaults differ. + +firewall_ports: + ssh: "22/tcp" + k3s_api: "6443/tcp" + flannel_vxlan: "8472/udp" + +admin_user: admin +goss_binary: /usr/local/bin/goss +goss_tests_dir: /etc/goss diff --git a/workplans/RAIL-HO-WP-0002-server-spec-and-test-suite.md b/workplans/RAIL-HO-WP-0002-server-spec-and-test-suite.md index 1b74808..aa66447 100644 --- a/workplans/RAIL-HO-WP-0002-server-spec-and-test-suite.md +++ b/workplans/RAIL-HO-WP-0002-server-spec-and-test-suite.md @@ -7,7 +7,7 @@ repo: railiance-hosts status: active owner: railiance topic_slug: railiance -state_hub_workstream_id: "" # register after creating workstream in hub +state_hub_workstream_id: "8fed53c2-4c39-4471-8bb9-61f58771fe0c" created: "2026-03-09" updated: "2026-03-09" --- @@ -123,8 +123,10 @@ and tests must satisfy. ```task id: T03 -status: todo +status: done +completed: "2026-03-09" priority: high +state_hub_task_id: "a34a1626-ff38-4925-a957-d94036fbded6" ``` Create `goss/baseline.yaml` with Goss assertions that implement every item in @@ -189,8 +191,10 @@ user: ```task id: T04 -status: todo +status: done +completed: "2026-03-09" priority: high +state_hub_task_id: "c072c45b-f18d-45be-b747-6d219c3f1439" ``` Create `ansible/roles/goss/` with tasks that: @@ -217,8 +221,10 @@ clean node, non-zero on a deliberately broken one (test with a manual config cha ```task id: T05 -status: todo +status: done +completed: "2026-03-09" priority: medium +state_hub_task_id: "a8100b8e-aed0-4bb4-a0dc-a6bdf3938b8d" ``` Add to Makefile: @@ -239,8 +245,10 @@ Also update `make status` to print a summary line ("All assertions passed" / ```task id: T06 -status: todo +status: done +completed: "2026-03-09" priority: medium +state_hub_task_id: "c3d98022-638d-4dcb-bdc7-a9501e1b6cd9" ``` Create `docs/adr/ADR-002-repo-boundary-hosts-vs-bootstrap.md` documenting: