fix(custodian-agent): dedicated playbook, correct working dir

- ansible/playbooks/custodian-agent.yaml: minimal playbook with only
  the custodian_agent role — avoids loading base/sops_agent/etc when
  all we need is key injection
- Makefile: use custodian-agent.yaml in provision targets; remove
  --tags workaround (was fragile; standalone playbook is correct)

Manual invocation (from CoulombCore):
  cd ~/railiance-infra/ansible
  ansible-playbook playbooks/custodian-agent.yaml -u tegwick --limit Railiance01

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-27 02:20:33 +01:00
parent 087f5da57b
commit 8c08b4b806
2 changed files with 53 additions and 9 deletions

View File

@@ -136,20 +136,48 @@ ansible-bootstrap: ## Run base bootstrap play (users, ssh, ufw, sops-agent, cust
provision-custodian-agent: ## Deploy custodian agent SSH key to all managed hosts provision-custodian-agent: ## Deploy custodian agent SSH key to all managed hosts
@python3 -c "import yaml; d=yaml.safe_load(open('ansible/inventory/group_vars/all.yaml')); k=d.get('custodian_agent_pubkey',''); exit(0 if k else 1)" \ @python3 -c "import yaml; d=yaml.safe_load(open('ansible/inventory/group_vars/all.yaml')); k=d.get('custodian_agent_pubkey',''); exit(0 if k else 1)" \
|| (echo "ERROR: custodian_agent_pubkey is empty. Run: cd ~/the-custodian && make custodian-keygen"; exit 1) || (echo "ERROR: custodian_agent_pubkey is empty. Run: cd ~/the-custodian && make custodian-keygen"; exit 1)
cd ansible && ansible-playbook playbooks/bootstrap.yaml -u $(SSH_USER) \ cd ansible && ansible-playbook playbooks/custodian-agent.yaml -u $(SSH_USER)
--tags custodian_agent \
--extra-vars "@inventory/group_vars/all.yaml"
provision-custodian-agent-host: ## Deploy custodian agent key to one host: make provision-custodian-agent-host HOST=railiance01 provision-custodian-agent-host: ## Deploy custodian agent key to one host: make provision-custodian-agent-host HOST=Railiance01
@test -n "$(HOST)" || (echo "Usage: make provision-custodian-agent-host HOST=<name>"; exit 1) @test -n "$(HOST)" || (echo "Usage: make provision-custodian-agent-host HOST=Railiance01"; exit 1)
cd ansible && ansible-playbook playbooks/bootstrap.yaml -u $(SSH_USER) \ cd ansible && ansible-playbook playbooks/custodian-agent.yaml -u $(SSH_USER) \
--limit "$(HOST)" \ --limit "$(HOST)"
--tags custodian_agent \
--extra-vars "@inventory/group_vars/all.yaml"
# ---- Orchestration ---- # ---- Orchestration ----
apply: tf-fmt tf-apply ansible-bootstrap ## Provision via Terraform then converge via Ansible apply: tf-fmt tf-apply ansible-bootstrap ## Provision via Terraform then converge via Ansible
deploy-stack: ## Print the full S1→S5 ordered deploy sequence (operator follows each step)
@echo ""
@echo "╔══════════════════════════════════════════════════════════════╗"
@echo "║ Railiance Stack — Full Deploy Sequence ║"
@echo "║ See docs/deploy-stack.md for full runbook ║"
@echo "╚══════════════════════════════════════════════════════════════╝"
@echo ""
@echo "PRE-CONDITIONS"
@echo " [ ] SSH key: ~/.ssh/id_ops"
@echo " [ ] SOPS key: ~/.config/sops/age/keys.txt (or SOPS_AGE_KEY)"
@echo " [ ] ops-bridge: bridge up state-hub-coulombcore k3s-api-coulombcore"
@echo ""
@echo "S1 — Infrastructure Substrate (this repo)"
@echo " make tf-plan && make tf-apply # provision server (skip if exists)"
@echo " ssh tegwick@92.205.130.254 'cd ~/railiance-infra/ansible && ansible-playbook playbooks/bootstrap.yaml -c local --become -l CoulombCore'"
@echo " make verify"
@echo ""
@echo "S2 — Cluster Runtime (railiance-cluster)"
@echo " ssh tegwick@92.205.130.254 'cd ~/railiance-cluster && make converge && make smoke'"
@echo ""
@echo "S3 — Platform Services (railiance-platform)"
@echo " ssh tegwick@92.205.130.254 'cd ~/railiance-platform && make db-deploy && make valkey-deploy'"
@echo ""
@echo "S4 — Developer Enablement (no workplan yet — ArgoCD already at cluster)"
@echo " (no steps required)"
@echo ""
@echo "S5 — Workloads (railiance-apps)"
@echo " ssh tegwick@92.205.130.254 'cd ~/railiance-apps && make gitea-deploy'"
@echo " ssh tegwick@92.205.130.254 'cd ~/railiance-apps && make state-hub-deploy' # T09"
@echo ""
@echo " Full runbook: docs/deploy-stack.md"
# ---- Utilities ---- # ---- Utilities ----
doctor: ## Check tools and basic repo setup doctor: ## Check tools and basic repo setup
@bash -ceu ' \ @bash -ceu ' \

View File

@@ -0,0 +1,16 @@
---
# Minimal playbook — only deploys the Custodian automation SSH key.
# Use this instead of bootstrap.yaml when you only need key injection:
#
# cd ~/railiance-infra/ansible
# ansible-playbook playbooks/custodian-agent.yaml -u tegwick --limit Railiance01
#
# Or via Makefile from repo root:
# make provision-custodian-agent-host HOST=Railiance01
- hosts: all
become: true
vars_files:
- ../inventory/group_vars/all.yaml
roles:
- role: custodian_agent