Files
the-custodian/Makefile
tegwick 63474845f8 fix(e2e-cron-install): rsync repo before installing cron
If the repo doesn't exist on the sandbox host, the chmod fails.
Now e2e-cron-install rsyncs the repo first (same mechanism as make e2e),
then installs the cron entry. run-on-host.sh uses git pull for
subsequent updates.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-27 02:26:34 +01:00

174 lines
7.0 KiB
Makefile

# the-custodian top-level Makefile
#
# Custodian agent SSH identity
# ----------------------------
# make custodian-keygen — generate ~/.ssh/id_custodian_agent (one-time)
# and write the public key into railiance-infra
# ansible/inventory/group_vars/all.yaml
# make custodian-key-show — print the current public key (for manual ops)
#
# After keygen, run in railiance-infra:
# make provision-custodian-agent (deploys the key to all managed hosts)
#
# The private key is NEVER committed. The public key is committed via railiance-infra.
CUSTODIAN_KEY := $(HOME)/.ssh/id_custodian_agent
RAILIANCE_INFRA := $(HOME)/railiance-infra
AGENT_VARS_FILE := $(RAILIANCE_INFRA)/ansible/inventory/group_vars/all.yaml
.PHONY: custodian-keygen
custodian-keygen: ## Generate custodian agent SSH keypair (one-time setup)
@if [ -f "$(CUSTODIAN_KEY)" ]; then \
echo "Key already exists at $(CUSTODIAN_KEY). Remove it first to regenerate."; \
exit 1; \
fi
ssh-keygen -t ed25519 -f "$(CUSTODIAN_KEY)" -C "custodian-agent" -N ""
@echo ""
@echo "Public key:"
@cat "$(CUSTODIAN_KEY).pub"
@echo ""
@PUBKEY=$$(cat "$(CUSTODIAN_KEY).pub") && \
python3 -c "\
import sys, re; \
content = open('$(AGENT_VARS_FILE)').read(); \
updated = re.sub(r'custodian_agent_pubkey:.*', 'custodian_agent_pubkey: \"' + sys.argv[1] + '\"', content); \
open('$(AGENT_VARS_FILE)', 'w').write(updated); \
print('Public key written to $(AGENT_VARS_FILE)')" "$$PUBKEY"
@echo ""
@echo "Next steps:"
@echo " 1. cd $(RAILIANCE_INFRA) && git add ansible/inventory/group_vars/all.yaml && git commit -m 'feat: add custodian agent public key'"
@echo " 2. cd $(RAILIANCE_INFRA) && make provision-custodian-agent"
.PHONY: custodian-key-show
custodian-key-show: ## Print the custodian agent public key
@test -f "$(CUSTODIAN_KEY).pub" || (echo "No key found at $(CUSTODIAN_KEY). Run: make custodian-keygen"; exit 1)
@cat "$(CUSTODIAN_KEY).pub"
## Deploy the custodian agent key directly via SSH (no Ansible required).
## Use when ansible is unavailable. Requires an existing admin key with SSH access.
## Usage: make custodian-key-deploy ADMIN_KEY=~/.ssh/id_ops
## make custodian-key-deploy ADMIN_KEY=~/.ssh/id_ops HOST=192.168.1.50 SSHUSER=admin
.PHONY: custodian-key-deploy
custodian-key-deploy:
@test -n "$(ADMIN_KEY)" || (echo "ERROR: ADMIN_KEY is required. Usage: make custodian-key-deploy ADMIN_KEY=~/.ssh/id_ops"; exit 1)
@test -f "$(CUSTODIAN_KEY).pub" || (echo "ERROR: No custodian key found. Run: make custodian-keygen"; exit 1)
$(eval PUBKEY := $(shell cat $(CUSTODIAN_KEY).pub))
@echo "Deploying custodian agent key to $(RAILIANCE_USER)@$(RAILIANCE_HOST)..."
@ssh -i "$(ADMIN_KEY)" -o StrictHostKeyChecking=no \
$(RAILIANCE_USER)@$(RAILIANCE_HOST) \
"mkdir -p ~/.ssh && chmod 700 ~/.ssh && \
echo '$(PUBKEY)' | grep -qF - ~/.ssh/authorized_keys 2>/dev/null || \
echo '$(PUBKEY)' >> ~/.ssh/authorized_keys && \
chmod 600 ~/.ssh/authorized_keys && \
echo 'Key deployed. Verifying...' && \
grep -c 'custodian-agent' ~/.ssh/authorized_keys | xargs -I{} echo '{} custodian-agent key(s) in authorized_keys'"
@echo "Done. Test with: make e2e-cron-list"
## Run e2e tests for a repo in a remote sandbox
## Usage: make e2e REPO=activity-core
## Requires: RAILIANCE01_HOST env var (or pass HOST=<ip>)
##
## Options:
## REPO=<slug> repository name under ~/ (required)
## HOST=<host> override RAILIANCE01_HOST
## USER=root SSH user (default: root)
## KEY= path to SSH key (optional)
## KEEP= set to 1 to keep sandbox after run
## WORKSTREAM_ID= state-hub workstream ID for progress event
REPO_PATH := $(HOME)/$(REPO)
ifdef HOST
E2E_HOST_FLAG := --host $(HOST)
else
E2E_HOST_FLAG :=
endif
ifdef USER
E2E_USER_FLAG := --user $(USER)
else
E2E_USER_FLAG :=
endif
ifdef KEY
E2E_KEY_FLAG := --key $(KEY)
else ifneq ($(wildcard $(CUSTODIAN_KEY)),)
E2E_KEY_FLAG := --key $(CUSTODIAN_KEY)
else
E2E_KEY_FLAG :=
endif
ifdef KEEP
E2E_KEEP_FLAG := --keep
else
E2E_KEEP_FLAG :=
endif
ifdef WORKSTREAM_ID
E2E_WS_FLAG := --workstream-id $(WORKSTREAM_ID)
else
E2E_WS_FLAG :=
endif
## Install e2e cron job on railiance01 for a repo.
## Usage: make e2e-cron-install REPO=activity-core
## Requires: RAILIANCE01_HOST / RAILIANCE01_USER set, or pass HOST= SSHUSER=
## The cron runs e2e/run-on-host.sh weekly (Sunday 03:13) on railiance01.
## Idempotent: safe to re-run (replaces existing entry for the same repo).
RAILIANCE_HOST := $(or $(HOST),$(RAILIANCE01_HOST),92.205.62.239)
RAILIANCE_USER := $(or $(SSHUSER),$(RAILIANCE01_USER),tegwick)
# Default SSH key: custodian agent identity (generated via make custodian-keygen)
# Override with KEY=~/.ssh/other_key if needed
RAILIANCE_KEY := $(or $(KEY),$(CUSTODIAN_KEY))
RAILIANCE_SSH := ssh -i "$(RAILIANCE_KEY)" -o StrictHostKeyChecking=no $(RAILIANCE_USER)@$(RAILIANCE_HOST)
.PHONY: e2e-cron-install
e2e-cron-install:
@test -n "$(REPO)" || (echo "ERROR: REPO is required."; exit 1)
$(eval REPO_PATH := $(HOME)/$(REPO))
$(eval REMOTE_REPO := /home/$(RAILIANCE_USER)/$(REPO))
$(eval CRON_CMD := $(REMOTE_REPO)/e2e/run-on-host.sh >> /var/log/$(REPO)-e2e.log 2>&1)
$(eval CRON_LINE := 13 3 * * 0 $(CRON_CMD))
@test -d "$(REPO_PATH)" || (echo "ERROR: local repo not found: $(REPO_PATH)"; exit 1)
@test -f "$(REPO_PATH)/e2e/run-on-host.sh" || (echo "ERROR: no e2e/run-on-host.sh in $(REPO_PATH)"; exit 1)
@echo "--- syncing $(REPO) to $(RAILIANCE_USER)@$(RAILIANCE_HOST):$(REMOTE_REPO)"
@rsync -az --delete \
--exclude=.git --exclude=__pycache__ --exclude='*.pyc' \
--exclude=.venv --exclude=node_modules \
-e "ssh -i $(RAILIANCE_KEY) -o StrictHostKeyChecking=no" \
"$(REPO_PATH)/" \
"$(RAILIANCE_USER)@$(RAILIANCE_HOST):$(REMOTE_REPO)/"
@echo "--- installing cron on $(RAILIANCE_USER)@$(RAILIANCE_HOST) for $(REPO)"
@$(RAILIANCE_SSH) "chmod +x $(REMOTE_REPO)/e2e/run-on-host.sh && \
( crontab -l 2>/dev/null | grep -v '$(REPO)-e2e' ; echo '$(CRON_LINE)' ) | crontab - && \
echo 'Cron installed:' && crontab -l | grep '$(REPO)-e2e'"
## Remove e2e cron job from railiance01 for a repo.
## Usage: make e2e-cron-remove REPO=activity-core
.PHONY: e2e-cron-remove
e2e-cron-remove:
@test -n "$(REPO)" || (echo "ERROR: REPO is required."; exit 1)
@$(RAILIANCE_SSH) "( crontab -l 2>/dev/null | grep -v '$(REPO)-e2e' ) | crontab - && echo 'Cron entry removed'"
## List e2e cron jobs on railiance01.
.PHONY: e2e-cron-list
e2e-cron-list:
@$(RAILIANCE_SSH) "crontab -l 2>/dev/null | grep 'e2e' || echo '(no e2e cron entries)'"
.PHONY: e2e
e2e:
@test -n "$(REPO)" || (echo "ERROR: REPO is required. Usage: make e2e REPO=activity-core"; exit 1)
@test -d "$(REPO_PATH)" || (echo "ERROR: repo path does not exist: $(REPO_PATH)"; exit 1)
@test -f "$(REPO_PATH)/e2e/e2e.yml" || (echo "ERROR: no e2e/e2e.yml in $(REPO_PATH)"; exit 1)
cd "$(CURDIR)" && python3 -m e2e_framework \
$(REPO_PATH) \
$(E2E_HOST_FLAG) \
$(E2E_USER_FLAG) \
$(E2E_KEY_FLAG) \
$(E2E_KEEP_FLAG) \
$(E2E_WS_FLAG)