Harden activity-core verifier evidence
Some checks are pending
railiance-tests / smoke (push) Waiting to run
Some checks are pending
railiance-tests / smoke (push) Waiting to run
This commit is contained in:
@@ -16,9 +16,17 @@ STATE_HUB_PROGRESS_POLL_SECONDS="${STATE_HUB_PROGRESS_POLL_SECONDS:-5}"
|
|||||||
ACTIVITY_CORE_REPO="${ACTIVITY_CORE_REPO:-/home/worsch/activity-core}"
|
ACTIVITY_CORE_REPO="${ACTIVITY_CORE_REPO:-/home/worsch/activity-core}"
|
||||||
ACTIVITY_CORE_REMOTE_REPO="${ACTIVITY_CORE_REMOTE_REPO:-}"
|
ACTIVITY_CORE_REMOTE_REPO="${ACTIVITY_CORE_REMOTE_REPO:-}"
|
||||||
ACTIVITY_CORE_CLUSTER_HOST="${ACTIVITY_CORE_CLUSTER_HOST:-railiance01}"
|
ACTIVITY_CORE_CLUSTER_HOST="${ACTIVITY_CORE_CLUSTER_HOST:-railiance01}"
|
||||||
|
ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL="${ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL:-0}"
|
||||||
ACTIVITY_CORE_SYNC_RUNTIME_BUNDLE="${ACTIVITY_CORE_SYNC_RUNTIME_BUNDLE:-auto}"
|
ACTIVITY_CORE_SYNC_RUNTIME_BUNDLE="${ACTIVITY_CORE_SYNC_RUNTIME_BUNDLE:-auto}"
|
||||||
ACTIVITY_CORE_RESTART_DEPLOYMENTS="${ACTIVITY_CORE_RESTART_DEPLOYMENTS:-0}"
|
ACTIVITY_CORE_RESTART_DEPLOYMENTS="${ACTIVITY_CORE_RESTART_DEPLOYMENTS:-0}"
|
||||||
if [[ "$ACTIVITY_CORE_CLUSTER_HOST" == "local" ]]; then
|
if [[ "$ACTIVITY_CORE_CLUSTER_HOST" == "local" ]]; then
|
||||||
|
if [[ "$ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL" != "1" ]]; then
|
||||||
|
{
|
||||||
|
echo "ACTIVITY_CORE_CLUSTER_HOST=local requires ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL=1"
|
||||||
|
echo "Default verifier execution is cluster-owned via railiance01/SSH."
|
||||||
|
} >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
ACTIVITY_CORE_CLUSTER_HOST=""
|
ACTIVITY_CORE_CLUSTER_HOST=""
|
||||||
fi
|
fi
|
||||||
if [[ -z "$ACTIVITY_CORE_REMOTE_REPO" ]]; then
|
if [[ -z "$ACTIVITY_CORE_REMOTE_REPO" ]]; then
|
||||||
@@ -38,9 +46,12 @@ STARTED_AT="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|||||||
CURRENT_GATE="startup"
|
CURRENT_GATE="startup"
|
||||||
REMOTE_REVISION=""
|
REMOTE_REVISION=""
|
||||||
API_IMAGE=""
|
API_IMAGE=""
|
||||||
|
API_IMAGE_ID=""
|
||||||
SYNC_STATUS_JSON=""
|
SYNC_STATUS_JSON=""
|
||||||
DEFINITION_JSON=""
|
DEFINITION_JSON=""
|
||||||
TRIGGER_JSON=""
|
TRIGGER_JSON=""
|
||||||
|
TRIGGER_KEY=""
|
||||||
|
EXPECTED_RUN_ID=""
|
||||||
PROGRESS_JSON=""
|
PROGRESS_JSON=""
|
||||||
EVIDENCE_NOTE_JSON=""
|
EVIDENCE_NOTE_JSON=""
|
||||||
|
|
||||||
@@ -49,8 +60,8 @@ export STATE_HUB_URL EVIDENCE_WORKSTREAM_ID EVIDENCE_TASK_ID
|
|||||||
export STATE_HUB_PROGRESS_TIMEOUT_SECONDS STATE_HUB_PROGRESS_POLL_SECONDS
|
export STATE_HUB_PROGRESS_TIMEOUT_SECONDS STATE_HUB_PROGRESS_POLL_SECONDS
|
||||||
export INTER_HUB_SUBMISSION_STATUS INTER_HUB_DEFER_REASON STARTED_AT
|
export INTER_HUB_SUBMISSION_STATUS INTER_HUB_DEFER_REASON STARTED_AT
|
||||||
export ACTIVITY_CORE_CLUSTER_HOST ACTIVITY_CORE_REMOTE_REPO
|
export ACTIVITY_CORE_CLUSTER_HOST ACTIVITY_CORE_REMOTE_REPO
|
||||||
export ACTIVITY_CORE_SYNC_RUNTIME_BUNDLE ACTIVITY_CORE_RESTART_DEPLOYMENTS
|
export ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL ACTIVITY_CORE_SYNC_RUNTIME_BUNDLE ACTIVITY_CORE_RESTART_DEPLOYMENTS
|
||||||
export REMOTE_REVISION API_IMAGE SYNC_STATUS_JSON DEFINITION_JSON TRIGGER_JSON PROGRESS_JSON
|
export REMOTE_REVISION API_IMAGE API_IMAGE_ID SYNC_STATUS_JSON DEFINITION_JSON TRIGGER_JSON TRIGGER_KEY EXPECTED_RUN_ID PROGRESS_JSON
|
||||||
|
|
||||||
log() {
|
log() {
|
||||||
printf '[activity-core-verify] %s\n' "$*"
|
printf '[activity-core-verify] %s\n' "$*"
|
||||||
@@ -121,10 +132,12 @@ detail = {
|
|||||||
"activity_core_repo": os.environ.get("ACTIVITY_CORE_REMOTE_REPO"),
|
"activity_core_repo": os.environ.get("ACTIVITY_CORE_REMOTE_REPO"),
|
||||||
"activity_core_revision": os.environ.get("REMOTE_REVISION") or None,
|
"activity_core_revision": os.environ.get("REMOTE_REVISION") or None,
|
||||||
"api_image": os.environ.get("API_IMAGE") or None,
|
"api_image": os.environ.get("API_IMAGE") or None,
|
||||||
|
"api_image_id": os.environ.get("API_IMAGE_ID") or None,
|
||||||
"runtime_bundle": "k8s/railiance/20-runtime.yaml",
|
"runtime_bundle": "k8s/railiance/20-runtime.yaml",
|
||||||
"sync_job": sync_status,
|
"sync_job": sync_status,
|
||||||
"definition": definition,
|
"definition": definition,
|
||||||
"manual_trigger": trigger,
|
"manual_trigger": trigger,
|
||||||
|
"expected_activity_core_run_id": os.environ.get("EXPECTED_RUN_ID") or None,
|
||||||
"state_hub_progress": progress,
|
"state_hub_progress": progress,
|
||||||
"inter_hub_submission": {
|
"inter_hub_submission": {
|
||||||
"status": os.environ.get("INTER_HUB_SUBMISSION_STATUS"),
|
"status": os.environ.get("INTER_HUB_SUBMISSION_STATUS"),
|
||||||
@@ -137,8 +150,9 @@ if status == "passed":
|
|||||||
summary = (
|
summary = (
|
||||||
"Railiance activity-core deploy/verify passed: runtime reconciled, "
|
"Railiance activity-core deploy/verify passed: runtime reconciled, "
|
||||||
"actcore-sync completed, ops-service-inventory-probes remains disabled, "
|
"actcore-sync completed, ops-service-inventory-probes remains disabled, "
|
||||||
f"manual trigger {trigger.get('workflow_id') if isinstance(trigger, dict) else 'unknown'} ran, "
|
f"manual trigger {trigger.get('workflow_id') if isinstance(trigger, dict) else 'unknown'} ran as "
|
||||||
f"and State Hub ops_inventory_probe progress {progress.get('id') if isinstance(progress, dict) else 'unknown'} exists."
|
f"{os.environ.get('EXPECTED_RUN_ID') or 'unknown run'}, and State Hub ops_inventory_probe progress "
|
||||||
|
f"{progress.get('id') if isinstance(progress, dict) else 'unknown'} matched that run."
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
summary = (
|
summary = (
|
||||||
@@ -189,6 +203,16 @@ set -euo pipefail
|
|||||||
command -v kubectl >/dev/null
|
command -v kubectl >/dev/null
|
||||||
EOF
|
EOF
|
||||||
)"
|
)"
|
||||||
|
if [[ -z "$ACTIVITY_CORE_CLUSTER_HOST" ]]; then
|
||||||
|
LOCAL_CONTEXT="$(
|
||||||
|
cluster_bash "$(cat <<'EOF'
|
||||||
|
set -euo pipefail
|
||||||
|
kubectl config current-context 2>/dev/null || true
|
||||||
|
EOF
|
||||||
|
)"
|
||||||
|
)"
|
||||||
|
log "local kubectl context: ${LOCAL_CONTEXT:-unknown}"
|
||||||
|
fi
|
||||||
|
|
||||||
CURRENT_GATE="runtime bundle sync"
|
CURRENT_GATE="runtime bundle sync"
|
||||||
if should_sync_runtime_bundle; then
|
if should_sync_runtime_bundle; then
|
||||||
@@ -255,6 +279,17 @@ kubectl -n $(quote "$NAMESPACE") get deploy actcore-api -o jsonpath='{.spec.temp
|
|||||||
EOF
|
EOF
|
||||||
)"
|
)"
|
||||||
)"
|
)"
|
||||||
|
API_IMAGE_ID="$(
|
||||||
|
cluster_bash "$(cat <<EOF
|
||||||
|
set -euo pipefail
|
||||||
|
kubectl -n $(quote "$NAMESPACE") get pod -l app.kubernetes.io/name=actcore-api -o jsonpath='{.items[0].status.containerStatuses[0].imageID}'
|
||||||
|
EOF
|
||||||
|
)"
|
||||||
|
)"
|
||||||
|
if [[ -z "$REMOTE_REVISION" && -z "$API_IMAGE_ID" ]]; then
|
||||||
|
printf 'could not determine activity-core revision or actcore-api imageID\n' >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
SYNC_STATUS_JSON="$(
|
SYNC_STATUS_JSON="$(
|
||||||
cluster_bash "$(cat <<EOF
|
cluster_bash "$(cat <<EOF
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
@@ -262,7 +297,7 @@ kubectl -n $(quote "$NAMESPACE") get job actcore-sync -o json
|
|||||||
EOF
|
EOF
|
||||||
)" | python3 -c 'import json,sys; j=json.load(sys.stdin); s=j.get("status",{}); print(json.dumps({"name": j["metadata"]["name"], "succeeded": s.get("succeeded", 0), "failed": s.get("failed", 0), "completion_time": s.get("completionTime")}))'
|
)" | python3 -c 'import json,sys; j=json.load(sys.stdin); s=j.get("status",{}); print(json.dumps({"name": j["metadata"]["name"], "succeeded": s.get("succeeded", 0), "failed": s.get("failed", 0), "completion_time": s.get("completionTime")}))'
|
||||||
)"
|
)"
|
||||||
export API_IMAGE SYNC_STATUS_JSON
|
export API_IMAGE API_IMAGE_ID SYNC_STATUS_JSON
|
||||||
|
|
||||||
CURRENT_GATE="disabled definition check"
|
CURRENT_GATE="disabled definition check"
|
||||||
log "checking ${DEFINITION_SLUG} is present and disabled"
|
log "checking ${DEFINITION_SLUG} is present and disabled"
|
||||||
@@ -331,7 +366,31 @@ if [[ -z "$TRIGGER_JSON" ]]; then
|
|||||||
printf 'manual trigger produced no output\n' >&2
|
printf 'manual trigger produced no output\n' >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
export TRIGGER_JSON
|
TRIGGER_KEY="$(
|
||||||
|
python3 - <<'PY'
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
trigger = json.loads(os.environ["TRIGGER_JSON"])
|
||||||
|
trigger_key = trigger.get("trigger_key")
|
||||||
|
if not trigger_key:
|
||||||
|
raise SystemExit("manual trigger response did not include trigger_key")
|
||||||
|
print(trigger_key)
|
||||||
|
PY
|
||||||
|
)"
|
||||||
|
export TRIGGER_KEY
|
||||||
|
EXPECTED_RUN_ID="$(
|
||||||
|
python3 - <<'PY'
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
definition_id = os.environ["DEFINITION_ID"]
|
||||||
|
trigger_key = os.environ["TRIGGER_KEY"]
|
||||||
|
print(uuid.uuid5(uuid.NAMESPACE_URL, f"{definition_id}:{trigger_key}"))
|
||||||
|
PY
|
||||||
|
)"
|
||||||
|
export TRIGGER_JSON EXPECTED_RUN_ID
|
||||||
|
log "manual trigger run id: ${EXPECTED_RUN_ID}"
|
||||||
|
|
||||||
CURRENT_GATE="State Hub ops_inventory_probe evidence"
|
CURRENT_GATE="State Hub ops_inventory_probe evidence"
|
||||||
log "polling State Hub for ops_inventory_probe progress"
|
log "polling State Hub for ops_inventory_probe progress"
|
||||||
@@ -348,6 +407,8 @@ base = os.environ["STATE_HUB_URL"].rstrip("/")
|
|||||||
started = datetime.fromisoformat(os.environ["STARTED_AT"].replace("Z", "+00:00"))
|
started = datetime.fromisoformat(os.environ["STARTED_AT"].replace("Z", "+00:00"))
|
||||||
timeout = int(os.environ["STATE_HUB_PROGRESS_TIMEOUT_SECONDS"])
|
timeout = int(os.environ["STATE_HUB_PROGRESS_TIMEOUT_SECONDS"])
|
||||||
interval = int(os.environ["STATE_HUB_PROGRESS_POLL_SECONDS"])
|
interval = int(os.environ["STATE_HUB_PROGRESS_POLL_SECONDS"])
|
||||||
|
definition_id = os.environ["DEFINITION_ID"]
|
||||||
|
expected_run_id = os.environ["EXPECTED_RUN_ID"]
|
||||||
deadline = time.monotonic() + timeout
|
deadline = time.monotonic() + timeout
|
||||||
url = base + "/progress/?" + urllib.parse.urlencode({"event_type": "ops_inventory_probe"})
|
url = base + "/progress/?" + urllib.parse.urlencode({"event_type": "ops_inventory_probe"})
|
||||||
|
|
||||||
@@ -358,18 +419,27 @@ while time.monotonic() < deadline:
|
|||||||
created_at = datetime.fromisoformat(event["created_at"].replace("Z", "+00:00"))
|
created_at = datetime.fromisoformat(event["created_at"].replace("Z", "+00:00"))
|
||||||
if created_at >= started:
|
if created_at >= started:
|
||||||
detail = event.get("detail") or {}
|
detail = event.get("detail") or {}
|
||||||
print(json.dumps({
|
if (
|
||||||
"id": event["id"],
|
isinstance(detail, dict)
|
||||||
"event_type": event.get("event_type"),
|
and detail.get("activity_id") == definition_id
|
||||||
"summary": event.get("summary"),
|
and detail.get("activity_core_run_id") == expected_run_id
|
||||||
"author": event.get("author"),
|
):
|
||||||
"created_at": event.get("created_at"),
|
print(json.dumps({
|
||||||
"detail_keys": sorted(detail.keys()) if isinstance(detail, dict) else [],
|
"id": event["id"],
|
||||||
}))
|
"event_type": event.get("event_type"),
|
||||||
raise SystemExit(0)
|
"summary": event.get("summary"),
|
||||||
|
"author": event.get("author"),
|
||||||
|
"created_at": event.get("created_at"),
|
||||||
|
"activity_id": detail.get("activity_id"),
|
||||||
|
"activity_core_run_id": detail.get("activity_core_run_id"),
|
||||||
|
"expected_activity_core_run_id": expected_run_id,
|
||||||
|
"idempotency_key": detail.get("idempotency_key"),
|
||||||
|
"detail_keys": sorted(detail.keys()),
|
||||||
|
}))
|
||||||
|
raise SystemExit(0)
|
||||||
time.sleep(interval)
|
time.sleep(interval)
|
||||||
|
|
||||||
raise SystemExit(f"no ops_inventory_probe progress found after {timeout}s")
|
raise SystemExit(f"no ops_inventory_probe progress for manual run {expected_run_id} found after {timeout}s")
|
||||||
PY
|
PY
|
||||||
)"
|
)"
|
||||||
export PROGRESS_JSON
|
export PROGRESS_JSON
|
||||||
|
|||||||
@@ -0,0 +1,120 @@
|
|||||||
|
---
|
||||||
|
id: RAILIANCE-WP-0013
|
||||||
|
type: workplan
|
||||||
|
title: "activity-core verifier evidence hardening"
|
||||||
|
domain: railiance
|
||||||
|
repo: railiance-cluster
|
||||||
|
status: finished
|
||||||
|
owner: codex
|
||||||
|
topic_slug: railiance
|
||||||
|
created: "2026-06-16"
|
||||||
|
updated: "2026-06-16"
|
||||||
|
state_hub_workstream_id: "a3abb83a-2d42-40f9-a5f6-1dbc36903436"
|
||||||
|
---
|
||||||
|
|
||||||
|
# activity-core verifier evidence hardening
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
`RAILIANCE-WP-0012` moved activity-core live deploy/verify ownership into
|
||||||
|
`railiance-cluster` and produced State Hub evidence
|
||||||
|
`baeeaeac-aa6d-4406-ae64-e54577f21386`, with `ops_inventory_probe` progress
|
||||||
|
`4c82360d-33e7-455b-8ab4-33facd4a3f8e`.
|
||||||
|
|
||||||
|
A follow-up review found hardening work that matters for routine verifier use:
|
||||||
|
the verifier should prove the State Hub progress event belongs to the specific
|
||||||
|
manual trigger it launched, evidence should include an immutable runtime
|
||||||
|
identity, and local `kubectl` mode should require an explicit double opt-in.
|
||||||
|
|
||||||
|
This is a hardening follow-up only; it does not reopen activity-core
|
||||||
|
`ACTIVITY-WP-0007-T06`.
|
||||||
|
|
||||||
|
## Correlate State Hub progress to the manual trigger
|
||||||
|
|
||||||
|
```task
|
||||||
|
id: RAILIANCE-WP-0013-T01
|
||||||
|
status: done
|
||||||
|
priority: high
|
||||||
|
state_hub_task_id: "d013a4a9-77fc-4cf0-babf-528d71acc0a1"
|
||||||
|
```
|
||||||
|
|
||||||
|
Update `tools/cmd/railiance-verify-activity-core` so after
|
||||||
|
`POST /activity-definitions/<id>/trigger` it parses `trigger_key`, derives the
|
||||||
|
expected activity-core manual `run_id`, and polls State Hub until it finds
|
||||||
|
`ops_inventory_probe` where:
|
||||||
|
|
||||||
|
- `detail.activity_id == DEFINITION_ID`;
|
||||||
|
- `detail.activity_core_run_id == expected_run_id`.
|
||||||
|
|
||||||
|
The verifier must not pass on merely any event created after `STARTED_AT`.
|
||||||
|
Include the expected run id and matched progress id in the evidence note.
|
||||||
|
|
||||||
|
2026-06-16: Implemented exact correlation. The verifier now derives the
|
||||||
|
expected UUIDv5 `activity_core_run_id` from `<DEFINITION_ID>:<trigger_key>` and
|
||||||
|
requires State Hub `ops_inventory_probe` detail to match both `activity_id` and
|
||||||
|
`activity_core_run_id`.
|
||||||
|
|
||||||
|
## Record immutable runtime evidence
|
||||||
|
|
||||||
|
```task
|
||||||
|
id: RAILIANCE-WP-0013-T02
|
||||||
|
status: done
|
||||||
|
priority: medium
|
||||||
|
state_hub_task_id: "c5780ec1-9a74-401e-b60e-a0fdf2b7e5d2"
|
||||||
|
```
|
||||||
|
|
||||||
|
Ensure successful evidence includes either `activity_core_revision` or an
|
||||||
|
immutable Kubernetes image ID/digest. When the remote repo revision is
|
||||||
|
unavailable, fall back to the live `actcore-api` pod container `imageID`.
|
||||||
|
|
||||||
|
2026-06-16: Implemented `api_image_id` capture from the live `actcore-api` pod
|
||||||
|
container status and added a guard so passed evidence must include either the
|
||||||
|
remote repo revision or the immutable image ID.
|
||||||
|
|
||||||
|
## Guard explicit local kubectl override
|
||||||
|
|
||||||
|
```task
|
||||||
|
id: RAILIANCE-WP-0013-T03
|
||||||
|
status: done
|
||||||
|
priority: medium
|
||||||
|
state_hub_task_id: "0d60809f-3f1d-4ea9-a96f-af074911acc0"
|
||||||
|
```
|
||||||
|
|
||||||
|
Keep `railiance01`/SSH as the default executor. If
|
||||||
|
`ACTIVITY_CORE_CLUSTER_HOST=local` is selected, require an additional explicit
|
||||||
|
opt-in such as `ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL=1` and print the current
|
||||||
|
`kubectl` context before continuing.
|
||||||
|
|
||||||
|
2026-06-16: Implemented the double opt-in. `ACTIVITY_CORE_CLUSTER_HOST=local`
|
||||||
|
now exits before cluster access unless `ACTIVITY_CORE_ALLOW_LOCAL_KUBECTL=1` is
|
||||||
|
also set, and accepted local mode prints the current `kubectl` context.
|
||||||
|
|
||||||
|
## Verify and publish hardening evidence
|
||||||
|
|
||||||
|
```task
|
||||||
|
id: RAILIANCE-WP-0013-T04
|
||||||
|
status: done
|
||||||
|
priority: medium
|
||||||
|
state_hub_task_id: "150e4fa3-800c-4997-baaa-da696f5a0fc0"
|
||||||
|
```
|
||||||
|
|
||||||
|
Run `bash -n tools/cmd/railiance-verify-activity-core`, run
|
||||||
|
`make verify-activity-core` against Railiance01, confirm the evidence note
|
||||||
|
matched the manual trigger run id, and post a non-secret State Hub note citing
|
||||||
|
the new evidence.
|
||||||
|
|
||||||
|
2026-06-16: Verified with `bash -n tools/cmd/railiance-verify-activity-core`
|
||||||
|
and a live Railiance01 `make verify-activity-core` run. The verifier posted
|
||||||
|
State Hub evidence note `60256e9a-9d1b-44db-8999-738cf03bca2e`, matched manual
|
||||||
|
run id `90e3b112-d1e3-51af-8fb2-cb61f26add17`, matched
|
||||||
|
`ops_inventory_probe` progress `db408146-0310-4ac3-ac77-f73c5a41e070`, and
|
||||||
|
included `api_image_id`
|
||||||
|
`sha256:5ff92a8217c450ae06075d00862b6e2a92a83ca09eea18b5a5e96b5d2d728b35`.
|
||||||
|
|
||||||
|
Done when:
|
||||||
|
|
||||||
|
- the verifier rejects unrelated fresh `ops_inventory_probe` events;
|
||||||
|
- evidence includes a non-null revision or image digest;
|
||||||
|
- local `kubectl` mode requires explicit double opt-in;
|
||||||
|
- the Railiance01 verifier run posts a passed evidence note with matched run id;
|
||||||
|
- `make fix-consistency REPO=railiance-cluster` has synced the workplan.
|
||||||
Reference in New Issue
Block a user