diff --git a/enos/modules/test_cluster_health/scripts/allocs.sh b/enos/modules/test_cluster_health/scripts/allocs.sh new file mode 100755 index 00000000000..1dc7aa23fac --- /dev/null +++ b/enos/modules/test_cluster_health/scripts/allocs.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -euo pipefail + +error_exit() { + echo "Error: $1" + exit 1 +} + +# Quality: nomad_allocs_status: A GET call to /v1/allocs returns the correct number of allocations and they are all running +allocs=$(nomad alloc status -json) +running_allocs=$(echo $allocs | jq '[.[] | select(.ClientStatus == "running")]') +allocs_length=$(echo "$running_allocs" | jq 'length' ) + +if [ -z "$allocs_length" ]; then + error_exit "No allocs found" +fi + +if [ "$allocs_length" -ne "$ALLOCS" ]; then + error_exit "Some allocs are not running $(nomad alloc status -json | jq -r '.[] | select(.ClientStatus != "running") | .ID')" +fi + +echo "All allocs are running." + +# Quality: nomad_reschedule_alloc: A POST / PUT call to /v1/allocation/:alloc_id/stop results in the stopped allocation being rescheduled + +MAX_WAIT_TIME=30 # Maximum wait time in seconds +POLL_INTERVAL=2 # Interval between status checks + +random_alloc_id=$(echo "$running_allocs" | jq -r ".[$((RANDOM % ($allocs_length + 1)))].ID") +nomad alloc stop -detach "$random_alloc_id" || error_exit "Failed to stop allocation $random_alloc_id." + +echo "Waiting for allocation $random_alloc_id to reach 'complete' status..." +elapsed_time=0 +while alloc_status=$(nomad alloc status -json "$random_alloc_id" | jq -r '.ClientStatus'); [ "$alloc_status" != "complete" ]; do + if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then + echo "Error: Allocation $random_alloc_id did not reach 'complete' status within $MAX_WAIT_TIME seconds." + exit 1 + fi + + echo "Current status: $alloc_status. Retrying in $POLL_INTERVAL seconds..." + sleep $POLL_INTERVAL + elapsed_time=$((elapsed_time + POLL_INTERVAL)) +done + +echo "Waiting for all the allocations to be running again" +elapsed_time=0 +while new_allocs=$(nomad alloc status -json | jq '[.[] | select(.ClientStatus == "running")] | jq "length"'); [ "$new_allocs" != "$ALLOCS" ]; do + if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then + echo "Error: Allocation $random_alloc_id did not reach 'complete' status within $MAX_WAIT_TIME seconds." + exit 1 + fi + + echo "Current status: $alloc_status. Retrying in $POLL_INTERVAL seconds..." + sleep $POLL_INTERVAL + elapsed_time=$((elapsed_time + POLL_INTERVAL)) +done diff --git a/enos/modules/test_cluster_health/scripts/clients.sh b/enos/modules/test_cluster_health/scripts/clients.sh index d8f5e102b21..24bac121781 100755 --- a/enos/modules/test_cluster_health/scripts/clients.sh +++ b/enos/modules/test_cluster_health/scripts/clients.sh @@ -4,34 +4,33 @@ set -euo pipefail +error_exit() { + echo "Error: $1" + exit 1 +} + # Quality: "nomad_CLIENTS_status: A GET call to /v1/CLIENTS returns the correct number of clients and they are all eligible and ready" -RUNNING_CLIENTS=$(nomad node status -json) -CLIENTS_LENGTH=$(echo "$RUNNING_CLIENTS" | jq 'length' ) +clients=$(nomad node status -json) +running_clients=$(echo $clients | jq '[.[] | select(.Status == "ready")]') +clients_length=$(echo "$running_clients" | jq 'length' ) -if [ -z "$CLIENTS_LENGTH" ]; then - echo "Error: No clients found" - exit 1 +if [ -z "$clients_length" ]; then + error_exit "No clients found" fi -if [ "$CLIENTS_LENGTH" -ne "$CLIENTS" ]; then - echo "Error: The number of clients does not match the expected count" - exit 1 -fi +if [ "$clients_length" -ne "$CLIENTS" ]; then + error_exit "Unexpected number of clients are ready $(echo $clients | jq '.[] | select(.Status != "ready") | .Name')" -echo "$RUNNING_CLIENTS" | jq -c '.[]' | while read -r node; do - STATUS=$(echo "$node" | jq -r '.Status') +fi - if [ "$STATUS" != "ready" ]; then - echo "Error: Client not alive" - exit 1 - fi +echo "$running_clients" | jq -c '.[]' | while read -r node; do + status=$(echo "$node" | jq -r '.Status') - ELIGIBILITY=$(echo "$node" | jq -r '.SchedulingEligibility') + eligibility=$(echo "$node" | jq -r '.SchedulingEligibility') - if [ "$ELIGIBILITY" != "eligible" ]; then - echo "Error: Client not eligible" - exit 1 + if [ "$eligibility" != "eligible" ]; then + error_exit "Client not eligible $(echo "$node" | jq -r '.Name')" fi done -echo "All CLIENTS are eligible and running." \ No newline at end of file +echo "All CLIENTS are eligible and running." diff --git a/enos/modules/test_cluster_health/scripts/jobs.sh b/enos/modules/test_cluster_health/scripts/jobs.sh index 6c11609c789..c54db9d5ec1 100755 --- a/enos/modules/test_cluster_health/scripts/jobs.sh +++ b/enos/modules/test_cluster_health/scripts/jobs.sh @@ -4,35 +4,21 @@ set -euo pipefail +error_exit() { + echo "Error: $1" + exit 1 +} + # Quality: nomad_job_status: A GET call to /v1/jobs returns the correct number of jobs and they are all running. -RUNNING_JOBS=$(nomad job status) -JOBS_LENGTH=$(echo "$RUNNING_JOBS" | awk 'NR > 1 {count++} END {print count}') +jobs_length=$(nomad job status| awk '$4 == "running" {count++} END {print count+0}') -if [ -z "$JOBS_LENGTH" ]; then - echo "Error: No jobs found" - exit 1 +if [ -z "$jobs_length" ]; then + error_exit "No jobs found" fi -if [ "$JOBS_LENGTH" -ne "$JOBS" ]; then - echo "Error: The number of jobs does not match the expected count" - exit 1 -fi - -if [ -n "$(echo "$RUNNING_JOBS" | awk '{if ($2 != "running") print $1}')" ]; then - echo "Error: Job not running" - exit 1 +if [ "$jobs_length" -ne "$JOBS" ]; then + error_exit "The number of jobs does not match the expected count $(nomad job status | awk 'NR > 1 && $4 != "running" {print $2}')" fi echo "All JOBS are running." - -#if [ $(echo "$RUNNING_JOBS" | jq '[.[] | .Allocations | length] | add') nq "$ALLOCS"]; then -# exit 1 -#fi - -#if [jq '[.[] | .Allocations | all(.State == "running")] | all' input.json -#] - -# Quality: nomad_allocs_status: A GET call to /v1/allocs returns the correct number of allocations and they are all running. - -echo "All allocs are running." \ No newline at end of file diff --git a/enos/modules/test_cluster_health/scripts/servers.sh b/enos/modules/test_cluster_health/scripts/servers.sh index b4dba7de2ac..bef74153922 100755 --- a/enos/modules/test_cluster_health/scripts/servers.sh +++ b/enos/modules/test_cluster_health/scripts/servers.sh @@ -4,34 +4,26 @@ set -euo pipefail -# Quality: nomad_agent_info: A GET call to /v1/agent/members returns the correct number of running servers and they are all aliv +error_exit() { + echo "Error: $1" + exit 1 +} -RUNNING_SERVERS=$(nomad server members -json) -SERVERS_LENGTH=$(echo "$RUNNING_SERVERS" | jq 'length' ) +# Quality: nomad_agent_info: A GET call to /v1/agent/members returns the correct number of running servers and they are all aliv +servers=$(nomad server members -json ) +running_servers=$(echo $servers | jq '[.[] | select(.Status == "alive")]') +servers_length=$(echo "$running_servers" | jq 'length' ) -if [ -z "$SERVERS_LENGTH" ]; then - echo "Error: No servers found" - exit 1 +if [ -z "$servers_length" ]; then + error_exit "No servers found" fi -if [ "$SERVERS_LENGTH" -ne "$SERVERS" ]; then - echo "Error: The number of servers does not match the expected count" - exit 1 +if [ "$servers_length" -ne "$SERVERS" ]; then + error_exit "Unexpected number of servers are alive $(echo $servers | jq '.[] | select(.Status != "alive") | .Name')" fi -echo "$RUNNING_SERVERS" | jq -c '.[]' | while read -r node; do - STATUS=$(echo "$node" | jq -r '.Status') - - if [ "$STATUS" != "alive" ]; then - echo "Error: Server not alive" - exit 1 - fi -done - -RESULT=$(echo "$RUNNING_SERVERS" | jq -r "map(.last_log_index ) | unique | length == 1") -if [ "$RESULT" != "true" ]; then - echo "Error: Server not up to date" - exit 1 +if [ $(echo "$running_servers" | jq -r "map(.last_log_index ) | unique | length == 1") != "true" ]; then + error_exit "Servers not up to date" fi -echo "All SERVERS are alive and up to date." \ No newline at end of file +echo "All SERVERS are alive and up to date."