Skip to content

Commit

Permalink
func: add tests for allocs
Browse files Browse the repository at this point in the history
  • Loading branch information
Juanadelacuesta committed Jan 9, 2025
1 parent 8ebc0f0 commit 8778011
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 67 deletions.
59 changes: 59 additions & 0 deletions enos/modules/test_cluster_health/scripts/allocs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env bash
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1

set -euo pipefail

error_exit() {
echo "Error: $1"
exit 1
}

# Quality: nomad_allocs_status: A GET call to /v1/allocs returns the correct number of allocations and they are all running
allocs=$(nomad alloc status -json)
running_allocs=$(echo $allocs | jq '[.[] | select(.ClientStatus == "running")]')
allocs_length=$(echo "$running_allocs" | jq 'length' )

if [ -z "$allocs_length" ]; then
error_exit "No allocs found"
fi

if [ "$allocs_length" -ne "$ALLOCS" ]; then
error_exit "Some allocs are not running $(nomad alloc status -json | jq -r '.[] | select(.ClientStatus != "running") | .ID')"
fi

echo "All allocs are running."

# Quality: nomad_reschedule_alloc: A POST / PUT call to /v1/allocation/:alloc_id/stop results in the stopped allocation being rescheduled

MAX_WAIT_TIME=30 # Maximum wait time in seconds
POLL_INTERVAL=2 # Interval between status checks

random_alloc_id=$(echo "$running_allocs" | jq -r ".[$((RANDOM % ($allocs_length + 1)))].ID")
nomad alloc stop -detach "$random_alloc_id" || error_exit "Failed to stop allocation $random_alloc_id."

echo "Waiting for allocation $random_alloc_id to reach 'complete' status..."
elapsed_time=0
while alloc_status=$(nomad alloc status -json "$random_alloc_id" | jq -r '.ClientStatus'); [ "$alloc_status" != "complete" ]; do
if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
echo "Error: Allocation $random_alloc_id did not reach 'complete' status within $MAX_WAIT_TIME seconds."
exit 1
fi

echo "Current status: $alloc_status. Retrying in $POLL_INTERVAL seconds..."
sleep $POLL_INTERVAL
elapsed_time=$((elapsed_time + POLL_INTERVAL))
done

echo "Waiting for all the allocations to be running again"
elapsed_time=0
while new_allocs=$(nomad alloc status -json | jq '[.[] | select(.ClientStatus == "running")] | jq "length"'); [ "$new_allocs" != "$ALLOCS" ]; do
if [ "$elapsed_time" -ge "$MAX_WAIT_TIME" ]; then
echo "Error: Allocation $random_alloc_id did not reach 'complete' status within $MAX_WAIT_TIME seconds."
exit 1
fi

echo "Current status: $alloc_status. Retrying in $POLL_INTERVAL seconds..."
sleep $POLL_INTERVAL
elapsed_time=$((elapsed_time + POLL_INTERVAL))
done
39 changes: 19 additions & 20 deletions enos/modules/test_cluster_health/scripts/clients.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,33 @@

set -euo pipefail

error_exit() {
echo "Error: $1"
exit 1
}

# Quality: "nomad_CLIENTS_status: A GET call to /v1/CLIENTS returns the correct number of clients and they are all eligible and ready"
RUNNING_CLIENTS=$(nomad node status -json)
CLIENTS_LENGTH=$(echo "$RUNNING_CLIENTS" | jq 'length' )
clients=$(nomad node status -json)
running_clients=$(echo $clients | jq '[.[] | select(.Status == "ready")]')
clients_length=$(echo "$running_clients" | jq 'length' )

if [ -z "$CLIENTS_LENGTH" ]; then
echo "Error: No clients found"
exit 1
if [ -z "$clients_length" ]; then
error_exit "No clients found"
fi

if [ "$CLIENTS_LENGTH" -ne "$CLIENTS" ]; then
echo "Error: The number of clients does not match the expected count"
exit 1
fi
if [ "$clients_length" -ne "$CLIENTS" ]; then
error_exit "Unexpected number of clients are ready $(echo $clients | jq '.[] | select(.Status != "ready") | .Name')"

echo "$RUNNING_CLIENTS" | jq -c '.[]' | while read -r node; do
STATUS=$(echo "$node" | jq -r '.Status')
fi

if [ "$STATUS" != "ready" ]; then
echo "Error: Client not alive"
exit 1
fi
echo "$running_clients" | jq -c '.[]' | while read -r node; do
status=$(echo "$node" | jq -r '.Status')

ELIGIBILITY=$(echo "$node" | jq -r '.SchedulingEligibility')
eligibility=$(echo "$node" | jq -r '.SchedulingEligibility')

if [ "$ELIGIBILITY" != "eligible" ]; then
echo "Error: Client not eligible"
exit 1
if [ "$eligibility" != "eligible" ]; then
error_exit "Client not eligible $(echo "$node" | jq -r '.Name')"
fi
done

echo "All CLIENTS are eligible and running."
echo "All CLIENTS are eligible and running."
34 changes: 10 additions & 24 deletions enos/modules/test_cluster_health/scripts/jobs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,21 @@

set -euo pipefail

error_exit() {
echo "Error: $1"
exit 1
}

# Quality: nomad_job_status: A GET call to /v1/jobs returns the correct number of jobs and they are all running.

RUNNING_JOBS=$(nomad job status)
JOBS_LENGTH=$(echo "$RUNNING_JOBS" | awk 'NR > 1 {count++} END {print count}')
jobs_length=$(nomad job status| awk '$4 == "running" {count++} END {print count+0}')

if [ -z "$JOBS_LENGTH" ]; then
echo "Error: No jobs found"
exit 1
if [ -z "$jobs_length" ]; then
error_exit "No jobs found"
fi

if [ "$JOBS_LENGTH" -ne "$JOBS" ]; then
echo "Error: The number of jobs does not match the expected count"
exit 1
fi

if [ -n "$(echo "$RUNNING_JOBS" | awk '{if ($2 != "running") print $1}')" ]; then
echo "Error: Job not running"
exit 1
if [ "$jobs_length" -ne "$JOBS" ]; then
error_exit "The number of jobs does not match the expected count $(nomad job status | awk 'NR > 1 && $4 != "running" {print $2}')"
fi

echo "All JOBS are running."

#if [ $(echo "$RUNNING_JOBS" | jq '[.[] | .Allocations | length] | add') nq "$ALLOCS"]; then
# exit 1
#fi

#if [jq '[.[] | .Allocations | all(.State == "running")] | all' input.json
#]

# Quality: nomad_allocs_status: A GET call to /v1/allocs returns the correct number of allocations and they are all running.

echo "All allocs are running."
38 changes: 15 additions & 23 deletions enos/modules/test_cluster_health/scripts/servers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,26 @@

set -euo pipefail

# Quality: nomad_agent_info: A GET call to /v1/agent/members returns the correct number of running servers and they are all aliv
error_exit() {
echo "Error: $1"
exit 1
}

RUNNING_SERVERS=$(nomad server members -json)
SERVERS_LENGTH=$(echo "$RUNNING_SERVERS" | jq 'length' )
# Quality: nomad_agent_info: A GET call to /v1/agent/members returns the correct number of running servers and they are all aliv
servers=$(nomad server members -json )
running_servers=$(echo $servers | jq '[.[] | select(.Status == "alive")]')
servers_length=$(echo "$running_servers" | jq 'length' )

if [ -z "$SERVERS_LENGTH" ]; then
echo "Error: No servers found"
exit 1
if [ -z "$servers_length" ]; then
error_exit "No servers found"
fi

if [ "$SERVERS_LENGTH" -ne "$SERVERS" ]; then
echo "Error: The number of servers does not match the expected count"
exit 1
if [ "$servers_length" -ne "$SERVERS" ]; then
error_exit "Unexpected number of servers are alive $(echo $servers | jq '.[] | select(.Status != "alive") | .Name')"
fi

echo "$RUNNING_SERVERS" | jq -c '.[]' | while read -r node; do
STATUS=$(echo "$node" | jq -r '.Status')

if [ "$STATUS" != "alive" ]; then
echo "Error: Server not alive"
exit 1
fi
done

RESULT=$(echo "$RUNNING_SERVERS" | jq -r "map(.last_log_index ) | unique | length == 1")
if [ "$RESULT" != "true" ]; then
echo "Error: Server not up to date"
exit 1
if [ $(echo "$running_servers" | jq -r "map(.last_log_index ) | unique | length == 1") != "true" ]; then
error_exit "Servers not up to date"
fi

echo "All SERVERS are alive and up to date."
echo "All SERVERS are alive and up to date."

0 comments on commit 8778011

Please sign in to comment.