diff --git a/enos/enos-scenario-upgrade.hcl b/enos/enos-scenario-upgrade.hcl index b8bac3c054f..69226815e68 100644 --- a/enos/enos-scenario-upgrade.hcl +++ b/enos/enos-scenario-upgrade.hcl @@ -9,23 +9,26 @@ scenario "upgrade" { matrix { arch = ["amd64"] - #arch = ["amd64", "arm64"] - //service_discovery = ["consul", "nomad"] - #edition = ["ce", "ent"] edition = ["ce"] os = ["linux"] - #os = ["linux", "windows"] - - /* exclude { + //service_discovery = ["consul", "nomad"] + //arch = ["amd64", "arm64"] + //edition = ["ce", "ent"] + //os = ["linux", "windows"] + exclude { os = ["windows"] arch = ["arm64"] - } */ + } } + providers = [ + provider.aws.default, + ] + locals { - cluster_name = "upgrade-testing-cluster-${matrix.os}-${matrix.arch}-${matrix.edition}-${var.product_version}" - linux_count = matrix.os == "linux" ? 4 : 0 - windows_count = matrix.os == "windows" ? 4 : 0 + cluster_name = "mcj-${matrix.os}-${matrix.arch}-${matrix.edition}-${var.product_version}" + linux_count = matrix.os == "linux" ? "4" : "0" + windows_count = matrix.os == "windows" ? "4" : "0" arch = matrix.arch } @@ -44,11 +47,11 @@ scenario "upgrade" { edition = matrix.edition product_version = var.product_version os = matrix.os - binary_path = "${var.nomad_local_binary}/${var.product_version}/${matrix.os}-${matrix.arch}-${matrix.edition}" + binary_path = "${var.nomad_local_binary}/${matrix.os}-${matrix.arch}-${matrix.edition}-${var.product_version}" } } - step "provision_cluster" { + step "provision_cluster" { depends_on = [step.copy_initial_binary] description = <<-EOF Using the binary from the previous step, provision a Nomad cluster using the e2e @@ -56,16 +59,16 @@ scenario "upgrade" { module = module.provision_cluster variables { - name = local.cluster_name - nomad_local_binary = step.copy_initial_binary.nomad_local_binary - server_count = var.server_count - client_count_linux = local.linux_count - client_count_windows_2016_amd64 = local.windows_count - nomad_license = var.nomad_license - consul_license = var.consul_license - volumes = false - nomad_region = var.nomad_region - instance_architecture = matrix.arch + name = local.cluster_name + nomad_local_binary = step.copy_initial_binary.nomad_local_binary + server_count = var.server_count + client_count_linux = local.linux_count + client_count_windows_2016 = local.windows_count + nomad_license = var.nomad_license + consul_license = var.consul_license + volumes = false + region = var.aws_region + instance_arch = matrix.arch } } @@ -77,13 +80,12 @@ scenario "upgrade" { module = module.run_workloads variables { - nomad_addr = step.provision_cluster.nomad_addr - ca_file = step.provision_cluster.ca_file - cert_file = step.provision_cluster.cert_file - key_file = step.provision_cluster.key_file - nomad_token = step.provision_cluster.nomad_token + nomad_addr = step.provision_cluster.nomad_addr + ca_file = step.provision_cluster.ca_file + cert_file = step.provision_cluster.cert_file + key_file = step.provision_cluster.key_file + nomad_token = step.provision_cluster.nomad_token } - verifies = [ quality.nomad_register_job, ] @@ -104,13 +106,13 @@ scenario "upgrade" { nomad_token = step.provision_cluster.nomad_token server_count = var.server_count client_count = local.linux_count + local.windows_count - jobs = step.run_initial_workloads.jobs_count + jobs_count = step.run_initial_workloads.jobs_count alloc_count = step.run_initial_workloads.allocs_count } verifies = [ quality.nomad_agent_info, - quality.nomad_agent_info_sel, + quality.nomad_agent_info_self, quality.nomad_nodes_status, quality.nomad_job_status, quality.nomad_allocs_status, @@ -119,6 +121,7 @@ scenario "upgrade" { } step "copy_upgrade_binary" { + depends_on = [step.provision_cluster] description = <<-EOF Bring the new upgraded binary from the artifactory EOF @@ -132,10 +135,10 @@ scenario "upgrade" { edition = matrix.edition product_version = var.upgrade_version os = matrix.os - binary_path = "${var.nomad_local_binary}/${var.upgrade_version}/${matrix.os}-${matrix.arch}-${matrix.edition}" + binary_path = "${var.nomad_local_binary}/${matrix.os}-${matrix.arch}-${matrix.edition}-${var.upgrade_version}" } } - /* +/* step "upgrade_servers" { description = <<-EOF Upgrade the cluster's servers by invoking nomad-cc ... @@ -160,6 +163,35 @@ scenario "upgrade" { // ... } + step "server_upgrade_test_cluster_health" { + depends_on = [step.run_initial_workloads] + description = <<-EOF + Verify the health of the cluster by checking the status of all servers, nodes, jobs and allocs and stopping random allocs to check for correct reschedules" + EOF + + module = module.test_cluster_health + variables { + nomad_addr = step.provision_cluster.nomad_addr + ca_file = step.provision_cluster.ca_file + cert_file = step.provision_cluster.cert_file + key_file = step.provision_cluster.key_file + nomad_token = step.provision_cluster.nomad_token + server_count = var.server_count + client_count = local.linux_count + local.windows_count + jobs_count = step.run_initial_workloads.jobs_count + alloc_count = step.run_initial_workloads.allocs_count + } + + verifies = [ + quality.nomad_agent_info, + quality.nomad_agent_info_self, + quality.nomad_nodes_status, + quality.nomad_job_status, + quality.nomad_allocs_status, + quality.nomad_reschedule_alloc, + ] + } + step "upgrade_client" { description = <<-EOF Upgrade the cluster's clients by invoking nomad-cc ... @@ -181,5 +213,72 @@ scenario "upgrade" { step "run_clients_workloads" { // ... - } */ + } + + step "client_upgrade_test_cluster_health" { + depends_on = [step.run_initial_workloads] + description = <<-EOF + Verify the health of the cluster by checking the status of all servers, nodes, jobs and allocs and stopping random allocs to check for correct reschedules" + EOF + + module = module.test_cluster_health + variables { + nomad_addr = step.provision_cluster.nomad_addr + ca_file = step.provision_cluster.ca_file + cert_file = step.provision_cluster.cert_file + key_file = step.provision_cluster.key_file + nomad_token = step.provision_cluster.nomad_token + server_count = var.server_count + client_count = local.linux_count + local.windows_count + jobs_count = step.run_initial_workloads.jobs_count + alloc_count = step.run_initial_workloads.allocs_count + } + + verifies = [ + quality.nomad_agent_info, + quality.nomad_agent_info_self, + quality.nomad_nodes_status, + quality.nomad_job_status, + quality.nomad_allocs_status, + quality.nomad_reschedule_alloc, + ] + } + */ + output "servers" { + value = step.provision_cluster.servers + } + + output "linux_clients" { + value = step.provision_cluster.linux_clients + } + + output "windows_clients" { + value = step.provision_cluster.windows_clients + } + + output "message" { + value = step.provision_cluster.message + } + + output "nomad_addr" { + value = step.provision_cluster.nomad_addr + } + + output "ca_file" { + value = step.provision_cluster.ca_file + } + + output "cert_file" { + value = step.provision_cluster.cert_file + } + + output "key_file" { + value = step.provision_cluster.key_file + } + + output "nomad_token" { + value = step.provision_cluster.nomad_token + sensitive = true + } + } diff --git a/enos/enos-terraform.hcl b/enos/enos-terraform.hcl index 63fa4dbc94c..618630eefa7 100644 --- a/enos/enos-terraform.hcl +++ b/enos/enos-terraform.hcl @@ -2,11 +2,16 @@ # SPDX-License-Identifier: BUSL-1.1 terraform "default" { + required_version = ">= 1.2.0" + required_providers { + aws = { + source = "hashicorp/aws" + } + enos = { - source = "registry.terraform.io/hashicorp-forge/enos" + source = "registry.terraform.io/hashicorp-forge/enos" + version = ">= 0.4.0" } } - - required_version = ">= 1.2.0" -} \ No newline at end of file +} diff --git a/enos/enos-vars.hcl b/enos/enos-vars.hcl index f72ca765fa5..5bb6b147132 100644 --- a/enos/enos-vars.hcl +++ b/enos/enos-vars.hcl @@ -58,3 +58,8 @@ variable "server_count" { description = "The number of servers to provision." default = "3" } + +variable "aws_region" { + description = "The AWS region to deploy to." + default = "us-east-1" +} diff --git a/enos/modules/fetch_artifactory/outputs.tf b/enos/modules/fetch_artifactory/outputs.tf index 8e6ed5de00a..f31854edd2f 100644 --- a/enos/modules/fetch_artifactory/outputs.tf +++ b/enos/modules/fetch_artifactory/outputs.tf @@ -1,11 +1,6 @@ # Copyright (c) HashiCorp, Inc. # SPDX-License-Identifier: BUSL-1.1 -output "nomad_local_binary" { - value = "${var.binary_path}/nomad" - description = "Path where the binary will be placed" -} - output "vault_artifactory_release" { description = "Binary information returned from the artifactory" value = { @@ -14,12 +9,7 @@ output "vault_artifactory_release" { } } -output "nomad_local_binary_ubuntu_jammy" { - value = "${var.binary_path}/nomad" +output "nomad_local_binary" { + value = var.os == "windows" ? "${var.binary_path}/nomad.exe" : "${var.binary_path}/nomad" description = "Path where the binary will be placed" } - -output "nomad_local_binary_windows_2016" { - value = "${var.binary_path}/nomad.exe" - description = "Path where the binary will be placed" -} diff --git a/enos/modules/fetch_artifactory/scripts/install.sh b/enos/modules/fetch_artifactory/scripts/install.sh index e29e4433831..42acbb6ffef 100755 --- a/enos/modules/fetch_artifactory/scripts/install.sh +++ b/enos/modules/fetch_artifactory/scripts/install.sh @@ -4,13 +4,10 @@ set -xeuo pipefail -# Variables -LOCAL_ZIP="nomad.zip" # Name for the downloaded file +LOCAL_ZIP="nomad.zip" -# Download the file wget --header="X-JFrog-Art-Api:$TOKEN" -O "$LOCAL_ZIP" "$URL" -#Check if the file was downloaded if [ $? -eq 0 ]; then echo "File downloaded successfully: $LOCAL_ZIP" else @@ -18,13 +15,9 @@ else exit 1 fi -# Create the BINARY_PATH directory mkdir -p "$BINARY_PATH" - -# Unzip the file unzip -o "$LOCAL_ZIP" -d "$BINARY_PATH" -# Check if the file was unzipped if [ $? -eq 0 ]; then echo "File unzipped successfully to $BINARY_PATH" else @@ -32,5 +25,4 @@ else exit 1 fi -# Remove the zipped file -rm "$LOCAL_ZIP" \ No newline at end of file +rm "$LOCAL_ZIP" diff --git a/enos/modules/run_workloads/jobs/.gitignore b/enos/modules/run_workloads/jobs/.gitignore deleted file mode 100644 index bb77286ba6a..00000000000 --- a/enos/modules/run_workloads/jobs/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.hcl diff --git a/enos/modules/run_workloads/main.tf b/enos/modules/run_workloads/main.tf index 049ad8a2de0..51075bf9664 100644 --- a/enos/modules/run_workloads/main.tf +++ b/enos/modules/run_workloads/main.tf @@ -9,23 +9,27 @@ terraform { } } +locals { + clean_token = trimspace(var.nomad_token) #Somewhere in the process, a newline is added to teh token. +} + resource "enos_local_exec" "wait_for_nomad_api" { environment = { NOMAD_ADDR = var.nomad_addr NOMAD_CACERT = var.ca_file NOMAD_CLIENT_CERT = var.cert_file NOMAD_CLIENT_KEY = var.key_file - NOMAD_TOKEN = var.nomad_token + NOMAD_TOKEN = local.clean_token } - inline = ["while ! nomad server members > /dev/null 2>&1; do echo 'waiting for nomad api...'; sleep 10; done"] + scripts = [abspath("${path.module}/scripts/wait_for_nomad_api.sh")] } resource "local_file" "nomad_job_files" { for_each = var.workloads - filename = "${path.module}/jobs/${each.key}.hcl" - content = templatefile(each.value.path, { alloc_count = each.value.alloc_count }) + filename = "${path.module}/jobs/${each.key}.nomad.hcl" + content = templatefile("${path.module}/${each.value.template}", { alloc_count = each.value.alloc_count }) } resource "enos_local_exec" "workloads" { @@ -36,8 +40,8 @@ resource "enos_local_exec" "workloads" { NOMAD_CACERT = var.ca_file NOMAD_CLIENT_CERT = var.cert_file NOMAD_CLIENT_KEY = var.key_file - NOMAD_TOKEN = var.nomad_token + NOMAD_TOKEN = local.clean_token } - inline = ["nomad job run ${path.module}/jobs/${each.key}.nomadhcl"] + inline = ["nomad job run ${path.module}/jobs/${each.key}.nomad.hcl"] } diff --git a/enos/modules/run_workloads/variables.tf b/enos/modules/run_workloads/variables.tf index a1342143822..4bbfbd021c0 100644 --- a/enos/modules/run_workloads/variables.tf +++ b/enos/modules/run_workloads/variables.tf @@ -30,11 +30,11 @@ variable "nomad_token" { variable "workloads" { description = "A map of workloads to provision" type = map(object({ - path = string + template = string alloc_count = number })) default = { - service_raw_exec = { path = "./templates/raw-exec-service.nomad.hcl", alloc_count = 3 } - service_docker = { path = "./templates/docker-service.nomad.hcl", alloc_count = 3 } + service_raw_exec = { template = "templates/raw-exec-service.nomad.hcl", alloc_count = 3 } + service_docker = { template = "templates/docker-service.nomad.hcl", alloc_count = 3 } } } diff --git a/enos/modules/test_cluster_health/main.tf b/enos/modules/test_cluster_health/main.tf index 9a22c2a685b..b0ed86b21f8 100644 --- a/enos/modules/test_cluster_health/main.tf +++ b/enos/modules/test_cluster_health/main.tf @@ -9,13 +9,17 @@ terraform { } } +locals { + clean_token = trimspace(var.nomad_token) #Somewhere in the process, a newline is added to teh token. +} + resource "enos_local_exec" "run_tests" { environment = { NOMAD_ADDR = var.nomad_addr NOMAD_CACERT = var.ca_file NOMAD_CLIENT_CERT = var.cert_file NOMAD_CLIENT_KEY = var.key_file - NOMAD_TOKEN = var.nomad_token + NOMAD_TOKEN = local.clean_token SERVERS = var.server_count CLIENTS = var.client_count JOBS = var.jobs_count