From e5315f49d41fe06dfe7bc379c68f4281fad34b4d Mon Sep 17 00:00:00 2001 From: Mingfei Shao <2475897+mfshao@users.noreply.github.com> Date: Mon, 24 Jun 2024 12:46:51 -0500 Subject: [PATCH 01/48] HP-1521 Feat/refresh ctgov metadata (#2570) * feat: fetch ct.gov metadata * check exception * fix excption * clean up old clinicaltrials_gov * fix check existence * debug * rate limit * debug * debug * fix request * remove unused imports * trigger gh action --- .../healdata/heal-cedar-data-ingest.py | 111 ++++++++++++++++-- 1 file changed, 103 insertions(+), 8 deletions(-) diff --git a/files/scripts/healdata/heal-cedar-data-ingest.py b/files/scripts/healdata/heal-cedar-data-ingest.py index bd59076c1..0e7cf8ef3 100644 --- a/files/scripts/healdata/heal-cedar-data-ingest.py +++ b/files/scripts/healdata/heal-cedar-data-ingest.py @@ -1,6 +1,5 @@ import argparse import copy -import json import sys import requests import pydash @@ -50,6 +49,50 @@ "BioSystics-AP": "https://biosystics-ap.com/assays/assaystudy//", } +CLINICAL_TRIALS_GOV_FIELDS = [ + "NCTId", + "OfficialTitle", + "BriefTitle", + "Acronym", + "StudyType", + "OverallStatus", + "StartDate", + "StartDateType", + "CompletionDate", + "CompletionDateType", + "IsFDARegulatedDrug", + "IsFDARegulatedDevice", + "IsPPSD", + "BriefSummary", + "DetailedDescription", + "Condition", + "DesignAllocation", + "DesignPrimaryPurpose", + "Phase", + "DesignInterventionModel", + "EnrollmentCount", + "EnrollmentType", + "DesignObservationalModel", + "InterventionType", + "PrimaryOutcomeMeasure", + "SecondaryOutcomeMeasure", + "OtherOutcomeMeasure", + "Gender", + "GenderBased", + "MaximumAge", + "MinimumAge", + "IPDSharing", + "IPDSharingTimeFrame", + "IPDSharingAccessCriteria", + "IPDSharingURL", + "SeeAlsoLinkURL", + "AvailIPDURL", + "AvailIPDId", + "AvailIPDComment", + "PatientRegistry", + "DesignTimePerspective", +] + def is_valid_uuid(uuid_to_test, version=4): """ @@ -76,7 +119,11 @@ def is_valid_uuid(uuid_to_test, version=4): def update_filter_metadata(metadata_to_update): # Retain these from existing filters save_filters = ["Common Data Elements"] - filter_metadata = [filter for filter in metadata_to_update["advSearchFilters"] if filter["key"] in save_filters] + filter_metadata = [ + filter + for filter in metadata_to_update["advSearchFilters"] + if filter["key"] in save_filters + ] for metadata_field_key, filter_field_key in FILTER_FIELD_MAPPINGS.items(): filter_field_values = pydash.get(metadata_to_update, metadata_field_key) if filter_field_values: @@ -99,7 +146,12 @@ def update_filter_metadata(metadata_to_update): filter_metadata = pydash.uniq(filter_metadata) metadata_to_update["advSearchFilters"] = filter_metadata # Retain these from existing tags - save_tags = ["Data Repository", "Common Data Elements", "RequiredIDP", "Additional Acknowledgement"] + save_tags = [ + "Data Repository", + "Common Data Elements", + "RequiredIDP", + "Additional Acknowledgement", + ] tags = [tag for tag in metadata_to_update["tags"] if tag["category"] in save_tags] # Add any new tags from advSearchFilters for f in metadata_to_update["advSearchFilters"]: @@ -166,6 +218,21 @@ def get_related_studies(serial_num, guid, hostname): return related_study_result +def get_clinical_trials_gov_metadata(nct_id): + if not nct_id: + return None + ct_metadata = {} + try: + ct_metadata_result = requests.get(f"https://clinicaltrials.gov/api/v2/studies/{nct_id}?fields={'|'.join(CLINICAL_TRIALS_GOV_FIELDS)}") + if ct_metadata_result.status_code != 200: + raise Exception(f"Could not get clinicaltrials.gov metadata, error code {ct_metadata_result.status_code}") + else: + ct_metadata = ct_metadata_result.json() + except Exception as exc: + raise Exception(f"Could not get clinicaltrials.gov metadata: {exc}") from exc + return ct_metadata + + parser = argparse.ArgumentParser() parser.add_argument("--directory", help="CEDAR Directory ID for registering ") @@ -231,7 +298,8 @@ def get_related_studies(serial_num, guid, hostname): for cedar_record in metadata_return["metadata"]["records"]: # get the CEDAR instance id from cedar for querying in our MDS cedar_instance_id = pydash.get( - cedar_record, "metadata_location.cedar_study_level_metadata_template_instance_ID" + cedar_record, + "metadata_location.cedar_study_level_metadata_template_instance_ID", ) if cedar_instance_id is None: print("This record doesn't have CEDAR instance id, skipping...") @@ -246,7 +314,9 @@ def get_related_studies(serial_num, guid, hostname): # the query result key is the record of the metadata. If it doesn't return anything then our query failed. if len(list(mds_res.keys())) == 0 or len(list(mds_res.keys())) > 1: - print(f"Query returned nothing for template_instance_ID={cedar_instance_id}&data=true") + print( + f"Query returned nothing for template_instance_ID={cedar_instance_id}&data=true" + ) continue # get the key for our mds record @@ -273,8 +343,10 @@ def get_related_studies(serial_num, guid, hostname): ).get("other_study_websites", []) # this ensures the nih_application_id, cedar_study_level_metadata_template_instance_ID and study_name are not alterable from CEDAR side del cedar_record["metadata_location"] - cedar_record["minimal_info"]["study_name"] = mds_res["gen3_discovery"]["study_metadata"].get("minimal_info", {}).get( - "study_name", "" + cedar_record["minimal_info"]["study_name"] = ( + mds_res["gen3_discovery"]["study_metadata"] + .get("minimal_info", {}) + .get("study_name", "") ) mds_res["gen3_discovery"]["study_metadata"].update(cedar_record) @@ -342,7 +414,9 @@ def get_related_studies(serial_num, guid, hostname): related_study_result = get_related_studies( serial_num, mds_record_guid, hostname ) - mds_res["gen3_discovery"]["related_studies"] = copy.deepcopy(related_study_result) + mds_res["gen3_discovery"]["related_studies"] = copy.deepcopy( + related_study_result + ) # merge data from cedar that is not study level metadata into a level higher deleted_keys = [] @@ -357,6 +431,27 @@ def get_related_studies(serial_num, guid, hostname): mds_res["gen3_discovery"] ) + clinical_trials_id = None + try: + clinical_trials_id = ( + mds_res["gen3_discovery"]["study_metadata"] + .get("metadata_location", {}) + .get("clinical_trials_study_ID", "") + ) + except Exception: + print("Unable to get clinical_trials_study_ID for study") + if clinical_trials_id: + try: + ct_gov_metadata = get_clinical_trials_gov_metadata(clinical_trials_id) + if ct_gov_metadata: + print(f"Got clinicaltrials.gov metadata for {mds_record_guid} with NCT ID {clinical_trials_id}") + mds_cedar_register_data_body["clinicaltrials_gov"] = copy.deepcopy(ct_gov_metadata) + except Exception as ex: + print(f'{ex}') + # This means the old clinicaltrials_gov section is actually from CEDAR not clinicaltrials.gov, so remove it + elif "clinicaltrials_gov" in mds_cedar_register_data_body: + del mds_cedar_register_data_body["clinicaltrials_gov"] + mds_cedar_register_data_body["gen3_discovery"] = mds_discovery_data_body mds_cedar_register_data_body["_guid_type"] = "discovery_metadata" From fcc4535b666b2d514cbf238b07cfc7446bad0b6d Mon Sep 17 00:00:00 2001 From: EliseCastle23 <109446148+EliseCastle23@users.noreply.github.com> Date: Tue, 25 Jun 2024 10:34:42 -0600 Subject: [PATCH 02/48] Argo to use IRSA for Artifact Upload (#2324) * changing argo to use a service account instead of s3 creds as it is more secure * changing the location of when to annotate the service accounts. * moving annotation commands again for testing * adding a sleep command to see if the sa gets annotated * forgot to add namespace to the command * removing argo-s3-creds from values.yaml * reverting my changes * testing out commenting the "artifactRespository" section entirely. * commenting out only the access and secret access keys * testing the "iamEnbpointResolution" value. * testing the "useStaticCredentials" key in the values.yaml * cleaning up files and adding the "gen3 awsrole create" command to allows SA's to assume role. Also, fixing the setup argo db to use environment variables instead of the argo s3 secret as it no longer exists. * getting rid of create-role command as the "awsrole create" will automatically do that. * changing the setup script to allow workflows to be run from any namespace! * testing creating a service account in each namespace so workflows can be run from anywhere. * Changes that allow for the following: Create ONE sa in the argo namespace. Create ONE role in the argo namespace. Create multiple inline bucket policies for every namespace so any namespace can access separate buckets. * Update values.yaml * changing the kube-setup script to use the "default" sa in the role Trust policy and removing the argo sa as there is no need for it anymore. * fixing typo * revertting kube-setup-argo changes as PR 2360 was opened separately * Update values.yaml --------- Co-authored-by: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> --- kube/services/argo/values.yaml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/kube/services/argo/values.yaml b/kube/services/argo/values.yaml index 23dda4a5a..011253e1e 100644 --- a/kube/services/argo/values.yaml +++ b/kube/services/argo/values.yaml @@ -92,18 +92,13 @@ server: # -- Influences the creation of the ConfigMap for the workflow-controller itself. useDefaultArtifactRepo: true +# -- Use static credentials for S3 (eg. when not using AWS IRSA) +useStaticCredentials: false artifactRepository: # -- Archive the main container logs as an artifact archiveLogs: true # -- Store artifact in a S3-compliant object store s3: - # Note the `key` attribute is not the actual secret, it's the PATH to - # the contents in the associated secret, as defined by the `name` attribute. - accessKeySecret: - name: argo-s3-creds - key: AccessKeyId - secretKeySecret: - name: argo-s3-creds - key: SecretAccessKey bucket: GEN3_ARGO_BUCKET endpoint: s3.amazonaws.com + useSDKCreds: true From 105ba4846bb7445cc02a71eaa55564dc740d215e Mon Sep 17 00:00:00 2001 From: Michael Lukowski Date: Tue, 25 Jun 2024 15:23:08 -0500 Subject: [PATCH 03/48] adding qdr production to squid whitelist (#2578) --- files/squid_whitelist/web_whitelist | 1 + 1 file changed, 1 insertion(+) diff --git a/files/squid_whitelist/web_whitelist b/files/squid_whitelist/web_whitelist index 1f7de95ec..e32c7f483 100644 --- a/files/squid_whitelist/web_whitelist +++ b/files/squid_whitelist/web_whitelist @@ -169,3 +169,4 @@ vpodc.org yahoo.com idp.stage.qdr.org stage.qdr.org +data.qdr.syr.edu From 0c84a51fdff7e905f986381a9242cde3a7792fa1 Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Mon, 1 Jul 2024 10:21:32 -0400 Subject: [PATCH 04/48] Update values.yaml (#2579) --- kube/services/argo/values.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kube/services/argo/values.yaml b/kube/services/argo/values.yaml index 011253e1e..c8178dd2a 100644 --- a/kube/services/argo/values.yaml +++ b/kube/services/argo/values.yaml @@ -1,6 +1,5 @@ controller: - parallelism: 10 - namespaceParallelism: 5 + parallelism: 7 metricsConfig: # -- Enables prometheus metrics server enabled: true From f050dc8dc9bff7d1a681bb1072d46d5d2c03b60e Mon Sep 17 00:00:00 2001 From: EliseCastle23 <109446148+EliseCastle23@users.noreply.github.com> Date: Tue, 2 Jul 2024 12:22:49 -0600 Subject: [PATCH 05/48] adding changes to give argo UI access to buckets so artifacts can be viewed in the UI (#2581) --- gen3/bin/awsrole.sh | 4 ++-- gen3/bin/kube-setup-argo.sh | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/gen3/bin/awsrole.sh b/gen3/bin/awsrole.sh index dd19ea7a4..b9e9f9514 100644 --- a/gen3/bin/awsrole.sh +++ b/gen3/bin/awsrole.sh @@ -14,7 +14,6 @@ gen3_load "gen3/gen3setup" gen3_awsrole_help() { gen3 help awsrole } - # # Assume-role policy - allows SA's to assume role. # NOTE: service-account to role is 1 to 1 @@ -71,7 +70,8 @@ function gen3_awsrole_ar_policy() { "${issuer_url}:aud": "sts.amazonaws.com", "${issuer_url}:sub": [ "system:serviceaccount:*:${serviceAccount}", - "system:serviceaccount:argo:default" + "system:serviceaccount:argo:default", + "system:serviceaccount:argo:argo-argo-workflows-server" ] } } diff --git a/gen3/bin/kube-setup-argo.sh b/gen3/bin/kube-setup-argo.sh index 88af5f328..677f62257 100644 --- a/gen3/bin/kube-setup-argo.sh +++ b/gen3/bin/kube-setup-argo.sh @@ -188,11 +188,13 @@ EOF roleArn=$(aws iam get-role --role-name "${roleName}" --query 'Role.Arn' --output text) gen3_log_info "Role annotate" g3kubectl annotate serviceaccount default eks.amazonaws.com/role-arn=${roleArn} --overwrite -n $argo_namespace + g3kubectl annotate serviceaccount argo-argo-workflows-server eks.amazonaws.com/role-arn=${roleArn} --overwrite -n $argo_namespace g3kubectl annotate serviceaccount argo eks.amazonaws.com/role-arn=${roleArn} --overwrite -n $nameSpace else gen3 awsrole create $roleName argo $nameSpace -all_namespaces roleArn=$(aws iam get-role --role-name "${roleName}" --query 'Role.Arn' --output text) g3kubectl annotate serviceaccount default eks.amazonaws.com/role-arn=${roleArn} -n $argo_namespace + g3kubectl annotate serviceaccount argo-argo-workflows-server eks.amazonaws.com/role-arn=${roleArn} -n $argo_namespace fi # Grant access within the current namespace to the argo SA in the current namespace From 83d38c2cb6fce532f001a93ef24387ce32b33862 Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Wed, 3 Jul 2024 14:35:58 -0400 Subject: [PATCH 06/48] Feat/alert for argo workflows (#2582) * Adding a monitor for long-pending pods in the argo namespace * Better folder structure * Changing the path name * Fix this * Removing variable * Syntax * Printing debug info * Removing debug statements --- .../argo-pod-pending-monitor/application.yaml | 22 ++++++++++ .../argo-pod-pending.yaml | 42 +++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 kube/services/argo-pod-pending-monitor/application.yaml create mode 100644 kube/services/argo-pod-pending-monitor/argo-pod-pending.yaml diff --git a/kube/services/argo-pod-pending-monitor/application.yaml b/kube/services/argo-pod-pending-monitor/application.yaml new file mode 100644 index 000000000..9bfc1a7e6 --- /dev/null +++ b/kube/services/argo-pod-pending-monitor/application.yaml @@ -0,0 +1,22 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: argo-pod-pending-monitor-application + namespace: argocd +spec: + destination: + namespace: default + server: https://kubernetes.default.svc + project: default + source: + repoURL: https://github.com/uc-cdis/cloud-automation.git + targetRevision: master + path: kube/services/argo-pod-pending-monitor + directory: + exclude: "application.yaml" + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/kube/services/argo-pod-pending-monitor/argo-pod-pending.yaml b/kube/services/argo-pod-pending-monitor/argo-pod-pending.yaml new file mode 100644 index 000000000..9486d06c2 --- /dev/null +++ b/kube/services/argo-pod-pending-monitor/argo-pod-pending.yaml @@ -0,0 +1,42 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: argo-pod-pending-monitor + namespace: default +spec: + schedule: "*/5 * * * *" + jobTemplate: + spec: + template: + metadata: + labels: + app: gen3job + spec: + serviceAccountName: node-monitor + containers: + - name: kubectl + image: quay.io/cdis/awshelper + env: + # This is the label we want to monitor, probably will never need to change + - name: NODE_LABEL + value: purpose=workflow + # This is in minutes + - name: SLACK_WEBHOOK_URL + valueFrom: + configMapKeyRef: + name: global + key: slack_webhook + + command: ["/bin/bash"] + args: + - "-c" + - | + #!/bin/bash + pending_pods=$(kubectl get pods -n argo -o json | jq -r '.items[] | select(.status.phase == "Pending") | {name: .metadata.name, creationTimestamp: .metadata.creationTimestamp} | select(((now - (.creationTimestamp | fromdateiso8601)) / 60) > 15) | .name') + if [[ ! -z $pending_pods ]]; then + echo "Pods $pending_pods has been around too long, sending an alert" + curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"WARNING: Pods \`${pending_pods}\` are older than 15 minutes!\"}" $SLACK_WEBHOOK_URL + else + echo "All good here!" + fi + restartPolicy: OnFailure From d387d57c213b2d34eb4829f44965f69e551857c4 Mon Sep 17 00:00:00 2001 From: Hara Prasad Date: Mon, 8 Jul 2024 07:39:24 -0700 Subject: [PATCH 07/48] Chore/update jenkins version (#2585) * Update jenkins controller version * Use LTS version * remove selenium container from jenkins controller --- .secrets.baseline | 10 +++++----- Docker/jenkins/Jenkins/Dockerfile | 2 +- Docker/jenkins/Jenkins2/Dockerfile | 2 +- kube/services/jenkins/jenkins-deploy.yaml | 13 ------------- kube/services/jenkins2/jenkins2-deploy.yaml | 13 ------------- 5 files changed, 7 insertions(+), 33 deletions(-) diff --git a/.secrets.baseline b/.secrets.baseline index 0c4eba0a8..4a300c57c 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -1506,14 +1506,14 @@ "filename": "kube/services/jenkins/jenkins-deploy.yaml", "hashed_secret": "c937b6fbb346a51ef679dd02ac5c4863e02bfdbf", "is_verified": false, - "line_number": 157 + "line_number": 144 }, { "type": "Secret Keyword", "filename": "kube/services/jenkins/jenkins-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 160 + "line_number": 147 } ], "kube/services/jenkins2-ci-worker/jenkins2-ci-worker-deploy.yaml": [ @@ -1554,14 +1554,14 @@ "filename": "kube/services/jenkins2/jenkins2-deploy.yaml", "hashed_secret": "c937b6fbb346a51ef679dd02ac5c4863e02bfdbf", "is_verified": false, - "line_number": 153 + "line_number": 140 }, { "type": "Secret Keyword", "filename": "kube/services/jenkins2/jenkins2-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 156 + "line_number": 143 } ], "kube/services/jobs/arborist-rm-expired-access-cronjob.yaml": [ @@ -3737,5 +3737,5 @@ } ] }, - "generated_at": "2024-03-07T21:26:14Z" + "generated_at": "2024-07-05T21:37:59Z" } diff --git a/Docker/jenkins/Jenkins/Dockerfile b/Docker/jenkins/Jenkins/Dockerfile index 04ebe5864..535fdebc1 100644 --- a/Docker/jenkins/Jenkins/Dockerfile +++ b/Docker/jenkins/Jenkins/Dockerfile @@ -1,4 +1,4 @@ -FROM jenkins/jenkins:2.426.3-lts-jdk21 +FROM jenkins/jenkins:2.452.2-lts-jdk21 USER root diff --git a/Docker/jenkins/Jenkins2/Dockerfile b/Docker/jenkins/Jenkins2/Dockerfile index e6b73bc76..cd470268b 100644 --- a/Docker/jenkins/Jenkins2/Dockerfile +++ b/Docker/jenkins/Jenkins2/Dockerfile @@ -1,4 +1,4 @@ -FROM jenkins/jenkins:2.426.3-lts-jdk21 +FROM jenkins/jenkins:2.452.2-lts-jdk21 USER root diff --git a/kube/services/jenkins/jenkins-deploy.yaml b/kube/services/jenkins/jenkins-deploy.yaml index 954e996f2..596c726a0 100644 --- a/kube/services/jenkins/jenkins-deploy.yaml +++ b/kube/services/jenkins/jenkins-deploy.yaml @@ -135,19 +135,6 @@ spec: subPath: "ca.pem" - name: dockersock mountPath: "/var/run/docker.sock" - - name: selenium - image: selenium/standalone-chrome:3.14 - ports: - - containerPort: 4444 - readinessProbe: - httpGet: - path: /wd/hub/sessions - port: 4444 - readinessProbe: - httpGet: - path: /wd/hub/sessions - port: 4444 - imagePullPolicy: Always volumes: - name: datadir persistentVolumeClaim: diff --git a/kube/services/jenkins2/jenkins2-deploy.yaml b/kube/services/jenkins2/jenkins2-deploy.yaml index 08365f811..c54464b00 100644 --- a/kube/services/jenkins2/jenkins2-deploy.yaml +++ b/kube/services/jenkins2/jenkins2-deploy.yaml @@ -131,19 +131,6 @@ spec: subPath: "ca.pem" - name: dockersock mountPath: "/var/run/docker.sock" - - name: selenium - image: selenium/standalone-chrome:3.14 - ports: - - containerPort: 4444 - readinessProbe: - httpGet: - path: /wd/hub/sessions - port: 4444 - readinessProbe: - httpGet: - path: /wd/hub/sessions - port: 4444 - imagePullPolicy: Always volumes: - name: datadir persistentVolumeClaim: From e2a375b4c2dd62a4ee5fe953cdc9b73c93e1d093 Mon Sep 17 00:00:00 2001 From: Ajo Augustine Date: Wed, 10 Jul 2024 15:14:17 -0500 Subject: [PATCH 08/48] update squid whitelist (#2588) --- files/squid_whitelist/web_whitelist | 1 + 1 file changed, 1 insertion(+) diff --git a/files/squid_whitelist/web_whitelist b/files/squid_whitelist/web_whitelist index e32c7f483..b0759ba32 100644 --- a/files/squid_whitelist/web_whitelist +++ b/files/squid_whitelist/web_whitelist @@ -14,6 +14,7 @@ clinicaltrials.gov charts.bitnami.com ctds-planx.atlassian.net data.cityofchicago.org +data.stage.qdr.org dataguids.org api.login.yahoo.com apt.kubernetes.io From d04fad6c5d8d575521d25ad6b4ece9892053359d Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Thu, 11 Jul 2024 12:01:12 -0400 Subject: [PATCH 09/48] Raising workflow provisioner limits (#2590) --- kube/services/argo-events/workflows/configmap.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/argo-events/workflows/configmap.yaml b/kube/services/argo-events/workflows/configmap.yaml index c754c3694..4ebb90f19 100644 --- a/kube/services/argo-events/workflows/configmap.yaml +++ b/kube/services/argo-events/workflows/configmap.yaml @@ -84,7 +84,7 @@ data: purpose: workflow limits: resources: - cpu: 2000 + cpu: 4000 providerRef: name: workflow-WORKFLOW_NAME # Kill nodes after 30 days to ensure they stay up to date From 350c83d28cea676baf428219aee1439bdb1116f1 Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Mon, 15 Jul 2024 11:00:26 -0400 Subject: [PATCH 10/48] Chore/fixing argo workflow age monitor (#2591) * Updating the argo workflow monitor to only alert on workflows that have a started time * Fixing syntax * Quotations * I'm confused * Fixing some issues * Using the alarm webhook, instead of the regular one --- .../argo-pod-pending.yaml | 2 +- .../argo-monitors/argo-node-age.yaml | 4 +-- .../node-monitors/node-not-ready.yaml | 2 +- .../argo-workflow-age.yaml | 34 +++++++++++-------- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/kube/services/argo-pod-pending-monitor/argo-pod-pending.yaml b/kube/services/argo-pod-pending-monitor/argo-pod-pending.yaml index 9486d06c2..d3d75a84e 100644 --- a/kube/services/argo-pod-pending-monitor/argo-pod-pending.yaml +++ b/kube/services/argo-pod-pending-monitor/argo-pod-pending.yaml @@ -25,7 +25,7 @@ spec: valueFrom: configMapKeyRef: name: global - key: slack_webhook + key: slack_alarm_webhook command: ["/bin/bash"] args: diff --git a/kube/services/node-monitors/argo-monitors/argo-node-age.yaml b/kube/services/node-monitors/argo-monitors/argo-node-age.yaml index 890495ee0..b389c072c 100644 --- a/kube/services/node-monitors/argo-monitors/argo-node-age.yaml +++ b/kube/services/node-monitors/argo-monitors/argo-node-age.yaml @@ -27,7 +27,7 @@ spec: valueFrom: configMapKeyRef: name: global - key: slack_webhook + key: slack_alarm_webhook command: ["/bin/bash"] args: @@ -55,4 +55,4 @@ spec: curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"WARNING: Node \`${NODE_NAME}\` is older than 3 hours!\"}" $SLACK_WEBHOOK_URL fi done - restartPolicy: OnFailure \ No newline at end of file + restartPolicy: OnFailure diff --git a/kube/services/node-monitors/node-not-ready.yaml b/kube/services/node-monitors/node-not-ready.yaml index 500832fc3..15ed616e6 100644 --- a/kube/services/node-monitors/node-not-ready.yaml +++ b/kube/services/node-monitors/node-not-ready.yaml @@ -21,7 +21,7 @@ spec: valueFrom: configMapKeyRef: name: global - key: slack_webhook + key: slack_alarm_webhook - name: ENVIRONMENT valueFrom: configMapKeyRef: diff --git a/kube/services/workflow-age-monitor/argo-workflow-age.yaml b/kube/services/workflow-age-monitor/argo-workflow-age.yaml index 0d0c29115..52910ad4a 100644 --- a/kube/services/workflow-age-monitor/argo-workflow-age.yaml +++ b/kube/services/workflow-age-monitor/argo-workflow-age.yaml @@ -24,7 +24,7 @@ spec: valueFrom: configMapKeyRef: name: global - key: slack_webhook + key: slack_alarm_webhook command: ["/bin/bash"] args: @@ -32,24 +32,30 @@ spec: - | #!/bin/bash # Get all workflows with specific label and check their age - kubectl get workflows --all-namespaces -o json | jq -c '.items[] | {name: .metadata.name, creationTimestamp: .metadata.creationTimestamp}' | while read workflow_info; do + kubectl get workflows --all-namespaces -o json | jq -c '.items[] | {name: .metadata.name, startedTimestamp: .status.startedAt}' | while read workflow_info; do WORKFLOW_NAME=$(echo $workflow_info | jq -r '.name') - CREATION_TIMESTAMP=$(echo $workflow_info | jq -r '.creationTimestamp') + STARTED_TIMESTAMP=$(echo $workflow_info | jq -r '.startedTimestamp') - # Convert creation timestamp to Unix Epoch time - CREATION_EPOCH=$(date -d "$CREATION_TIMESTAMP" +%s) + echo "Checking workflow $WORKFLOW_NAME" + echo "$STARTED_TIMESTAMP" - # Get current Unix Epoch time - CURRENT_EPOCH=$(date +%s) + if [ "$STARTED_TIMESTAMP" != "null" ]; then + echo "Workflow $WORKFLOW_NAME started at $STARTED_TIMESTAMP" + # Convert creation timestamp to Unix Epoch time + CREATION_EPOCH=$(date -d "$STARTED_TIMESTAMP" +%s) - # Calculate workflow age in seconds - WORKFLOW_AGE=$(($CURRENT_EPOCH - $CREATION_EPOCH)) + # Get current Unix Epoch time + CURRENT_EPOCH=$(date +%s) - # Check if workflow age is greater than threshold - if [ "$WORKFLOW_AGE" -gt "$THRESHOLD_TIME" ]; then - echo "Workflow $WORKFLOW_NAME has been running for over $THRESHOLD_TIME seconds, sending an alert" - # Send alert to Slack - curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"WARNING: Workflow \`${WORKFLOW_NAME}\` has been running longer than $THRESHOLD_TIME seconds\"}" $SLACK_WEBHOOK_URL + # Calculate workflow age in seconds + WORKFLOW_AGE=$(($CURRENT_EPOCH - $CREATION_EPOCH)) + + # Check if workflow age is greater than threshold + if [ "$WORKFLOW_AGE" -gt "$THRESHOLD_TIME" ]; then + echo "Workflow $WORKFLOW_NAME has been running for over $THRESHOLD_TIME seconds, sending an alert" + # Send alert to Slack + curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"WARNING: Workflow \`${WORKFLOW_NAME}\` has been running longer than $THRESHOLD_TIME seconds\"}" $SLACK_WEBHOOK_URL + fi fi done restartPolicy: OnFailure From 2f7e8aab2b7d92ffea0f8d82b4be10959cf15ae1 Mon Sep 17 00:00:00 2001 From: Pauline Ribeyre <4224001+paulineribeyre@users.noreply.github.com> Date: Tue, 16 Jul 2024 11:33:23 -0500 Subject: [PATCH 11/48] Add hatchery access (#2592) --- files/scripts/ecr-access-job.md | 2 +- gen3/bin/kube-setup-hatchery.sh | 22 +++++++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/files/scripts/ecr-access-job.md b/files/scripts/ecr-access-job.md index 9659b186b..5f8dff767 100644 --- a/files/scripts/ecr-access-job.md +++ b/files/scripts/ecr-access-job.md @@ -59,7 +59,7 @@ Trust policy (allows Acct2): } ``` -- Policy in the account (Acct2) that contains the DynamoDB table (created automatically by `kube-setup-ecr-access-job.sh`): +- Policy in the account (Acct2) that contains the DynamoDB table (created automatically by `kube-setup-ecr-access-cronjob.sh`): ``` { "Version": "2012-10-17", diff --git a/gen3/bin/kube-setup-hatchery.sh b/gen3/bin/kube-setup-hatchery.sh index dadbbd930..97365677d 100644 --- a/gen3/bin/kube-setup-hatchery.sh +++ b/gen3/bin/kube-setup-hatchery.sh @@ -175,6 +175,8 @@ $assumeImageBuilderRolePolicyBlock "Action": [ "batch:DescribeComputeEnvironments", "batch:CreateComputeEnvironment", + "batch:UpdateComputeEnvironment", + "batch:ListJobs", "batch:CreateJobQueue", "batch:TagResource", "iam:ListPolicies", @@ -197,10 +199,28 @@ $assumeImageBuilderRolePolicyBlock "iam:CreateInstanceProfile", "iam:AddRoleToInstanceProfile", "iam:PassRole", - "s3:CreateBucket" + "kms:CreateKey", + "kms:CreateAlias", + "kms:DescribeKey", + "kms:TagResource", + "s3:CreateBucket", + "s3:PutEncryptionConfiguration", + "s3:PutBucketPolicy", + "s3:PutLifecycleConfiguration" ], "Resource": "*" }, + { + "Sid": "CreateSlrForNextflowBatchWorkspaces", + "Effect": "Allow", + "Action": "iam:CreateServiceLinkedRole", + "Resource": "arn:aws:iam::*:role/aws-service-role/batch.amazonaws.com/*", + "Condition": { + "StringLike": { + "iam:AWSServiceName": "batch.amazonaws.com" + } + } + }, { "Sid": "PassRoleForNextflowBatchWorkspaces", "Effect": "Allow", From e7fb972628f3b3a977ec6b3576743a6de4fe8976 Mon Sep 17 00:00:00 2001 From: Ajo Augustine Date: Wed, 17 Jul 2024 08:54:52 -0500 Subject: [PATCH 12/48] copy gen3 dataabses in Aurora (#2356) * copy gen3 dataabses in Aurora * Update psql-db-copy-aurora-job.yaml * Update psql-db-copy-aurora-job.yaml * add service account * Add documentation for dbbackup.sh * Add psql-db-aurora-migration-job, updating dbbackup.sh * Add psql-db-aurora-migration-job, updating dbbackup.sh * Update dbbackup.sh * Update psql-db-copy-aurora-job.yaml * Update dbbackup.md --- doc/dbbackup.md | 52 +++++ gen3/bin/dbbackup.sh | 102 ++++++-- .../jobs/psql-db-aurora-migration-job.yaml | 219 ++++++++++++++++++ .../jobs/psql-db-copy-aurora-job.yaml | 193 +++++++++++++++ .../services/jobs/psql-db-copy-aurora-sa.yaml | 30 +++ 5 files changed, 575 insertions(+), 21 deletions(-) create mode 100644 doc/dbbackup.md create mode 100644 kube/services/jobs/psql-db-aurora-migration-job.yaml create mode 100644 kube/services/jobs/psql-db-copy-aurora-job.yaml create mode 100644 kube/services/jobs/psql-db-copy-aurora-sa.yaml diff --git a/doc/dbbackup.md b/doc/dbbackup.md new file mode 100644 index 000000000..9e21f2bde --- /dev/null +++ b/doc/dbbackup.md @@ -0,0 +1,52 @@ +# TL;DR + +This script facilitates the management of database backup and restore within the Gen3 environment. It can establish policies, service accounts, roles, and S3 buckets. Depending on the command provided, it can initiate a database dump, perform a restore, migrate databases to a new RDS instance on Aurora, or clone databases to an RDS Aurora instance. + +## Usage + +```sh +gen3 dbbackup [dump|restore|va-dump|create-sa|migrate-to-aurora|copy-to-aurora] +``` + +### Commands + +#### dump + +Initiates a database dump and pushes it to an S3 bucket, creating the essential AWS resources if they are absent. The dump operation is intended to be executed from the namespace/commons that requires the backup. + +```sh +gen3 dbbackup dump +``` + +#### restore + +Initiates a database restore from an S3 bucket, creating the essential AWS resources if they are absent. The restore operation is meant to be executed in the target namespace where the backup needs to be restored. + +```sh +gen3 dbbackup restore +``` + +#### create-sa + +Creates the necessary service account and roles for DB copy. + +```sh +gen3 dbbackup create-sa +``` + +#### migrate-to-aurora + +Triggers a service account creation and a job to migrate a Gen3 commons to an AWS RDS Aurora instance. + +```sh +gen3 dbbackup migrate-to-aurora +``` + +#### copy-to-aurora + +Triggers a service account creation and a job to copy the databases Indexd, Sheepdog & Metadata to new databases within an RDS Aurora cluster from another namespace in same RDS cluster. + +```sh +gen3 dbbackup copy-to-aurora +``` + diff --git a/gen3/bin/dbbackup.sh b/gen3/bin/dbbackup.sh index eb9611a90..eeb569519 100644 --- a/gen3/bin/dbbackup.sh +++ b/gen3/bin/dbbackup.sh @@ -1,26 +1,28 @@ #!/bin/bash #################################################################################################### -# Script: dbdump.sh +# Script: dbbackup.sh # # Description: # This script facilitates the management of database backups within the gen3 environment. It is -# equipped to establish policies, service accounts, roles, and S3 buckets. Depending on the -# command provided, it will either initiate a database dump or perform a restore. +# equipped to establish policies, service accounts, roles, and S3 buckets. Depending on the +# command provided, it will either initiate a database dump, perform a restore, migrate to Aurora, +# or copy to Aurora. # # Usage: -# gen3 dbbackup [dump|restore] +# gen3 dbbackup [dump|restore|va-dump|create-sa|migrate-to-aurora|copy-to-aurora ] # -# dump - Initiates a database dump, creating the essential AWS resources if they are absent. -# The dump operation is intended to be executed from the namespace/commons that requires -# the backup. -# restore - Initiates a database restore, creating the essential AWS resources if they are absent. -# The restore operation is meant to be executed in the target namespace, where the backup -# needs to be restored. +# dump - Initiates a database dump, creating the essential AWS resources if they are absent. +# The dump operation is intended to be executed from the namespace/commons that requires +# the backup. +# restore - Initiates a database restore, creating the essential AWS resources if they are absent. +# The restore operation is meant to be executed in the target namespace, where the backup +# needs to be restored. +# va-dump - Runs a va-testing DB dump. +# create-sa - Creates the necessary service account and roles for DB copy. +# migrate-to-aurora - Triggers a service account creation and a job to migrate a Gen3 commons to an AWS RDS Aurora instance. +# copy-to-aurora - Triggers a service account creation and a job to copy the databases Indexd, Sheepdog & Metadata to new databases within an RDS Aurora cluster. # -# Notes: -# This script extensively utilizes the AWS CLI and the gen3 CLI. Proper functioning demands a -# configured gen3 environment and the availability of the necessary CLI tools. # #################################################################################################### @@ -49,7 +51,6 @@ gen3_log_info "namespace: $namespace" gen3_log_info "sa_name: $sa_name" gen3_log_info "bucket_name: $bucket_name" - # Create an S3 access policy if it doesn't exist create_policy() { # Check if policy exists @@ -87,7 +88,6 @@ EOM fi } - # Create or update the Service Account and its corresponding IAM Role create_service_account_and_role() { cluster_arn=$(kubectl config current-context) @@ -101,7 +101,6 @@ create_service_account_and_role() { gen3_log_info "oidc_url: $oidc_url" gen3_log_info "role_name: $role_name" - cat > ${trust_policy} <" + exit 1 + fi + gen3_log_info "Copying databases within Aurora..." + copy_to_aurora "$2" + ;; *) - echo "Invalid command. Usage: gen3 dbbackup [dump|restore|va-dump]" + echo "Invalid command. Usage: gen3 dbbackup [dump|restore|va-dump|create-sa|migrate-to-aurora|copy-to-aurora ]" return 1 ;; esac } -main "$1" +main "$@" diff --git a/kube/services/jobs/psql-db-aurora-migration-job.yaml b/kube/services/jobs/psql-db-aurora-migration-job.yaml new file mode 100644 index 000000000..dc6f40c11 --- /dev/null +++ b/kube/services/jobs/psql-db-aurora-migration-job.yaml @@ -0,0 +1,219 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: psql-db-aurora-migration +spec: + template: + metadata: + labels: + app: gen3job + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - weight: 99 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - ONDEMAND + serviceAccountName: psql-db-copy-sa + containers: + - name: pgdump + image: quay.io/cdis/awshelper:master + imagePullPolicy: Always + env: + - name: gen3Env + valueFrom: + configMapKeyRef: + name: global + key: environment + - name: JENKINS_HOME + value: "devterm" + - name: GEN3_HOME + value: /home/ubuntu/cloud-automation + command: [ "/bin/bash" ] + args: + - "-c" + - | + # This job migrates (takes backup and restores) the databases in a Gen3 instance to an Aurora RDS cluster. + # Requirements: + # 1. Aurora server credentials should be present in the Gen3Secrets/creds.json with name 'aurora'. + # 2. Ensure that `gen3 psql aurora` and `gen3 secrets decode aurora-creds` work as expected. + # 3. The job needs the "psql-db-copy-sa" service account with the necessary permissions to read secrets from all relevant namespaces. + + source "${GEN3_HOME}/gen3/lib/utils.sh" + gen3_load "gen3/gen3setup" + namespace=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace) + default_databases=($(echo -e "$(gen3 db services)" | sort -r)) + date_str=$(date -u +%y%m%d_%H%M%S) + databases=("${default_databases[@]}") + gen3_log_info "databases: ${databases[@]}" + + # Initialize sheepdog_db_name and failed_migrations variables + sheepdog_db_name="" + failed_migrations="" + + # find Aurora Server credentials + aurora_host_name=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_host') + aurora_master_username=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_username') + aurora_master_password=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_password') + aurora_master_database=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_database') + + gen3_log_info "Aurora Creds: \n aurora_host_name: $aurora_host_name \n aurora_master_username: $aurora_master_username \n aurora_master_database: $aurora_master_database" + + # Verify important variables are present + if [ -z "$aurora_host_name" ] || [ -z "$aurora_master_username" ] || [ -z "$aurora_master_password" ] || [ -z "$aurora_master_database" ]; then + gen3_log_err "Aurora credentials are missing. Exiting." + exit 1 + fi + + new_resources="" + + # Function to truncate to 63 characters + function truncate_identifier() { + local identifier=$1 + if [ ${#identifier} -gt 63 ]; then + echo "${identifier:0:63}" + else + echo "$identifier" + fi + } + + # Function to create a database with retry logic + function create_database_with_retry() { + local db_name=$1 + local retries=5 + local wait_time=10 + for i in $(seq 1 $retries); do + PGPASSWORD=${db_password} psql -h $aurora_host_name -U "$db_user" -d postgres -c "CREATE DATABASE $db_name" + if [ $? -eq 0 ]; then + return 0 + fi + gen3_log_err "Failed to create database $db_name. Retrying in $wait_time seconds..." + sleep $wait_time + done + return 1 + } + + # Looping through each service to: + # - Extract the database credentials. + # - Check if the user already exists, if not, create the user. + # - Grant required privileges. + # - Create the database (except for peregrine). + # - Backup and restore the database on the Aurora Cluster. + for database in "${databases[@]}"; do + for secret_name in "${database}-creds creds.json" "$database-g3auto dbcreds.json"; do + creds=$(gen3 secrets decode $secret_name 2>/dev/null) + if [ $? -eq 0 ] && [ ! -z "$creds" ]; then + db_hostname=$(echo $creds | jq -r .db_host) + db_username=$(echo $creds | jq -r .db_username) + db_password=$(echo $creds | jq -r .db_password) + db_database=$(echo $creds | jq -r .db_database) + gen3_log_info "Extracting service credentials for $database from $secret_name: \n db_hostname: $db_hostname \n db_username: $db_username \n db_database: $db_database \n" + break + fi + done + + if [ -z "$db_hostname" ] || [ -z "$db_username" ] || [ -z "$db_password" ] || [ -z "$db_database" ]; then + gen3_log_err "Failed to extract database credentials for $database" + failed_migrations="${failed_migrations}\nDatabase: $database, Error: Failed to extract credentials" + continue + fi + + # Check source database accessibility + PGPASSWORD=${db_password} pg_isready -h $db_hostname -U "$db_username" -d "$db_database" + if [ $? -ne 0 ]; then + gen3_log_err "Cannot connect to source database $db_database at $db_hostname. Skipping database $database." + failed_migrations="${failed_migrations}\nDatabase: $database, Error: Cannot connect to source database at $db_hostname" + continue + fi + + # Define db_user and db_name variables with replaced hyphens + db_user="$(echo $database | tr '-' '_')_user_$(echo $namespace | tr '-' '_')" + db_name="$(echo $database | tr '-' '_')_$(echo $namespace | tr '-' '_')_${date_str}" + + # Truncate identifiers if necessary + db_user=$(truncate_identifier $db_user) + db_name=$(truncate_identifier $db_name) + + # Try to connect to the Aurora database with the extracted credentials. + # If the connection is successful, it means the user already exists. + # If not, create the user. + + PGPASSWORD=${db_password} psql -h $aurora_host_name -U "$db_user" -d postgres -c "\q" + if [ $? -eq 0 ]; then + gen3_log_info "User $db_user, password already exists" + else + gen3 psql aurora -c "CREATE USER \"$db_user\" WITH PASSWORD '$db_password' CREATEDB" + if [ $? -ne 0 ]; then + gen3_log_err "Failed to create user for $database" + failed_migrations="${failed_migrations}\nDatabase: $database, Error: Failed to create user" + continue + else + gen3_log_info "Database user $db_user created successfully" + fi + fi + + if [ "$database" != "peregrine" ]; then + # Create the database with a unique name by appending namespace and date. + create_database_with_retry $db_name + if [ $? -ne 0 ]; then + gen3_log_err "Failed to create database for $database" + failed_migrations="${failed_migrations}\nDatabase: $database, Error: Failed to create database" + continue + else + gen3_log_info "Database $db_name created successfully" + if [ "$database" == "sheepdog" ]; then + sheepdog_db_name=$db_name + fi + fi + + # Backup the current database and restore it to the newly created database. + if gen3 db backup $database | PGPASSWORD=${db_password} psql -h $aurora_host_name -U "$db_user" -d "$db_name"; then + gen3_log_info "Database $database restored successfully to $db_name" + new_resources="${new_resources}\nSource_Database: $db_database Source_Host: $db_hostname Source_User: $db_username Restored_Database: $db_name User: $db_user" + else + gen3_log_err "Failed to backup and restore database for $database" + failed_migrations="${failed_migrations}\nDatabase: $database, Error: Failed to backup and restore database" + fi + fi + + if [ "$database" == "peregrine" ]; then + if [ -n "$sheepdog_db_name" ]; then + gen3 psql aurora -d "$sheepdog_db_name" -c "GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO \"$db_user\"" + if [ $? -ne 0 ]; then + gen3_log_err "Failed to grant access to sheepdog tables for peregrine user" + failed_migrations="${failed_migrations}\nDatabase: $database, Error: Failed to grant access to sheepdog tables for peregrine user" + continue + else + gen3_log_info "Access to sheepdog tables granted successfully for peregrine user" + new_resources="${new_resources}\nUser: $db_user with access to sheepdog database $sheepdog_db_name" + fi + else + gen3_log_err "Sheepdog database not found for granting permissions to peregrine user" + failed_migrations="${failed_migrations}\nDatabase: $database, Error: Sheepdog database not found for granting permissions" + fi + fi + done + + # Logging the newly created resources + gen3_log_info "New resources created on $aurora_host_name\n$new_resources" + + # Logging the failed migrations + if [ -n "$failed_migrations" ]; then + gen3_log_info "Failed migrations:\n$failed_migrations" + fi + + # Sleep for 600 seconds to allow the user to check the logs + sleep 600 + restartPolicy: Never diff --git a/kube/services/jobs/psql-db-copy-aurora-job.yaml b/kube/services/jobs/psql-db-copy-aurora-job.yaml new file mode 100644 index 000000000..8fd6e899a --- /dev/null +++ b/kube/services/jobs/psql-db-copy-aurora-job.yaml @@ -0,0 +1,193 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: psql-db-copy-aurora +spec: + template: + metadata: + labels: + app: gen3job + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - weight: 99 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - ONDEMAND + serviceAccountName: psql-db-copy-sa + containers: + - name: pgdump + image: quay.io/cdis/awshelper:master + imagePullPolicy: Always + env: + - name: gen3Env + valueFrom: + configMapKeyRef: + name: global + key: environment + - name: JENKINS_HOME + value: "devterm" + - name: GEN3_HOME + value: /home/ubuntu/cloud-automation + - name: SOURCE_NAMESPACE + GEN3_SOURCE_NAMESPACE|-value: "staging"-| # Default value, should be overwritten by the environment variable + command: [ "/bin/bash" ] + args: + - "-c" + - | + # This script copies specified databases from a source namespace to the current namespace on the same Aurora RDS instance. + # + # This script requires the following to work properly: + # + # 1. Aurora server credentials must be present in the Gen3Secrets/creds.json file. + # These credentials should be present as a Kubernetes secret named "aurora-creds". + # This secret should contain the keys: db_host, db_username, db_password, and db_database. + # + # 2. The "gen3 psql aurora" command should be available to connect to the Aurora server. + # + # 3. The "gen3 secrets decode aurora-creds creds.json" command should work, allowing the script to decode the necessary secrets. + # + # 4. The source and the destination databases should be on the same Aurora instance. + # + # 5. The ServiceAccount, roles, and role binding must be set up using the script psql-db-copy-aurora-sa.yaml. + # The psql-db-copy-aurora-sa.yaml script is configured for the default namespace. + # Modify the namespace as needed before applying it where the script will run. + # These can be created by executing the command: + # kubectl apply -f ${GEN3_HOME}/kube/services/jobs/psql-db-copy-aurora-sa.yaml + # + # How to run the script: + # gen3 job run psql-db-copy-aurora -v SOURCE_NAMESPACE + # + + source "${GEN3_HOME}/gen3/lib/utils.sh" + gen3_load "gen3/gen3setup" + namespace=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace) + date_str=$(date -u +%y%m%d_%H%M%S) + # Define the default databases to be copied + databases=( "indexd" "sheepdog" "metadata") + gen3_log_info "databases to be processed: ${databases[@]}" + source_namespace=$SOURCE_NAMESPACE + gen3_log_info "Source Namespace: $source_namespace" + + # find Aurora Server credentials + aurora_host_name=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_host') + aurora_master_username=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_username') + aurora_master_password=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_password') + aurora_database=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_database') + + # Verify important variables are present + if [ -z "$aurora_host_name" ] || [ -z "$aurora_master_username" ] || [ -z "$aurora_master_password" ] || [ -z "$aurora_database" ]; then + gen3_log_err "Aurora credentials are missing. Exiting." + exit 1 + fi + + # Function to truncate to 63 characters + function truncate_identifier() { + local identifier=$1 + if [ ${#identifier} -gt 63 ]; then + echo "${identifier:0:63}" + else + echo "$identifier" + fi + } + + # Function to decode Kubernetes secrets + function secrets_decode() { + local namespace=$1 + local secret=$2 + local key=$3 + local secrets_value + + secrets_value=$(kubectl get secret -n $namespace $secret -o json 2>/dev/null | jq -r --arg key "$key" '.data[$key]' | base64 --decode --ignore-garbage 2>/dev/null) + if [ $? -ne 0 ] || [ -z "$secrets_value" ]; then + echo "Secret $secret in namespace $namespace not found or failed to decode" >&2 + return 1 + else + echo "$secrets_value" + fi + } + + # Array to hold the names of newly created databases + new_databases=() + + # Looping through each database + for database in "${databases[@]}"; do + source_creds="" + creds="" + + # Try to get the source and destination credentials with the "-g3auto" suffix and key "dbcreds.json" + source_creds=$(secrets_decode $source_namespace ${database}-g3auto dbcreds.json) + if [ $? -ne 0 ]; then + source_creds="" + fi + creds=$(secrets_decode $namespace ${database}-g3auto dbcreds.json) + if [ $? -ne 0 ]; then + creds="" + fi + + # If the "-g3auto" suffix didn't work for both source_creds and creds, try with the suffix "creds" and key "creds.json" + if [ -z "$source_creds" ] && [ -z "$creds" ]; then + source_creds=$(secrets_decode $source_namespace ${database}-creds creds.json) + if [ $? -ne 0 ]; then + source_creds="" + fi + creds=$(secrets_decode $namespace ${database}-creds creds.json) + if [ $? -ne 0 ]; then + creds="" + fi + fi + + # If we still couldn't get the credentials, log an error and continue to the next database + if [ -z "$source_creds" ] || [ -z "$creds" ]; then + gen3_log_err "Failed to extract database credentials for $database" + continue + fi + + source_db_database=$(echo $source_creds | jq -r .db_database) + db_username=$(echo $creds | jq -r .db_username) + db_database=$(echo $creds | jq -r .db_database) + + if [ -z "$source_db_database" ] || [ -z "$db_username" ] || [ -z "$db_database" ]; then + gen3_log_err "One or more required credentials are missing for $database. Skipping." + continue + fi + target_db=$(truncate_identifier $(echo "${database}_${namespace}_${date_str}" | tr '-' '_')) + gen3_log_info "Processing database: $database" + gen3_log_info "Source DB: $source_db_database, Username: $db_username, Current DB: $db_database, Target DB: $target_db" + + # DB commands + gen3 psql aurora -c "GRANT $db_username TO $aurora_master_username" + gen3 psql aurora -c "SELECT pg_terminate_backend(pg_stat_activity.pid) FROM pg_stat_activity WHERE pg_stat_activity.datname = '$source_db_database' AND pid <> pg_backend_pid()" + gen3 psql aurora -c "CREATE DATABASE $target_db WITH TEMPLATE $source_db_database OWNER $db_username" + pg_command="DO \$\$ DECLARE tbl record; BEGIN FOR tbl IN (SELECT table_schema || '.' || table_name AS full_table_name FROM information_schema.tables WHERE table_schema = 'public') LOOP EXECUTE 'ALTER TABLE ' || tbl.full_table_name || ' OWNER TO $db_username;'; END LOOP; END \$\$;" + PGPASSWORD=${aurora_master_password} psql -h $aurora_host_name -U $aurora_master_username -d "$target_db" -c "$pg_command" + if [ $? -eq 0 ]; then + gen3_log_info "Successfully processed $database" + new_databases+=("$target_db") + else + gen3_log_err "Failed to process $database" + fi + done + + gen3_log_info "Job Completed" + + # Print the list of newly created databases + gen3_log_info "Newly created Database Names::" + for new_db in "${new_databases[@]}"; do + gen3_log_info "$new_db" + done + + sleep 600 + restartPolicy: Never diff --git a/kube/services/jobs/psql-db-copy-aurora-sa.yaml b/kube/services/jobs/psql-db-copy-aurora-sa.yaml new file mode 100644 index 000000000..e6977a187 --- /dev/null +++ b/kube/services/jobs/psql-db-copy-aurora-sa.yaml @@ -0,0 +1,30 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: psql-db-copy-sa + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: psql-db-copy-role +rules: +- apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "watch", "list"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: psql-db-copy-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: psql-db-copy-role +subjects: +- kind: ServiceAccount + name: psql-db-copy-sa + namespace: default # Ensure this references the correct namespace + From d4e265183fa78277b5c77eb775cc39f300bfd762 Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Wed, 17 Jul 2024 10:54:20 -0400 Subject: [PATCH 13/48] Adding an alert to all workflows if they get timed out (#2593) * Adding an alert to all workflows if they get timed out * Let's add some logic to create the secret * Let's just delete and recreate at all times * Changing image * Removing APK command --- gen3/bin/kube-setup-argo.sh | 12 ++++++++++++ kube/services/argo/values.yaml | 14 ++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/gen3/bin/kube-setup-argo.sh b/gen3/bin/kube-setup-argo.sh index 677f62257..1a25a98c8 100644 --- a/gen3/bin/kube-setup-argo.sh +++ b/gen3/bin/kube-setup-argo.sh @@ -204,6 +204,18 @@ EOF aws iam put-role-policy --role-name ${roleName} --policy-name ${internalBucketPolicy} --policy-document file://$internalBucketPolicyFile || true fi + # Create a secret for the slack webhook + alarm_webhook=$(g3kubectl get cm global -o yaml | yq .data.slack_alarm_webhook | tr -d '"') + + if [ -z "$alarm_webhook" ]; then + gen3_log_err "Please set a slack_alarm_webhook in the 'global' configmap. This is needed to alert for failed workflows." + exit 1 + fi + + g3kubectl -n argo delete secret slack-webhook-secret + g3kubectl -n argo create secret generic "slack-webhook-secret" --from-literal=SLACK_WEBHOOK_URL=$alarm_webhook + + ## if new bucket then do the following # Get the aws keys from secret # Create and attach lifecycle policy diff --git a/kube/services/argo/values.yaml b/kube/services/argo/values.yaml index c8178dd2a..eeb2e9e01 100644 --- a/kube/services/argo/values.yaml +++ b/kube/services/argo/values.yaml @@ -61,6 +61,20 @@ controller: workflowDefaults: spec: archiveLogs: true + onExit: alert-on-timeout + templates: + - name: alert-on-timeout + script: + image: quay.io/cdis/amazonlinux-debug:master + command: [sh] + envFrom: + - secretRef: + name: slack-webhook-secret + source: | + failure_reason=$(echo {{workflow.failures}} | jq 'any(.[]; .message == "Step exceeded its deadline")' ) + if [ "$failure_reason" ]; then + curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"ALERT: Workflow {{workflow.name}} has been killed due to timeout\"}" "$SLACK_WEBHOOK_URL" + fi # -- [Node selector] nodeSelector: From 9dff5360fdbef7dbcc01f57ecec4888145c252c6 Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Fri, 19 Jul 2024 03:20:24 -0500 Subject: [PATCH 14/48] fix: bash comparison for true values in argo slack webhook notify (#2596) --- kube/services/argo/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/argo/values.yaml b/kube/services/argo/values.yaml index eeb2e9e01..c1e951773 100644 --- a/kube/services/argo/values.yaml +++ b/kube/services/argo/values.yaml @@ -72,7 +72,7 @@ controller: name: slack-webhook-secret source: | failure_reason=$(echo {{workflow.failures}} | jq 'any(.[]; .message == "Step exceeded its deadline")' ) - if [ "$failure_reason" ]; then + if [ "$failure_reason" = "true" ]; then curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"ALERT: Workflow {{workflow.name}} has been killed due to timeout\"}" "$SLACK_WEBHOOK_URL" fi From 63ecfc9f30466507110c61953c2683387b030b9b Mon Sep 17 00:00:00 2001 From: Hara Prasad Date: Fri, 19 Jul 2024 08:57:09 -0700 Subject: [PATCH 15/48] Update python to 3.9 in jenkins pods (#2597) * Update python to 3.9 in jenkins pods * fix version --- .pre-commit-config.yaml | 4 +- .secrets.baseline | 104 +++++------------- Docker/jenkins/Jenkins-CI-Worker/Dockerfile | 18 +-- .../Jenkins-CI-Worker/install-python3.8.sh | 8 -- .../Jenkins-CI-Worker/install-python3.9.sh | 8 ++ Docker/jenkins/Jenkins/Dockerfile | 18 +-- Docker/jenkins/Jenkins/install-python3.8.sh | 7 -- Docker/jenkins/Jenkins/install-python3.9.sh | 7 ++ Docker/jenkins/Jenkins2/Dockerfile | 18 +-- Docker/jenkins/Jenkins2/install-python3.8.sh | 7 -- Docker/jenkins/Jenkins2/install-python3.9.sh | 7 ++ 11 files changed, 77 insertions(+), 129 deletions(-) delete mode 100755 Docker/jenkins/Jenkins-CI-Worker/install-python3.8.sh create mode 100755 Docker/jenkins/Jenkins-CI-Worker/install-python3.9.sh delete mode 100755 Docker/jenkins/Jenkins/install-python3.8.sh create mode 100755 Docker/jenkins/Jenkins/install-python3.9.sh delete mode 100755 Docker/jenkins/Jenkins2/install-python3.8.sh create mode 100755 Docker/jenkins/Jenkins2/install-python3.9.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 82034495d..c3a384baa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,11 +1,11 @@ repos: - repo: git@github.com:Yelp/detect-secrets - rev: v1.4.0 + rev: v1.5.0 hooks: - id: detect-secrets args: ['--baseline', '.secrets.baseline'] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.5.0 + rev: v4.6.0 hooks: - id: no-commit-to-branch args: [--branch, develop, --branch, master, --pattern, release/.*] diff --git a/.secrets.baseline b/.secrets.baseline index 4a300c57c..0cc95d0da 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -1,5 +1,5 @@ { - "version": "1.4.0", + "version": "1.5.0", "plugins_used": [ { "name": "ArtifactoryDetector" @@ -26,6 +26,9 @@ { "name": "GitHubTokenDetector" }, + { + "name": "GitLabTokenDetector" + }, { "name": "HexHighEntropyString", "limit": 3.0 @@ -36,6 +39,9 @@ { "name": "IbmCosHmacDetector" }, + { + "name": "IPPublicDetector" + }, { "name": "JwtTokenDetector" }, @@ -49,9 +55,15 @@ { "name": "NpmDetector" }, + { + "name": "OpenAIDetector" + }, { "name": "PrivateKeyDetector" }, + { + "name": "PypiTokenDetector" + }, { "name": "SendGridDetector" }, @@ -67,6 +79,9 @@ { "name": "StripeDetector" }, + { + "name": "TelegramBotTokenDetector" + }, { "name": "TwilioKeyDetector" } @@ -75,10 +90,6 @@ { "path": "detect_secrets.filters.allowlist.is_line_allowlisted" }, - { - "path": "detect_secrets.filters.common.is_baseline_file", - "filename": ".secrets.baseline" - }, { "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", "min_level": 2 @@ -246,6 +257,15 @@ "line_number": 154 } ], + "files/lambda/test-security_alerts.py": [ + { + "type": "AWS Access Key", + "filename": "files/lambda/test-security_alerts.py", + "hashed_secret": "4e041fbfd5dd5918d3d5e968f5f739f815ae92da", + "is_verified": false, + "line_number": 5 + } + ], "files/scripts/psql-fips-fix.sh": [ { "type": "Secret Keyword", @@ -640,78 +660,6 @@ "line_number": 25 } ], - "gen3/test/terraformTest.sh": [ - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "6b44a330b450ee550c081410c6b705dfeaa105ce", - "is_verified": false, - "line_number": 156 - }, - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "d869db7fe62fb07c25a0403ecaea55031744b5fb", - "is_verified": false, - "line_number": 163 - }, - { - "type": "Base64 High Entropy String", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "1cc07dccfdf640eb0e403e490a873a5536759009", - "is_verified": false, - "line_number": 172 - }, - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "1cc07dccfdf640eb0e403e490a873a5536759009", - "is_verified": false, - "line_number": 172 - }, - { - "type": "Base64 High Entropy String", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "185a71a740ef6b9b21c84e6eaa47b89c7de181ef", - "is_verified": false, - "line_number": 175 - }, - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "185a71a740ef6b9b21c84e6eaa47b89c7de181ef", - "is_verified": false, - "line_number": 175 - }, - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "212e1d3823c8c9af9e4c0c172164ee292b9a6768", - "is_verified": false, - "line_number": 311 - }, - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "cb80dbb67a1a5bdf4957eea1473789f1c65357c6", - "is_verified": false, - "line_number": 312 - }, - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "5f35c25f4bf588b5fad46e249fcd9221f5257ce4", - "is_verified": false, - "line_number": 313 - }, - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "5308421b43dde5775f1993bd25a8163070d65598", - "is_verified": false, - "line_number": 314 - } - ], "kube/services/access-backend/access-backend-deploy.yaml": [ { "type": "Secret Keyword", @@ -3737,5 +3685,5 @@ } ] }, - "generated_at": "2024-07-05T21:37:59Z" + "generated_at": "2024-07-19T04:34:31Z" } diff --git a/Docker/jenkins/Jenkins-CI-Worker/Dockerfile b/Docker/jenkins/Jenkins-CI-Worker/Dockerfile index 6eeb8f4fd..9401e6a4b 100644 --- a/Docker/jenkins/Jenkins-CI-Worker/Dockerfile +++ b/Docker/jenkins/Jenkins-CI-Worker/Dockerfile @@ -83,21 +83,21 @@ RUN curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc| gpg --dearmor apt-get install -y postgresql-client-13 # Copy sh script responsible for installing Python -COPY install-python3.8.sh /root/tmp/install-python3.8.sh +COPY install-python3.9.sh /root/tmp/install-python3.9.sh -# Run the script responsible for installing Python 3.8.0 and link it to /usr/bin/python -RUN chmod +x /root/tmp/install-python3.8.sh; sync && \ - bash /root/tmp/install-python3.8.sh && \ - rm -rf /root/tmp/install-python3.8.sh && \ +# Run the script responsible for installing Python 3.9.19 and link it to /usr/bin/python +RUN chmod +x /root/tmp/install-python3.9.sh; sync && \ + bash /root/tmp/install-python3.9.sh && \ + rm -rf /root/tmp/install-python3.9.sh && \ unlink /usr/bin/python3 && \ - ln -s /usr/local/bin/python3.8 /usr/bin/python3 + ln -s /usr/local/bin/python3.9 /usr/bin/python3 # Fix shebang for lsb_release -RUN sed -i 's/python3/python3.8/' /usr/bin/lsb_release && \ - sed -i 's/python3/python3.8/' /usr/bin/add-apt-repository +RUN sed -i 's/python3/python3.9/' /usr/bin/lsb_release && \ + sed -i 's/python3/python3.9/' /usr/bin/add-apt-repository # install aws cli, poetry, pytest, etc. -RUN set -xe && python3.8 -m pip install --upgrade pip setuptools && python3.8 -m pip install awscli --upgrade && python3.8 -m pip install pytest --upgrade && python3.8 -m pip install poetry && python3.8 -m pip install PyYAML --upgrade && python3.8 -m pip install lxml --upgrade && python3.8 -m pip install yq --upgrade && python3.8 -m pip install datadog --upgrade +RUN set -xe && python3.9 -m pip install --upgrade pip setuptools && python3.9 -m pip install awscli --upgrade && python3.9 -m pip install pytest --upgrade && python3.9 -m pip install poetry && python3.9 -m pip install PyYAML --upgrade && python3.9 -m pip install lxml --upgrade && python3.9 -m pip install yq --upgrade && python3.9 -m pip install datadog --upgrade # install terraform RUN curl -o /tmp/terraform.zip https://releases.hashicorp.com/terraform/0.11.15/terraform_0.11.15_linux_amd64.zip \ diff --git a/Docker/jenkins/Jenkins-CI-Worker/install-python3.8.sh b/Docker/jenkins/Jenkins-CI-Worker/install-python3.8.sh deleted file mode 100755 index a01d59420..000000000 --- a/Docker/jenkins/Jenkins-CI-Worker/install-python3.8.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -wget https://www.python.org/ftp/python/3.8.0/Python-3.8.0.tar.xz -tar xf Python-3.8.0.tar.xz -rm Python-3.8.0.tar.xz -cd Python-3.8.0 -./configure -make -make altinstall diff --git a/Docker/jenkins/Jenkins-CI-Worker/install-python3.9.sh b/Docker/jenkins/Jenkins-CI-Worker/install-python3.9.sh new file mode 100755 index 000000000..88b7596ae --- /dev/null +++ b/Docker/jenkins/Jenkins-CI-Worker/install-python3.9.sh @@ -0,0 +1,8 @@ +#!/bin/bash +wget https://www.python.org/ftp/python/3.9.19/Python-3.9.19.tar.xz +tar xf Python-3.9.19.tar.xz +rm Python-3.9.19.tar.xz +cd Python-3.9.19 +./configure +make +make altinstall diff --git a/Docker/jenkins/Jenkins/Dockerfile b/Docker/jenkins/Jenkins/Dockerfile index 535fdebc1..49c0f82b5 100644 --- a/Docker/jenkins/Jenkins/Dockerfile +++ b/Docker/jenkins/Jenkins/Dockerfile @@ -68,21 +68,21 @@ RUN DISTRO="$(lsb_release -c -s)" \ && rm -rf /var/lib/apt/lists/* # Copy sh script responsible for installing Python -COPY install-python3.8.sh /root/tmp/install-python3.8.sh +COPY install-python3.9.sh /root/tmp/install-python3.9.sh -# Run the script responsible for installing Python 3.8.0 and link it to /usr/bin/python -RUN chmod +x /root/tmp/install-python3.8.sh; sync && \ - ./root/tmp/install-python3.8.sh && \ - rm -rf /root/tmp/install-python3.8.sh && \ +# Run the script responsible for installing Python 3.9.19 and link it to /usr/bin/python +RUN chmod +x /root/tmp/install-python3.9.sh; sync && \ + ./root/tmp/install-python3.9.sh && \ + rm -rf /root/tmp/install-python3.9.sh && \ unlink /usr/bin/python3 && \ - ln -s /Python-3.8.0/python /usr/bin/python3 + ln -s /Python-3.9.0/python /usr/bin/python3 # Fix shebang for lsb_release -RUN sed -i 's/python3/python3.8/' /usr/bin/lsb_release && \ - sed -i 's/python3/python3.8/' /usr/bin/add-apt-repository +RUN sed -i 's/python3/python3.9/' /usr/bin/lsb_release && \ + sed -i 's/python3/python3.9/' /usr/bin/add-apt-repository # install aws cli, poetry, pytest, etc. -RUN set -xe && python3 -m pip install --upgrade pip && python3 -m pip install awscli --upgrade && python3 -m pip install pytest --upgrade && python3 -m pip install poetry && python3 -m pip install PyYAML --upgrade && python3 -m pip install lxml --upgrade && python3 -m pip install yq --upgrade +RUN set -xe && python3.9 -m pip install --upgrade pip && python3.9 -m pip install awscli --upgrade && python3.9 -m pip install pytest --upgrade && python3.9 -m pip install poetry && python3.9 -m pip install PyYAML --upgrade && python3.9 -m pip install lxml --upgrade && python3.9 -m pip install yq --upgrade # install chrome (supports headless mode) RUN set -xe \ diff --git a/Docker/jenkins/Jenkins/install-python3.8.sh b/Docker/jenkins/Jenkins/install-python3.8.sh deleted file mode 100755 index df21c66e5..000000000 --- a/Docker/jenkins/Jenkins/install-python3.8.sh +++ /dev/null @@ -1,7 +0,0 @@ -wget https://www.python.org/ftp/python/3.8.0/Python-3.8.0.tar.xz -tar xf Python-3.8.0.tar.xz -rm Python-3.8.0.tar.xz -cd Python-3.8.0 -./configure -make -make altinstall diff --git a/Docker/jenkins/Jenkins/install-python3.9.sh b/Docker/jenkins/Jenkins/install-python3.9.sh new file mode 100755 index 000000000..83d7f17cd --- /dev/null +++ b/Docker/jenkins/Jenkins/install-python3.9.sh @@ -0,0 +1,7 @@ +wget https://www.python.org/ftp/python/3.9.19/Python-3.9.19.tar.xz +tar xf Python-3.9.19.tar.xz +rm Python-3.9.19.tar.xz +cd Python-3.9.19 +./configure +make +make altinstall diff --git a/Docker/jenkins/Jenkins2/Dockerfile b/Docker/jenkins/Jenkins2/Dockerfile index cd470268b..9e585ca0e 100644 --- a/Docker/jenkins/Jenkins2/Dockerfile +++ b/Docker/jenkins/Jenkins2/Dockerfile @@ -69,21 +69,21 @@ RUN DISTRO="$(lsb_release -c -s)" \ && rm -rf /var/lib/apt/lists/* # Copy sh script responsible for installing Python -COPY install-python3.8.sh /root/tmp/install-python3.8.sh +COPY install-python3.9.sh /root/tmp/install-python3.9.sh -# Run the script responsible for installing Python 3.8.0 and link it to /usr/bin/python -RUN chmod +x /root/tmp/install-python3.8.sh; sync && \ - ./root/tmp/install-python3.8.sh && \ - rm -rf /root/tmp/install-python3.8.sh && \ +# Run the script responsible for installing Python 3.9.19 and link it to /usr/bin/python +RUN chmod +x /root/tmp/install-python3.9.sh; sync && \ + ./root/tmp/install-python3.9.sh && \ + rm -rf /root/tmp/install-python3.9.sh && \ unlink /usr/bin/python3 && \ - ln -s /Python-3.8.0/python /usr/bin/python3 + ln -s /Python-3.9.19/python /usr/bin/python3 # Fix shebang for lsb_release -RUN sed -i 's/python3/python3.5/' /usr/bin/lsb_release && \ - sed -i 's/python3/python3.5/' /usr/bin/add-apt-repository +RUN sed -i 's/python3/python3.9/' /usr/bin/lsb_release && \ + sed -i 's/python3/python3.9/' /usr/bin/add-apt-repository # install aws cli, poetry, pytest, etc. -RUN set -xe && python3 -m pip install --upgrade pip && python3 -m pip install awscli --upgrade && python3 -m pip install pytest --upgrade && python3 -m pip install poetry && python3 -m pip install PyYAML --upgrade && python3 -m pip install lxml --upgrade && python3 -m pip install yq --upgrade +RUN set -xe && python3.9 -m pip install --upgrade pip && python3.9 -m pip install awscli --upgrade && python3.9 -m pip install pytest --upgrade && python3.9 -m pip install poetry && python3.9 -m pip install PyYAML --upgrade && python3.9 -m pip install lxml --upgrade && python3.9 -m pip install yq --upgrade # install chrome (supports headless mode) RUN set -xe \ diff --git a/Docker/jenkins/Jenkins2/install-python3.8.sh b/Docker/jenkins/Jenkins2/install-python3.8.sh deleted file mode 100755 index df21c66e5..000000000 --- a/Docker/jenkins/Jenkins2/install-python3.8.sh +++ /dev/null @@ -1,7 +0,0 @@ -wget https://www.python.org/ftp/python/3.8.0/Python-3.8.0.tar.xz -tar xf Python-3.8.0.tar.xz -rm Python-3.8.0.tar.xz -cd Python-3.8.0 -./configure -make -make altinstall diff --git a/Docker/jenkins/Jenkins2/install-python3.9.sh b/Docker/jenkins/Jenkins2/install-python3.9.sh new file mode 100755 index 000000000..83d7f17cd --- /dev/null +++ b/Docker/jenkins/Jenkins2/install-python3.9.sh @@ -0,0 +1,7 @@ +wget https://www.python.org/ftp/python/3.9.19/Python-3.9.19.tar.xz +tar xf Python-3.9.19.tar.xz +rm Python-3.9.19.tar.xz +cd Python-3.9.19 +./configure +make +make altinstall From 522b2bb1f5ede3b47ec67765e6490234d75d5ee5 Mon Sep 17 00:00:00 2001 From: EliseCastle23 <109446148+EliseCastle23@users.noreply.github.com> Date: Fri, 19 Jul 2024 14:09:45 -0600 Subject: [PATCH 16/48] adding a backoff limit and job history limit to monitor cronjobs (#2598) --- kube/services/node-monitors/argo-monitors/argo-node-age.yaml | 3 +++ kube/services/node-monitors/fenceshib-jenkins-test.yaml | 3 +++ kube/services/node-monitors/node-not-ready.yaml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/kube/services/node-monitors/argo-monitors/argo-node-age.yaml b/kube/services/node-monitors/argo-monitors/argo-node-age.yaml index b389c072c..7a60a32ce 100644 --- a/kube/services/node-monitors/argo-monitors/argo-node-age.yaml +++ b/kube/services/node-monitors/argo-monitors/argo-node-age.yaml @@ -5,8 +5,11 @@ metadata: namespace: default spec: schedule: "*/5 * * * *" + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 1 jobTemplate: spec: + backoffLimit: 4 template: metadata: labels: diff --git a/kube/services/node-monitors/fenceshib-jenkins-test.yaml b/kube/services/node-monitors/fenceshib-jenkins-test.yaml index e9e27af98..deaf26b3e 100644 --- a/kube/services/node-monitors/fenceshib-jenkins-test.yaml +++ b/kube/services/node-monitors/fenceshib-jenkins-test.yaml @@ -5,8 +5,11 @@ metadata: namespace: default spec: schedule: "0 */4 * * *" + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 1 jobTemplate: spec: + backoffLimit: 4 template: metadata: labels: diff --git a/kube/services/node-monitors/node-not-ready.yaml b/kube/services/node-monitors/node-not-ready.yaml index 15ed616e6..709dfc79e 100644 --- a/kube/services/node-monitors/node-not-ready.yaml +++ b/kube/services/node-monitors/node-not-ready.yaml @@ -5,8 +5,11 @@ metadata: namespace: default spec: schedule: "*/30 * * * *" + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 1 jobTemplate: spec: + backoffLimit: 4 template: metadata: labels: From 62629a32c027debcc446d0ad6516b0c4f26a738d Mon Sep 17 00:00:00 2001 From: Ajo Augustine Date: Tue, 23 Jul 2024 10:21:30 -0500 Subject: [PATCH 17/48] script to update the gen3 secrets after migration (#2594) * script to update the gen3 secrets after migration * Update config-update.sh * Update config-update.sh --- files/scripts/config-update.sh | 298 +++++++++++++++++++++++++++++++++ 1 file changed, 298 insertions(+) create mode 100644 files/scripts/config-update.sh diff --git a/files/scripts/config-update.sh b/files/scripts/config-update.sh new file mode 100644 index 000000000..55938d492 --- /dev/null +++ b/files/scripts/config-update.sh @@ -0,0 +1,298 @@ +#!/bin/bash + +# Script Name: config-update.sh +# Description: This script updates the gen3 config files for various services based on information +# provided in a migration file migration.txt. It updates JSON configuration files and other related files +# with new database host, username, and database name. The script also verifies the updates +# to ensure they are applied correctly. + +# Ensure the GEN3_HOME variable is set to the correct path +if [[ -z "$GEN3_HOME" ]]; then + echo "GEN3_HOME is not set. Please set it to the path of your Gen3 installation." + exit 1 +fi + +# Check if jq is installed +if ! command -v jq &> /dev/null; then + echo "jq could not be found. Please install jq to run this script." + exit 1 +fi + +source "${GEN3_HOME}/gen3/lib/utils.sh" +gen3_load "gen3/lib/kube-setup-init" + +# Backup the $HOME/Gen3Secrets directory +backup_dir="$HOME/Gen3Secrets-$(date +%Y%m%d%H%M%S)" +cp -r "$HOME/Gen3Secrets" "$backup_dir" +echo "Backup of Gen3Secrets created at $backup_dir" + +# Function to update JSON file +update_json_config() { + local file_path=$1 + local service=$2 + local db_host=$3 + local db_username=$4 + local db_database=$5 + + echo "Updating JSON config for service: $service" + echo "File path: $file_path" + echo "db_host: $db_host" + echo "db_username: $db_username" + echo "db_database: $db_database" + + if [[ -f $file_path ]]; then + local tmp_file + tmp_file=$(mktemp) + + if [[ $service == "fence" || $service == "userapi" ]]; then + jq --arg db_host "$db_host" --arg db_username "$db_username" --arg db_database "$db_database" \ + '(.fence.db_host = $db_host) | (.fence.db_username = $db_username) | (.fence.db_database = $db_database) | + (.fence.fence_database = $db_database) | + (.userapi.db_host = $db_host) | (.userapi.db_username = $db_username) | (.userapi.db_database = $db_database) | + (.userapi.fence_database = $db_database) | + (.sheepdog.fence_host = $db_host) | (.sheepdog.fence_username = $db_username) | (.sheepdog.fence_database = $db_database) | + (.gdcapi.fence_host = $db_host) | (.gdcapi.fence_username = $db_username) | (.gdcapi.fence_database = $db_database) | + (.peregrine.fence_host = $db_host) | (.peregrine.fence_username = $db_username) | (.peregrine.fence_database = $db_database)' \ + "$file_path" > "$tmp_file" && mv "$tmp_file" "$file_path" + + # Verify the update + local updated_host updated_username updated_database + updated_host=$(jq -r '.fence.db_host' "$file_path") + updated_username=$(jq -r '.fence.db_username' "$file_path") + updated_database=$(jq -r '.fence.db_database' "$file_path") + if [[ "$updated_host" == "$db_host" && "$updated_username" == "$db_username" && "$updated_database" == "$db_database" ]]; then + gen3_log_info "Updated JSON config for service: $service successfully." + else + gen3_log_err "Failed to update JSON config for service: $service." + fi + + elif [[ $service == "sheepdog" || $service == "gdcapi" ]]; then + jq --arg db_host "$db_host" --arg db_username "$db_username" --arg db_database "$db_database" \ + '(.sheepdog.db_host = $db_host) | (.sheepdog.db_username = $db_username) | (.sheepdog.db_database = $db_database) | + (.gdcapi.db_host = $db_host) | (.gdcapi.db_username = $db_username) | (.gdcapi.db_database = $db_database)' \ + "$file_path" > "$tmp_file" && mv "$tmp_file" "$file_path" + + # Verify the update + local updated_host updated_username updated_database + updated_host=$(jq -r '.sheepdog.db_host' "$file_path") + updated_username=$(jq -r '.sheepdog.db_username' "$file_path") + updated_database=$(jq -r '.sheepdog.db_database' "$file_path") + if [[ "$updated_host" == "$db_host" && "$updated_username" == "$db_username" && "$updated_database" == "$db_database" ]]; then + gen3_log_info "Updated JSON config for service: $service successfully." + else + gen3_log_err "Failed to update JSON config for service: $service." + fi + + elif [[ $service == "indexd" ]]; then + jq --arg db_host "$db_host" --arg db_username "$db_username" --arg db_database "$db_database" \ + '(.indexd.db_host = $db_host) | (.indexd.db_username = $db_username) | (.indexd.db_database = $db_database)' \ + "$file_path" > "$tmp_file" && mv "$tmp_file" "$file_path" + + # Verify the update + local updated_host updated_username updated_database + updated_host=$(jq -r '.indexd.db_host' "$file_path") + updated_username=$(jq -r '.indexd.db_username' "$file_path") + updated_database=$(jq -r '.indexd.db_database' "$file_path") + if [[ "$updated_host" == "$db_host" && "$updated_username" == "$db_username" && "$updated_database" == "$db_database" ]]; then + gen3_log_info "Updated JSON config for service: $service successfully." + else + gen3_log_err "Failed to update JSON config for service: $service." + fi + + elif [[ $service == "peregrine" ]]; then + jq --arg db_host "$db_host" --arg db_username "$db_username" --arg db_database "$db_database" \ + '(.peregrine.db_host = $db_host) | (.peregrine.db_username = $db_username) | (.peregrine.db_database = $db_database)' \ + "$file_path" > "$tmp_file" && mv "$tmp_file" "$file_path" + + # Verify the update + local updated_host updated_username updated_database + updated_host=$(jq -r '.peregrine.db_host' "$file_path") + updated_username=$(jq -r '.peregrine.db_username' "$file_path") + updated_database=$(jq -r '.peregrine.db_database' "$file_path") + if [[ "$updated_host" == "$db_host" && "$updated_username" == "$db_username" && "$updated_database" == "$db_database" ]]; then + gen3_log_info "Updated JSON config for service: $service successfully." + else + gen3_log_err "Failed to update JSON config for service: $service." + fi + + else + jq --arg db_host "$db_host" --arg db_username "$db_username" --arg db_database "$db_database" \ + '(.db_host = $db_host) | (.db_username = $db_username) | (.db_database = $db_database)' \ + "$file_path" > "$tmp_file" && mv "$tmp_file" "$file_path" + + # Verify the update + local updated_host updated_username updated_database + updated_host=$(jq -r '.db_host' "$file_path") + updated_username=$(jq -r '.db_username' "$file_path") + updated_database=$(jq -r '.db_database' "$file_path") + if [[ "$updated_host" == "$db_host" && "$updated_username" == "$db_username" && "$updated_database" == "$db_database" ]]; then + gen3_log_info "Updated JSON config for service: $service successfully." + else + gen3_log_err "Failed to update JSON config for service: $service." + fi + fi + else + echo "File $file_path does not exist." + fi +} + +# Function to update other files +update_other_files() { + local file_path=$1 + local db_host=$2 + local db_username=$3 + local db_database=$4 + + echo "Updating other files at $file_path" + echo "db_host: $db_host" + echo "db_username: $db_username" + echo "db_database: $db_database" + + if [[ -f $file_path ]]; then + if [[ "$file_path" == *".env" ]]; then + sed -i "s|DB_HOST=.*|DB_HOST=$db_host|" "$file_path" + sed -i "s|DB_USER=.*|DB_USER=$db_username|" "$file_path" + sed -i "s|DB_DATABASE=.*|DB_DATABASE=$db_database|" "$file_path" + + # Verify the update + local updated_host updated_username updated_database + updated_host=$(grep 'DB_HOST=' "$file_path" | cut -d'=' -f2) + updated_username=$(grep 'DB_USER=' "$file_path" | cut -d'=' -f2) + updated_database=$(grep 'DB_DATABASE=' "$file_path" | cut -d'=' -f2) + else + sed -i "s|DB_HOST:.*|DB_HOST: $db_host|" "$file_path" + sed -i "s|DB_USER:.*|DB_USER: $db_username|" "$file_path" + sed -i "s|DB_DATABASE:.*|DB_DATABASE: $db_database|" "$file_path" + + # Verify the update + local updated_host updated_username updated_database + updated_host=$(grep 'DB_HOST:' "$file_path" | cut -d':' -f2 | xargs) + updated_username=$(grep 'DB_USER:' "$file_path" | cut -d':' -f2 | xargs) + updated_database=$(grep 'DB_DATABASE:' "$file_path" | cut -d':' -f2 | xargs) + fi + + if [[ "$updated_host" == "$db_host" && "$updated_username" == "$db_username" && "$updated_database" == "$db_database" ]]; then + gen3_log_info "Updated file at $file_path successfully." + else + gen3_log_err "Failed to update file at $file_path." + fi + else + echo "File $file_path does not exist." + fi +} + +# Function to update fence-config.yaml +update_fence_config() { + local creds_json_path="$HOME/Gen3Secrets/creds.json" + local file_path=$1 + local db_host=$2 + local db_username=$3 + local db_database=$4 + + echo "Updating fence-config.yaml at $file_path" + echo "db_host: $db_host" + echo "db_username: $db_username" + echo "db_database: $db_database" + + if [[ -f $file_path ]]; then + local current_password + current_password=$(jq -r '.fence.db_password' "$creds_json_path") + + sed -i "s|DB: postgresql://.*:.*@.*:5432/.*|DB: postgresql://$db_username:$current_password@$db_host:5432/$db_database|" "$file_path" + + # Verify the update + local updated_entry + updated_entry=$(grep 'DB: postgresql://' "$file_path") + if [[ "$updated_entry" == *"$db_host"* && "$updated_entry" == *"$db_username"* && "$updated_entry" == *"$db_database"* ]]; then + gen3_log_info "Updated fence-config.yaml at $file_path successfully." + else + gen3_log_err "Failed to update fence-config.yaml at $file_path." + fi + else + echo "File $file_path does not exist." + fi +} + +# Function to parse the migration file and apply updates +parse_and_update() { + local migration_file=$1 + local creds_json_path="$HOME/Gen3Secrets/creds.json" + local namespace + namespace=$(gen3 db namespace) + local new_db_host + new_db_host=$(grep "INFO" "$migration_file" | awk '{print $8}') + + gen3_log_info "New db_host identified: $new_db_host" + while read -r line; do + if [[ $line == Source_Database* || $line == User* ]]; then + echo "Processing line: $line" + + IFS=' ' read -r -a parts <<< "$line" + local db_host="$new_db_host" + local db_username + local db_database + + if [[ $line == Source_Database* ]]; then + db_username="${parts[9]}" + echo "db_username='${parts[9]}'" + db_database="${parts[7]}" + echo "db_database='${parts[7]}'" + elif [[ $line == User* ]]; then + db_username="${parts[1]}" + echo "db_username='${parts[1]}'" + db_database="${parts[7]}" + echo "db_database='${parts[7]}'" + else + continue + fi + + # Extract the service name from db_username + if [[ $db_username =~ ^([a-zA-Z]+)_user_ ]]; then + local service="${BASH_REMATCH[1]}" + else + echo "Skipping line: $line due to improper db_username format" + continue + fi + + gen3_log_info "Updating service: $service with db_username: $db_username and db_database: $db_database" + + # Update specific config files for each service + case $service in + arborist) + update_json_config "$HOME/Gen3Secrets/g3auto/arborist/dbcreds.json" "$service" "$db_host" "$db_username" "$db_database" + ;; + audit) + update_json_config "$HOME/Gen3Secrets/g3auto/audit/dbcreds.json" "$service" "$db_host" "$db_username" "$db_database" + update_other_files "$HOME/Gen3Secrets/g3auto/audit/audit-service-config.yaml" "$db_host" "$db_username" "$db_database" + ;; + metadata) + update_json_config "$HOME/Gen3Secrets/g3auto/metadata/dbcreds.json" "$service" "$db_host" "$db_username" "$db_database" + update_other_files "$HOME/Gen3Secrets/g3auto/metadata/metadata.env" "$db_host" "$db_username" "$db_database" + ;; + ohdsi) + update_json_config "$HOME/Gen3Secrets/g3auto/ohdsi/dbcreds.json" "$service" "$db_host" "$db_username" "$db_database" + ;; + orthanc) + update_json_config "$HOME/Gen3Secrets/g3auto/orthanc/dbcreds.json" "$service" "$db_host" "$db_username" "$db_database" + ;; + requestor) + update_json_config "$HOME/Gen3Secrets/g3auto/requestor/dbcreds.json" "$service" "$db_host" "$db_username" "$db_database" + update_other_files "$HOME/Gen3Secrets/g3auto/requestor/requestor-config.yaml" "$db_host" "$db_username" "$db_database" + ;; + wts) + update_json_config "$HOME/Gen3Secrets/g3auto/wts/dbcreds.json" "$service" "$db_host" "$db_username" "$db_database" + ;; + fence) + update_fence_config "$HOME/Gen3Secrets/apis_configs/fence-config.yaml" "$db_host" "$db_username" "$db_database" + update_json_config "$creds_json_path" "$service" "$db_host" "$db_username" "$db_database" + ;; + sheepdog | peregrine | indexd) + update_json_config "$creds_json_path" "$service" "$db_host" "$db_username" "$db_database" + ;; + esac + fi + done < "$migration_file" +} + +# Run the script +parse_and_update "migration.txt" From 0b9eadca56a91238f27fca7360cd9373246befe0 Mon Sep 17 00:00:00 2001 From: Ajo Augustine Date: Thu, 25 Jul 2024 14:41:18 -0500 Subject: [PATCH 18/48] Update psql-fips-fix.sh (#2605) add requestor db --- files/scripts/psql-fips-fix.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/scripts/psql-fips-fix.sh b/files/scripts/psql-fips-fix.sh index fcbb6e20c..82ac5c59f 100644 --- a/files/scripts/psql-fips-fix.sh +++ b/files/scripts/psql-fips-fix.sh @@ -16,7 +16,7 @@ for name in indexd fence sheepdog peregrine; do update_pass $name $username $password done -for name in wts metadata gearbox audit arborist access-backend argo_db atlas argo thor; do +for name in wts metadata gearbox audit arborist access-backend argo_db requestor atlas argo thor; do if [[ ! -z $(gen3 secrets decode $name-g3auto dbcreds.json) ]]; then username=$(gen3 secrets decode $name-g3auto dbcreds.json | jq -r .db_username) password=$(gen3 secrets decode $name-g3auto dbcreds.json | jq -r .db_password) From 2c0dac9ed52948c48a701e5220c72c9648c0c895 Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Tue, 30 Jul 2024 14:41:17 -0400 Subject: [PATCH 19/48] Up-to-date setup for cluster-level resources (#2610) --- gen3/bin/kube-setup-cluster-level-resources | 41 +++++++++++++++++++ .../services/cluster-level-resources/app.yaml | 21 ++++++++++ 2 files changed, 62 insertions(+) create mode 100644 gen3/bin/kube-setup-cluster-level-resources create mode 100644 kube/services/cluster-level-resources/app.yaml diff --git a/gen3/bin/kube-setup-cluster-level-resources b/gen3/bin/kube-setup-cluster-level-resources new file mode 100644 index 000000000..f4349398f --- /dev/null +++ b/gen3/bin/kube-setup-cluster-level-resources @@ -0,0 +1,41 @@ +#!/bin/bash +source "${GEN3_HOME}/gen3/lib/utils.sh" +gen3_load "gen3/gen3setup" + +# Set default value for TARGET_REVISION +TARGET_REVISION="master" + +# Ask for TARGET_REVISION +read -p "Please provide a target revision for the cluster resources chart (default is master): " user_target_revision +# If user input is not empty, use it; otherwise, keep the default +TARGET_REVISION=${user_target_revision:-$TARGET_REVISION} + +# Ask for CLUSTER_NAME (no default value) +read -p "Enter the name of the cluster: " CLUSTER_NAME + +# Check if CLUSTER_NAME is provided +if [ -z "$CLUSTER_NAME" ]; then + echo "Error: CLUSTER_NAME cannot be empty." + exit 1 +fi + +# Create a temporary file +temp_file=$(mktemp) + +# Use sed to replace placeholders in the original file +sed -e "s|TARGET_REVISION|$TARGET_REVISION|g" \ + -e "s|CLUSTER_NAME|$CLUSTER_NAME|g" \ + $GEN3_HOME/kube/services/cluster-level-resources/app.yaml > "$temp_file" + +echo "WARNING: Do you have a folder already set up for this environment in gen3-gitops, in the form of /cluster-values/cluster-values.yaml? If not, this will not work." +echo "" +read -n 1 -s -r -p "Press any key to confirm and continue, or Ctrl+C to cancel..." +echo "" + +# Apply the templated file with kubectl +kubectl apply -f "$temp_file" + +# Clean up the temporary file +rm "$temp_file" + +echo "Application has been applied to the cluster." \ No newline at end of file diff --git a/kube/services/cluster-level-resources/app.yaml b/kube/services/cluster-level-resources/app.yaml new file mode 100644 index 000000000..95a2ed4c4 --- /dev/null +++ b/kube/services/cluster-level-resources/app.yaml @@ -0,0 +1,21 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: cluster-level-resources + namespace: argocd +spec: + project: default + destination: + namespace: argocd + server: https://kubernetes.default.svc + source: + repoURL: https://github.com/uc-cdis/gen3-gitops.git + targetRevision: TARGET_REVISION + path: cluster-level-resources + helm: + valueFiles: + - ../CLUSTER_NAME/cluster-values/cluster-values.yaml + releaseName: cluster-level-resources + syncPolicy: + automated: + selfHeal: true From 4e6964e029db6cb8c012da3bf423358332c19b43 Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Tue, 30 Jul 2024 15:10:56 -0400 Subject: [PATCH 20/48] Feat/setup cluster level resources (#2611) * Up-to-date setup for cluster-level resources * Need the extension --- .../bin/kube-setup-cluster-level-resources.sh | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 gen3/bin/kube-setup-cluster-level-resources.sh diff --git a/gen3/bin/kube-setup-cluster-level-resources.sh b/gen3/bin/kube-setup-cluster-level-resources.sh new file mode 100644 index 000000000..f4349398f --- /dev/null +++ b/gen3/bin/kube-setup-cluster-level-resources.sh @@ -0,0 +1,41 @@ +#!/bin/bash +source "${GEN3_HOME}/gen3/lib/utils.sh" +gen3_load "gen3/gen3setup" + +# Set default value for TARGET_REVISION +TARGET_REVISION="master" + +# Ask for TARGET_REVISION +read -p "Please provide a target revision for the cluster resources chart (default is master): " user_target_revision +# If user input is not empty, use it; otherwise, keep the default +TARGET_REVISION=${user_target_revision:-$TARGET_REVISION} + +# Ask for CLUSTER_NAME (no default value) +read -p "Enter the name of the cluster: " CLUSTER_NAME + +# Check if CLUSTER_NAME is provided +if [ -z "$CLUSTER_NAME" ]; then + echo "Error: CLUSTER_NAME cannot be empty." + exit 1 +fi + +# Create a temporary file +temp_file=$(mktemp) + +# Use sed to replace placeholders in the original file +sed -e "s|TARGET_REVISION|$TARGET_REVISION|g" \ + -e "s|CLUSTER_NAME|$CLUSTER_NAME|g" \ + $GEN3_HOME/kube/services/cluster-level-resources/app.yaml > "$temp_file" + +echo "WARNING: Do you have a folder already set up for this environment in gen3-gitops, in the form of /cluster-values/cluster-values.yaml? If not, this will not work." +echo "" +read -n 1 -s -r -p "Press any key to confirm and continue, or Ctrl+C to cancel..." +echo "" + +# Apply the templated file with kubectl +kubectl apply -f "$temp_file" + +# Clean up the temporary file +rm "$temp_file" + +echo "Application has been applied to the cluster." \ No newline at end of file From 5a510742c30de90831aded523d25b2ddb160fc8e Mon Sep 17 00:00:00 2001 From: Ajo Augustine Date: Thu, 1 Aug 2024 13:57:53 -0500 Subject: [PATCH 21/48] Update psql-fips-fix.sh (#2612) add ohdsi to psql-fips-fix.sh --- files/scripts/psql-fips-fix.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/scripts/psql-fips-fix.sh b/files/scripts/psql-fips-fix.sh index 82ac5c59f..8cb0ed049 100644 --- a/files/scripts/psql-fips-fix.sh +++ b/files/scripts/psql-fips-fix.sh @@ -16,7 +16,7 @@ for name in indexd fence sheepdog peregrine; do update_pass $name $username $password done -for name in wts metadata gearbox audit arborist access-backend argo_db requestor atlas argo thor; do +for name in wts metadata gearbox audit arborist access-backend argo_db requestor atlas ohdsi argo thor; do if [[ ! -z $(gen3 secrets decode $name-g3auto dbcreds.json) ]]; then username=$(gen3 secrets decode $name-g3auto dbcreds.json | jq -r .db_username) password=$(gen3 secrets decode $name-g3auto dbcreds.json | jq -r .db_password) From b5ce29c88585eff92854688f2da697ba10d9e72f Mon Sep 17 00:00:00 2001 From: emalinowski Date: Tue, 6 Aug 2024 16:01:32 -0500 Subject: [PATCH 22/48] feat(set-es-domain): Updated es-proxy script to allow for setting es domain (#2614) --- gen3/bin/kube-setup-aws-es-proxy.sh | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/gen3/bin/kube-setup-aws-es-proxy.sh b/gen3/bin/kube-setup-aws-es-proxy.sh index 986c5bf05..3feee143a 100644 --- a/gen3/bin/kube-setup-aws-es-proxy.sh +++ b/gen3/bin/kube-setup-aws-es-proxy.sh @@ -2,7 +2,7 @@ # # Deploy aws-es-proxy into existing commons # https://github.com/abutaha/aws-es-proxy -# +# source "${GEN3_HOME}/gen3/lib/utils.sh" @@ -11,17 +11,33 @@ gen3_load "gen3/lib/kube-setup-init" # Deploy Datadog with argocd if flag is set in the manifest path manifestPath=$(g3k_manifest_path) es7="$(jq -r ".[\"global\"][\"es7\"]" < "$manifestPath" | tr '[:upper:]' '[:lower:]')" +esDomain="$(jq -r ".[\"global\"][\"esDomain\"]" < "$manifestPath" | tr '[:upper:]' '[:lower:]')" [[ -z "$GEN3_ROLL_ALL" ]] && gen3 kube-setup-secrets if g3kubectl get secrets/aws-es-proxy > /dev/null 2>&1; then envname="$(gen3 api environment)" - - if [ "$es7" = true ]; then + if [ "$esDomain" != "null" ]; then + if ES_ENDPOINT="$(aws es describe-elasticsearch-domains --domain-names ${esDomain} --query "DomainStatusList[*].Endpoints" --output text)" \ + && [[ -n "${ES_ENDPOINT}" && -n "${esDomain}" ]]; then + gen3 roll aws-es-proxy GEN3_ES_ENDPOINT "${ES_ENDPOINT}" + g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-priority-class.yaml" + g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-service.yaml" + gen3_log_info "kube-setup-aws-es-proxy" "The aws-es-proxy service has been deployed onto the k8s cluster." + else + # + # probably running in jenkins or job environment + # try to make sure network policy labels are up to date + # + gen3_log_info "kube-setup-aws-es-proxy" "Not deploying aws-es-proxy, no endpoint to hook it up." + gen3 kube-setup-networkpolicy service aws-es-proxy + g3kubectl patch deployment "aws-es-proxy-deployment" -p '{"spec":{"template":{"metadata":{"labels":{"netvpc":"yes"}}}}}' || true + fi + elif [ "$es7" = true ]; then if ES_ENDPOINT="$(aws es describe-elasticsearch-domains --domain-names ${envname}-gen3-metadata-2 --query "DomainStatusList[*].Endpoints" --output text)" \ && [[ -n "${ES_ENDPOINT}" && -n "${envname}" ]]; then gen3 roll aws-es-proxy GEN3_ES_ENDPOINT "${ES_ENDPOINT}" - g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-priority-class.yaml" + g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-priority-class.yaml" g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-service.yaml" gen3_log_info "kube-setup-aws-es-proxy" "The aws-es-proxy service has been deployed onto the k8s cluster." else From 099d50fd38d5cf7f9c6a144c020c32aefbdfa3ce Mon Sep 17 00:00:00 2001 From: George Thomas <98996322+george42-ctds@users.noreply.github.com> Date: Wed, 7 Aug 2024 09:42:12 -0700 Subject: [PATCH 23/48] (HP-1598): enable datadog in cedar-wrapper deployment (#2608) * (HP-1598): enable datadog in deployment * (HP-1598): set GEN3_DEBUG=True * (HP-1598): set GEN3_DEBUG=False --- .../cedar-wrapper/cedar-wrapper-deploy.yaml | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/kube/services/cedar-wrapper/cedar-wrapper-deploy.yaml b/kube/services/cedar-wrapper/cedar-wrapper-deploy.yaml index fa6b741a2..740e18c91 100644 --- a/kube/services/cedar-wrapper/cedar-wrapper-deploy.yaml +++ b/kube/services/cedar-wrapper/cedar-wrapper-deploy.yaml @@ -97,6 +97,36 @@ spec: secretKeyRef: name: cedar-g3auto key: "cedar_api_key.txt" + - name: GEN3_DEBUG + GEN3_DEBUG_FLAG|-value: "False"-| + - name: DD_ENABLED + valueFrom: + configMapKeyRef: + name: manifest-global + key: dd_enabled + optional: true + - name: DD_ENV + valueFrom: + fieldRef: + fieldPath: metadata.labels['tags.datadoghq.com/env'] + - name: DD_SERVICE + valueFrom: + fieldRef: + fieldPath: metadata.labels['tags.datadoghq.com/service'] + - name: DD_VERSION + valueFrom: + fieldRef: + fieldPath: metadata.labels['tags.datadoghq.com/version'] + - name: DD_LOGS_INJECTION + value: "true" + - name: DD_PROFILING_ENABLED + value: "true" + - name: DD_TRACE_SAMPLE_RATE + value: "1" + - name: DD_AGENT_HOST + valueFrom: + fieldRef: + fieldPath: status.hostIP volumeMounts: - name: "ca-volume" readOnly: true From 1d953310e26f728748545abd65917b6b363dca7b Mon Sep 17 00:00:00 2001 From: Ajo Augustine Date: Thu, 8 Aug 2024 10:21:35 -0500 Subject: [PATCH 24/48] Update psql-db-aurora-migration-job.yaml (#2616) set password_encryption to scram-sha-256 --- kube/services/jobs/psql-db-aurora-migration-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/jobs/psql-db-aurora-migration-job.yaml b/kube/services/jobs/psql-db-aurora-migration-job.yaml index dc6f40c11..ca81c37e8 100644 --- a/kube/services/jobs/psql-db-aurora-migration-job.yaml +++ b/kube/services/jobs/psql-db-aurora-migration-job.yaml @@ -154,7 +154,7 @@ spec: if [ $? -eq 0 ]; then gen3_log_info "User $db_user, password already exists" else - gen3 psql aurora -c "CREATE USER \"$db_user\" WITH PASSWORD '$db_password' CREATEDB" + gen3 psql aurora -c "SET password_encryption = 'scram-sha-256';CREATE USER \"$db_user\" WITH PASSWORD '$db_password' CREATEDB" if [ $? -ne 0 ]; then gen3_log_err "Failed to create user for $database" failed_migrations="${failed_migrations}\nDatabase: $database, Error: Failed to create user" From 2cbd1ca6f28aab03c68899d1c03270dca23db638 Mon Sep 17 00:00:00 2001 From: pieterlukasse Date: Mon, 12 Aug 2024 19:51:43 +0200 Subject: [PATCH 25/48] Feat: set webapi token exp. to 15 min and refresh when 4 min left (#2589) * feat: set webapi token exp. to 15 min and refresh when 4 min left ...to simulate a session timeout (inactivity timeout) of 15 min * Use python 3.9 * revert change to lambda.tf --------- Co-authored-by: Hara Prasad Juvvala --- gen3/test/bootstrapTest.sh | 2 +- gen3/test/fenceStuffTest.sh | 2 +- kube/services/ohdsi-atlas/ohdsi-atlas-config-local.yaml | 1 + kube/services/ohdsi-webapi/ohdsi-webapi-config.yaml | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gen3/test/bootstrapTest.sh b/gen3/test/bootstrapTest.sh index be3241f31..d07512d8b 100644 --- a/gen3/test/bootstrapTest.sh +++ b/gen3/test/bootstrapTest.sh @@ -12,7 +12,7 @@ test_bootstrap_fenceconfig() { because $? "secret template exists and is valid yaml: $secretConf" [[ -f "$publicConf" ]] && yq -r . < "$secretConf" > /dev/null; because $? "public template exists and is valid yaml: $secretConf" - python3.8 "$GEN3_HOME/apis_configs/yaml_merge.py" "$publicConf" "$secretConf" | yq -r . > /dev/null; + python3.9 "$GEN3_HOME/apis_configs/yaml_merge.py" "$publicConf" "$secretConf" | yq -r . > /dev/null; because $? "yaml_perge public private should yield valid yaml" } diff --git a/gen3/test/fenceStuffTest.sh b/gen3/test/fenceStuffTest.sh index 09a0eb125..df250a1ad 100644 --- a/gen3/test/fenceStuffTest.sh +++ b/gen3/test/fenceStuffTest.sh @@ -17,7 +17,7 @@ EOM C: 4 B: 3 EOM - json3="$(python3.8 "$GEN3_HOME/apis_configs/yaml_merge.py" "$yaml1" "$yaml2")"; because $? "yaml_merge should succeed" + json3="$(python3.9 "$GEN3_HOME/apis_configs/yaml_merge.py" "$yaml1" "$yaml2")"; because $? "yaml_merge should succeed" [[ "1" == "$(jq -r .A <<<"$json3")" ]]; because $? ".A should be 1" /bin/rm "$yaml1" /bin/rm "$yaml2" diff --git a/kube/services/ohdsi-atlas/ohdsi-atlas-config-local.yaml b/kube/services/ohdsi-atlas/ohdsi-atlas-config-local.yaml index 6b724eb2d..7c686df91 100644 --- a/kube/services/ohdsi-atlas/ohdsi-atlas-config-local.yaml +++ b/kube/services/ohdsi-atlas/ohdsi-atlas-config-local.yaml @@ -29,6 +29,7 @@ data: configLocal.cohortComparisonResultsEnabled = false; configLocal.userAuthenticationEnabled = true; configLocal.plpResultsEnabled = false; + configLocal.refreshTokenThreshold = 1000 * 60 * 4; // refresh auth token if it will expire within 4 minutes return configLocal; }); diff --git a/kube/services/ohdsi-webapi/ohdsi-webapi-config.yaml b/kube/services/ohdsi-webapi/ohdsi-webapi-config.yaml index 8eb01ec08..a5d0972eb 100644 --- a/kube/services/ohdsi-webapi/ohdsi-webapi-config.yaml +++ b/kube/services/ohdsi-webapi/ohdsi-webapi-config.yaml @@ -22,7 +22,7 @@ stringData: security_cors_enabled: "true" security_origin: "*" - security_token_expiration: "43200" + security_token_expiration: "900" security_ssl_enabled: "false" security_provider: AtlasRegularSecurity From d7ffd046b423eaf0a14528d757e2140e07717a7e Mon Sep 17 00:00:00 2001 From: Ajo Augustine Date: Wed, 14 Aug 2024 16:21:59 -0500 Subject: [PATCH 26/48] Feat/backup script (#2604) * install mount-s3 * install mount-s3 * install mount-s3 * install mount-s3 * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update Dockerfile * Create psql-db-backup-encrypt-job.yaml * Update psql-db-backup-encrypt-job.yaml * Update dbbackup.sh * Update Dockerfile * Update dbbackup.sh * Update dbbackup.sh * Update dbbackup.sh * Update psql-db-backup-encrypt-job.yaml * Update dbbackup.sh * Update dbbackup.sh * Update dbbackup.sh * Update dbbackup.sh * Update dbbackup.sh * Update dbbackup.sh updated cron time * add kube-setup-s3-csi-driver * update dbbackup.sh * Update kube-setup-s3-csi-driver fix tmp file name fix tmp file name * Update dbbackup.sh added service account creation * Update kube-setup-s3-csi-driver.sh Add all oidc_url's to the trust plicy * Update kube-setup-s3-csi-driver.sh separate policies, roles per cluster --- gen3/bin/dbbackup.sh | 208 ++++++++++++---- gen3/bin/kube-setup-s3-csi-driver.sh | 202 ++++++++++++++++ .../jobs/psql-db-backup-encrypt-job.yaml | 224 ++++++++++++++++++ 3 files changed, 588 insertions(+), 46 deletions(-) create mode 100644 gen3/bin/kube-setup-s3-csi-driver.sh create mode 100644 kube/services/jobs/psql-db-backup-encrypt-job.yaml diff --git a/gen3/bin/dbbackup.sh b/gen3/bin/dbbackup.sh index eeb569519..df0139d3b 100644 --- a/gen3/bin/dbbackup.sh +++ b/gen3/bin/dbbackup.sh @@ -10,7 +10,7 @@ # or copy to Aurora. # # Usage: -# gen3 dbbackup [dump|restore|va-dump|create-sa|migrate-to-aurora|copy-to-aurora ] +# gen3 dbbackup [dump|restore|va-dump|create-sa|migrate-to-aurora|copy-to-aurora|encrypt|setup-cron ] # # dump - Initiates a database dump, creating the essential AWS resources if they are absent. # The dump operation is intended to be executed from the namespace/commons that requires @@ -21,19 +21,12 @@ # va-dump - Runs a va-testing DB dump. # create-sa - Creates the necessary service account and roles for DB copy. # migrate-to-aurora - Triggers a service account creation and a job to migrate a Gen3 commons to an AWS RDS Aurora instance. -# copy-to-aurora - Triggers a service account creation and a job to copy the databases Indexd, Sheepdog & Metadata to new databases within an RDS Aurora cluster. -# +# copy-to-aurora - Triggers a service account creation and a job to copy the databases Indexd, Sheepdog & Metadata to new databases within an RDS Aurora cluster. The source_namespace must be provided. The job should be run at the destination, not at the source. +# encrypt - Perform encrypted backup. +# setup-cron - Set up a cronjob for encrypted backup. # #################################################################################################### -# Exit on error -#set -e - -# Print commands before executing -#set -x - -#trap 'echo "Error at Line $LINENO"' ERR - source "${GEN3_HOME}/gen3/lib/utils.sh" gen3_load "gen3/lib/kube-setup-init" @@ -42,20 +35,36 @@ account_id=$(aws sts get-caller-identity --query "Account" --output text) vpc_name="$(gen3 api environment)" namespace="$(gen3 db namespace)" sa_name="dbbackup-sa" -bucket_name="gen3-db-backups-${account_id}" +bucket_name_encrypted="gen3-db-backups-encrypted-${account_id}" +kms_key_alias="alias/gen3-db-backups-kms-key" + +cluster_arn=$(kubectl config current-context) +eks_cluster=$(echo "$cluster_arn" | awk -F'/' '{print $2}') -gen3_log_info "policy_name: $policy_name" gen3_log_info "account_id: $account_id" gen3_log_info "vpc_name: $vpc_name" gen3_log_info "namespace: $namespace" gen3_log_info "sa_name: $sa_name" -gen3_log_info "bucket_name: $bucket_name" +gen3_log_info "bucket_name_encrypted: $bucket_name_encrypted" +gen3_log_info "kms_key_alias: $kms_key_alias" +gen3_log_info "eks_cluster: $eks_cluster" + +# Create or get the KMS key +create_or_get_kms_key() { + kms_key_id=$(aws kms list-aliases --query "Aliases[?AliasName=='$kms_key_alias'].TargetKeyId" --output text) + if [ -z "$kms_key_id" ]; then + gen3_log_info "Creating new KMS key with alias $kms_key_alias" + kms_key_id=$(aws kms create-key --query "KeyMetadata.KeyId" --output text) + aws kms create-alias --alias-name $kms_key_alias --target-key-id $kms_key_id + else + gen3_log_info "KMS key with alias $kms_key_alias already exists" + fi + kms_key_arn=$(aws kms describe-key --key-id $kms_key_id --query "KeyMetadata.Arn" --output text) +} # Create an S3 access policy if it doesn't exist create_policy() { - # Check if policy exists if ! aws iam list-policies --query "Policies[?PolicyName == '$policy_name'] | [0].Arn" --output text | grep -q "arn:aws:iam"; then - # Create the S3 access policy - policy document access_policy=$(cat <<-EOM { "Version": "2012-10-17", @@ -70,15 +79,14 @@ create_policy() { "s3:DeleteObject" ], "Resource": [ - "arn:aws:s3:::gen3-db-backups-*" + "arn:aws:s3:::gen3-db-backups-*", + "arn:aws:s3:::gen3-db-backups-encrypted-*" ] } ] } EOM ) - - # Create the S3 access policy from the policy document policy_arn=$(aws iam create-policy --policy-name "$policy_name" --policy-document "$access_policy" --query "Policy.Arn" --output text) gen3_log_info "policy_arn: $policy_arn" else @@ -90,16 +98,10 @@ EOM # Create or update the Service Account and its corresponding IAM Role create_service_account_and_role() { - cluster_arn=$(kubectl config current-context) - eks_cluster=$(echo "$cluster_arn" | awk -F'/' '{print $2}') oidc_url=$(aws eks describe-cluster --name $eks_cluster --query 'cluster.identity.oidc.issuer' --output text | sed -e 's/^https:\/\///') role_name="${vpc_name}-${namespace}-${sa_name}-role" role_arn="arn:aws:iam::${account_id}:role/${role_name}" local trust_policy=$(mktemp -p "$XDG_RUNTIME_DIR" "tmp_policy.XXXXXX") - gen3_log_info "trust_policy: $trust_policy" - gen3_log_info "eks_cluster: $eks_cluster" - gen3_log_info "oidc_url: $oidc_url" - gen3_log_info "role_name: $role_name" cat > ${trust_policy} <&1; then - gen3_log_info "Updating existing role: $role_name" aws iam update-assume-role-policy --role-name $role_name --policy-document "file://$trust_policy" else - gen3_log_info "Creating new role: $role_name" aws iam create-role --role-name $role_name --assume-role-policy-document "file://$trust_policy" fi @@ -143,20 +138,26 @@ EOF if ! kubectl get serviceaccount -n $namespace $sa_name 2>&1; then kubectl create serviceaccount -n $namespace $sa_name fi - # Annotate the KSA with the IAM role ARN - gen3_log_info "Annotating Service Account with IAM role ARN" + # Annotate the KSA with the IAM role ARN kubectl annotate serviceaccount -n ${namespace} ${sa_name} eks.amazonaws.com/role-arn=${role_arn} --overwrite - } -# Create an S3 bucket if it doesn't exist +# Create an S3 bucket with SSE-KMS if it doesn't exist create_s3_bucket() { + local bucket_name=$1 + local kms_key_arn=$2 # Check if bucket already exists if aws s3 ls "s3://$bucket_name" 2>&1 | grep -q 'NoSuchBucket'; then - gen3_log_info "Bucket does not exist, creating..." aws s3 mb "s3://$bucket_name" - else - gen3_log_info "Bucket $bucket_name already exists, skipping bucket creation." + # Enable SSE-KMS encryption on the bucket + aws s3api put-bucket-encryption --bucket $bucket_name --server-side-encryption-configuration '{ + "Rules": [{ + "ApplyServerSideEncryptionByDefault": { + "SSEAlgorithm": "aws:kms", + "KMSMasterKeyID": "'"$kms_key_arn"'" + } + }] + }' fi } @@ -181,7 +182,7 @@ apiVersion: v1 kind: ServiceAccount metadata: name: psql-db-copy-sa - + namespace: ${namespace} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -191,7 +192,6 @@ rules: - apiGroups: [""] resources: ["secrets"] verbs: ["get", "watch", "list"] - --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding @@ -208,6 +208,90 @@ subjects: EOF } +# Function to create the persistent volume and persistent volume claim +create_pv_pvc() { + if ! kubectl get pv s3-pv-db-backups 2>&1; then + cat <&1; then + cat <]" + echo "Invalid command. Usage: gen3 dbbackup [dump|restore|va-dump|create-sa|migrate-to-aurora|copy-to-aurora|encrypt|setup-cron ]" return 1 ;; esac diff --git a/gen3/bin/kube-setup-s3-csi-driver.sh b/gen3/bin/kube-setup-s3-csi-driver.sh new file mode 100644 index 000000000..c93ccf8dd --- /dev/null +++ b/gen3/bin/kube-setup-s3-csi-driver.sh @@ -0,0 +1,202 @@ +#!/bin/bash + +#################################################################################################### +# Script: kube-setup-s3-csi-driver.sh +# +# Description: +# This script sets up the Mountpoint for Amazon S3 CSI driver in an EKS cluster. +# It creates necessary IAM policies and roles. +# +# Usage: +# gen3 kube-setup-s3-csi-driver [bucket_name] +# +#################################################################################################### + +source "${GEN3_HOME}/gen3/lib/utils.sh" +gen3_load "gen3/lib/kube-setup-init" + +account_id=$(aws sts get-caller-identity --query "Account" --output text) +vpc_name="$(gen3 api environment)" +namespace="$(gen3 db namespace)" +default_bucket_name_encrypted="gen3-db-backups-encrypted-${account_id}" +bucket_name=${1:-$default_bucket_name_encrypted} + +cluster_arn=$(kubectl config current-context) +eks_cluster=$(echo "$cluster_arn" | awk -F'/' '{print $2}') + +gen3_log_info "account_id: $account_id" +gen3_log_info "vpc_name: $vpc_name" +gen3_log_info "namespace: $namespace" +gen3_log_info "bucket_name: $bucket_name" +gen3_log_info "eks_cluster: $eks_cluster" + +# Create policy for Mountpoint for Amazon S3 CSI driver +create_s3_csi_policy() { + policy_name="AmazonS3CSIDriverPolicy-${eks_cluster}" + policy_arn=$(aws iam list-policies --query "Policies[?PolicyName == '$policy_name'].[Arn]" --output text) + if [ -z "$policy_arn" ]; then + cat < /tmp/s3-csi-policy-$$.json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "MountpointFullBucketAccess", + "Effect": "Allow", + "Action": [ + "s3:ListBucket" + ], + "Resource": [ + "arn:aws:s3:::${bucket_name}" + ] + }, + { + "Sid": "MountpointFullObjectAccess", + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:PutObject", + "s3:AbortMultipartUpload", + "s3:DeleteObject" + ], + "Resource": [ + "arn:aws:s3:::${bucket_name}/*" + ] + } + ] +} +EOF + policy_arn=$(aws iam create-policy --policy-name "$policy_name" --policy-document file:///tmp/s3-csi-policy-$$.json --query "Policy.Arn" --output text) + rm -f /tmp/s3-csi-policy-$$.json + fi + gen3_log_info "Created or found policy with ARN: $policy_arn" + echo $policy_arn +} + +# Create the trust policy for Mountpoint for Amazon S3 CSI driver +create_s3_csi_trust_policy() { + oidc_url=$(aws eks describe-cluster --name $eks_cluster --query 'cluster.identity.oidc.issuer' --output text | sed -e 's/^https:\/\///') + trust_policy_file="/tmp/aws-s3-csi-driver-trust-policy-$$.json" + cat < ${trust_policy_file} +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "arn:aws:iam::${account_id}:oidc-provider/${oidc_url}" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringLike": { + "${oidc_url}:aud": "sts.amazonaws.com", + "${oidc_url}:sub": "system:serviceaccount:*:s3-csi-*" + } + } + } + ] +} +EOF +} + +# Create the IAM role for Mountpoint for Amazon S3 CSI driver +create_s3_csi_role() { + role_name="AmazonEKS_S3_CSI_DriverRole-${eks_cluster}" + if ! aws iam get-role --role-name $role_name 2>/dev/null; then + aws iam create-role --role-name $role_name --assume-role-policy-document file:///tmp/aws-s3-csi-driver-trust-policy-$$.json + rm -f /tmp/aws-s3-csi-driver-trust-policy-$$.json + fi + gen3_log_info "Created or found role: $role_name" + echo $role_name +} + +# Attach the policies to the IAM role +attach_s3_csi_policies() { + role_name=$1 + policy_arn=$2 + eks_policy_name="eks-s3-csi-policy-${eks_cluster}" + gen3_log_info "Attaching S3 CSI policy with ARN: $policy_arn to role: $role_name" + eks_policy_arn=$(aws iam list-policies --query "Policies[?PolicyName == '$eks_policy_name'].Arn" --output text) + if [ -z "$eks_policy_arn" ]; then + cat < /tmp/eks-s3-csi-policy-$$.json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:ListBucket", + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject" + ], + "Resource": [ + "arn:aws:s3:::${bucket_name}", + "arn:aws:s3:::${bucket_name}/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:Decrypt", + "kms:Encrypt", + "kms:GenerateDataKey" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "eks:DescribeCluster" + ], + "Resource": "*" + } + ] +} +EOF + eks_policy_arn=$(aws iam create-policy --policy-name "$eks_policy_name" --policy-document file:///tmp/eks-s3-csi-policy-$$.json --query "Policy.Arn" --output text) + rm -f /tmp/eks-s3-csi-policy-$$.json + fi + aws iam attach-role-policy --role-name $role_name --policy-arn $policy_arn + aws iam attach-role-policy --role-name $role_name --policy-arn $eks_policy_arn +} + +# Create or update the CSI driver and its resources +setup_csi_driver() { + create_s3_csi_policy + policy_arn=$(aws iam list-policies --query "Policies[?PolicyName == 'AmazonS3CSIDriverPolicy-${eks_cluster}'].[Arn]" --output text) + create_s3_csi_trust_policy + create_s3_csi_role + role_name="AmazonEKS_S3_CSI_DriverRole-${eks_cluster}" + attach_s3_csi_policies $role_name $policy_arn + + # Install CSI driver + gen3_log_info "eks cluster name: $eks_cluster" + + # Capture the output of the command and prevent it from exiting the script + csi_driver_check=$(aws eks describe-addon --cluster-name $eks_cluster --addon-name aws-mountpoint-s3-csi-driver --query 'addon.addonName' --output text 2>&1 || true) + + if echo "$csi_driver_check" | grep -q "ResourceNotFoundException"; then + gen3_log_info "CSI driver not found, installing..." + aws eks create-addon --cluster-name $eks_cluster --addon-name aws-mountpoint-s3-csi-driver --service-account-role-arn arn:aws:iam::${account_id}:role/AmazonEKS_S3_CSI_DriverRole-${eks_cluster} + csi_status="CREATING" + retries=0 + while [ "$csi_status" != "ACTIVE" ] && [ $retries -lt 12 ]; do + gen3_log_info "Waiting for CSI driver to become active... (attempt $((retries+1)))" + sleep 10 + csi_status=$(aws eks describe-addon --cluster-name $eks_cluster --addon-name aws-mountpoint-s3-csi-driver --query 'addon.status' --output text || echo "CREATING") + retries=$((retries+1)) + done + if [ "$csi_status" == "ACTIVE" ]; then + gen3_log_info "CSI driver successfully installed and active." + else + gen3_log_error "CSI driver installation failed or not active. Current status: $csi_status" + fi + elif echo "$csi_driver_check" | grep -q "aws-mountpoint-s3-csi-driver"; then + gen3_log_info "CSI driver already exists, skipping installation." + else + gen3_log_info "Unexpected error occurred: $csi_driver_check" + exit 1 + fi +} + +setup_csi_driver diff --git a/kube/services/jobs/psql-db-backup-encrypt-job.yaml b/kube/services/jobs/psql-db-backup-encrypt-job.yaml new file mode 100644 index 000000000..914b81ffa --- /dev/null +++ b/kube/services/jobs/psql-db-backup-encrypt-job.yaml @@ -0,0 +1,224 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: psql-db-backup-encrypt +spec: + template: + metadata: + labels: + app: gen3job + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - weight: 99 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - ONDEMAND + serviceAccountName: dbencrypt-sa + containers: + - name: pgdump + image: quay.io/cdis/awshelper:master + imagePullPolicy: Always + env: + - name: gen3Env + valueFrom: + configMapKeyRef: + name: global + key: environment + - name: JENKINS_HOME + value: "devterm" + - name: GEN3_HOME + value: /home/ubuntu/cloud-automation + command: [ "/bin/bash" ] + args: + - "-c" + - | + #!/bin/bash + + # This script takes backup of Gen3 Service databases, encrypts it, and moves it to an encrypted S3 bucket. + # Requirements: + # 1. PGP public key must be available as a variable in the script. + # 2. The job needs the necessary permissions to read secrets, config maps from the target namespace. + + source "${GEN3_HOME}/gen3/lib/utils.sh" + gen3_load "gen3/gen3setup" + + # Fetch necessary information + namespace=$(gen3 api namespace) + environment=$(gen3 api environment) + hostname=$(gen3 api hostname) + default_databases=($(gen3 db services)) + date_str=$(date -u +%y%m%d_%H%M%S) + databases=("${default_databases[@]}") + gen3_log_info "Databases to be backed up: ${databases[@]}" + + # Define backup directory structure + BACKUP_DIR="/home/ubuntu/backup" + HOST_DIR="${BACKUP_DIR}/${hostname}" + ENV_NS_DIR="${HOST_DIR}/${environment}-${namespace}" + DATE_DIR="${ENV_NS_DIR}/${date_str}" + mkdir -p "${DATE_DIR}" + + # PGP public key + PUBLIC_KEY="-----BEGIN PGP PUBLIC KEY BLOCK----- + + mQINBGar5esBEADFHTpT8IzB5Vn77Ied9O1MlsEkn+k/Qbn1giEZia+BiGSGfJqD + ebJn3B/6NeUqyfq55ADw9oNNXw+LcTZrRtZeOv8kq+mfdJ64e1Qnv9i0l6e+LXbq + An3wUvQy+djtTIpQDIdtk0UyYQcNsxkdaqjbYzbNr33mbEjD4JfsOt7qkFJRLG26 + Mc8GEJxYfK0PYX8P54LuU+jc2bq/O9VK13YJ7WYYhrRBsoAej2aRr+3KELrACCeE + RZ8G0XPBhAI96FE6dcohoVo1+m3mXTR6BBtqAIslc0tWyqk5S5YPrGsYeogOl+yq + HyVildf25/ZLFHEnfnyOYAx5ghKRisKRx8bJ2esbSVSryvnpeOMtA57Wba3y+cFn + 5W5YG+MqLG+tqWFIRMs+zLeYnZtP/F2Qdc+5CgT0rEtPI8OpilaB+GiPlRjgDM4m + mbv1XABJvho7uWco1yASrBDsaDQKgkWpVnyIETZOP+FWpK7LJvUz9l/aoSMUK9iQ + Ko1SggewM4fCyBeoSso7aZ75xQK+XkRyFeyd2DqotT/e2ZgIt//TzQ9LF61SVq+Q + hYKJsTxFedAK6Q1C5sHzzG+fFbOTrQ71vgOtKh7eT8quM9sAsCXw4YMGS2v0mSyB + kiJllrz6I54pKiXs2iXYQZLs6hDNDHH0/uEjOVGsG9y/vAdVuRr39VbVGQARAQAB + tCtQbGF0Zm9ybSBFbmdpbmVlcmluZyA8cGVAY3Rkcy51Y2hpY2Fnby5lZHU+iQJO + BBMBCgA4FiEEkqaslDgj+ReG0CykPBvbSP+i50gFAmar5esCGy8FCwkIBwIGFQoJ + CAsCBBYCAwECHgECF4AACgkQPBvbSP+i50gm7xAAwCvhBeEESHUbwd27r8YyOY1r + ZEELagJSaCMUA5W7C780y2RTxVHJ7XmVVEwMCCXnZ0u7G+GZH3k6jHuIRrYwPGgY + ehjAwmLU3uRTQDnbGtrWpLlgFZtqHSQO09sstiuuYYEniIGTt3/yGDMPsuqgguPN + pCT4IARoke3RdHAu5LQRZKaN1U0a/8qWIckCCOWLY8vkzjo/5IKoJQhel4eN3Zwn + 4qokIbDU9En+9De//rniIPMJFn26mQc9NIBW0uy6J2tNG/y3wJba3MNWL+WdCznE + yaFsTTGVzfdyCI3+HWq+fjCnrTQeYcsfPTbifpyaVdb6+FDj1yhY+hlJzLmDCMgP + OT3MD9NyWgBxuB2XqPOjo5RtA8uh3avNljRYC9k0bvuawNpGSZu7LKd2w79jAknm + Vh6TJ4+WHWx9vAozrwQ+O+8RmD0CPojsj4OQHb9lVTDd++6D7pq9o8yrBaZNCv9l + /gXk+f/3D19v0iYTlJF4OlGJyiTRfpJ27lq5Z0AuSm0SO/sc5O2tOV4fYDKUHnn9 + G+kw9+ZAdRpNS4x3W6j3sC3/Y5kKhD2fpyycHUfm2n0j2mGmXN1kQ28NU0mhJttB + OZazdgeijPXqN7+DM64iwKz9fSamc09FK7JTDgb64oAA0Py29bT9WLAMdYTNhFrE + agGOzCqb4TEjHoDIa9u5Ag0EZqvl6wEQAN1eAl7+ttfCd3NicrzwUHczsCyRyqde + HCjWRPhQ5rQ8kAmvt/b1SD/cTZM8nhLGOUBNEq9cK9ZuOS71AYvxKG30yYz0VakX + VDcHO0iAxSXqSKK1jrr9x4qqU1jw7Phy80WsVv7yA/vAsOug5ikqwAFVIEkSAltu + wk48xLqSeP0+g9fJGzEi78p+itjkhz9n+vgQEZN8OuI6yShlA2mB5Sq9ekvs4mHC + BvAFNBhTgK2szl6GUOBmoREnqf7hId7IhmhirzZxdI3v7yMrGMB0aH3++JdNHA7x + SeYN8B41RAH61kwz7JEoh4sVdfppYF7xx94numfX4YTftQGYvLIgbW4WzoE3BKAl + LSV3+1mERp06QM5zdH8zBwGRiM/ob/x+g2htyqYMG+6M1ZjMgrrNjsP5Zy80k//F + LBok3inKLNalM28WwtYdoXNnsYTOo3+UzIjtl1hfZoYgbn6LuiL0Oewga7QrOZ/P + UCZOwPdL2TgKDOqt7usdHso5i4139BOu6quBBp7ouqFSKFbWoOdffik/g0f+5UPw + +nEBN0JfpN6ACA1P6p/GzHkfYcOflumFjkpFFhB4PvHxpdBSH7T90ec+a/9XGImL + EIoeKMpCl3+yayd9u8JzLCZVYo2rgTnp/DoqoGPzv5W7DR709sAtSbxcuA4Klbzu + t9Xc9DKc6in/ABEBAAGJBGwEGAEKACAWIQSSpqyUOCP5F4bQLKQ8G9tI/6LnSAUC + Zqvl6wIbLgJACRA8G9tI/6LnSMF0IAQZAQoAHRYhBEubwQz2su3GAKUEIgZh6MFg + Klj0BQJmq+XrAAoJEAZh6MFgKlj0iHoP/0vEZkRVCkNnWQPUuq9c1tqc+9dX9VJh + Mx6EYh8tcuAbilEZpAsWT0qasM6Y87BO56X/suG+2agZfLkz9o60uBPlcHkCuttW + vrAmXaVSXWv8EEvDaaGTymSM0cEDEd+4hIaFobbeOm6FQfdp57yAI4QGDmK1bzkE + fG6bejHkI0DubR4kumHXlMiDgSLeOdUh0IbsDWl8+3wcpocNtIy8Q2V+hCuRW5l1 + Ag4I6P2qadpPlbbV4mxQzOCfn/Y2vHmpXL7FJBaCTgiYgT+uyFj91b9tbYcsVFW5 + 2vuXWpVFrDNhMzRS8Fa3GXoM3SQn9cKMDgSp9X0lyDrj8DnGjG/0o+zHB4VnC3jz + Lms56al3t0lBuG9unz0e3sFCwvwUeYIjnqU1ViosZvz3u7TrpsMdsuKHISs7ck2j + rLNbi97/vdRjTARpQCNAN5V6YIjvx53OWSMJscGvGpOGlM9GbSy1a4eZ2vKbNelN + TQDWWao6nfInvbewG2OhZrx9MzajJvF1YD85O6LpDkIFCyZLb3rjKUWtEduQrJMe + ztj/hHhl+702EXWPxHFaYySfatcAutrB+n9Z7l96gzLqt8trrsopEYNLH9rmNesL + DrDwRjN5C0W3hGIhq03kR0tq/hQeZfhvREKDzGCITi1wef41ZUSG7dkGWT7n+WCw + 1IQ6DzzALDAyzH4QAKrQ4NCM+39sV+NPi+eyAIJ94P+cerhMPZh0LEdzHlX+DSUv + UoRAHuIml2VBe9cnwgD0tHXdxvjg3XLDwufvCfOu06jEmnEHpsokd/3qYj5dJ3Nd + Q4HvLQVKCnEvtM5uOoUZYxkGxobhH8ah18eC5/YmA95V3fiYF/Jg96I//Zbq/BZY + lTO5NjQzutNrrnEsr5BDbHDbURLZ58iixWLtYIVI04FRuu2UDZa9bNvjEQuwZos3 + nzHxmJeluo91HbW+FdRFByehrAOfUhkb04xJKEBXjhOqdUeSezIGhp88pb+yhV+w + WNSsxK+uOJ9Pr1Sjz3/pr9nopVFF1kqY8iE3GYgiYpu3p2A1zGUxlaoHQCZ/aT08 + whGzEsGkgQGOGX3pseKaYIVbxMNbfRGsJCKjdukQbuy5Gz/ffAm8vvf7JfPWmVUO + G+zU9L9ZIHZKlQ76PQTA1mEWa3akU6vVScDbNUiObCNZPQJdj6V6HpVAlo/sOXOt + 1RaIB2Oz5ViwAOJFYxO/PomcXiMOThnkF7op8R2I4cVoYlKnxK0VUoluNX9fiH5D + aI9PgmA2NVbQ/LqP+rP3hLbFSlh0nXjt4NxCbE14ApSslsoEaqilNgtL9UcIzkBE + 3lNYclZLeQk5SLPsohmsXoYJ6W8G1XopvZ/cG417GQ4N7FOr9VRBXimRX71O + =/4uP + -----END PGP PUBLIC KEY BLOCK-----" + + # Import the public key + echo "$PUBLIC_KEY" | gpg --import + + # Function to encrypt a database with PGP public key + function encrypt_database() { + local db_name=$1 + gpg --yes --trust-model always --output "${DATE_DIR}/${db_name}.sql.gpg" --encrypt --recipient pe@ctds.uchicago.edu "${DATE_DIR}/${db_name}.sql" + + if [ $? -eq 0 ]; then + rm "${DATE_DIR}/${db_name}.sql" + gen3_log_info "Successfully encrypted and removed the original file for database $db_name. \n" + return 0 + fi + gen3_log_err "Failed to encrypt database $db_name.\n" + return 1 + } + + # Loop through each service to back up and encrypt the database + for database in "${databases[@]}"; do + for secret_name in "${database}-creds creds.json" "$database-g3auto dbcreds.json"; do + creds=$(gen3 secrets decode $secret_name 2>/dev/null) + # Extracting service credentials + if [ $? -eq 0 ] && [ ! -z "$creds" ]; then + db_hostname=$(echo $creds | jq -r .db_host) + db_username=$(echo $creds | jq -r .db_username) + db_password=$(echo $creds | jq -r .db_password) + db_database=$(echo $creds | jq -r .db_database) + gen3_log_info "Extracting service credentials for $database from $secret_name:\n db_hostname: $db_hostname\n db_username: $db_username\n db_database: $db_database\n" + break + fi + done + + # Verify credentials are extracted + if [ -z "$db_hostname" ] || [ -z "$db_username" ] || [ -z "$db_password" ] || [ -z "$db_database" ]; then + gen3_log_err "Failed to extract database credentials for $database" + failed_backups="${failed_backups}\nDatabase: $database, Error: Failed to extract credentials" + continue + fi + + # Check database accessibility + PGPASSWORD=${db_password} pg_isready -h $db_hostname -U "$db_username" -d "$db_database" + if [ $? -ne 0 ]; then + gen3_log_err "Cannot connect to source database $db_database at $db_hostname. Skipping database $database." + failed_backups="${failed_backups}\nDatabase: $database, Error: Cannot connect to source database at $db_hostname" + continue + fi + + if [ "$database" != "peregrine" ]; then + # Backup the current database + if PGPASSWORD=${db_password} pg_dump -h $db_hostname -U "$db_username" -d "$db_database" > "${DATE_DIR}/${db_database}.sql"; then + gen3_log_info "Database $database backed up to ${DATE_DIR}/${db_database}.sql" + if encrypt_database "$db_database"; then + backedup_databases="${backedup_databases}\nDatabase: $db_database" + else + failed_backups="${failed_backups}\nDatabase: $database, Error: Failed to encrypt database" + fi + else + gen3_log_err "Failed to backup $database" + failed_backups="${failed_backups}\nDatabase: $database, Error: Failed to backup database" + fi + fi + done + + # Logging the successful backups + if [ -n "$backedup_databases" ]; then + gen3_log_info "Successfully backed up and encrypted databases:\n$backedup_databases" + fi + + # Logging the failed backups + if [ -n "$failed_backups" ]; then + gen3_log_info "Failed backups:\n$failed_backups" + fi + + # Sleep for 600 seconds to allow the user to check the logs + sleep 600 + volumeMounts: + - mountPath: "/home/ubuntu/backup" + name: s3-volume + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + volumes: + - name: s3-volume + persistentVolumeClaim: + claimName: s3-pvc-db-backups + restartPolicy: Never + From 6a7366740d1de6387e333ed21b88c5cb28222538 Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Tue, 20 Aug 2024 13:55:23 -0400 Subject: [PATCH 27/48] Changing karpenter config to rely on pre-FIPS'd images (#2619) --- .../karpenter/nodeTemplateDefault.yaml | 21 +++---------------- .../karpenter/provisionerDefault.yaml | 14 ++++++------- 2 files changed, 9 insertions(+), 26 deletions(-) diff --git a/kube/services/karpenter/nodeTemplateDefault.yaml b/kube/services/karpenter/nodeTemplateDefault.yaml index 6ba8b3a0f..fbb783135 100644 --- a/kube/services/karpenter/nodeTemplateDefault.yaml +++ b/kube/services/karpenter/nodeTemplateDefault.yaml @@ -3,6 +3,9 @@ kind: AWSNodeTemplate metadata: name: default spec: + amiSelector: + aws::name: EKS-FIPS* + aws::owners: "143731057154" subnetSelector: karpenter.sh/discovery: VPC_NAME securityGroupSelector: @@ -32,30 +35,12 @@ spec: sysctl -w fs.inotify.max_user_watches=12000 - sudo yum update -y - sudo yum install -y dracut-fips openssl >> /opt/fips-install.log - sudo dracut -f - # configure grub - sudo /sbin/grubby --update-kernel=ALL --args="fips=1" - # --BOUNDARY # Content-Type: text/cloud-config; charset="us-ascii" # mounts: # - ['fstype': 'bpf', 'mountpoint': '/sys/fs/bpf', 'opts': 'rw,relatime'] - --BOUNDARY - - Content-Type: text/cloud-config; charset="us-ascii" - - power_state: - delay: now - mode: reboot - message: Powering off - timeout: 2 - condition: true - - --BOUNDARY-- blockDeviceMappings: - deviceName: /dev/xvda diff --git a/kube/services/karpenter/provisionerDefault.yaml b/kube/services/karpenter/provisionerDefault.yaml index ac08284ce..f92a5e383 100644 --- a/kube/services/karpenter/provisionerDefault.yaml +++ b/kube/services/karpenter/provisionerDefault.yaml @@ -11,14 +11,14 @@ spec: - key: kubernetes.io/arch operator: In values: - - amd64 + - amd64 - key: karpenter.k8s.aws/instance-category operator: In values: - - c - - m - - r - - t + - c + - m + - r + - t # Set a limit of 1000 vcpus limits: resources: @@ -30,6 +30,4 @@ spec: consolidation: enabled: true # Kill nodes after 30 days to ensure they stay up to date - ttlSecondsUntilExpired: 2592000 - - + ttlSecondsUntilExpired: 604800 From 4dfcee206aeb1f0b03ad95e34ab3181c6f481076 Mon Sep 17 00:00:00 2001 From: smvgarcia <111767892+smvgarcia@users.noreply.github.com> Date: Thu, 22 Aug 2024 11:15:21 -0500 Subject: [PATCH 28/48] Update support emails from @datacommons.io to @gen3.org (#2620) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update vpnvm_new.sh - email update all support emails from @datacommons.io to @gen3.org do not merge until gen3.org emails are active. * Update vpnvm.sh - email update all support emails from @datacommons.io to @gen3.org do not merge until gen3.org emails are active. * Update install_ovpn.sh - email update all support emails from @datacommons.io to @gen3.org do not merge until gen3.org emails are active. * Update vpnvm_ubuntu18.sh - email update all support emails from @datacommons.io to @gen3.org do not merge until gen3.org emails are active. * retrigger checks --------- Co-authored-by: George Thomas <98996322+george42-ctds@users.noreply.github.com> Co-authored-by: jacob50231 --- files/openvpn_management_scripts/install_ovpn.sh | 2 +- flavors/vpn_nlb_central/vpnvm.sh | 4 ++-- flavors/vpn_nlb_central/vpnvm_new.sh | 2 +- flavors/vpn_nlb_central/vpnvm_ubuntu18.sh | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/files/openvpn_management_scripts/install_ovpn.sh b/files/openvpn_management_scripts/install_ovpn.sh index 4250d2ca2..180d0274c 100644 --- a/files/openvpn_management_scripts/install_ovpn.sh +++ b/files/openvpn_management_scripts/install_ovpn.sh @@ -17,7 +17,7 @@ COUNTRY="US" STATE="IL" CITY="Chicago" ORG="CDIS" -EMAIL='support\@datacommons.io' +EMAIL='support\@gen3.org' KEY_EXPIRE=365 diff --git a/flavors/vpn_nlb_central/vpnvm.sh b/flavors/vpn_nlb_central/vpnvm.sh index 879488eab..548962231 100644 --- a/flavors/vpn_nlb_central/vpnvm.sh +++ b/flavors/vpn_nlb_central/vpnvm.sh @@ -102,7 +102,7 @@ export FQDN="$SERVERNAME.planx-pla.net"; export cloud="$CLOUDNAME"; export SERVE #export FQDN="raryatestvpnv1.planx-pla.net"; export cloud="planxvpn1"; export SERVER_PEM="/root/server.pem"; bash /root/openvpn_management_scripts/install_ovpn.sh -#export FQDN="raryatestvpnv1.planx-pla.net"; export cloud="planxvpn"; export EMAIL="support@datacommons.io"; export SERVER_PEM="/root/server.pem"; export VPN_SUBNET="192.168.192.0/20"; export VM_SUBNET="10.128.0.0/20"; bash install_ovpn.sh +#export FQDN="raryatestvpnv1.planx-pla.net"; export cloud="planxvpn"; export EMAIL="support@gen3.org"; export SERVER_PEM="/root/server.pem"; export VPN_SUBNET="192.168.192.0/20"; export VM_SUBNET="10.128.0.0/20"; bash install_ovpn.sh ### need to install lighttpd @@ -174,4 +174,4 @@ sudo chmod 755 /etc/init.d/awslogs sudo systemctl enable awslogs sudo systemctl restart awslogs -echo "Install is completed" \ No newline at end of file +echo "Install is completed" diff --git a/flavors/vpn_nlb_central/vpnvm_new.sh b/flavors/vpn_nlb_central/vpnvm_new.sh index 00f8306fc..627672694 100644 --- a/flavors/vpn_nlb_central/vpnvm_new.sh +++ b/flavors/vpn_nlb_central/vpnvm_new.sh @@ -32,7 +32,7 @@ COUNTRY="US" STATE="IL" CITY="Chicago" ORG="CTDS" -EMAIL='support\@datacommons.io' +EMAIL='support\@gen3.org' KEY_EXPIRE=365 #OpenVPN diff --git a/flavors/vpn_nlb_central/vpnvm_ubuntu18.sh b/flavors/vpn_nlb_central/vpnvm_ubuntu18.sh index af5efdfaf..e2f8210ea 100644 --- a/flavors/vpn_nlb_central/vpnvm_ubuntu18.sh +++ b/flavors/vpn_nlb_central/vpnvm_ubuntu18.sh @@ -28,7 +28,7 @@ COUNTRY="US" STATE="IL" CITY="Chicago" ORG="CTDS" -EMAIL='support\@datacommons.io' +EMAIL='support\@gen3.org' KEY_EXPIRE=365 #OpenVPN From 41cadbb439b36ea838b4a661a5adda9fc221353e Mon Sep 17 00:00:00 2001 From: EliseCastle23 <109446148+EliseCastle23@users.noreply.github.com> Date: Fri, 23 Aug 2024 13:45:58 -0600 Subject: [PATCH 29/48] adding "hostname" label to our services (#2624) --- .secrets.baseline | 110 +++++++++--------- gen3/lib/g3k_manifest.sh | 3 + .../ambassador/ambassador-deploy.yaml | 1 + kube/services/arborist/arborist-deploy.yaml | 1 + .../argo-wrapper/argo-wrapper-deploy.yaml | 1 + .../audit-service/audit-service-deploy.yaml | 1 + .../aws-es-proxy/aws-es-proxy-deploy.yaml | 1 + .../dicom-server/dicom-server-deploy.yaml | 1 + .../dicom-viewer/dicom-viewer-deploy.yaml | 1 + kube/services/fence/fence-deploy.yaml | 1 + .../frontend-framework-deploy.yaml | 1 + kube/services/guppy/guppy-deploy.yaml | 1 + kube/services/hatchery/hatchery-deploy.yaml | 1 + kube/services/indexd/indexd-deploy.yaml | 1 + .../manifestservice-deploy.yaml | 1 + kube/services/metadata/metadata-deploy.yaml | 1 + kube/services/peregrine/peregrine-deploy.yaml | 1 + kube/services/portal/portal-deploy.yaml | 1 + kube/services/requestor/requestor-deploy.yaml | 1 + kube/services/revproxy/revproxy-deploy.yaml | 1 + kube/services/sheepdog/sheepdog-deploy.yaml | 1 + kube/services/sower/sower-deploy.yaml | 1 + .../ssjdispatcher/ssjdispatcher-deploy.yaml | 1 + kube/services/wts/wts-deploy.yaml | 1 + 24 files changed, 82 insertions(+), 53 deletions(-) diff --git a/.secrets.baseline b/.secrets.baseline index 0cc95d0da..d8f28a079 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -90,6 +90,10 @@ { "path": "detect_secrets.filters.allowlist.is_line_allowlisted" }, + { + "path": "detect_secrets.filters.common.is_baseline_file", + "filename": ".secrets.baseline" + }, { "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", "min_level": 2 @@ -693,49 +697,49 @@ "filename": "kube/services/arborist/arborist-deploy.yaml", "hashed_secret": "dbd5f43594a152b52261c8e21520a3989823fe55", "is_verified": false, - "line_number": 64 + "line_number": 65 }, { "type": "Secret Keyword", "filename": "kube/services/arborist/arborist-deploy.yaml", "hashed_secret": "1c062eaac9e6fa0766377d3cfc3e4a88982fecdb", "is_verified": false, - "line_number": 67 + "line_number": 68 }, { "type": "Secret Keyword", "filename": "kube/services/arborist/arborist-deploy.yaml", "hashed_secret": "694cfd0a009a42055e975de9111b2f3c6e8a3634", "is_verified": false, - "line_number": 70 + "line_number": 71 }, { "type": "Secret Keyword", "filename": "kube/services/arborist/arborist-deploy.yaml", "hashed_secret": "4b09a441cef18c75560f6c3caeafc96f2163c3fd", "is_verified": false, - "line_number": 77 + "line_number": 78 }, { "type": "Secret Keyword", "filename": "kube/services/arborist/arborist-deploy.yaml", "hashed_secret": "7e7478a28dcc3695a083b66b47243b050c813e2d", "is_verified": false, - "line_number": 80 + "line_number": 81 }, { "type": "Secret Keyword", "filename": "kube/services/arborist/arborist-deploy.yaml", "hashed_secret": "2f57bb00fcb93481c2be444e3e9f322b6cb5fadb", "is_verified": false, - "line_number": 83 + "line_number": 84 }, { "type": "Secret Keyword", "filename": "kube/services/arborist/arborist-deploy.yaml", "hashed_secret": "ea73fcfdaa415890d5fde24d3b2245671be32f73", "is_verified": false, - "line_number": 86 + "line_number": 87 } ], "kube/services/argo/workflows/fence-usersync-wf.yaml": [ @@ -806,7 +810,7 @@ "filename": "kube/services/audit-service/audit-service-deploy.yaml", "hashed_secret": "42cde1c58c36d8bb5804a076e55ac6ec07ef99fc", "is_verified": false, - "line_number": 64 + "line_number": 65 } ], "kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml": [ @@ -815,7 +819,7 @@ "filename": "kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml", "hashed_secret": "7f834ccb442433fc12ec9532f75c3a4b6a748d4c", "is_verified": false, - "line_number": 46 + "line_number": 47 } ], "kube/services/cedar-wrapper/cedar-wrapper-deploy.yaml": [ @@ -913,7 +917,7 @@ "filename": "kube/services/dicom-server/dicom-server-deploy.yaml", "hashed_secret": "706168ac2565a93cceffe2202ac45d3d31c075fb", "is_verified": false, - "line_number": 40 + "line_number": 41 } ], "kube/services/fence/fence-canary-deploy.yaml": [ @@ -987,63 +991,63 @@ "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "dbd5f43594a152b52261c8e21520a3989823fe55", "is_verified": false, - "line_number": 71 + "line_number": 72 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "1c062eaac9e6fa0766377d3cfc3e4a88982fecdb", "is_verified": false, - "line_number": 74 + "line_number": 75 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "694cfd0a009a42055e975de9111b2f3c6e8a3634", "is_verified": false, - "line_number": 77 + "line_number": 78 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "4b09a441cef18c75560f6c3caeafc96f2163c3fd", "is_verified": false, - "line_number": 87 + "line_number": 88 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "7e7478a28dcc3695a083b66b47243b050c813e2d", "is_verified": false, - "line_number": 90 + "line_number": 91 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "2f57bb00fcb93481c2be444e3e9f322b6cb5fadb", "is_verified": false, - "line_number": 93 + "line_number": 94 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "98f5a68541a6d981bf5825f23dffe6a0b150e457", "is_verified": false, - "line_number": 96 + "line_number": 97 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "0849046cdafcdb17f5a4bf5c528430d5e04ad295", "is_verified": false, - "line_number": 99 + "line_number": 100 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 102 + "line_number": 103 } ], "kube/services/fenceshib/fenceshib-canary-deploy.yaml": [ @@ -1189,28 +1193,28 @@ "filename": "kube/services/frontend-framework/frontend-framework-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 54 + "line_number": 55 }, { "type": "Secret Keyword", "filename": "kube/services/frontend-framework/frontend-framework-deploy.yaml", "hashed_secret": "6607b403f74e62246fc6a3c938feffc5a34a7e49", "is_verified": false, - "line_number": 57 + "line_number": 58 }, { "type": "Secret Keyword", "filename": "kube/services/frontend-framework/frontend-framework-deploy.yaml", "hashed_secret": "4b0bb3e58651fe56ee23e59aa6a3cb96dc61ddd2", "is_verified": false, - "line_number": 60 + "line_number": 61 }, { "type": "Secret Keyword", "filename": "kube/services/frontend-framework/frontend-framework-deploy.yaml", "hashed_secret": "e3c7565314f404e3883929f003c65a02a80366e9", "is_verified": false, - "line_number": 66 + "line_number": 67 } ], "kube/services/frontend-framework/frontend-framework-root-deploy.yaml": [ @@ -1346,14 +1350,14 @@ "filename": "kube/services/guppy/guppy-deploy.yaml", "hashed_secret": "0db22b31c9add2d3c76743c0ac6fbc99bb8b4761", "is_verified": false, - "line_number": 65 + "line_number": 66 }, { "type": "Secret Keyword", "filename": "kube/services/guppy/guppy-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 68 + "line_number": 69 } ], "kube/services/indexd/indexd-canary-deploy.yaml": [ @@ -1392,28 +1396,28 @@ "filename": "kube/services/indexd/indexd-deploy.yaml", "hashed_secret": "0b701c1fabb6ba47a7d47d455e3696d207014bd3", "is_verified": false, - "line_number": 63 + "line_number": 64 }, { "type": "Secret Keyword", "filename": "kube/services/indexd/indexd-deploy.yaml", "hashed_secret": "aee98a99696237d70b6854ee4c2d9e42bc696039", "is_verified": false, - "line_number": 66 + "line_number": 67 }, { "type": "Secret Keyword", "filename": "kube/services/indexd/indexd-deploy.yaml", "hashed_secret": "bdecca54d39013d43d3b7f05f2927eaa7df375dc", "is_verified": false, - "line_number": 72 + "line_number": 73 }, { "type": "Secret Keyword", "filename": "kube/services/indexd/indexd-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 75 + "line_number": 76 } ], "kube/services/jenkins-ci-worker/jenkins-ci-worker-deploy.yaml": [ @@ -2818,21 +2822,21 @@ "filename": "kube/services/manifestservice/manifestservice-deploy.yaml", "hashed_secret": "3da2c49c267b6c58401bbf05e379b38d20434f78", "is_verified": false, - "line_number": 61 + "line_number": 62 }, { "type": "Secret Keyword", "filename": "kube/services/manifestservice/manifestservice-deploy.yaml", "hashed_secret": "469e0c2b1a67aa94955bae023ddc727be31581a7", "is_verified": false, - "line_number": 64 + "line_number": 65 }, { "type": "Secret Keyword", "filename": "kube/services/manifestservice/manifestservice-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 67 + "line_number": 68 } ], "kube/services/metadata/metadata-deploy.yaml": [ @@ -2841,14 +2845,14 @@ "filename": "kube/services/metadata/metadata-deploy.yaml", "hashed_secret": "e14f65c8ca7f3b27a0f0f5463569954841e162c9", "is_verified": false, - "line_number": 61 + "line_number": 62 }, { "type": "Secret Keyword", "filename": "kube/services/metadata/metadata-deploy.yaml", "hashed_secret": "c27babf45eb0ed87329e69c7d47dba611e859c5d", "is_verified": false, - "line_number": 66 + "line_number": 67 } ], "kube/services/monitoring/grafana-values.yaml": [ @@ -2930,28 +2934,28 @@ "filename": "kube/services/peregrine/peregrine-deploy.yaml", "hashed_secret": "6131c35d7eebdbc17a314bef8aac75b87323cff3", "is_verified": false, - "line_number": 67 + "line_number": 68 }, { "type": "Secret Keyword", "filename": "kube/services/peregrine/peregrine-deploy.yaml", "hashed_secret": "ca253d1c9dece2da0d6fb24ded7bdb849a475966", "is_verified": false, - "line_number": 70 + "line_number": 71 }, { "type": "Secret Keyword", "filename": "kube/services/peregrine/peregrine-deploy.yaml", "hashed_secret": "990a3202b5c94aa5e5997e7dc1a218e457f8b8ec", "is_verified": false, - "line_number": 76 + "line_number": 77 }, { "type": "Secret Keyword", "filename": "kube/services/peregrine/peregrine-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 79 + "line_number": 80 } ], "kube/services/pidgin/pidgin-deploy.yaml": [ @@ -2976,28 +2980,28 @@ "filename": "kube/services/portal/portal-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 55 + "line_number": 56 }, { "type": "Secret Keyword", "filename": "kube/services/portal/portal-deploy.yaml", "hashed_secret": "5c5a8e158ad2d8544f73cd5422072d414f497faa", "is_verified": false, - "line_number": 58 + "line_number": 59 }, { "type": "Secret Keyword", "filename": "kube/services/portal/portal-deploy.yaml", "hashed_secret": "619551216e129bbc5322678abf9c9210c0327cfb", "is_verified": false, - "line_number": 61 + "line_number": 62 }, { "type": "Secret Keyword", "filename": "kube/services/portal/portal-deploy.yaml", "hashed_secret": "e3c7565314f404e3883929f003c65a02a80366e9", "is_verified": false, - "line_number": 67 + "line_number": 68 } ], "kube/services/portal/portal-root-deploy.yaml": [ @@ -3119,7 +3123,7 @@ "filename": "kube/services/requestor/requestor-deploy.yaml", "hashed_secret": "15debe4170aa5b89858d939f4c0644307ae7789b", "is_verified": false, - "line_number": 61 + "line_number": 62 } ], "kube/services/revproxy/gen3.nginx.conf/indexd-service.conf": [ @@ -3171,21 +3175,21 @@ "filename": "kube/services/revproxy/revproxy-deploy.yaml", "hashed_secret": "c7a87a61893a647e29289845cb51e61afb06800b", "is_verified": false, - "line_number": 74 + "line_number": 75 }, { "type": "Secret Keyword", "filename": "kube/services/revproxy/revproxy-deploy.yaml", "hashed_secret": "b3a4e2dea4c1fae8c58a07a84065b73b3a2d831c", "is_verified": false, - "line_number": 77 + "line_number": 78 }, { "type": "Secret Keyword", "filename": "kube/services/revproxy/revproxy-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 80 + "line_number": 81 } ], "kube/services/sftp/sftp-deploy.yaml": [ @@ -3233,28 +3237,28 @@ "filename": "kube/services/sheepdog/sheepdog-deploy.yaml", "hashed_secret": "ec9c944c51e87322de8d22e3ca9e2be1ad8fee0d", "is_verified": false, - "line_number": 63 + "line_number": 64 }, { "type": "Secret Keyword", "filename": "kube/services/sheepdog/sheepdog-deploy.yaml", "hashed_secret": "79496491225eda4a7be9fcddee2825c85b1535cc", "is_verified": false, - "line_number": 66 + "line_number": 67 }, { "type": "Secret Keyword", "filename": "kube/services/sheepdog/sheepdog-deploy.yaml", "hashed_secret": "e43756046ad1763d6946575fed0e05130a154bd2", "is_verified": false, - "line_number": 72 + "line_number": 73 }, { "type": "Secret Keyword", "filename": "kube/services/sheepdog/sheepdog-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 75 + "line_number": 76 } ], "kube/services/shiny/shiny-deploy.yaml": [ @@ -3272,7 +3276,7 @@ "filename": "kube/services/ssjdispatcher/ssjdispatcher-deploy.yaml", "hashed_secret": "7f932449df74fc78573fea502df8a484aef3f69d", "is_verified": false, - "line_number": 61 + "line_number": 62 } ], "kube/services/superset/superset-deploy.yaml": [ @@ -3363,7 +3367,7 @@ "filename": "kube/services/wts/wts-deploy.yaml", "hashed_secret": "5de687ae886f19c3cb68d4980e3f2e77cca3db9e", "is_verified": false, - "line_number": 65 + "line_number": 66 } ], "packer/buildAll.sh": [ @@ -3685,5 +3689,5 @@ } ] }, - "generated_at": "2024-07-19T04:34:31Z" + "generated_at": "2024-08-23T16:35:21Z" } diff --git a/gen3/lib/g3k_manifest.sh b/gen3/lib/g3k_manifest.sh index ae42e84ba..d69ef5b99 100644 --- a/gen3/lib/g3k_manifest.sh +++ b/gen3/lib/g3k_manifest.sh @@ -253,8 +253,11 @@ g3k_manifest_filter() { kvList+=("$kvLabelKey" "tags.datadoghq.com/version: '$version'") done environment="$(g3k_config_lookup ".global.environment" "$manifestPath")" + hostname="$(g3k_config_lookup ".global.hostname" "$manifestPath")" kvEnvKey=$(echo "GEN3_ENV_LABEL" | tr '[:lower:]' '[:upper:]') + kvHostKey=$(echo "GEN3_HOSTNAME_LABEL" | tr '[:lower:]' '[:upper:]') kvList+=("$kvEnvKey" "tags.datadoghq.com/env: $environment") + kvList+=("$kvHostKey" "hostname: $hostname") for key in $(g3k_config_lookup '. | keys[]' "$manifestPath"); do gen3_log_debug "harvesting key $key" for key2 in $(g3k_config_lookup ".[\"${key}\"] "' | to_entries | map(select((.value|type != "array") and (.value|type != "object"))) | map(.key)[]' "$manifestPath" | grep '^[a-zA-Z]'); do diff --git a/kube/services/ambassador/ambassador-deploy.yaml b/kube/services/ambassador/ambassador-deploy.yaml index 8788cef13..28e6a41fd 100644 --- a/kube/services/ambassador/ambassador-deploy.yaml +++ b/kube/services/ambassador/ambassador-deploy.yaml @@ -24,6 +24,7 @@ spec: netnolimit: "yes" userhelper: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/arborist/arborist-deploy.yaml b/kube/services/arborist/arborist-deploy.yaml index 5deef6ac7..360c5c04a 100644 --- a/kube/services/arborist/arborist-deploy.yaml +++ b/kube/services/arborist/arborist-deploy.yaml @@ -24,6 +24,7 @@ spec: # for revproxy authz public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/argo-wrapper/argo-wrapper-deploy.yaml b/kube/services/argo-wrapper/argo-wrapper-deploy.yaml index 89ec29ecc..3b9d1b6a2 100644 --- a/kube/services/argo-wrapper/argo-wrapper-deploy.yaml +++ b/kube/services/argo-wrapper/argo-wrapper-deploy.yaml @@ -24,6 +24,7 @@ spec: GEN3_ENV_LABEL GEN3_ARGO-WRAPPER_VERSION GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/audit-service/audit-service-deploy.yaml b/kube/services/audit-service/audit-service-deploy.yaml index 935cab408..b7081a7f5 100644 --- a/kube/services/audit-service/audit-service-deploy.yaml +++ b/kube/services/audit-service/audit-service-deploy.yaml @@ -27,6 +27,7 @@ spec: # for network policy netnolimit: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: serviceAccountName: audit-service-sa affinity: diff --git a/kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml b/kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml index 34f18d973..c7f72b4d8 100644 --- a/kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml +++ b/kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml @@ -21,6 +21,7 @@ spec: app: esproxy netvpc: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: nodeAffinity: diff --git a/kube/services/dicom-server/dicom-server-deploy.yaml b/kube/services/dicom-server/dicom-server-deploy.yaml index 43bd90e5d..58040e6d4 100644 --- a/kube/services/dicom-server/dicom-server-deploy.yaml +++ b/kube/services/dicom-server/dicom-server-deploy.yaml @@ -16,6 +16,7 @@ spec: release: production public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: nodeAffinity: diff --git a/kube/services/dicom-viewer/dicom-viewer-deploy.yaml b/kube/services/dicom-viewer/dicom-viewer-deploy.yaml index 9df6fbc93..7cd9b6bbe 100644 --- a/kube/services/dicom-viewer/dicom-viewer-deploy.yaml +++ b/kube/services/dicom-viewer/dicom-viewer-deploy.yaml @@ -16,6 +16,7 @@ spec: release: production public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: nodeAffinity: diff --git a/kube/services/fence/fence-deploy.yaml b/kube/services/fence/fence-deploy.yaml index 9524315d9..35048424b 100644 --- a/kube/services/fence/fence-deploy.yaml +++ b/kube/services/fence/fence-deploy.yaml @@ -28,6 +28,7 @@ spec: userhelper: "yes" tags.datadoghq.com/service: "fence" GEN3_ENV_LABEL + GEN3_HOSTNAME_LABEL GEN3_FENCE_VERSION GEN3_DATE_LABEL spec: diff --git a/kube/services/frontend-framework/frontend-framework-deploy.yaml b/kube/services/frontend-framework/frontend-framework-deploy.yaml index f0da277dc..7acc9f745 100644 --- a/kube/services/frontend-framework/frontend-framework-deploy.yaml +++ b/kube/services/frontend-framework/frontend-framework-deploy.yaml @@ -18,6 +18,7 @@ spec: app: frontend-framework public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/guppy/guppy-deploy.yaml b/kube/services/guppy/guppy-deploy.yaml index c3e8d121c..1dc6c7da0 100644 --- a/kube/services/guppy/guppy-deploy.yaml +++ b/kube/services/guppy/guppy-deploy.yaml @@ -23,6 +23,7 @@ spec: GEN3_GUPPY_VERSION GEN3_ENV_LABEL GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/hatchery/hatchery-deploy.yaml b/kube/services/hatchery/hatchery-deploy.yaml index f7de81d79..80e64a582 100644 --- a/kube/services/hatchery/hatchery-deploy.yaml +++ b/kube/services/hatchery/hatchery-deploy.yaml @@ -24,6 +24,7 @@ spec: GEN3_HATCHERY_VERSION GEN3_ENV_LABEL GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/indexd/indexd-deploy.yaml b/kube/services/indexd/indexd-deploy.yaml index 239079058..af60e9b4a 100644 --- a/kube/services/indexd/indexd-deploy.yaml +++ b/kube/services/indexd/indexd-deploy.yaml @@ -27,6 +27,7 @@ spec: GEN3_ENV_LABEL GEN3_INDEXD_VERSION GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/manifestservice/manifestservice-deploy.yaml b/kube/services/manifestservice/manifestservice-deploy.yaml index 0966f2480..3551cfa66 100644 --- a/kube/services/manifestservice/manifestservice-deploy.yaml +++ b/kube/services/manifestservice/manifestservice-deploy.yaml @@ -24,6 +24,7 @@ spec: userhelper: "yes" netvpc: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: serviceAccountName: manifestservice-sa affinity: diff --git a/kube/services/metadata/metadata-deploy.yaml b/kube/services/metadata/metadata-deploy.yaml index 72986e795..68a83078e 100644 --- a/kube/services/metadata/metadata-deploy.yaml +++ b/kube/services/metadata/metadata-deploy.yaml @@ -25,6 +25,7 @@ spec: # for network policy netnolimit: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/peregrine/peregrine-deploy.yaml b/kube/services/peregrine/peregrine-deploy.yaml index 20bba64ad..6467fe325 100644 --- a/kube/services/peregrine/peregrine-deploy.yaml +++ b/kube/services/peregrine/peregrine-deploy.yaml @@ -29,6 +29,7 @@ spec: GEN3_ENV_LABEL GEN3_PEREGRINE_VERSION GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/portal/portal-deploy.yaml b/kube/services/portal/portal-deploy.yaml index 742f1b71c..fb1aa2d6f 100644 --- a/kube/services/portal/portal-deploy.yaml +++ b/kube/services/portal/portal-deploy.yaml @@ -19,6 +19,7 @@ spec: app: portal public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/requestor/requestor-deploy.yaml b/kube/services/requestor/requestor-deploy.yaml index fb5ce173f..2ed886638 100644 --- a/kube/services/requestor/requestor-deploy.yaml +++ b/kube/services/requestor/requestor-deploy.yaml @@ -25,6 +25,7 @@ spec: # for network policy netnolimit: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/revproxy/revproxy-deploy.yaml b/kube/services/revproxy/revproxy-deploy.yaml index 7ea798b77..80fd582e0 100644 --- a/kube/services/revproxy/revproxy-deploy.yaml +++ b/kube/services/revproxy/revproxy-deploy.yaml @@ -23,6 +23,7 @@ spec: userhelper: "yes" internet: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/sheepdog/sheepdog-deploy.yaml b/kube/services/sheepdog/sheepdog-deploy.yaml index a260c8741..2f476d0f0 100644 --- a/kube/services/sheepdog/sheepdog-deploy.yaml +++ b/kube/services/sheepdog/sheepdog-deploy.yaml @@ -27,6 +27,7 @@ spec: GEN3_ENV_LABEL GEN3_SHEEPDOG_VERSION GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/sower/sower-deploy.yaml b/kube/services/sower/sower-deploy.yaml index b66739d06..2c4e5f610 100644 --- a/kube/services/sower/sower-deploy.yaml +++ b/kube/services/sower/sower-deploy.yaml @@ -22,6 +22,7 @@ spec: public: "yes" netnolimit: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/ssjdispatcher/ssjdispatcher-deploy.yaml b/kube/services/ssjdispatcher/ssjdispatcher-deploy.yaml index 554c60cb5..990f583cb 100644 --- a/kube/services/ssjdispatcher/ssjdispatcher-deploy.yaml +++ b/kube/services/ssjdispatcher/ssjdispatcher-deploy.yaml @@ -21,6 +21,7 @@ spec: netnolimit: "yes" public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: serviceAccountName: ssjdispatcher-service-account securityContext: diff --git a/kube/services/wts/wts-deploy.yaml b/kube/services/wts/wts-deploy.yaml index e54a9cfc4..06f43fe01 100644 --- a/kube/services/wts/wts-deploy.yaml +++ b/kube/services/wts/wts-deploy.yaml @@ -29,6 +29,7 @@ spec: GEN3_DATE_LABEL GEN3_WTS_VERSION GEN3_ENV_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: From e7bc9de2b7bb4fad094c6cc1f268a580ffd982bc Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Fri, 23 Aug 2024 16:45:03 -0400 Subject: [PATCH 30/48] Using the FIPS-compliant AMI (#2623) --- gen3/bin/kube-setup-cluster-level-resources | 41 ------------------- .../argo-events/workflows/configmap.yaml | 23 +++-------- 2 files changed, 5 insertions(+), 59 deletions(-) delete mode 100644 gen3/bin/kube-setup-cluster-level-resources diff --git a/gen3/bin/kube-setup-cluster-level-resources b/gen3/bin/kube-setup-cluster-level-resources deleted file mode 100644 index f4349398f..000000000 --- a/gen3/bin/kube-setup-cluster-level-resources +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -source "${GEN3_HOME}/gen3/lib/utils.sh" -gen3_load "gen3/gen3setup" - -# Set default value for TARGET_REVISION -TARGET_REVISION="master" - -# Ask for TARGET_REVISION -read -p "Please provide a target revision for the cluster resources chart (default is master): " user_target_revision -# If user input is not empty, use it; otherwise, keep the default -TARGET_REVISION=${user_target_revision:-$TARGET_REVISION} - -# Ask for CLUSTER_NAME (no default value) -read -p "Enter the name of the cluster: " CLUSTER_NAME - -# Check if CLUSTER_NAME is provided -if [ -z "$CLUSTER_NAME" ]; then - echo "Error: CLUSTER_NAME cannot be empty." - exit 1 -fi - -# Create a temporary file -temp_file=$(mktemp) - -# Use sed to replace placeholders in the original file -sed -e "s|TARGET_REVISION|$TARGET_REVISION|g" \ - -e "s|CLUSTER_NAME|$CLUSTER_NAME|g" \ - $GEN3_HOME/kube/services/cluster-level-resources/app.yaml > "$temp_file" - -echo "WARNING: Do you have a folder already set up for this environment in gen3-gitops, in the form of /cluster-values/cluster-values.yaml? If not, this will not work." -echo "" -read -n 1 -s -r -p "Press any key to confirm and continue, or Ctrl+C to cancel..." -echo "" - -# Apply the templated file with kubectl -kubectl apply -f "$temp_file" - -# Clean up the temporary file -rm "$temp_file" - -echo "Application has been applied to the cluster." \ No newline at end of file diff --git a/kube/services/argo-events/workflows/configmap.yaml b/kube/services/argo-events/workflows/configmap.yaml index 4ebb90f19..f57ae07d0 100644 --- a/kube/services/argo-events/workflows/configmap.yaml +++ b/kube/services/argo-events/workflows/configmap.yaml @@ -87,8 +87,8 @@ data: cpu: 4000 providerRef: name: workflow-WORKFLOW_NAME - # Kill nodes after 30 days to ensure they stay up to date - ttlSecondsUntilExpired: 2592000 + # Kill nodes after 2 days to ensure they stay up to date + ttlSecondsUntilExpired: 172800 ttlSecondsAfterEmpty: 10 nodetemplate.yaml: | @@ -97,6 +97,9 @@ data: metadata: name: workflow-WORKFLOW_NAME spec: + amiSelector: + aws::name: EKS-FIPS* + aws::owners: "143731057154" subnetSelector: karpenter.sh/discovery: ENVIRONMENT securityGroupSelector: @@ -129,22 +132,6 @@ data: sysctl -w fs.inotify.max_user_watches=12000 - sudo yum update -y - sudo yum install -y dracut-fips openssl >> /opt/fips-install.log - sudo dracut -f - # configure grub - sudo /sbin/grubby --update-kernel=ALL --args="fips=1" - - --BOUNDARY - Content-Type: text/cloud-config; charset="us-ascii" - - power_state: - delay: now - mode: reboot - message: Powering off - timeout: 2 - condition: true - --BOUNDARY-- blockDeviceMappings: - deviceName: /dev/xvda From 5df2c56d93bcd634989550a8a5c66795df030d64 Mon Sep 17 00:00:00 2001 From: Hara Prasad Date: Mon, 26 Aug 2024 10:52:11 -0700 Subject: [PATCH 31/48] Update jenkins version to latest lts 2.462.1 (#2625) --- Docker/jenkins/Jenkins/Dockerfile | 2 +- Docker/jenkins/Jenkins2/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Docker/jenkins/Jenkins/Dockerfile b/Docker/jenkins/Jenkins/Dockerfile index 49c0f82b5..aae48e7b7 100644 --- a/Docker/jenkins/Jenkins/Dockerfile +++ b/Docker/jenkins/Jenkins/Dockerfile @@ -1,4 +1,4 @@ -FROM jenkins/jenkins:2.452.2-lts-jdk21 +FROM jenkins/jenkins:2.462.1-lts-jdk21 USER root diff --git a/Docker/jenkins/Jenkins2/Dockerfile b/Docker/jenkins/Jenkins2/Dockerfile index 9e585ca0e..c4bf93dfa 100644 --- a/Docker/jenkins/Jenkins2/Dockerfile +++ b/Docker/jenkins/Jenkins2/Dockerfile @@ -1,4 +1,4 @@ -FROM jenkins/jenkins:2.452.2-lts-jdk21 +FROM jenkins/jenkins:2.462.1-lts-jdk21 USER root From a9354fd3b49b6d22aefd821e0e8ab18a35ee937a Mon Sep 17 00:00:00 2001 From: EliseCastle23 <109446148+EliseCastle23@users.noreply.github.com> Date: Mon, 26 Aug 2024 14:05:50 -0600 Subject: [PATCH 32/48] adding prometheus annotations to Fence (#2626) --- kube/services/fence/fence-deploy.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kube/services/fence/fence-deploy.yaml b/kube/services/fence/fence-deploy.yaml index 35048424b..cf03036df 100644 --- a/kube/services/fence/fence-deploy.yaml +++ b/kube/services/fence/fence-deploy.yaml @@ -17,6 +17,9 @@ spec: maxUnavailable: 0 template: metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/path: /metrics labels: app: fence release: production From 5b508f21347eda07c2c1735e15a9e4fef117ad47 Mon Sep 17 00:00:00 2001 From: EliseCastle23 <109446148+EliseCastle23@users.noreply.github.com> Date: Tue, 27 Aug 2024 15:38:56 -0600 Subject: [PATCH 33/48] adding hostname label to portal and ff (#2627) --- .secrets.baseline | 18 +++++++++--------- .../frontend-framework-root-deploy.yaml | 1 + kube/services/portal/portal-root-deploy.yaml | 1 + 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/.secrets.baseline b/.secrets.baseline index d8f28a079..ededd2dff 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -1223,28 +1223,28 @@ "filename": "kube/services/frontend-framework/frontend-framework-root-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 54 + "line_number": 55 }, { "type": "Secret Keyword", "filename": "kube/services/frontend-framework/frontend-framework-root-deploy.yaml", "hashed_secret": "6607b403f74e62246fc6a3c938feffc5a34a7e49", "is_verified": false, - "line_number": 57 + "line_number": 58 }, { "type": "Secret Keyword", "filename": "kube/services/frontend-framework/frontend-framework-root-deploy.yaml", "hashed_secret": "4b0bb3e58651fe56ee23e59aa6a3cb96dc61ddd2", "is_verified": false, - "line_number": 60 + "line_number": 61 }, { "type": "Secret Keyword", "filename": "kube/services/frontend-framework/frontend-framework-root-deploy.yaml", "hashed_secret": "e3c7565314f404e3883929f003c65a02a80366e9", "is_verified": false, - "line_number": 66 + "line_number": 67 } ], "kube/services/gdcapi/gdcapi-deploy.yaml": [ @@ -3010,28 +3010,28 @@ "filename": "kube/services/portal/portal-root-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 55 + "line_number": 56 }, { "type": "Secret Keyword", "filename": "kube/services/portal/portal-root-deploy.yaml", "hashed_secret": "5c5a8e158ad2d8544f73cd5422072d414f497faa", "is_verified": false, - "line_number": 58 + "line_number": 59 }, { "type": "Secret Keyword", "filename": "kube/services/portal/portal-root-deploy.yaml", "hashed_secret": "619551216e129bbc5322678abf9c9210c0327cfb", "is_verified": false, - "line_number": 61 + "line_number": 62 }, { "type": "Secret Keyword", "filename": "kube/services/portal/portal-root-deploy.yaml", "hashed_secret": "e3c7565314f404e3883929f003c65a02a80366e9", "is_verified": false, - "line_number": 67 + "line_number": 68 } ], "kube/services/presigned-url-fence/presigned-url-fence-deploy.yaml": [ @@ -3689,5 +3689,5 @@ } ] }, - "generated_at": "2024-08-23T16:35:21Z" + "generated_at": "2024-08-27T21:36:15Z" } diff --git a/kube/services/frontend-framework/frontend-framework-root-deploy.yaml b/kube/services/frontend-framework/frontend-framework-root-deploy.yaml index 8cad981c8..15ca3d6a1 100644 --- a/kube/services/frontend-framework/frontend-framework-root-deploy.yaml +++ b/kube/services/frontend-framework/frontend-framework-root-deploy.yaml @@ -18,6 +18,7 @@ spec: app: frontend-framework public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/portal/portal-root-deploy.yaml b/kube/services/portal/portal-root-deploy.yaml index f639a1e15..28ac8fc31 100644 --- a/kube/services/portal/portal-root-deploy.yaml +++ b/kube/services/portal/portal-root-deploy.yaml @@ -19,6 +19,7 @@ spec: app: portal public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: From e4d01a1d58ffc60c27a5547dcfa0d0453804d09c Mon Sep 17 00:00:00 2001 From: EliseCastle23 <109446148+EliseCastle23@users.noreply.github.com> Date: Wed, 28 Aug 2024 10:41:31 -0600 Subject: [PATCH 34/48] Update presigned-url-fence-deploy.yaml (#2628) --- .../presigned-url-fence/presigned-url-fence-deploy.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kube/services/presigned-url-fence/presigned-url-fence-deploy.yaml b/kube/services/presigned-url-fence/presigned-url-fence-deploy.yaml index 45e6daaea..375f424ed 100644 --- a/kube/services/presigned-url-fence/presigned-url-fence-deploy.yaml +++ b/kube/services/presigned-url-fence/presigned-url-fence-deploy.yaml @@ -30,6 +30,10 @@ spec: GEN3_ENV_LABEL GEN3_FENCE_VERSION GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL + annotations: + prometheus.io/scrape: "true" + prometheus.io/path: /metrics spec: serviceAccountName: fence-sa affinity: From d00e436a4dff34e9cd73f103affd8d02736badbd Mon Sep 17 00:00:00 2001 From: EliseCastle23 <109446148+EliseCastle23@users.noreply.github.com> Date: Fri, 30 Aug 2024 11:39:52 -0600 Subject: [PATCH 35/48] Using Mimir for Hatchery Reaper Job (#2613) * making changes so hatchery reaper job relies on mimir instead of Prometheus. Also, removing Prometheus from "kube-setup-ambassador" * changing wording for legacy reaper job * uploading jupyter change for testing * removin extra "/" * changing the label name from "kube-namespace" to "namespace" to match mimir labels * changing ttl for test * removing the rate for testing * revertting changes used for testing --- gen3/bin/jupyter.sh | 4 ++-- gen3/bin/kube-setup-ambassador.sh | 2 -- gen3/bin/prometheus.sh | 8 +++----- kube/services/jobs/hatchery-reaper-job.yaml | 2 +- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/gen3/bin/jupyter.sh b/gen3/bin/jupyter.sh index 169ec59dc..b5c1c5390 100644 --- a/gen3/bin/jupyter.sh +++ b/gen3/bin/jupyter.sh @@ -210,7 +210,7 @@ gen3_jupyter_idle_pods() { fi # Get the list of idle ambassador clusters from prometheus - local promQuery="sum by (envoy_cluster_name) (rate(envoy_cluster_upstream_rq_total{kubernetes_namespace=\"${namespace}\"}[${ttl}]))" + local promQuery="sum by (envoy_cluster_name) (rate(envoy_cluster_upstream_rq_total{namespace=\"${namespace}\"}[${ttl}]))" local tempClusterFile="$(mktemp "$XDG_RUNTIME_DIR/idle_apps.json_XXXXXX")" gen3 prometheus query "$promQuery" "${tokenKey#none}" | jq -e -r '.data.result[] | { "cluster": .metric.envoy_cluster_name, "rate": .value[1] } | select(.rate == "0")' | tee "$tempClusterFile" 1>&2 if [[ $? != 0 ]]; then @@ -245,7 +245,7 @@ gen3_jupyter_idle_pods() { current_time=$(date +%s) age=$((current_time - pod_creation)) - # potential workspaces to be reaped for inactivity must be at least 60 minutes old + # potential workspaces to be reaped for inactivity must be at least 60 minutes old if ((age >= 3600)); then gen3_log_info "try to kill pod $name in $jnamespace" g3kubectl delete pod --namespace "$jnamespace" "$name" 1>&2 diff --git a/gen3/bin/kube-setup-ambassador.sh b/gen3/bin/kube-setup-ambassador.sh index 5f92af5cc..06ae1ee56 100644 --- a/gen3/bin/kube-setup-ambassador.sh +++ b/gen3/bin/kube-setup-ambassador.sh @@ -68,11 +68,9 @@ case "$command" in ;; "hatchery") deploy_hatchery_proxy "$@" - gen3 kube-setup-prometheus prometheus ;; *) deploy_hatchery_proxy "$@" deploy_api_gateway "$@" - gen3 kube-setup-prometheus prometheus ;; esac \ No newline at end of file diff --git a/gen3/bin/prometheus.sh b/gen3/bin/prometheus.sh index 1d71c6a7a..d7290451c 100644 --- a/gen3/bin/prometheus.sh +++ b/gen3/bin/prometheus.sh @@ -4,9 +4,7 @@ source "${GEN3_HOME}/gen3/lib/utils.sh" gen3_load "gen3/gen3setup" - -#export GEN3_PROMHOST="${GEN3_PROMHOST:-"http://prometheus-server.prometheus.svc.cluster.local"}" -export GEN3_PROMHOST="${GEN3_PROMHOST:-"http://prometheus-operated.monitoring.svc.cluster.local:9090"}" +export GEN3_PROMHOST="${GEN3_PROMHOST:-"https://mimir.planx-pla.net"}" gen3_prom_help() { gen3 help prometheus @@ -16,11 +14,11 @@ function gen3_prom_curl() { local urlBase="$1" shift || return 1 local hostOrKey="${1:-${GEN3_PROMHOST}}" - local urlPath="api/v1/$urlBase" + local urlPath="prometheus/api/v1/$urlBase" if [[ "$hostOrKey" =~ ^http ]]; then gen3_log_info "fetching $hostOrKey/$urlPath" - curl -s -H 'Accept: application/json' "$hostOrKey/$urlPath" + curl -s -H 'Accept: application/json' -H "X-Scope-OrgID: anonymous" "$hostOrKey/$urlPath" else gen3 api curl "$urlPath" "$hostOrKey" fi diff --git a/kube/services/jobs/hatchery-reaper-job.yaml b/kube/services/jobs/hatchery-reaper-job.yaml index 77d249e37..4f54752c9 100644 --- a/kube/services/jobs/hatchery-reaper-job.yaml +++ b/kube/services/jobs/hatchery-reaper-job.yaml @@ -110,7 +110,7 @@ spec: done # legacy reaper code - gen3_log_info "Running legacy reaper job (based on local cluster/ prometheus)" + gen3_log_info "Running legacy reaper job (based on Mimir)" if appList="$(gen3 jupyter idle none "$(gen3 db namespace)" kill)" && [[ -n "$appList" && -n "$slackWebHook" && "$slackWebHook" != "None" ]]; then curl -X POST --data-urlencode "payload={\"text\": \"hatchery-reaper in $gen3Hostname: \n\`\`\`\n${appList}\n\`\`\`\"}" "${slackWebHook}" fi From 0e02d83bdcf8e95400323f58456fec07ec5a63ae Mon Sep 17 00:00:00 2001 From: Binam Bajracharya <44302895+BinamB@users.noreply.github.com> Date: Tue, 10 Sep 2024 12:22:41 -0500 Subject: [PATCH 36/48] (PPS-819): Indexd Migration Script (#2599) * Indexd Migration Script * Fix file * extra info * change creds name --- .../indexd-single-table-migration-job.yaml | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 kube/services/jobs/indexd-single-table-migration-job.yaml diff --git a/kube/services/jobs/indexd-single-table-migration-job.yaml b/kube/services/jobs/indexd-single-table-migration-job.yaml new file mode 100644 index 000000000..e1e6a81a3 --- /dev/null +++ b/kube/services/jobs/indexd-single-table-migration-job.yaml @@ -0,0 +1,83 @@ +# Setup for running this migration https://github.com/uc-cdis/indexd/blob/master/docs/migration_to_single_table_indexd.md +apiVersion: batch/v1 +kind: Job +metadata: + name: indexd-single-table-migration +spec: + template: + metadata: + labels: + app: gen3job + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - weight: 99 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - ONDEMAND + automountServiceAccountToken: false + volumes: + - name: config-volume + secret: + secretName: "indexd-secret" # pragma: allowlist secret + - name: "indexd-creds-volume" + secret: + secretName: "indexd-creds" # pragma: allowlist secret + - name: config-helper + configMap: + name: config-helper + - name: creds-volume-new + secret: + secretName: "indexd-new-creds" # pragma: allowlist secret + containers: + - name: indexd + GEN3_INDEXD_IMAGE + env: + - name: START_DID + GEN3_START_DID|-value: ""-| + volumeMounts: + - name: "config-volume" + readOnly: true + mountPath: "/var/www/indexd/local_settings.py" + subPath: "local_settings.py" + - name: "indexd-creds-volume" + readOnly: true + mountPath: "/var/www/indexd/creds.json" + subPath: creds.json + - name: "config-helper" + readOnly: true + mountPath: "/var/www/indexd/config_helper.py" + subPath: config_helper.py + - name: "creds-volume-new" + readOnly: true + mountPath: "/var/www/indexd/creds_new.json" + subPath: creds.json + resources: + requests: + cpu: 1000m + memory: 1Gi + imagePullPolicy: Always + command: ["/bin/bash"] + args: + - "-c" + - | + flags="--creds-path /var/www/indexd/creds_new.json" + if [[ -n "$START_DID" ]]; then + flags="$flags --start-did $START_DID" + fi + time python /indexd/bin/migrate_to_single_table.py $flags + echo "Exit code: $?" + restartPolicy: Never + + From 03f987f6a92395cecee7b72db24cb4b9623bbd81 Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Mon, 16 Sep 2024 14:09:53 -0500 Subject: [PATCH 37/48] fix: gen3_s3_info check for existing bucket (#2637) --- gen3/bin/s3.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gen3/bin/s3.sh b/gen3/bin/s3.sh index e89d3ca7b..0625bdf83 100644 --- a/gen3/bin/s3.sh +++ b/gen3/bin/s3.sh @@ -103,7 +103,8 @@ _add_bucket_to_cloudtrail() { # _bucket_exists() { local bucketName=$1 - if [[ -z "$(gen3_aws_run aws s3api head-bucket --bucket $bucketName 2>&1)" ]]; then + gen3_aws_run aws s3api head-bucket --bucket $bucketName > /dev/null 2>&1 + if [[ $? -eq 0 ]]; then echo 0 else echo 1 @@ -173,10 +174,12 @@ gen3_s3_info() { gen3_log_err "Unable to fetch AWS account ID." return 1 fi - if [[ ! -z "$(gen3_aws_run aws s3api head-bucket --bucket $1 2>&1)" ]]; then + + if [[ $(_bucket_exists $bucketName) -ne 0 ]]; then gen3_log_err "Bucket does not exist" return 1 fi + local rootPolicyArn="arn:aws:iam::${AWS_ACCOUNT_ID}:policy" if gen3_aws_run aws iam get-policy --policy-arn ${rootPolicyArn}/${writerName} >/dev/null 2>&1; then writerPolicy="{ \"name\": \"$writerName\", \"policy_arn\": \"${rootPolicyArn}/${writerName}\" } " From 6a38be4b2a7c7b6abdf788837fe3a3ad2705f53d Mon Sep 17 00:00:00 2001 From: Ajo Augustine Date: Tue, 17 Sep 2024 13:44:38 -0500 Subject: [PATCH 38/48] update web whitelist (#2639) --- files/squid_whitelist/web_whitelist | 1 + 1 file changed, 1 insertion(+) diff --git a/files/squid_whitelist/web_whitelist b/files/squid_whitelist/web_whitelist index b0759ba32..0507652f3 100644 --- a/files/squid_whitelist/web_whitelist +++ b/files/squid_whitelist/web_whitelist @@ -7,6 +7,7 @@ achecker.ca apache.github.io api.epigraphdb.org api.monqcle.com +app.getambassador.io awslabs.github.io biodata-integration-tests.net marketing.biorender.com From 025d65a58d9955e4fb254a6159415627bd723ea0 Mon Sep 17 00:00:00 2001 From: Mingfei Shao <2475897+mfshao@users.noreply.github.com> Date: Tue, 17 Sep 2024 13:55:05 -0500 Subject: [PATCH 39/48] disable dd rum (#2632) --- kube/services/portal/portal-deploy.yaml | 14 -------------- kube/services/portal/portal-root-deploy.yaml | 14 -------------- 2 files changed, 28 deletions(-) diff --git a/kube/services/portal/portal-deploy.yaml b/kube/services/portal/portal-deploy.yaml index fb1aa2d6f..20347a3be 100644 --- a/kube/services/portal/portal-deploy.yaml +++ b/kube/services/portal/portal-deploy.yaml @@ -183,20 +183,6 @@ spec: name: global key: mapbox_token optional: true - - name: DATADOG_APPLICATION_ID - # Optional application ID for Datadog - valueFrom: - secretKeyRef: - name: portal-datadog-config - key: datadog_application_id - optional: true - - name: DATADOG_CLIENT_TOKEN - # Optional client token for Datadog - valueFrom: - secretKeyRef: - name: portal-datadog-config - key: datadog_client_token - optional: true - name: DATA_UPLOAD_BUCKET # S3 bucket name for data upload, for setting up CSP GEN3_DATA_UPLOAD_BUCKET|-value: ""-| diff --git a/kube/services/portal/portal-root-deploy.yaml b/kube/services/portal/portal-root-deploy.yaml index 28ac8fc31..b65d58982 100644 --- a/kube/services/portal/portal-root-deploy.yaml +++ b/kube/services/portal/portal-root-deploy.yaml @@ -183,20 +183,6 @@ spec: name: global key: mapbox_token optional: true - - name: DATADOG_APPLICATION_ID - # Optional application ID for Datadog - valueFrom: - secretKeyRef: - name: portal-datadog-config - key: datadog_application_id - optional: true - - name: DATADOG_CLIENT_TOKEN - # Optional client token for Datadog - valueFrom: - secretKeyRef: - name: portal-datadog-config - key: datadog_client_token - optional: true - name: DATA_UPLOAD_BUCKET # S3 bucket name for data upload, for setting up CSP GEN3_DATA_UPLOAD_BUCKET|-value: ""-| From ef9f921d93b49dafb1005ff8eda1226bd21fede4 Mon Sep 17 00:00:00 2001 From: EliseCastle23 <109446148+EliseCastle23@users.noreply.github.com> Date: Fri, 20 Sep 2024 15:34:32 -0600 Subject: [PATCH 40/48] Update squid_authorized_keys_user (#2643) --- files/authorized_keys/squid_authorized_keys_user | 1 + 1 file changed, 1 insertion(+) diff --git a/files/authorized_keys/squid_authorized_keys_user b/files/authorized_keys/squid_authorized_keys_user index 4b35fecd9..fa891e516 100644 --- a/files/authorized_keys/squid_authorized_keys_user +++ b/files/authorized_keys/squid_authorized_keys_user @@ -17,6 +17,7 @@ ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDT5VxB1A2JOc3MurPSVH9U6x49PCZfaHgJD1FbKXgP ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDKJR5N5VIU9qdSfCtlskzuQ7A5kNn8YPeXsoKq0HhYZSd4Aq+7gZ0tY0dFUKtXLpJsQVDTflINc7sLDDXNp3icuSMmxOeNgvBfi8WnzBxcATh3uqidPqE0hcnhVQbpsza1zk8jkOB2o8FfBdDTOSbgPESv/1dnGApfkZj96axERUCMzyyUSEmif2moWJaVv2Iv7O+xjQqIZcMXiAo5BCnTCFFKGVOphy65cOsbcE02tEloiZ3lMAPMamZGV7SMQiD3BusncnVctn/E1vDqeozItgDrTdajKqtW0Mt6JFONVFobzxS8AsqFwaHiikOZhKq2LoqgvbXZvNWH2zRELezP jawadq@Jawads-MacBook-Air.local ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC3vyd6a7tsANi149ylPQYS8Gsp/SxJyhdK/j6arv77KbM0EIzzUiclFLnMKcqUQ263FrPyx3a3UP80R77ayCnwcEHrxlJrYfyFUva8vtmI9mu8VE7oXvuR/jcOyXM9NosxyYacL/p6W5X4r8tqo/gJFjmls1YRfu3JPlTgTT0VzGJu+B6rLEsw53c37VVzSaCtu/jBOjyxI1/UaNg1cd+hcfoQxJ9zSDqqE7ZUNOc3zHP+1AGYCQ/CJsNrDl2OkppIdC9He5jgjLhyD7yvyarI+oF05oHknol/K1hXK+yxIkF2Ou5krfjw7TMBvD+JbQVb35vL9acXFF20+lHLRLbobPU/6ZZTup3q7IRm5OWaL2CJtYZbJvicKW0Ep+vTzaiQjK71L6UxcIvnzvbP9Dnatv1GBMMDaQxAa4Lood8NG2ty1yfLN972akGqBlwJASXMRd/ogzxv2KSH9w6HHYoc2WpDhUtNHmjwX1FSLYPW3qx5ICMW6j9gR2u1tG4Ohzp1CmYVElnRHbnBrTkLde65Vqedk2tQy8fcopH59ZASIuR4GbhCb2SiNkr1PHEvfhLMzg/UCSnnhX9vUNhkPjJRy/bdL3pOt/77lpIQUqQBArOiZmfG8OD0q4+3Nr+c9v5bSSvynjirlKk+wb8sKyOoSAXdFeovL/A0BUKUjCtsXQ== dev@test.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQChK/8JjpUeWcF/1Ea2M4mSbLz1tOfpq74xD2USxE54kx7VoN1G7ylV76yqSIeRq1e7PPBEg5ZD1aXUJnlI32RwLJ5kaHnoB82Ta+Fv1B/vVoHCObcALfiHPpwPf1kM2liWEB0EhYcz1OUv3YQriPqjiRoWfnbw60GIyzhpWZhKRq0zlISOaTYdV9kafX+N7M6/gSU0632TgUwwsStYrffEleyrC/Lh+4UaESozWoPFiZLl2eMCKfZNFBB99HTFifImW2yC6Ag1QhCd1i3NpfiYuaSDH7WR3slPRSd8DiUAwGC2DkIuWPp3bhaAv2V4mtLIBAaTZsINIACB2+w7yf9yvCGtdobCmp4AA7ik9rEkRLk/Jff0YBHd6Z4qyIuRht3ZeWXIYSK1zOlPfs4lPUgvbjlPgMVFV2CrvOTnS+YZdW+8AklwRC3HDPD8wv3H/eGxl3K0vHWTBbTb774nVNfRDw81wcezCXFNUn4p2he7fgKcxs/rnMsYUcY8JJNR7Iz+NNIGUCom6HFwCMQdangFMHUW5TxxrlJcwVRaAns1M6g3ilYO+uvN/XsgCpZWYWnv5rBk8qz6dBM7gpc8tSr6Hvr7/vlghF3jpL+mQiW+7vUL+UZrUFNyoacUcQ+NuxKacHtHQKuRDyWofp+CB2b2a744F3mpkxx74HIkiZ72mQ== dev@test.com +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC/lrIPuGPfQzkm2FRMFn/+8MAY5q9godnJxbLJCQ1aKwenXlENHqHDmq+lrP+4S5KGARpTxnIC+i4jR995pDFJmeHsgS6O2GdBBTNi7DznIlqEGd7K4GwsNsLTi06ueuLLIy4tdbRtCYTIlSte5VbyQ1/KfUjTBvj5qXChY/wOG1O766GR681UFN0qk5BPLtEUWIfJCgKuHicxd6eWVoLrYbSj9e1Wug6aJVjngm+ufqAH+yH5PImHo+r0jaj9TiGXzDACAVrW8WipKZ6YlTRs+RCkVmUWgf0+aWfSEcFtrSCM+UzeID5E3T7dTSeWXMDYJSF9rZqCKqh8AIbtt2lH6Ukz19u+nr3zhznOA5AhdgrXSAYQqtss1lptQRn4It0wTq/dmRytbIXOnu4osNmyCs1xAv+b2YHbS0R8SiSPzqkUd1Z8/qNBWrXiBmITh86xFHJy4Nj70n9ZBkSQvPEgEevGtO7BTgH4ziyMYKeunF8IoA8mR9s3iHSSzSsNaWP6ICkTj4CRJsLfx6R600s1Fukwo2CieBs0gV1x4wvsesRtpZY2aTDHTLjrSXD8ZnOCqOtUHCsto+S9gGKgWONcrb7ofe7u1R/F6er67nVnjnfeSCaRYU49GHB94A9UmxBJssExIjpaKaO5ZSPKzG4OrvXaagMyYxWxKHCedj9otw== dev@test.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDDTX+pQvGrQVXmHGDhBP+632tgbb1j+BQWkrsUkDJGzwFiGs4dgqDs2eC+aDVq2LFz4xj0SgussFAKciB45OgmSZKX5yUE3Oo/lqov0Bb5f85iBHGv/X/JiuIYaq8GJklVyyo1sfKLUK1SOal6bE1WofezyTyDsdrHjIU50quzW7nB1CmL6rekIv/+df/seut4b3De1d2uX5WGGtcvQ5yTSgBW5aabMAJ2V9WlP/6Dw040Kq0MyKV01cIJ1HAjFhP58gbf3Eytz3AqqJVT6u0QroxhesCgKTyGcAyYy3airI/N0FHdC5oABVEJ6dKyy1rYvOchuxYeVMVVWn0vS7mZ+vP7dqaDmgEUU2qmTPBQZV2xBWCdpfyUYYARW2JzlEaySbmA+yoxFBsquunVbIgUGNEUbxefsFdM3k5pS6I1uuEM0ATYH5iNz84nKKCcksGlib0i/pEtra6N/mFF7yjHYBRb/E/VCZig0gKezDJWu/DO0emJA+kdQpqp48U+qFrSWkuiO0dCQYl3VCVo8vedgMGPjr8MbUjU7o8W1+DYyjFM8HYMknRNdVAqAoK+cedw9mAWVGpKFrl61caGTFck0634nAVFUmfGTh9XRaZeFdDnivxnqP837gcsdKnEGYnkrxWap97XeXzK0P0Svy1zBfUQyzU5vrHfHt2H7ILDMw== prodv1-usersync-sftp ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDaO/doqHANcTZFEqZOoy9kKgbxu0d/cS1nEINlFcoQ/jnCG7huznWnWiYgnlkS6/Op9VrDp6qG/UBDye2mTvAh2FHPsOzSGvgml3dPYB5fy6G/xoXd7NJnIxttwFUvk4GuLZ40s24WCcXoFGJ2vaSAVYr0q6lmqOqk6jp1/lNj4+QFD4mcH2//jTscSFNseRII2NECu+PnnWAuYFOIHH1IODOvInEivUvN6VBX410D7iD7cEdhgiYitFZH6Cp6ubWG7OUKdZYv0067eO6HDDzl7y+BBUf3DF6Lr8gqtGXVqmAB9UqeBJ8pP3pNWKbgAa8sHvS8JxElCIc+4EM5dTI2OrDYKiuCTPZEC14WEFZLKqH7tjQFuZe0jfVRtoFNmKWClCgkJDWpyIkdR+qHcnOwlYkUVN3B02WVu4kTfox2ZUz65tLspJNAxAjYVrI7+c6LTQHSJwMcAMYcehR3vuqAfKE7xM6ReNxRQXsWaasdJgT2IJKj7vHu/G9GVycjiheg3zakJ9rr+63I68XlHNnTtfjIl/jgIHgcU18ggbwkwjL3xk39YttutlAaNAGUYCsopn/HdK8A86KvTCwHGEKtubgEHmv1oRAOooVaNes1oko2y9Saaqee52bsvwfeTLgxXB43d9GOWLoyBlgprDiufssFHoiJKQlgrqEwtg+vYQ== giangbui0816@gmail.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDTpJ2l8nfOvhJ4Y3pjadFU69nfJBRuE0BaHE22LK9qflFWdhGW+T/x8Qy9406DFXCh6KED+q9lC+N4nR92AfgFNaBmkXZkzWLoXcqO1IWRexHwTqAUcrtLjpx5wNdCr3+vv9hWhXtvYg8ewnrZc+WxYde4EUmXbhzPXbg0SkBXTr6bpYhs6inyttfBeJNxbeydrW7cmhFiAdOkm03o3AXdH86PNlWVfVHy8OHHzf4fbvlJlOx7OeB+wOyQUr3DW+IWBLQFJk4uyagn/ECV9OIQpxoJFTQjcSrJ6v/GqlY5PImM6YxL8NlZu46CDIxagaZkum+iJ8dtPYr6tJuLiP5Ny0Gsl1X5DoKlstgyqqPNYTnZVS4GSS5Hyxm6HmodZ78OR5+vAoyWKZ3unXU5Dbkz0Qxq9VtrGo2xd0M+dDi/7YazRpLL0tc39w48Wl7KD3jFzoesZp1JHeEGLdGXlGCw8AM1FT0WDf28ShTRds6uWPGvMtM3XkVDPMLFwroKv1RCErmqLYod4HOMuwlmdRvtDGYb3NYsliOnHPiT9nhu2J6KmT1jj8uFOLyTaJCArtBqIsXscP3R4o0wBlQl3FniMdiK7ESkv8DUaOr1Co+/3wX9n/p/BW5bxuq1R9HpNyKsrALyNJUkquVT+5aPcNKXvmAeHAw/D0TYzy6ZKBpnDw== kyuleburton@Kyules-MacBook-Pro.local From 6a06e826d5d245a614ff19cedd8669d12b26ce5d Mon Sep 17 00:00:00 2001 From: Ajo Augustine Date: Tue, 24 Sep 2024 10:15:22 -0500 Subject: [PATCH 41/48] add alerts to psql encrypted backup job (#2645) * add alerts to psql encrypted backup job * Update psql-db-backup-encrypt-job.yaml * updating slackWebHook check validation --- .../jobs/psql-db-backup-encrypt-job.yaml | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/kube/services/jobs/psql-db-backup-encrypt-job.yaml b/kube/services/jobs/psql-db-backup-encrypt-job.yaml index 914b81ffa..c7a257490 100644 --- a/kube/services/jobs/psql-db-backup-encrypt-job.yaml +++ b/kube/services/jobs/psql-db-backup-encrypt-job.yaml @@ -38,6 +38,12 @@ spec: key: environment - name: JENKINS_HOME value: "devterm" + - name: slackWebHook + valueFrom: + configMapKeyRef: + name: global + key: slack_alarm_webhook + optional: true - name: GEN3_HOME value: /home/ubuntu/cloud-automation command: [ "/bin/bash" ] @@ -69,7 +75,6 @@ spec: ENV_NS_DIR="${HOST_DIR}/${environment}-${namespace}" DATE_DIR="${ENV_NS_DIR}/${date_str}" mkdir -p "${DATE_DIR}" - # PGP public key PUBLIC_KEY="-----BEGIN PGP PUBLIC KEY BLOCK----- @@ -138,6 +143,14 @@ spec: # Import the public key echo "$PUBLIC_KEY" | gpg --import + # Validate Slack webhook + if [[ -n "${slackWebHook}" && "${slackWebHook}" == https* ]]; then + slack=true + else + echo "WARNING: slackWebHook is not set or invalid; not sending alerts to Slack" + slack=false + fi + # Function to encrypt a database with PGP public key function encrypt_database() { local db_name=$1 @@ -166,7 +179,7 @@ spec: break fi done - + # Verify credentials are extracted if [ -z "$db_hostname" ] || [ -z "$db_username" ] || [ -z "$db_password" ] || [ -z "$db_database" ]; then gen3_log_err "Failed to extract database credentials for $database" @@ -206,6 +219,9 @@ spec: # Logging the failed backups if [ -n "$failed_backups" ]; then gen3_log_info "Failed backups:\n$failed_backups" + if [ "$slack" = true ]; then + curl -X POST --data-urlencode "payload={\"text\": \"*Backup failed* for psql-db-backup-encrypt on ${hostname} Cluster: ${environment} Namespace: ${namespace} at $(date).\nFailed backups: ${failed_backups}\"}" $slackWebHook + fi fi # Sleep for 600 seconds to allow the user to check the logs @@ -219,6 +235,5 @@ spec: volumes: - name: s3-volume persistentVolumeClaim: - claimName: s3-pvc-db-backups + claimName: s3-pvc-db-backups restartPolicy: Never - From 95f3cff3a080999dd533667b84eb2185e0c55595 Mon Sep 17 00:00:00 2001 From: Aidan Hilt <11202897+AidanHilt@users.noreply.github.com> Date: Tue, 24 Sep 2024 17:17:54 -0400 Subject: [PATCH 42/48] Feat/es proxy irsa no terragrunt (#2644) * New setup for irsa for es proxy, that removes the need for integration with Terragrunt * Let's try to fix this up * Scoping issue * We need to actually apply the right deployment * Suppressing errors * Let's just check if the policy exists * Maybe we do need a wildcard policy? * I think this will fix the access * Final deploy, with limited policy --- gen3/bin/kube-setup-aws-es-proxy.sh | 86 +++++++++++++++++-- .../aws-es-proxy-deploy-irsa.yaml | 83 ++++++++++++++++++ 2 files changed, 161 insertions(+), 8 deletions(-) create mode 100644 kube/services/aws-es-proxy/aws-es-proxy-deploy-irsa.yaml diff --git a/gen3/bin/kube-setup-aws-es-proxy.sh b/gen3/bin/kube-setup-aws-es-proxy.sh index 3feee143a..1ce80fd8e 100644 --- a/gen3/bin/kube-setup-aws-es-proxy.sh +++ b/gen3/bin/kube-setup-aws-es-proxy.sh @@ -12,13 +12,13 @@ gen3_load "gen3/lib/kube-setup-init" manifestPath=$(g3k_manifest_path) es7="$(jq -r ".[\"global\"][\"es7\"]" < "$manifestPath" | tr '[:upper:]' '[:lower:]')" esDomain="$(jq -r ".[\"global\"][\"esDomain\"]" < "$manifestPath" | tr '[:upper:]' '[:lower:]')" +envname="$(gen3 api environment)" [[ -z "$GEN3_ROLL_ALL" ]] && gen3 kube-setup-secrets if g3kubectl get secrets/aws-es-proxy > /dev/null 2>&1; then - envname="$(gen3 api environment)" if [ "$esDomain" != "null" ]; then - if ES_ENDPOINT="$(aws es describe-elasticsearch-domains --domain-names ${esDomain} --query "DomainStatusList[*].Endpoints" --output text)" \ + if ES_ENDPOINT="$(aws es describe-elasticsearch-domains --domain-names "${esDomain}" --query "DomainStatusList[*].Endpoints" --output text)" \ && [[ -n "${ES_ENDPOINT}" && -n "${esDomain}" ]]; then gen3 roll aws-es-proxy GEN3_ES_ENDPOINT "${ES_ENDPOINT}" g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-priority-class.yaml" @@ -34,10 +34,10 @@ if g3kubectl get secrets/aws-es-proxy > /dev/null 2>&1; then g3kubectl patch deployment "aws-es-proxy-deployment" -p '{"spec":{"template":{"metadata":{"labels":{"netvpc":"yes"}}}}}' || true fi elif [ "$es7" = true ]; then - if ES_ENDPOINT="$(aws es describe-elasticsearch-domains --domain-names ${envname}-gen3-metadata-2 --query "DomainStatusList[*].Endpoints" --output text)" \ + if ES_ENDPOINT="$(aws es describe-elasticsearch-domains --domain-names "${envname}"-gen3-metadata-2 --query "DomainStatusList[*].Endpoints" --output text)" \ && [[ -n "${ES_ENDPOINT}" && -n "${envname}" ]]; then gen3 roll aws-es-proxy GEN3_ES_ENDPOINT "${ES_ENDPOINT}" - g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-priority-class.yaml" + g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-priority-class.yaml" g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-service.yaml" gen3_log_info "kube-setup-aws-es-proxy" "The aws-es-proxy service has been deployed onto the k8s cluster." else @@ -50,7 +50,7 @@ if g3kubectl get secrets/aws-es-proxy > /dev/null 2>&1; then g3kubectl patch deployment "aws-es-proxy-deployment" -p '{"spec":{"template":{"metadata":{"labels":{"netvpc":"yes"}}}}}' || true fi else - if ES_ENDPOINT="$(aws es describe-elasticsearch-domains --domain-names ${envname}-gen3-metadata --query "DomainStatusList[*].Endpoints" --output text)" \ + if ES_ENDPOINT="$(aws es describe-elasticsearch-domains --domain-names "${envname}"-gen3-metadata --query "DomainStatusList[*].Endpoints" --output text)" \ && [[ -n "${ES_ENDPOINT}" && -n "${envname}" ]]; then gen3 roll aws-es-proxy GEN3_ES_ENDPOINT "${ES_ENDPOINT}" g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-service.yaml" @@ -67,6 +67,76 @@ if g3kubectl get secrets/aws-es-proxy > /dev/null 2>&1; then fi gen3 job cron es-garbage '@daily' else - gen3_log_info "kube-setup-aws-es-proxy" "Not deploying aws-es-proxy - secret is not configured" - exit 1 -fi + gen3_log_info "kube-setup-aws-es-proxy" "No secret detected, attempting IRSA setup" + deploy=true + + # Let's pre-calculate all the info we need about the cluster, so we can just pass it on later + if [ "$esDomain" != "null" ] && [ -n "$esDomain" ]; then + ES_ENDPOINT="$(aws es describe-elasticsearch-domains --domain-names "${esDomain}" --query "DomainStatusList[*].Endpoints" --output text)" + ES_ARN="$(aws es describe-elasticsearch-domains --domain-names "${esDomain}" --query "DomainStatusList[*].ARN" --output text)" + elif [ "$es7" = true ]; then + if [ -n "$envname" ]; then + ES_ENDPOINT="$(aws es describe-elasticsearch-domains --domain-names "${envname}"-gen3-metadata-2 --query "DomainStatusList[*].Endpoints" --output text)" + ES_ARN="$(aws es describe-elasticsearch-domains --domain-names "${envname}"-gen3-metadata-2 --query "DomainStatusList[*].ARN" --output text)" + else + deploy=false + fi + else + if [ -n "$envname" ]; then + ES_ENDPOINT="$(aws es describe-elasticsearch-domains --domain-names "${envname}"-gen3-metadata --query "DomainStatusList[*].Endpoints" --output text)" + ES_ARN="$(aws es describe-elasticsearch-domains --domain-names "${envname}"-gen3-metadata --query "DomainStatusList[*].ARN" --output text)" + else + deploy=false + fi + fi + # Let's only do setup stuff if we're going to want to deploy... otherwise, we take the CI env actions + if [ "$deploy" = "true" ]; then + # Put that ARN into a template we get from terraform + policyjson=$(cat < Date: Thu, 26 Sep 2024 13:04:43 -0600 Subject: [PATCH 43/48] Update spark-deploy.yaml (#2647) lowering requests for cpu and memory based on metric data. --- kube/services/spark/spark-deploy.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kube/services/spark/spark-deploy.yaml b/kube/services/spark/spark-deploy.yaml index b280cecf0..2a17b5fba 100644 --- a/kube/services/spark/spark-deploy.yaml +++ b/kube/services/spark/spark-deploy.yaml @@ -81,8 +81,8 @@ spec: imagePullPolicy: Always resources: requests: - cpu: 3 - memory: 4Gi + cpu: 2 + memory: 2Gi command: ["/bin/bash" ] args: - "-c" From 64462a6df45089c1c695683acf6e5b92df599bab Mon Sep 17 00:00:00 2001 From: Ajo Augustine Date: Mon, 30 Sep 2024 12:57:24 -0500 Subject: [PATCH 44/48] add skip database option (#2646) * add skip database --- .../jobs/psql-db-backup-encrypt-job.yaml | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/kube/services/jobs/psql-db-backup-encrypt-job.yaml b/kube/services/jobs/psql-db-backup-encrypt-job.yaml index c7a257490..26e32dfee 100644 --- a/kube/services/jobs/psql-db-backup-encrypt-job.yaml +++ b/kube/services/jobs/psql-db-backup-encrypt-job.yaml @@ -46,6 +46,12 @@ spec: optional: true - name: GEN3_HOME value: /home/ubuntu/cloud-automation + - name: SKIP_DBS + valueFrom: + configMapKeyRef: + name: dbbackup-exclude-list + key: skip_dbs + optional: true command: [ "/bin/bash" ] args: - "-c" @@ -56,6 +62,9 @@ spec: # Requirements: # 1. PGP public key must be available as a variable in the script. # 2. The job needs the necessary permissions to read secrets, config maps from the target namespace. + # 3. Databases to skip can be provided using a ConfigMap called `dbbackup-exclude-list` containing a comma-separated list of databases under the key `skip_dbs`. + # Example command to create the ConfigMap: + # kubectl create configmap dbbackup-exclude-list --from-literal=skip_dbs="arborist-bak,wts-bak" source "${GEN3_HOME}/gen3/lib/utils.sh" gen3_load "gen3/gen3setup" @@ -64,10 +73,17 @@ spec: namespace=$(gen3 api namespace) environment=$(gen3 api environment) hostname=$(gen3 api hostname) - default_databases=($(gen3 db services)) + + # Fetch and filter databases + databases=$(gen3 db services | grep -v -E "$(echo "${SKIP_DBS//,/|}")") + + # Log skipped databases + for db in ${SKIP_DBS//,/ }; do + gen3_log_info "Skipping backup for database: $db" + done + date_str=$(date -u +%y%m%d_%H%M%S) - databases=("${default_databases[@]}") - gen3_log_info "Databases to be backed up: ${databases[@]}" + gen3_log_info "Databases to be backed up: ${databases}" # Define backup directory structure BACKUP_DIR="/home/ubuntu/backup" @@ -166,7 +182,7 @@ spec: } # Loop through each service to back up and encrypt the database - for database in "${databases[@]}"; do + for database in $databases; do for secret_name in "${database}-creds creds.json" "$database-g3auto dbcreds.json"; do creds=$(gen3 secrets decode $secret_name 2>/dev/null) # Extracting service credentials From c68be9e9671e0547a05e23d43e8108d3a0432450 Mon Sep 17 00:00:00 2001 From: Ajo Augustine Date: Wed, 2 Oct 2024 07:28:29 -0500 Subject: [PATCH 45/48] Update psql-db-backup-encrypt-job.yaml (#2648) update regular expression to allow spaces in comma separated values --- kube/services/jobs/psql-db-backup-encrypt-job.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kube/services/jobs/psql-db-backup-encrypt-job.yaml b/kube/services/jobs/psql-db-backup-encrypt-job.yaml index 26e32dfee..f0d1a9587 100644 --- a/kube/services/jobs/psql-db-backup-encrypt-job.yaml +++ b/kube/services/jobs/psql-db-backup-encrypt-job.yaml @@ -75,7 +75,8 @@ spec: hostname=$(gen3 api hostname) # Fetch and filter databases - databases=$(gen3 db services | grep -v -E "$(echo "${SKIP_DBS//,/|}")") + databases=$(gen3 db services | grep -v -E "$(echo "${SKIP_DBS}" | sed 's/, */|/g' )") + # Log skipped databases for db in ${SKIP_DBS//,/ }; do From a5627754db22920faed432a5e7384bc0bffec374 Mon Sep 17 00:00:00 2001 From: Binam Bajracharya <44302895+BinamB@users.noreply.github.com> Date: Wed, 9 Oct 2024 10:05:15 -0500 Subject: [PATCH 46/48] Isolate Jenkins-new for Single Table IndexD Temporarily (#2651) --- files/scripts/ci-env-pool-reset.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/files/scripts/ci-env-pool-reset.sh b/files/scripts/ci-env-pool-reset.sh index 362cfbfd5..5c0936867 100644 --- a/files/scripts/ci-env-pool-reset.sh +++ b/files/scripts/ci-env-pool-reset.sh @@ -35,7 +35,6 @@ jenkins-niaid EOF cat - > jenkins-envs-releases.txt < Date: Mon, 14 Oct 2024 15:12:47 -0600 Subject: [PATCH 47/48] removing WAF creation from cloud-auto as it is now in Terraform (#2650) --- gen3/bin/kube-setup-ingress.sh | 17 ---- gen3/bin/waf-rules-GPE-312.json | 153 -------------------------------- 2 files changed, 170 deletions(-) delete mode 100644 gen3/bin/waf-rules-GPE-312.json diff --git a/gen3/bin/kube-setup-ingress.sh b/gen3/bin/kube-setup-ingress.sh index b75470f73..df5731cf1 100644 --- a/gen3/bin/kube-setup-ingress.sh +++ b/gen3/bin/kube-setup-ingress.sh @@ -15,23 +15,6 @@ ctxNamespace="$(g3kubectl config view -ojson | jq -r ".contexts | map(select(.na scriptDir="${GEN3_HOME}/kube/services/ingress" gen3_ingress_setup_waf() { - gen3_log_info "Starting GPE-312 waf setup" - #variable to see if WAF already exists - export waf=`aws wafv2 list-web-acls --scope REGIONAL | jq -r '.WebACLs[]|select(.Name| contains(env.vpc_name)).Name'` -if [[ -z $waf ]]; then - gen3_log_info "Creating Web ACL. This may take a few minutes." - aws wafv2 create-web-acl\ - --name $vpc_name-waf \ - --scope REGIONAL \ - --default-action Allow={} \ - --visibility-config SampledRequestsEnabled=true,CloudWatchMetricsEnabled=true,MetricName=GPE-312WebAclMetrics \ - --rules file://${GEN3_HOME}/gen3/bin/waf-rules-GPE-312.json \ - --region us-east-1 - #Need to sleep to avoid "WAFUnavailableEntityException" error since the waf takes a bit to spin up - sleep 300 -else - gen3_log_info "WAF already exists. Skipping..." -fi gen3_log_info "Attaching ACL to ALB." export acl_arn=`aws wafv2 list-web-acls --scope REGIONAL | jq -r '.WebACLs[]|select(.Name| contains(env.vpc_name)).ARN'` export alb_name=`kubectl get ingress gen3-ingress | awk '{print $4}' | tail +2 | sed 's/^\([A-Za-z0-9]*-[A-Za-z0-9]*-[A-Za-z0-9]*\).*/\1/;q'` diff --git a/gen3/bin/waf-rules-GPE-312.json b/gen3/bin/waf-rules-GPE-312.json deleted file mode 100644 index b8cdccabe..000000000 --- a/gen3/bin/waf-rules-GPE-312.json +++ /dev/null @@ -1,153 +0,0 @@ -[ - { - "Name": "AWS-AWSManagedRulesAdminProtectionRuleSet", - "Priority": 0, - "Statement": { - "ManagedRuleGroupStatement": { - "VendorName": "AWS", - "Name": "AWSManagedRulesAdminProtectionRuleSet", - "RuleActionOverrides": [ - { - "Name": "AdminProtection_URIPATH", - "ActionToUse": { - "Challenge": {} - } - } - ] - } - }, - "OverrideAction": { - "None": {} - }, - "VisibilityConfig": { - "SampledRequestsEnabled": true, - "CloudWatchMetricsEnabled": true, - "MetricName": "AWS-AWSManagedRulesAdminProtectionRuleSet" - } - }, - { - "Name": "AWS-AWSManagedRulesAmazonIpReputationList", - "Priority": 1, - "Statement": { - "ManagedRuleGroupStatement": { - "VendorName": "AWS", - "Name": "AWSManagedRulesAmazonIpReputationList", - "RuleActionOverrides": [ - { - "Name": "AWSManagedReconnaissanceList", - "ActionToUse": { - "Count": {} - } - } - ] - } - }, - "OverrideAction": { - "None": {} - }, - "VisibilityConfig": { - "SampledRequestsEnabled": true, - "CloudWatchMetricsEnabled": true, - "MetricName": "AWS-AWSManagedRulesAmazonIpReputationList" - } - }, - { - "Name": "AWS-AWSManagedRulesCommonRuleSet", - "Priority": 2, - "Statement": { - "ManagedRuleGroupStatement": { - "VendorName": "AWS", - "Name": "AWSManagedRulesCommonRuleSet", - "Version": "Version_1.4", - "RuleActionOverrides": [ - { - "Name": "EC2MetaDataSSRF_BODY", - "ActionToUse": { - "Count": {} - } - }, - { - "Name": "GenericLFI_BODY", - "ActionToUse": { - "Allow": {} - } - }, - { - "Name": "SizeRestrictions_QUERYSTRING", - "ActionToUse": { - "Count": {} - } - }, - { - "Name": "SizeRestrictions_BODY", - "ActionToUse": { - "Allow": {} - } - }, - { - "Name": "CrossSiteScripting_BODY", - "ActionToUse": { - "Count": {} - } - }, - { - "Name": "SizeRestrictions_URIPATH", - "ActionToUse": { - "Allow": {} - } - }, - { - "Name": "SizeRestrictions_Cookie_HEADER", - "ActionToUse": { - "Allow": {} - } - } - ] - } - }, - "OverrideAction": { - "None": {} - }, - "VisibilityConfig": { - "SampledRequestsEnabled": true, - "CloudWatchMetricsEnabled": true, - "MetricName": "AWS-AWSManagedRulesCommonRuleSet" - } - }, - { - "Name": "AWS-AWSManagedRulesKnownBadInputsRuleSet", - "Priority": 3, - "Statement": { - "ManagedRuleGroupStatement": { - "VendorName": "AWS", - "Name": "AWSManagedRulesKnownBadInputsRuleSet" - } - }, - "OverrideAction": { - "None": {} - }, - "VisibilityConfig": { - "SampledRequestsEnabled": true, - "CloudWatchMetricsEnabled": true, - "MetricName": "AWS-AWSManagedRulesKnownBadInputsRuleSet" - } - }, - { - "Name": "AWS-AWSManagedRulesLinuxRuleSet", - "Priority": 4, - "Statement": { - "ManagedRuleGroupStatement": { - "VendorName": "AWS", - "Name": "AWSManagedRulesLinuxRuleSet" - } - }, - "OverrideAction": { - "None": {} - }, - "VisibilityConfig": { - "SampledRequestsEnabled": true, - "CloudWatchMetricsEnabled": true, - "MetricName": "AWS-AWSManagedRulesLinuxRuleSet" - } - } -] \ No newline at end of file From 5d5742be899042ded03e8615a33b4da1926af1b2 Mon Sep 17 00:00:00 2001 From: EliseCastle23 <109446148+EliseCastle23@users.noreply.github.com> Date: Fri, 18 Oct 2024 13:38:52 -0600 Subject: [PATCH 48/48] Update image_build_push_squid.yaml (#2654) --- .github/workflows/image_build_push_squid.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/image_build_push_squid.yaml b/.github/workflows/image_build_push_squid.yaml index ce1761d3c..0645fb8b2 100644 --- a/.github/workflows/image_build_push_squid.yaml +++ b/.github/workflows/image_build_push_squid.yaml @@ -1,6 +1,7 @@ name: Build Squid images on: + workflow_dispatch: push: paths: - .github/workflows/image_build_push_squid.yaml