From 6f9bb48e43ff7f8d75e8fe152b09c91cd65eeed3 Mon Sep 17 00:00:00 2001 From: Terence Tuhinanshu Date: Thu, 5 Oct 2023 08:26:38 -0400 Subject: [PATCH 1/5] Update PostGIS version 3.3 is no longer available --- deployment/ansible/group_vars/all | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/ansible/group_vars/all b/deployment/ansible/group_vars/all index 474a00072..d04a18b60 100644 --- a/deployment/ansible/group_vars/all +++ b/deployment/ansible/group_vars/all @@ -20,7 +20,7 @@ postgresql_support_repository_channel: "main" postgresql_support_libpq_version: "13.*.pgdg20.04+1" postgresql_support_psycopg2_version: "2.8.*" postgis_version: "3" -postgis_package_version: "3.3*pgdg20.04+1" +postgis_package_version: "3.4*pgdg20.04+1" daemontools_version: "1:0.76-7" From f3457955855f57c85e8084d458549f5a3569f0c3 Mon Sep 17 00:00:00 2001 From: Terence Tuhinanshu Date: Thu, 5 Oct 2023 08:26:56 -0400 Subject: [PATCH 2/5] Make purging caches optional This is only required when doing so in AWS, for local development this always fails. By making it optional we ensure that local development environments can be setup smoothly. --- scripts/aws/setupdb.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/scripts/aws/setupdb.sh b/scripts/aws/setupdb.sh index 60c2e26f2..c74014a53 100755 --- a/scripts/aws/setupdb.sh +++ b/scripts/aws/setupdb.sh @@ -15,8 +15,9 @@ where options are one or more of: \n -d load/reload DRB stream data\n -m load/reload mapshed data\n -p load/reload DEP data\n - -c load/reload nhdplus catchment data + -c load/reload nhdplus catchment data\n -q load/reload water quality data\n + -X purge s3 cache as well\n -x purge s3 cache for given path\n " @@ -29,6 +30,7 @@ load_hires_stream=false load_mapshed=false load_water_quality=false load_catchment=false +should_purge_cache=false while getopts ":hbsSdpmqcf:x:" opt; do case $opt in @@ -55,6 +57,8 @@ while getopts ":hbsSdpmqcf:x:" opt; do file_to_load=$OPTARG ;; x) path_to_purge=$OPTARG ;; + X) + should_purge_cache=true ;; \?) echo "invalid option: -$OPTARG" exit ;; @@ -85,9 +89,11 @@ function download_and_load { } function purge_tile_cache { - for path in "${PATHS[@]}"; do - aws s3 rm --recursive "s3://tile-cache.${PUBLIC_HOSTED_ZONE_NAME}/${path}/" - done + if [ "$should_purge_cache" = "true" ] ; then + for path in "${PATHS[@]}"; do + aws s3 rm --recursive "s3://tile-cache.${PUBLIC_HOSTED_ZONE_NAME}/${path}/" + done + fi } function create_trgm_indexes { From f9ab871c7001edbc9d4beda0e941be383a533720 Mon Sep 17 00:00:00 2001 From: Terence Tuhinanshu Date: Wed, 11 Oct 2023 16:49:58 +0000 Subject: [PATCH 3/5] Explicitly specify raster source bucket To make it more apparent where to specify an alternative bucket. --- deployment/ansible/group_vars/all | 1 + .../ansible/roles/model-my-watershed.base/defaults/main.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/deployment/ansible/group_vars/all b/deployment/ansible/group_vars/all index d04a18b60..b789fa8d7 100644 --- a/deployment/ansible/group_vars/all +++ b/deployment/ansible/group_vars/all @@ -47,6 +47,7 @@ geop_port: 8090 geop_version: "5.3.0" geop_cache_enabled: 1 geop_timeout: 200 +geop_bucket: datahub-catalogs-us-east-1 nginx_cache_dir: "/var/cache/nginx" diff --git a/deployment/ansible/roles/model-my-watershed.base/defaults/main.yml b/deployment/ansible/roles/model-my-watershed.base/defaults/main.yml index 57a72e8fa..d56ade259 100644 --- a/deployment/ansible/roles/model-my-watershed.base/defaults/main.yml +++ b/deployment/ansible/roles/model-my-watershed.base/defaults/main.yml @@ -14,6 +14,7 @@ envdir_config: MMW_GEOPROCESSING_PORT: "{{ geop_port }}" MMW_GEOPROCESSING_VERSION: "{{ geop_version }}" MMW_GEOPROCESSING_TIMEOUT: "{{ geop_timeout }}" + MMW_GEOPROCESSING_BUCKET: "{{ geop_bucket }}" MMW_ITSI_CLIENT_ID: "{{ itsi_client_id }}" MMW_ITSI_SECRET_KEY: "{{ itsi_secret_key }}" MMW_ITSI_BASE_URL: "{{ itsi_base_url }}" From 9e6b83083bffa2d969e9406f6d4a20f563a69e89 Mon Sep 17 00:00:00 2001 From: Terence Tuhinanshu Date: Wed, 18 Oct 2023 01:46:09 +0000 Subject: [PATCH 4/5] Use all environment variables for mmw-geoprocessing Previously, only a handful of environment variables were supplied to the mmw-geoprocessing service. This was confusing, as more relevant variables were added to mmw.d/env but were not taking effect. By loading all the environment variables for mmw-geoprocessing, just as we do for celery, we make the environment variables more universal and consistently applied. --- deployment/ansible/group_vars/all | 2 +- .../templates/systemd-geoprocessing.service.j2 | 7 +++---- src/mmw/mmw/settings/base.py | 8 +++++++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/deployment/ansible/group_vars/all b/deployment/ansible/group_vars/all index b789fa8d7..845ad1bb9 100644 --- a/deployment/ansible/group_vars/all +++ b/deployment/ansible/group_vars/all @@ -46,7 +46,7 @@ geop_port: 8090 geop_version: "5.3.0" geop_cache_enabled: 1 -geop_timeout: 200 +geop_timeout: 200s geop_bucket: datahub-catalogs-us-east-1 nginx_cache_dir: "/var/cache/nginx" diff --git a/deployment/ansible/roles/model-my-watershed.geoprocessing/templates/systemd-geoprocessing.service.j2 b/deployment/ansible/roles/model-my-watershed.geoprocessing/templates/systemd-geoprocessing.service.j2 index dbd6ec82b..2a93bcab4 100644 --- a/deployment/ansible/roles/model-my-watershed.geoprocessing/templates/systemd-geoprocessing.service.j2 +++ b/deployment/ansible/roles/model-my-watershed.geoprocessing/templates/systemd-geoprocessing.service.j2 @@ -4,13 +4,12 @@ After=network.target [Service] {% if ['development', 'test'] | some_are_in(group_names) -%} -Environment=MMW_GEOPROCESSING_TIMEOUT={{ geop_timeout }}s AWS_PROFILE={{ aws_profile }} -{% else %} -Environment=MMW_GEOPROCESSING_TIMEOUT={{ geop_timeout }}s +Environment=AWS_PROFILE={{ aws_profile }} {% endif %} +Environment="JAVA_WITH_ENV=/usr/bin/envdir /etc/mmw.d/env /usr/bin/java" User=mmw WorkingDirectory={{ geop_home }} -ExecStart=/usr/bin/java -jar mmw-geoprocessing-{{ geop_version }}.jar +ExecStart=/bin/sh -c '${JAVA_WITH_ENV} -jar mmw-geoprocessing-{{ geop_version }}.jar' StandardOutput=syslog StandardError=syslog SyslogIdentifier=geoprocessing diff --git a/src/mmw/mmw/settings/base.py b/src/mmw/mmw/settings/base.py index 384965627..3dae62d6f 100644 --- a/src/mmw/mmw/settings/base.py +++ b/src/mmw/mmw/settings/base.py @@ -9,6 +9,8 @@ https://docs.djangoproject.com/en/1.7/ref/settings/ """ +import re + from os import environ from os.path import abspath, basename, dirname, join, normpath from sys import path @@ -128,7 +130,11 @@ def get_env_setting(setting): CELERY_TASK_DEFAULT_EXCHANGE = 'tasks' CELERY_TASK_DEFAULT_ROUTING_KEY = "task.%s" % STACK_COLOR -CELERY_TASK_TIME_LIMIT = int(environ.get('MMW_GEOPROCESSING_TIMEOUT', 120)) +# MMW_GEOPROCESSING_TIMEOUT specified with "s" suffix for mmw-geoprocessing +MMW_GEOPROCESSING_TIMEOUT = environ.get('MMW_GEOPROCESSING_TIMEOUT', '120s') + +CELERY_TASK_TIME_LIMIT = int( + re.search(r'\d+', MMW_GEOPROCESSING_TIMEOUT).group()) TASK_REQUEST_TIMEOUT = CELERY_TASK_TIME_LIMIT - 10 # END CELERY CONFIGURATION From b4ac0140fb29637092c334a447e81905d01ac0d9 Mon Sep 17 00:00:00 2001 From: Terence Tuhinanshu Date: Tue, 5 Dec 2023 15:35:14 +0000 Subject: [PATCH 5/5] Update mmw-geoprocessing to 5.4.0 This version has observability improvements, that allow the inspection of the environment variables that are used at runtime. This can help debug issues with misconfiguration of the geoprocessing service. See https://github.com/WikiWatershed/mmw-geoprocessing/pull/104 --- deployment/ansible/group_vars/all | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/ansible/group_vars/all b/deployment/ansible/group_vars/all index 845ad1bb9..c00780e2e 100644 --- a/deployment/ansible/group_vars/all +++ b/deployment/ansible/group_vars/all @@ -44,7 +44,7 @@ docker_compose_version: "1.26.*" geop_host: "localhost" geop_port: 8090 -geop_version: "5.3.0" +geop_version: "5.4.0" geop_cache_enabled: 1 geop_timeout: 200s geop_bucket: datahub-catalogs-us-east-1