From 6395bde0463870e34f0ff12bbfb04a59b2a2cd88 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Wed, 1 Nov 2023 09:51:46 +0100 Subject: [PATCH] Minor updates to release tools (#3216) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: RĂ©mi Kazeroni Co-authored-by: Valeriu Predoi --- doc/sphinx/source/utils.rst | 7 ++- esmvaltool/utils/batch-jobs/generate.py | 63 ++++++++++++------- .../utils/batch-jobs/parse_recipes_output.py | 10 +-- esmvaltool/utils/draft_release_notes.py | 26 ++++---- .../utils/testing/regression/compare.py | 15 +++-- 5 files changed, 74 insertions(+), 47 deletions(-) diff --git a/doc/sphinx/source/utils.rst b/doc/sphinx/source/utils.rst index 5e46d06d81..e5e7b00553 100644 --- a/doc/sphinx/source/utils.rst +++ b/doc/sphinx/source/utils.rst @@ -246,7 +246,8 @@ Optionally, the following parameters can be edited: * ``partition``, *str*: Name of the DKRZ partition used to run jobs. Default is ``interactive`` to minimize computing cost compared to ``compute`` for which nodes cannot be shared. * ``memory``, *str*: Amount of memory requested for each run. Default is ``64G`` to allow to run 4 recipes on the same node in parallel. * ``time``, *str*: Time limit. Default is ``04:00:00`` to increase the job priority. Jobs can run for up to 8 hours and 12 hours on the compute and interactive partitions, respectively. - +* ``default_max_parallel_tasks``, *int*: Default is ``8`` which works for most recipes. For other cases, an entry needs to be made to the ``MAX_PARALLEL_TASKS`` dictionary (see below). + The script will generate a submission script for all recipes using by default the ``interactive`` queue and with a time limit of 4h. In case a recipe may require of additional resources, they can be defined in the ``SPECIAL_RECIPES`` dictionary. The recipe name has to be given as a ``key`` in which the values are another dictionary. @@ -263,8 +264,8 @@ given by the slurm flags ``--mem``, ``--constraint`` or ``--ntasks``. In general }, } -Some recipes can only be run with ``--max_parallel_tasks=1`` for various reasons (memory issues, diagnostic issues, CMIP3 data used). -These recipes need to be added to the ``ONE_TASK_RECIPES`` list. +Some recipes can only be run with a number of tasks less than ``default_max_parallel_tasks`` for various reasons (memory issues, diagnostic issues, CMIP3 data used). +These recipes need to be added to the ``MAX_PARALLEL_TASKS`` dictionary with a specific ``max_parallel_tasks`` value. Note that the script has been optimized to use standard SLURM settings to run most recipes while minimizing the computational cost of the jobs and tailored runtime settings for resource-intensive recipes. It is only necessary to edit this script for recipes that have been added since the last release and cannot be run with the default settings. diff --git a/esmvaltool/utils/batch-jobs/generate.py b/esmvaltool/utils/batch-jobs/generate.py index dd7a46d83f..fc9deb8339 100644 --- a/esmvaltool/utils/batch-jobs/generate.py +++ b/esmvaltool/utils/batch-jobs/generate.py @@ -51,6 +51,9 @@ # Full path to config_file # If none, ~/.esmvaltool/config-user.yml is used config_file = '' +# Set max_parallel_tasks +# If none, read from config_file +default_max_parallel_tasks = 8 # List of recipes that require non-default SLURM options set above SPECIAL_RECIPES = { @@ -74,12 +77,17 @@ }, 'recipe_climate_change_hotspot': { 'partition': '#SBATCH --partition=compute \n', + 'memory': '#SBATCH --constraint=512G \n', }, 'recipe_collins13ipcc': { 'partition': '#SBATCH --partition=compute \n', 'time': '#SBATCH --time=08:00:00 \n', 'memory': '#SBATCH --constraint=512G \n', }, + 'recipe_daily_era5': { + 'partition': '#SBATCH --partition=compute \n', + 'memory': '#SBATCH --constraint=512G \n', + }, 'recipe_eady_growth_rate': { 'partition': '#SBATCH --partition=compute \n', }, @@ -125,6 +133,7 @@ }, 'recipe_ipccwg1ar6ch3_fig_3_43': { 'partition': '#SBATCH --partition=compute \n', + 'time': '#SBATCH --time=08:00:00 \n', }, 'recipe_lauer22jclim_fig3-4_zonal': { 'partition': '#SBATCH --partition=compute \n', @@ -137,6 +146,7 @@ }, 'recipe_mpqb_xch4': { 'partition': '#SBATCH --partition=compute \n', + 'memory': '#SBATCH --constraint=512G \n', }, 'recipe_perfmetrics_CMIP5': { 'partition': '#SBATCH --partition=compute \n', @@ -164,6 +174,9 @@ 'recipe_schlund20jgr_gpp_change_rcp85': { 'partition': '#SBATCH --partition=compute \n', }, + 'recipe_sea_surface_salinity': { + 'partition': '#SBATCH --partition=compute \n', + }, 'recipe_smpi': { 'partition': '#SBATCH --partition=compute \n', }, @@ -174,6 +187,9 @@ 'partition': '#SBATCH --partition=compute \n', 'time': '#SBATCH --time=08:00:00 \n', }, + 'recipe_thermodyn_diagtool': { + 'partition': '#SBATCH --partition=compute \n', + }, 'recipe_wenzel16jclim': { 'partition': '#SBATCH --partition=compute \n', }, @@ -182,26 +198,28 @@ }, } +# These recipes cannot be run with the default number of parallel +# tasks (max_parallel_tasks=8). # These recipes either use CMIP3 input data # (see https://github.com/ESMValGroup/ESMValCore/issues/430) -# and recipes where tasks require the full compute node memory. -ONE_TASK_RECIPES = [ - 'recipe_bock20jgr_fig_1-4', - 'recipe_bock20jgr_fig_6-7', - 'recipe_bock20jgr_fig_8-10', - 'recipe_flato13ipcc_figure_96', - 'recipe_flato13ipcc_figures_938_941_cmip3', - 'recipe_ipccwg1ar6ch3_fig_3_9', - 'recipe_ipccwg1ar6ch3_fig_3_42_a', - 'recipe_ipccwg1ar6ch3_fig_3_43', - 'recipe_check_obs', - 'recipe_collins13ipcc', - 'recipe_lauer22jclim_fig3-4_zonal', - 'recipe_lauer22jclim_fig5_lifrac', - 'recipe_smpi', - 'recipe_smpi_4cds', - 'recipe_wenzel14jgr', - ] +# or require a large fraction of the compute node memory. +MAX_PARALLEL_TASKS = { + 'recipe_bock20jgr_fig_1-4': 1, + 'recipe_bock20jgr_fig_6-7': 1, + 'recipe_bock20jgr_fig_8-10': 1, + 'recipe_flato13ipcc_figure_96': 1, + 'recipe_flato13ipcc_figures_938_941_cmip3': 1, + 'recipe_ipccwg1ar6ch3_fig_3_9': 1, + 'recipe_ipccwg1ar6ch3_fig_3_42_a': 1, + 'recipe_ipccwg1ar6ch3_fig_3_43': 1, + 'recipe_check_obs': 1, + 'recipe_collins13ipcc': 1, + 'recipe_lauer22jclim_fig3-4_zonal': 3, + 'recipe_lauer22jclim_fig5_lifrac': 3, + 'recipe_smpi': 1, + 'recipe_smpi_4cds': 1, + 'recipe_wenzel14jgr': 1, +} def generate_submit(): @@ -272,9 +290,12 @@ def generate_submit(): else: file.write(f'esmvaltool run --config_file ' f'{str(config_file)} {str(recipe)}') - if recipe.stem in ONE_TASK_RECIPES: - file.write(' --max_parallel_tasks=1') - + # set max_parallel_tasks + max_parallel_tasks = MAX_PARALLEL_TASKS.get( + recipe.stem, + default_max_parallel_tasks, + ) + file.write(f' --max_parallel_tasks={max_parallel_tasks}\n') if submit: subprocess.check_call(['sbatch', filename]) diff --git a/esmvaltool/utils/batch-jobs/parse_recipes_output.py b/esmvaltool/utils/batch-jobs/parse_recipes_output.py index a8436d3c31..9df80c6cc0 100644 --- a/esmvaltool/utils/batch-jobs/parse_recipes_output.py +++ b/esmvaltool/utils/batch-jobs/parse_recipes_output.py @@ -7,10 +7,8 @@ a GitHub issue or any other such documentation. """ import datetime -import os - import glob - +import os # User change needed # directory where SLURM output files (.out and .err) are @@ -81,11 +79,13 @@ def display_in_md(): todaynow = datetime.datetime.now() print(f"## Recipe running session {todaynow}\n") with open("all_recipes.txt", "r", encoding='utf-8') as allrecs: - all_recs = [rec.strip() for rec in allrecs.readlines()] + all_recs = [ + os.path.basename(rec.strip()) for rec in allrecs.readlines() + ] # parse different types of recipe outcomes recipe_list, failed, missing_dat = parse_output_file() - print("### Succesfully run recipes\n\n") + print("### Successfully ran recipes\n\n") print(f"{len(recipe_list)} out of {len(all_recs)}\n") for rec in recipe_list: print("- " + rec) diff --git a/esmvaltool/utils/draft_release_notes.py b/esmvaltool/utils/draft_release_notes.py index d00e752455..7b1c5f61b1 100644 --- a/esmvaltool/utils/draft_release_notes.py +++ b/esmvaltool/utils/draft_release_notes.py @@ -41,23 +41,22 @@ } PREVIOUS_RELEASE = { - 'esmvalcore': datetime.datetime(2022, 10, 13, 18), + 'esmvalcore': datetime.datetime(2023, 5, 19, 16), 'esmvaltool': datetime.datetime(2022, 10, 28, 18), } - LABELS = { 'esmvalcore': ( + 'backwards incompatible change', # important, keep at the top + 'deprecated feature', # important, keep at the top + 'bug', # important, keep at the top 'api', - 'backwards incompatible change', - 'bug', 'cmor', 'containerization', 'community', + 'dask', 'deployment', - 'deprecated feature', 'documentation', - 'enhancement', 'fix for dataset', 'installation', 'iris', @@ -65,20 +64,21 @@ 'release', 'testing', 'UX', - 'variable derivation' + 'variable derivation', + 'enhancement', # uncategorized, keep at the bottom ), 'esmvaltool': ( - 'backwards incompatible change', - 'bug', + 'backwards incompatible change', # important, keep at the top + 'deprecated feature', # important, keep at the top + 'bug', # important, keep at the top 'community', - 'deprecated feature', 'documentation', 'diagnostic', 'preprocessor', 'observations', 'testing', 'installation', - 'enhancement', + 'enhancement', # uncategorized, keep at the bottom ) } @@ -87,6 +87,7 @@ 'deprecated feature': 'Deprecations', 'bug': 'Bug fixes', 'cmor': 'CMOR standard', + 'dask': 'Computational performance improvements', 'diagnostic': 'Diagnostics', 'fix for dataset': 'Fixes for datasets', 'observations': 'Observational and re-analysis dataset support', @@ -210,7 +211,8 @@ def _compose_note(pull): def main(): - """Entry point for the scrip.""" + """Entry point for the script.""" + def display(lines, out): text = "\n".join(lines) + "\n" out.write(text) diff --git a/esmvaltool/utils/testing/regression/compare.py b/esmvaltool/utils/testing/regression/compare.py index ca96b65a68..a4ee33c5d3 100644 --- a/esmvaltool/utils/testing/regression/compare.py +++ b/esmvaltool/utils/testing/regression/compare.py @@ -80,7 +80,7 @@ def diff_attrs(ref: dict, cur: dict) -> str: msg.append(f"missing attribute '{key}'") elif not np.array_equal(ref[key], cur[key]): msg.append(f"value of attribute '{key}' is different: " - f"expected '{cur[key]}' but found '{ref[key]}'") + f"expected '{ref[key]}' but found '{cur[key]}'") for key in cur: if key not in ref: msg.append(f"extra attribute '{key}' with value '{cur[key]}'") @@ -96,12 +96,15 @@ def diff_array(ref: np.ndarray, cur: np.ndarray) -> str: msg = [] if cur.shape != ref.shape: msg.append("data has different shape") + elif np.issubdtype(ref.dtype, np.inexact) and np.issubdtype( + cur.dtype, np.inexact): + if not np.array_equal(ref, cur, equal_nan=True): + if np.allclose(ref, cur, equal_nan=True): + msg.append("data is almost but not quite the same") + else: + msg.append("data is different") elif not np.array_equal(ref, cur): - if np.issubdtype(ref.dtype, np.inexact) and np.issubdtype( - cur.dtype, np.inexact) and np.allclose(ref, cur): - msg.append("data is almost but not quite the same") - else: - msg.append("data is different") + msg.append("data is different") return as_txt(msg)