Skip to content

Commit

Permalink
Minor updates to release tools (#3216)
Browse files Browse the repository at this point in the history
Co-authored-by: Rémi Kazeroni <[email protected]>
Co-authored-by: Valeriu Predoi <[email protected]>
  • Loading branch information
3 people authored Nov 1, 2023
1 parent 1d05511 commit 6395bde
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 47 deletions.
7 changes: 4 additions & 3 deletions doc/sphinx/source/utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,8 @@ Optionally, the following parameters can be edited:
* ``partition``, *str*: Name of the DKRZ partition used to run jobs. Default is ``interactive`` to minimize computing cost compared to ``compute`` for which nodes cannot be shared.
* ``memory``, *str*: Amount of memory requested for each run. Default is ``64G`` to allow to run 4 recipes on the same node in parallel.
* ``time``, *str*: Time limit. Default is ``04:00:00`` to increase the job priority. Jobs can run for up to 8 hours and 12 hours on the compute and interactive partitions, respectively.

* ``default_max_parallel_tasks``, *int*: Default is ``8`` which works for most recipes. For other cases, an entry needs to be made to the ``MAX_PARALLEL_TASKS`` dictionary (see below).

The script will generate a submission script for all recipes using by default the ``interactive`` queue and with a time limit of 4h. In case a recipe
may require of additional resources, they can be defined in the ``SPECIAL_RECIPES`` dictionary. The recipe name has to be given as a ``key`` in which the
values are another dictionary.
Expand All @@ -263,8 +264,8 @@ given by the slurm flags ``--mem``, ``--constraint`` or ``--ntasks``. In general
},
}
Some recipes can only be run with ``--max_parallel_tasks=1`` for various reasons (memory issues, diagnostic issues, CMIP3 data used).
These recipes need to be added to the ``ONE_TASK_RECIPES`` list.
Some recipes can only be run with a number of tasks less than ``default_max_parallel_tasks`` for various reasons (memory issues, diagnostic issues, CMIP3 data used).
These recipes need to be added to the ``MAX_PARALLEL_TASKS`` dictionary with a specific ``max_parallel_tasks`` value.

Note that the script has been optimized to use standard SLURM settings to run most recipes while minimizing the computational cost of the jobs and tailored runtime settings for resource-intensive recipes.
It is only necessary to edit this script for recipes that have been added since the last release and cannot be run with the default settings.
Expand Down
63 changes: 42 additions & 21 deletions esmvaltool/utils/batch-jobs/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@
# Full path to config_file
# If none, ~/.esmvaltool/config-user.yml is used
config_file = ''
# Set max_parallel_tasks
# If none, read from config_file
default_max_parallel_tasks = 8

# List of recipes that require non-default SLURM options set above
SPECIAL_RECIPES = {
Expand All @@ -74,12 +77,17 @@
},
'recipe_climate_change_hotspot': {
'partition': '#SBATCH --partition=compute \n',
'memory': '#SBATCH --constraint=512G \n',
},
'recipe_collins13ipcc': {
'partition': '#SBATCH --partition=compute \n',
'time': '#SBATCH --time=08:00:00 \n',
'memory': '#SBATCH --constraint=512G \n',
},
'recipe_daily_era5': {
'partition': '#SBATCH --partition=compute \n',
'memory': '#SBATCH --constraint=512G \n',
},
'recipe_eady_growth_rate': {
'partition': '#SBATCH --partition=compute \n',
},
Expand Down Expand Up @@ -125,6 +133,7 @@
},
'recipe_ipccwg1ar6ch3_fig_3_43': {
'partition': '#SBATCH --partition=compute \n',
'time': '#SBATCH --time=08:00:00 \n',
},
'recipe_lauer22jclim_fig3-4_zonal': {
'partition': '#SBATCH --partition=compute \n',
Expand All @@ -137,6 +146,7 @@
},
'recipe_mpqb_xch4': {
'partition': '#SBATCH --partition=compute \n',
'memory': '#SBATCH --constraint=512G \n',
},
'recipe_perfmetrics_CMIP5': {
'partition': '#SBATCH --partition=compute \n',
Expand Down Expand Up @@ -164,6 +174,9 @@
'recipe_schlund20jgr_gpp_change_rcp85': {
'partition': '#SBATCH --partition=compute \n',
},
'recipe_sea_surface_salinity': {
'partition': '#SBATCH --partition=compute \n',
},
'recipe_smpi': {
'partition': '#SBATCH --partition=compute \n',
},
Expand All @@ -174,6 +187,9 @@
'partition': '#SBATCH --partition=compute \n',
'time': '#SBATCH --time=08:00:00 \n',
},
'recipe_thermodyn_diagtool': {
'partition': '#SBATCH --partition=compute \n',
},
'recipe_wenzel16jclim': {
'partition': '#SBATCH --partition=compute \n',
},
Expand All @@ -182,26 +198,28 @@
},
}

# These recipes cannot be run with the default number of parallel
# tasks (max_parallel_tasks=8).
# These recipes either use CMIP3 input data
# (see https://github.com/ESMValGroup/ESMValCore/issues/430)
# and recipes where tasks require the full compute node memory.
ONE_TASK_RECIPES = [
'recipe_bock20jgr_fig_1-4',
'recipe_bock20jgr_fig_6-7',
'recipe_bock20jgr_fig_8-10',
'recipe_flato13ipcc_figure_96',
'recipe_flato13ipcc_figures_938_941_cmip3',
'recipe_ipccwg1ar6ch3_fig_3_9',
'recipe_ipccwg1ar6ch3_fig_3_42_a',
'recipe_ipccwg1ar6ch3_fig_3_43',
'recipe_check_obs',
'recipe_collins13ipcc',
'recipe_lauer22jclim_fig3-4_zonal',
'recipe_lauer22jclim_fig5_lifrac',
'recipe_smpi',
'recipe_smpi_4cds',
'recipe_wenzel14jgr',
]
# or require a large fraction of the compute node memory.
MAX_PARALLEL_TASKS = {
'recipe_bock20jgr_fig_1-4': 1,
'recipe_bock20jgr_fig_6-7': 1,
'recipe_bock20jgr_fig_8-10': 1,
'recipe_flato13ipcc_figure_96': 1,
'recipe_flato13ipcc_figures_938_941_cmip3': 1,
'recipe_ipccwg1ar6ch3_fig_3_9': 1,
'recipe_ipccwg1ar6ch3_fig_3_42_a': 1,
'recipe_ipccwg1ar6ch3_fig_3_43': 1,
'recipe_check_obs': 1,
'recipe_collins13ipcc': 1,
'recipe_lauer22jclim_fig3-4_zonal': 3,
'recipe_lauer22jclim_fig5_lifrac': 3,
'recipe_smpi': 1,
'recipe_smpi_4cds': 1,
'recipe_wenzel14jgr': 1,
}


def generate_submit():
Expand Down Expand Up @@ -272,9 +290,12 @@ def generate_submit():
else:
file.write(f'esmvaltool run --config_file '
f'{str(config_file)} {str(recipe)}')
if recipe.stem in ONE_TASK_RECIPES:
file.write(' --max_parallel_tasks=1')

# set max_parallel_tasks
max_parallel_tasks = MAX_PARALLEL_TASKS.get(
recipe.stem,
default_max_parallel_tasks,
)
file.write(f' --max_parallel_tasks={max_parallel_tasks}\n')
if submit:
subprocess.check_call(['sbatch', filename])

Expand Down
10 changes: 5 additions & 5 deletions esmvaltool/utils/batch-jobs/parse_recipes_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
a GitHub issue or any other such documentation.
"""
import datetime
import os

import glob

import os

# User change needed
# directory where SLURM output files (.out and .err) are
Expand Down Expand Up @@ -81,11 +79,13 @@ def display_in_md():
todaynow = datetime.datetime.now()
print(f"## Recipe running session {todaynow}\n")
with open("all_recipes.txt", "r", encoding='utf-8') as allrecs:
all_recs = [rec.strip() for rec in allrecs.readlines()]
all_recs = [
os.path.basename(rec.strip()) for rec in allrecs.readlines()
]

# parse different types of recipe outcomes
recipe_list, failed, missing_dat = parse_output_file()
print("### Succesfully run recipes\n\n")
print("### Successfully ran recipes\n\n")
print(f"{len(recipe_list)} out of {len(all_recs)}\n")
for rec in recipe_list:
print("- " + rec)
Expand Down
26 changes: 14 additions & 12 deletions esmvaltool/utils/draft_release_notes.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,44 +41,44 @@
}

PREVIOUS_RELEASE = {
'esmvalcore': datetime.datetime(2022, 10, 13, 18),
'esmvalcore': datetime.datetime(2023, 5, 19, 16),
'esmvaltool': datetime.datetime(2022, 10, 28, 18),
}


LABELS = {
'esmvalcore': (
'backwards incompatible change', # important, keep at the top
'deprecated feature', # important, keep at the top
'bug', # important, keep at the top
'api',
'backwards incompatible change',
'bug',
'cmor',
'containerization',
'community',
'dask',
'deployment',
'deprecated feature',
'documentation',
'enhancement',
'fix for dataset',
'installation',
'iris',
'preprocessor',
'release',
'testing',
'UX',
'variable derivation'
'variable derivation',
'enhancement', # uncategorized, keep at the bottom
),
'esmvaltool': (
'backwards incompatible change',
'bug',
'backwards incompatible change', # important, keep at the top
'deprecated feature', # important, keep at the top
'bug', # important, keep at the top
'community',
'deprecated feature',
'documentation',
'diagnostic',
'preprocessor',
'observations',
'testing',
'installation',
'enhancement',
'enhancement', # uncategorized, keep at the bottom
)
}

Expand All @@ -87,6 +87,7 @@
'deprecated feature': 'Deprecations',
'bug': 'Bug fixes',
'cmor': 'CMOR standard',
'dask': 'Computational performance improvements',
'diagnostic': 'Diagnostics',
'fix for dataset': 'Fixes for datasets',
'observations': 'Observational and re-analysis dataset support',
Expand Down Expand Up @@ -210,7 +211,8 @@ def _compose_note(pull):


def main():
"""Entry point for the scrip."""
"""Entry point for the script."""

def display(lines, out):
text = "\n".join(lines) + "\n"
out.write(text)
Expand Down
15 changes: 9 additions & 6 deletions esmvaltool/utils/testing/regression/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def diff_attrs(ref: dict, cur: dict) -> str:
msg.append(f"missing attribute '{key}'")
elif not np.array_equal(ref[key], cur[key]):
msg.append(f"value of attribute '{key}' is different: "
f"expected '{cur[key]}' but found '{ref[key]}'")
f"expected '{ref[key]}' but found '{cur[key]}'")
for key in cur:
if key not in ref:
msg.append(f"extra attribute '{key}' with value '{cur[key]}'")
Expand All @@ -96,12 +96,15 @@ def diff_array(ref: np.ndarray, cur: np.ndarray) -> str:
msg = []
if cur.shape != ref.shape:
msg.append("data has different shape")
elif np.issubdtype(ref.dtype, np.inexact) and np.issubdtype(
cur.dtype, np.inexact):
if not np.array_equal(ref, cur, equal_nan=True):
if np.allclose(ref, cur, equal_nan=True):
msg.append("data is almost but not quite the same")
else:
msg.append("data is different")
elif not np.array_equal(ref, cur):
if np.issubdtype(ref.dtype, np.inexact) and np.issubdtype(
cur.dtype, np.inexact) and np.allclose(ref, cur):
msg.append("data is almost but not quite the same")
else:
msg.append("data is different")
msg.append("data is different")
return as_txt(msg)


Expand Down

0 comments on commit 6395bde

Please sign in to comment.