Skip to content

Commit

Permalink
Added new operators for statistics preprocesor (e.g., percentile) a…
Browse files Browse the repository at this point in the history
…nd allowed arbitrary kwargs (#2191)

Co-authored-by: Valeriu Predoi <[email protected]>
Co-authored-by: Klaus Zimmermann <[email protected]>
  • Loading branch information
3 people authored Oct 10, 2023
1 parent 0cea725 commit dde0755
Show file tree
Hide file tree
Showing 20 changed files with 1,822 additions and 484 deletions.
371 changes: 285 additions & 86 deletions doc/recipe/preprocessor.rst

Large diffs are not rendered by default.

115 changes: 66 additions & 49 deletions esmvalcore/_recipe/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,21 @@

import logging
import os
import re
import subprocess
from inspect import getfullargspec
from pprint import pformat
from shutil import which
from typing import Any, Iterable

import isodate
import yamale

import esmvalcore.preprocessor
from esmvalcore.exceptions import InputFilesNotFound, RecipeError
from esmvalcore.local import _get_start_end_year, _parse_period
from esmvalcore.preprocessor import TIME_PREPROCESSORS, PreprocessingTask
from esmvalcore.preprocessor._multimodel import STATISTIC_MAPPING
from esmvalcore.preprocessor._multimodel import _get_operator_and_kwargs
from esmvalcore.preprocessor._shared import get_iris_aggregator
from esmvalcore.preprocessor._supplementary_vars import (
PREPROCESSOR_SUPPLEMENTARIES,
)
Expand Down Expand Up @@ -256,20 +258,6 @@ def extract_shape(settings):
"{}".format(', '.join(f"'{k}'".lower() for k in valid[key])))


def _verify_statistics(statistics, step):
"""Raise error if multi-model statistics cannot be verified."""
valid_names = ['std'] + list(STATISTIC_MAPPING.keys())
valid_patterns = [r"^(p\d{1,2})(\.\d*)?$"]

for statistic in statistics:
if not (statistic in valid_names
or re.match(r'|'.join(valid_patterns), statistic)):
raise RecipeError(
"Invalid value encountered for `statistic` in preprocessor "
f"{step}. Valid values are {valid_names} "
f"or patterns matching {valid_patterns}. Got '{statistic}'.")


def _verify_span_value(span):
"""Raise error if span argument cannot be verified."""
valid_names = ('overlap', 'full')
Expand Down Expand Up @@ -305,26 +293,8 @@ def _verify_ignore_scalar_coords(ignore_scalar_coords):
f"{ignore_scalar_coords}.")


def _verify_arguments(given, expected):
"""Raise error if arguments cannot be verified."""
for key in given:
if key not in expected:
raise RecipeError(
f"Unexpected keyword argument encountered: {key}. Valid "
f"keywords are: {expected}.")


def multimodel_statistics_preproc(settings):
"""Check that the multi-model settings are valid."""
valid_keys = [
'groupby',
'ignore_scalar_coords',
'keep_input_datasets',
'span',
'statistics',
]
_verify_arguments(settings.keys(), valid_keys)

span = settings.get('span', None) # optional, default: overlap
if span:
_verify_span_value(span)
Expand All @@ -333,10 +303,6 @@ def multimodel_statistics_preproc(settings):
if groupby:
_verify_groupby(groupby)

statistics = settings.get('statistics', None) # required
if statistics:
_verify_statistics(statistics, 'multi_model_statistics')

keep_input_datasets = settings.get('keep_input_datasets', True)
_verify_keep_input_datasets(keep_input_datasets)

Expand All @@ -346,21 +312,10 @@ def multimodel_statistics_preproc(settings):

def ensemble_statistics_preproc(settings):
"""Check that the ensemble settings are valid."""
valid_keys = [
'ignore_scalar_coords',
'span',
'statistics',
]
_verify_arguments(settings.keys(), valid_keys)

span = settings.get('span', 'overlap') # optional, default: overlap
if span:
_verify_span_value(span)

statistics = settings.get('statistics', None)
if statistics:
_verify_statistics(statistics, 'ensemble_statistics')

ignore_scalar_coords = settings.get('ignore_scalar_coords', False)
_verify_ignore_scalar_coords(ignore_scalar_coords)

Expand Down Expand Up @@ -456,3 +411,65 @@ def reference_for_bias_preproc(products):
f"{len(reference_products):d}{ref_products_str}Please also "
f"ensure that the reference dataset is not excluded with the "
f"'exclude' option")


def statistics_preprocessors(settings: dict) -> None:
"""Check options of statistics preprocessors."""
mm_stats = (
'multi_model_statistics',
'ensemble_statistics',
)
for (step, step_settings) in settings.items():

# For multi-model statistics, we need to check each entry of statistics
if step in mm_stats:
_check_mm_stat(step, step_settings)

# For other statistics, check optional kwargs for operator
elif '_statistics' in step:
_check_regular_stat(step, step_settings)


def _check_regular_stat(step, step_settings):
"""Check regular statistics (non-multi-model statistics) step."""
step_settings = dict(step_settings)

# Some preprocessors like climate_statistics use default 'mean' for
# operator. If 'operator' is missing for those preprocessors with no
# default, this will be detected in PreprocessorFile.check() later.
operator = step_settings.pop('operator', 'mean')

# If preprocessor does not exist, do nothing here; this will be detected in
# PreprocessorFile.check() later.
try:
preproc_func = getattr(esmvalcore.preprocessor, step)
except AttributeError:
return

# Ignore other preprocessor arguments, e.g., 'hours' for hourly_statistics
other_args = getfullargspec(preproc_func).args[1:]
operator_kwargs = {
k: v for (k, v) in step_settings.items() if k not in other_args
}
try:
get_iris_aggregator(operator, **operator_kwargs)
except ValueError as exc:
raise RecipeError(
f"Invalid options for {step}: {exc}"
)


def _check_mm_stat(step, step_settings):
"""Check multi-model statistic step."""
statistics = step_settings.get('statistics', [])
for stat in statistics:
try:
(operator, kwargs) = _get_operator_and_kwargs(stat)
except ValueError as exc:
raise RecipeError(str(exc))
try:
get_iris_aggregator(operator, **kwargs)
except ValueError as exc:
raise RecipeError(
f"Invalid options for {step}: {exc}"
)
15 changes: 8 additions & 7 deletions esmvalcore/_recipe/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@
from esmvalcore.config._config import TASKSEP
from esmvalcore.config._diagnostics import TAGS
from esmvalcore.dataset import Dataset
from esmvalcore.exceptions import (
InputFilesNotFound,
RecipeError,
)
from esmvalcore.exceptions import InputFilesNotFound, RecipeError
from esmvalcore.local import (
_dates_to_timerange,
_get_multiproduct_filename,
Expand All @@ -39,6 +36,7 @@
PreprocessorFile,
)
from esmvalcore.preprocessor._area import _update_shapefile_path
from esmvalcore.preprocessor._multimodel import _get_stat_identifier
from esmvalcore.preprocessor._other import _group_products
from esmvalcore.preprocessor._regrid import (
_spec_to_latlonvals,
Expand Down Expand Up @@ -416,9 +414,11 @@ def _update_multiproduct(input_products, order, preproc_dir, step):
for identifier, products in _group_products(products, by_key=grouping):
common_attributes = _get_common_attributes(products, settings)

for statistic in settings.get('statistics', []):
statistics = settings.get('statistics', [])
for statistic in statistics:
statistic_attributes = dict(common_attributes)
statistic_attributes[step] = _get_tag(step, identifier, statistic)
stat_id = _get_stat_identifier(statistic)
statistic_attributes[step] = _get_tag(step, identifier, stat_id)
statistic_attributes.setdefault('alias',
statistic_attributes[step])
statistic_attributes.setdefault('dataset',
Expand All @@ -432,7 +432,7 @@ def _update_multiproduct(input_products, order, preproc_dir, step):
) # Note that ancestors is set when running the preprocessor func.
output_products.add(statistic_product)
relevant_settings['output_products'][identifier][
statistic] = statistic_product
stat_id] = statistic_product

return output_products, relevant_settings

Expand Down Expand Up @@ -637,6 +637,7 @@ def _update_preproc_functions(settings, dataset, datasets, missing_vars):
_update_regrid_time(dataset, settings)
if dataset.facets.get('frequency') == 'fx':
check.check_for_temporal_preprocs(settings)
check.statistics_preprocessors(settings)


def _get_preprocessor_task(datasets, profiles, task_name):
Expand Down
4 changes: 4 additions & 0 deletions esmvalcore/cmor/_fixes/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ def add_plev_from_altitude(cube):
)
pressure_coord = iris.coords.AuxCoord(pressure_points,
bounds=pressure_bounds,
var_name='plev',
standard_name='air_pressure',
long_name='pressure',
units='Pa')
cube.add_aux_coord(pressure_coord, cube.coord_dims(height_coord))
return
Expand Down Expand Up @@ -145,7 +147,9 @@ def add_altitude_from_plev(cube):
)
altitude_coord = iris.coords.AuxCoord(altitude_points,
bounds=altitude_bounds,
var_name='alt',
standard_name='altitude',
long_name='altitude',
units='m')
cube.add_aux_coord(altitude_coord, cube.coord_dims(plev_coord))
return
Expand Down
Loading

0 comments on commit dde0755

Please sign in to comment.