Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added new operators for statistics preprocesor (e.g., percentile) and allowed arbitrary kwargs #2191

Merged
merged 42 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
64416c3
Cleaned and extended statistical preprocs
schlunma Sep 1, 2023
df2be18
Added further tests
schlunma Sep 5, 2023
ab14a70
Fixed existing test and added new ones
schlunma Sep 6, 2023
01ff248
Simplified weights handling
schlunma Sep 6, 2023
74149a8
Fixed axis_statistics
schlunma Sep 6, 2023
9c2ded3
Removed prints
schlunma Sep 6, 2023
a68dbab
Expanded tests
schlunma Sep 6, 2023
1055a27
Expanded MM stats tests
schlunma Sep 6, 2023
38f31af
Ignored irrelevant warnings when using weights and fixed bug in check…
schlunma Sep 7, 2023
6723c3c
Fixed tests
schlunma Sep 7, 2023
a00adc5
Improved test coverage
schlunma Sep 7, 2023
c9b2165
Make sure to drop weights coordinates
schlunma Sep 7, 2023
0c5f9b6
Fixed doc build
schlunma Sep 7, 2023
fef4949
Optimized doc
schlunma Sep 7, 2023
8fa58d1
Tried another doc optimization
schlunma Sep 7, 2023
ba00590
Fixed multi_model_stats
schlunma Sep 7, 2023
1ef7a50
Fixed test
schlunma Sep 7, 2023
0a116e4
Do not automatically use iris.analysis.PERCENTILE instead of MEDIAN, …
schlunma Sep 7, 2023
d6165a7
Fixed tests
schlunma Sep 7, 2023
ee59631
Merge branch 'main' into stat_preprocs
schlunma Sep 27, 2023
d6535ca
Changed multi_model_statistics API
schlunma Sep 28, 2023
ed175c3
Merge remote-tracking branch 'origin/main' into stat_preprocs
schlunma Sep 28, 2023
9cd9498
Changed API of statistics preprocs
schlunma Sep 28, 2023
561f2f4
Merge branch 'main' into stat_preprocs
schlunma Oct 2, 2023
b0568e5
Merge remote-tracking branch 'origin/main' into stat_preprocs
schlunma Oct 6, 2023
db46e61
Merge branch 'main' into stat_preprocs
valeriupredoi Oct 9, 2023
9258b04
Update doc/recipe/preprocessor.rst
schlunma Oct 9, 2023
68ce7c6
Update doc/recipe/preprocessor.rst
schlunma Oct 9, 2023
dcf67fc
Update doc/recipe/preprocessor.rst
schlunma Oct 9, 2023
60785a2
Update esmvalcore/preprocessor/_multimodel.py
schlunma Oct 9, 2023
d4a4414
Adjust overlong line
Oct 9, 2023
9f54bab
Update esmvalcore/preprocessor/_shared.py
schlunma Oct 9, 2023
89cb06d
Update esmvalcore/preprocessor/_shared.py
schlunma Oct 9, 2023
8415631
Update esmvalcore/preprocessor/_shared.py
schlunma Oct 9, 2023
4c45916
Final review comments and cleanup
schlunma Oct 9, 2023
548f61f
Fixed tests
schlunma Oct 9, 2023
7b4f1d6
'Simplify' multicube_statistics to please Codacy
schlunma Oct 9, 2023
dbf1951
Simplify check.statistics_preprocessors to please Codacy
schlunma Oct 9, 2023
97884c3
Make sure climate_statistics and axis_statistics do not change input …
schlunma Oct 9, 2023
42f244e
Make sure area_stats and axis_stats do not change input cube
schlunma Oct 9, 2023
41820ac
Make time_weights coordinate used by esmvaltool "private"
schlunma Oct 9, 2023
6481245
Merge branch 'main' into stat_preprocs
schlunma Oct 9, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
371 changes: 285 additions & 86 deletions doc/recipe/preprocessor.rst

Large diffs are not rendered by default.

115 changes: 66 additions & 49 deletions esmvalcore/_recipe/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,21 @@

import logging
import os
import re
import subprocess
from inspect import getfullargspec
from pprint import pformat
from shutil import which
from typing import Any, Iterable

import isodate
import yamale

import esmvalcore.preprocessor
from esmvalcore.exceptions import InputFilesNotFound, RecipeError
from esmvalcore.local import _get_start_end_year, _parse_period
from esmvalcore.preprocessor import TIME_PREPROCESSORS, PreprocessingTask
from esmvalcore.preprocessor._multimodel import STATISTIC_MAPPING
from esmvalcore.preprocessor._multimodel import _get_operator_and_kwargs
from esmvalcore.preprocessor._shared import get_iris_aggregator
from esmvalcore.preprocessor._supplementary_vars import (
PREPROCESSOR_SUPPLEMENTARIES,
)
Expand Down Expand Up @@ -256,20 +258,6 @@ def extract_shape(settings):
"{}".format(', '.join(f"'{k}'".lower() for k in valid[key])))


def _verify_statistics(statistics, step):
"""Raise error if multi-model statistics cannot be verified."""
valid_names = ['std'] + list(STATISTIC_MAPPING.keys())
valid_patterns = [r"^(p\d{1,2})(\.\d*)?$"]

for statistic in statistics:
if not (statistic in valid_names
or re.match(r'|'.join(valid_patterns), statistic)):
raise RecipeError(
"Invalid value encountered for `statistic` in preprocessor "
f"{step}. Valid values are {valid_names} "
f"or patterns matching {valid_patterns}. Got '{statistic}'.")


def _verify_span_value(span):
"""Raise error if span argument cannot be verified."""
valid_names = ('overlap', 'full')
Expand Down Expand Up @@ -305,26 +293,8 @@ def _verify_ignore_scalar_coords(ignore_scalar_coords):
f"{ignore_scalar_coords}.")


def _verify_arguments(given, expected):
"""Raise error if arguments cannot be verified."""
for key in given:
if key not in expected:
raise RecipeError(
f"Unexpected keyword argument encountered: {key}. Valid "
f"keywords are: {expected}.")


def multimodel_statistics_preproc(settings):
"""Check that the multi-model settings are valid."""
valid_keys = [
'groupby',
'ignore_scalar_coords',
'keep_input_datasets',
'span',
'statistics',
]
_verify_arguments(settings.keys(), valid_keys)

span = settings.get('span', None) # optional, default: overlap
if span:
_verify_span_value(span)
Expand All @@ -333,10 +303,6 @@ def multimodel_statistics_preproc(settings):
if groupby:
_verify_groupby(groupby)

statistics = settings.get('statistics', None) # required
if statistics:
_verify_statistics(statistics, 'multi_model_statistics')

keep_input_datasets = settings.get('keep_input_datasets', True)
_verify_keep_input_datasets(keep_input_datasets)

Expand All @@ -346,21 +312,10 @@ def multimodel_statistics_preproc(settings):

def ensemble_statistics_preproc(settings):
"""Check that the ensemble settings are valid."""
valid_keys = [
'ignore_scalar_coords',
'span',
'statistics',
]
_verify_arguments(settings.keys(), valid_keys)

span = settings.get('span', 'overlap') # optional, default: overlap
if span:
_verify_span_value(span)

statistics = settings.get('statistics', None)
if statistics:
_verify_statistics(statistics, 'ensemble_statistics')

ignore_scalar_coords = settings.get('ignore_scalar_coords', False)
_verify_ignore_scalar_coords(ignore_scalar_coords)

Expand Down Expand Up @@ -456,3 +411,65 @@ def reference_for_bias_preproc(products):
f"{len(reference_products):d}{ref_products_str}Please also "
f"ensure that the reference dataset is not excluded with the "
f"'exclude' option")


def statistics_preprocessors(settings: dict) -> None:
"""Check options of statistics preprocessors."""
valeriupredoi marked this conversation as resolved.
Show resolved Hide resolved
mm_stats = (
'multi_model_statistics',
'ensemble_statistics',
)
for (step, step_settings) in settings.items():

# For multi-model statistics, we need to check each entry of statistics
if step in mm_stats:
_check_mm_stat(step, step_settings)

# For other statistics, check optional kwargs for operator
elif '_statistics' in step:
_check_regular_stat(step, step_settings)


def _check_regular_stat(step, step_settings):
"""Check regular statistics (non-multi-model statistics) step."""
step_settings = dict(step_settings)

# Some preprocessors like climate_statistics use default 'mean' for
# operator. If 'operator' is missing for those preprocessors with no
# default, this will be detected in PreprocessorFile.check() later.
operator = step_settings.pop('operator', 'mean')

# If preprocessor does not exist, do nothing here; this will be detected in
# PreprocessorFile.check() later.
try:
preproc_func = getattr(esmvalcore.preprocessor, step)
except AttributeError:
return

# Ignore other preprocessor arguments, e.g., 'hours' for hourly_statistics
other_args = getfullargspec(preproc_func).args[1:]
operator_kwargs = {
k: v for (k, v) in step_settings.items() if k not in other_args
}
try:
get_iris_aggregator(operator, **operator_kwargs)
except ValueError as exc:
raise RecipeError(
f"Invalid options for {step}: {exc}"
)


def _check_mm_stat(step, step_settings):
"""Check multi-model statistic step."""
statistics = step_settings.get('statistics', [])
for stat in statistics:
try:
(operator, kwargs) = _get_operator_and_kwargs(stat)
except ValueError as exc:
raise RecipeError(str(exc))
try:
get_iris_aggregator(operator, **kwargs)
except ValueError as exc:
raise RecipeError(
f"Invalid options for {step}: {exc}"
)
15 changes: 8 additions & 7 deletions esmvalcore/_recipe/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@
from esmvalcore.config._config import TASKSEP
from esmvalcore.config._diagnostics import TAGS
from esmvalcore.dataset import Dataset
from esmvalcore.exceptions import (
InputFilesNotFound,
RecipeError,
)
from esmvalcore.exceptions import InputFilesNotFound, RecipeError
from esmvalcore.local import (
_dates_to_timerange,
_get_multiproduct_filename,
Expand All @@ -39,6 +36,7 @@
PreprocessorFile,
)
from esmvalcore.preprocessor._area import _update_shapefile_path
from esmvalcore.preprocessor._multimodel import _get_stat_identifier
from esmvalcore.preprocessor._other import _group_products
from esmvalcore.preprocessor._regrid import (
_spec_to_latlonvals,
Expand Down Expand Up @@ -416,9 +414,11 @@ def _update_multiproduct(input_products, order, preproc_dir, step):
for identifier, products in _group_products(products, by_key=grouping):
common_attributes = _get_common_attributes(products, settings)

for statistic in settings.get('statistics', []):
statistics = settings.get('statistics', [])
for statistic in statistics:
statistic_attributes = dict(common_attributes)
statistic_attributes[step] = _get_tag(step, identifier, statistic)
stat_id = _get_stat_identifier(statistic)
statistic_attributes[step] = _get_tag(step, identifier, stat_id)
statistic_attributes.setdefault('alias',
statistic_attributes[step])
statistic_attributes.setdefault('dataset',
Expand All @@ -432,7 +432,7 @@ def _update_multiproduct(input_products, order, preproc_dir, step):
) # Note that ancestors is set when running the preprocessor func.
output_products.add(statistic_product)
relevant_settings['output_products'][identifier][
statistic] = statistic_product
stat_id] = statistic_product

return output_products, relevant_settings

Expand Down Expand Up @@ -637,6 +637,7 @@ def _update_preproc_functions(settings, dataset, datasets, missing_vars):
_update_regrid_time(dataset, settings)
if dataset.facets.get('frequency') == 'fx':
check.check_for_temporal_preprocs(settings)
check.statistics_preprocessors(settings)


def _get_preprocessor_task(datasets, profiles, task_name):
Expand Down
4 changes: 4 additions & 0 deletions esmvalcore/cmor/_fixes/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ def add_plev_from_altitude(cube):
)
pressure_coord = iris.coords.AuxCoord(pressure_points,
bounds=pressure_bounds,
var_name='plev',
standard_name='air_pressure',
long_name='pressure',
units='Pa')
cube.add_aux_coord(pressure_coord, cube.coord_dims(height_coord))
return
Expand Down Expand Up @@ -145,7 +147,9 @@ def add_altitude_from_plev(cube):
)
altitude_coord = iris.coords.AuxCoord(altitude_points,
bounds=altitude_bounds,
var_name='alt',
standard_name='altitude',
long_name='altitude',
units='m')
cube.add_aux_coord(altitude_coord, cube.coord_dims(plev_coord))
return
Expand Down
Loading