From 3dd527e3ed8e23eaa5fe666af775dd7f2c9a08e2 Mon Sep 17 00:00:00 2001 From: nlensse1 Date: Wed, 29 Nov 2023 14:33:29 -0500 Subject: [PATCH] Check if time variables are being added a second time --- CHANGELOG.md | 1 + podaac/subsetter/subset.py | 23 +++++++++++------------ tests/test_subset.py | 28 ++++++++++++++-------------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c55c66a..119c321f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [issue/209](https://github.com/podaac/l2ss-py/issues/209): Update code so dims are created if they don't already exists. - Update way we modify shapefile from 180 to 360 lon lats. - [issue/205](https://github.com/podaac/l2ss-py/issues/205): Add coding capability for when groups have same dimension names but different values. Xarray rename dims is utilized +- [issue/220](https://github.com/podaac/l2ss-py/issues/220): Check if the time variables being found haven't already been found. Add time_vars as an extra arguement in compute_time_variable_name ### Security ## [2.6.0] diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py index 6d1f5210..12bb86f0 100644 --- a/podaac/subsetter/subset.py +++ b/podaac/subsetter/subset.py @@ -487,7 +487,7 @@ def get_spatial_bounds(dataset: xr.Dataset, lat_var_names: str, lon_var_names: s return np.array([[min_lon, max_lon], [min_lat, max_lat]]) -def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable) -> str: +def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_time_vars: list) -> str: """ Try to determine the name of the 'time' variable. This is done as follows: @@ -512,7 +512,6 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable) -> str ValueError If the time variable could not be determined """ - time_vars = find_matching_coords(dataset, ['time']) if time_vars: # There should only be one time var match (this is called once @@ -523,10 +522,10 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable) -> str time_vars = list(filter(lambda var_name: 'time' in var_name, dataset.dims.keys())) for var_name in time_vars: - if "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims: + if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims: return var_name for var_name in list(dataset.data_vars.keys()): - if "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims: + if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims: return var_name # first check if any variables are named 'time' @@ -534,7 +533,7 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable) -> str var_name_time = var_name.strip(GROUP_DELIM).split(GROUP_DELIM)[-1] if len(dataset[var_name].squeeze().dims) == 0: continue - if ('time' == var_name_time.lower() or 'timeMidScan' == var_name_time) and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims: + if var_name not in total_time_vars and ('time' == var_name_time.lower() or 'timeMidScan' == var_name_time) and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims: return var_name # then check if any variables have 'time' in the string if the above loop doesn't return anything @@ -542,7 +541,7 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable) -> str var_name_time = var_name.strip(GROUP_DELIM).split(GROUP_DELIM)[-1] if len(dataset[var_name].squeeze().dims) == 0: continue - if 'time' in var_name_time.lower() and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims: + if var_name not in total_time_vars and 'time' in var_name_time.lower() and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims: return var_name raise ValueError('Unable to determine time variable') @@ -942,7 +941,7 @@ def subset_with_bbox(dataset: xr.Dataset, # pylint: disable=too-many-branches group_vars = list(dataset.keys()) group_dataset = dataset[group_vars] - + print (time_var_name) # Calculate temporal conditions temporal_cond = build_temporal_cond(min_time, max_time, group_dataset, time_var_name) @@ -1060,11 +1059,11 @@ def get_coordinate_variable_names(dataset: xr.Dataset, if not lat_var_names or not lon_var_names: lat_var_names, lon_var_names = compute_coordinate_variable_names(dataset) if not time_var_names: - time_var_names = [ - compute_time_variable_name( - dataset, dataset[lat_var_name] - ) for lat_var_name in lat_var_names - ] + time_var_names = [] + for lat_var_name in lat_var_names: + time_var_names.append(compute_time_variable_name(dataset, + dataset[lat_var_name], + time_var_names)) #for lat_var_name in lat_var_names time_var_names.append(compute_utc_name(dataset)) time_var_names = [x for x in time_var_names if x is not None] # remove Nones and any duplicates diff --git a/tests/test_subset.py b/tests/test_subset.py index 117b45af..a9a78679 100644 --- a/tests/test_subset.py +++ b/tests/test_subset.py @@ -165,7 +165,7 @@ def test_subset_variables(test_file, data_dir, subset_output_dir, request): time_var_name = None try: lat_var_name = subset.compute_coordinate_variable_names(in_ds)[0][0] - time_var_name = subset.compute_time_variable_name(in_ds, in_ds[lat_var_name]) + time_var_name = subset.compute_time_variable_name(in_ds, in_ds[lat_var_name], []) except ValueError: # unable to determine lon lat vars pass @@ -1263,7 +1263,7 @@ def test_get_time_variable_name(test_file, data_dir): ds = xr.open_dataset(xr.backends.NetCDF4DataStore(ds), **args) lat_var_name = subset.compute_coordinate_variable_names(ds)[0][0] - time_var_name = subset.compute_time_variable_name(ds, ds[lat_var_name]) + time_var_name = subset.compute_time_variable_name(ds, ds[lat_var_name], []) assert time_var_name is not None assert 'time' in time_var_name @@ -1506,7 +1506,7 @@ def test_get_time_squeeze(data_dir, subset_output_dir): **args ) as dataset: lat_var_name = subset.compute_coordinate_variable_names(dataset)[0][0] - time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name]) + time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name], []) lat_dims = dataset[lat_var_name].squeeze().dims time_dims = dataset[time_var_name].squeeze().dims assert lat_dims == time_dims @@ -1534,7 +1534,7 @@ def test_get_indexers_nd(data_dir, subset_output_dir): ) as dataset: lat_var_name = subset.compute_coordinate_variable_names(dataset)[0][0] lon_var_name = subset.compute_coordinate_variable_names(dataset)[1][0] - time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name]) + time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name], []) oper = operator.and_ cond = oper( @@ -1725,11 +1725,11 @@ def test_get_time_epoch_var(data_dir, subset_output_dir): **args ) as dataset: lat_var_names, _ = subset.compute_coordinate_variable_names(dataset) - time_var_names = [ - subset.compute_time_variable_name( - dataset, dataset[lat_var_name] - ) for lat_var_name in lat_var_names - ] + time_var_names = [] + for lat_var_name in lat_var_names: + time_var_names.append(subset.compute_time_variable_name( + dataset, dataset[lat_var_name], time_var_names + )) epoch_time_var = subset.get_time_epoch_var(dataset, time_var_names[0]) assert epoch_time_var.split('__')[-1] == 'time' @@ -2019,11 +2019,11 @@ def test_get_time_OMI(data_dir, subset_output_dir): **args ) as dataset: lat_var_names, _ = subset.compute_coordinate_variable_names(dataset) - time_var_names = [ - subset.compute_time_variable_name( - dataset, dataset[lat_var_name] - ) for lat_var_name in lat_var_names - ] + time_var_names = [] + for lat_var_name in lat_var_names: + time_var_names.append(subset.compute_time_variable_name( + dataset, dataset[lat_var_name], time_var_names + )) assert "Time" in time_var_names[0] assert "Latitude" in lat_var_names[0]