Skip to content

Commit

Permalink
Check if time variables are being added a second time
Browse files Browse the repository at this point in the history
  • Loading branch information
nlensse1 committed Nov 29, 2023
1 parent 50c55b7 commit 3dd527e
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 26 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [issue/209](https://github.com/podaac/l2ss-py/issues/209): Update code so dims are created if they don't already exists.
- Update way we modify shapefile from 180 to 360 lon lats.
- [issue/205](https://github.com/podaac/l2ss-py/issues/205): Add coding capability for when groups have same dimension names but different values. Xarray rename dims is utilized
- [issue/220](https://github.com/podaac/l2ss-py/issues/220): Check if the time variables being found haven't already been found. Add time_vars as an extra arguement in compute_time_variable_name
### Security

## [2.6.0]
Expand Down
23 changes: 11 additions & 12 deletions podaac/subsetter/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def get_spatial_bounds(dataset: xr.Dataset, lat_var_names: str, lon_var_names: s
return np.array([[min_lon, max_lon], [min_lat, max_lat]])


def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable) -> str:
def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_time_vars: list) -> str:
"""
Try to determine the name of the 'time' variable. This is done as
follows:
Expand All @@ -512,7 +512,6 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable) -> str
ValueError
If the time variable could not be determined
"""

time_vars = find_matching_coords(dataset, ['time'])
if time_vars:
# There should only be one time var match (this is called once
Expand All @@ -523,26 +522,26 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable) -> str
time_vars = list(filter(lambda var_name: 'time' in var_name, dataset.dims.keys()))

for var_name in time_vars:
if "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
return var_name
for var_name in list(dataset.data_vars.keys()):
if "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
return var_name

# first check if any variables are named 'time'
for var_name in list(dataset.data_vars.keys()):
var_name_time = var_name.strip(GROUP_DELIM).split(GROUP_DELIM)[-1]
if len(dataset[var_name].squeeze().dims) == 0:
continue
if ('time' == var_name_time.lower() or 'timeMidScan' == var_name_time) and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims:
if var_name not in total_time_vars and ('time' == var_name_time.lower() or 'timeMidScan' == var_name_time) and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims:
return var_name

# then check if any variables have 'time' in the string if the above loop doesn't return anything
for var_name in list(dataset.data_vars.keys()):
var_name_time = var_name.strip(GROUP_DELIM).split(GROUP_DELIM)[-1]
if len(dataset[var_name].squeeze().dims) == 0:
continue
if 'time' in var_name_time.lower() and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims:
if var_name not in total_time_vars and 'time' in var_name_time.lower() and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims:
return var_name

raise ValueError('Unable to determine time variable')
Expand Down Expand Up @@ -942,7 +941,7 @@ def subset_with_bbox(dataset: xr.Dataset, # pylint: disable=too-many-branches
group_vars = list(dataset.keys())

group_dataset = dataset[group_vars]

print (time_var_name)
# Calculate temporal conditions
temporal_cond = build_temporal_cond(min_time, max_time, group_dataset, time_var_name)

Expand Down Expand Up @@ -1060,11 +1059,11 @@ def get_coordinate_variable_names(dataset: xr.Dataset,
if not lat_var_names or not lon_var_names:
lat_var_names, lon_var_names = compute_coordinate_variable_names(dataset)
if not time_var_names:
time_var_names = [
compute_time_variable_name(
dataset, dataset[lat_var_name]
) for lat_var_name in lat_var_names
]
time_var_names = []
for lat_var_name in lat_var_names:
time_var_names.append(compute_time_variable_name(dataset,
dataset[lat_var_name],
time_var_names)) #for lat_var_name in lat_var_names
time_var_names.append(compute_utc_name(dataset))
time_var_names = [x for x in time_var_names if x is not None] # remove Nones and any duplicates

Expand Down
28 changes: 14 additions & 14 deletions tests/test_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def test_subset_variables(test_file, data_dir, subset_output_dir, request):
time_var_name = None
try:
lat_var_name = subset.compute_coordinate_variable_names(in_ds)[0][0]
time_var_name = subset.compute_time_variable_name(in_ds, in_ds[lat_var_name])
time_var_name = subset.compute_time_variable_name(in_ds, in_ds[lat_var_name], [])
except ValueError:
# unable to determine lon lat vars
pass
Expand Down Expand Up @@ -1263,7 +1263,7 @@ def test_get_time_variable_name(test_file, data_dir):
ds = xr.open_dataset(xr.backends.NetCDF4DataStore(ds), **args)

lat_var_name = subset.compute_coordinate_variable_names(ds)[0][0]
time_var_name = subset.compute_time_variable_name(ds, ds[lat_var_name])
time_var_name = subset.compute_time_variable_name(ds, ds[lat_var_name], [])

assert time_var_name is not None
assert 'time' in time_var_name
Expand Down Expand Up @@ -1506,7 +1506,7 @@ def test_get_time_squeeze(data_dir, subset_output_dir):
**args
) as dataset:
lat_var_name = subset.compute_coordinate_variable_names(dataset)[0][0]
time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name])
time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name], [])
lat_dims = dataset[lat_var_name].squeeze().dims
time_dims = dataset[time_var_name].squeeze().dims
assert lat_dims == time_dims
Expand Down Expand Up @@ -1534,7 +1534,7 @@ def test_get_indexers_nd(data_dir, subset_output_dir):
) as dataset:
lat_var_name = subset.compute_coordinate_variable_names(dataset)[0][0]
lon_var_name = subset.compute_coordinate_variable_names(dataset)[1][0]
time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name])
time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name], [])
oper = operator.and_

cond = oper(
Expand Down Expand Up @@ -1725,11 +1725,11 @@ def test_get_time_epoch_var(data_dir, subset_output_dir):
**args
) as dataset:
lat_var_names, _ = subset.compute_coordinate_variable_names(dataset)
time_var_names = [
subset.compute_time_variable_name(
dataset, dataset[lat_var_name]
) for lat_var_name in lat_var_names
]
time_var_names = []
for lat_var_name in lat_var_names:
time_var_names.append(subset.compute_time_variable_name(
dataset, dataset[lat_var_name], time_var_names
))
epoch_time_var = subset.get_time_epoch_var(dataset, time_var_names[0])

assert epoch_time_var.split('__')[-1] == 'time'
Expand Down Expand Up @@ -2019,11 +2019,11 @@ def test_get_time_OMI(data_dir, subset_output_dir):
**args
) as dataset:
lat_var_names, _ = subset.compute_coordinate_variable_names(dataset)
time_var_names = [
subset.compute_time_variable_name(
dataset, dataset[lat_var_name]
) for lat_var_name in lat_var_names
]
time_var_names = []
for lat_var_name in lat_var_names:
time_var_names.append(subset.compute_time_variable_name(
dataset, dataset[lat_var_name], time_var_names
))
assert "Time" in time_var_names[0]
assert "Latitude" in lat_var_names[0]

Expand Down

0 comments on commit 3dd527e

Please sign in to comment.