Skip to content

Commit

Permalink
Merge branch 'develop' into release/2.10.0
Browse files Browse the repository at this point in the history
# Conflicts:
#	pyproject.toml
  • Loading branch information
James Wood authored and James Wood committed May 2, 2024
2 parents 53adc2e + b9742d3 commit e497a68
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]
### Added
- [issue/260](https://github.com/podaac/l2ss-py/pull/261): Add gpm cleanup function to add a timeMidScan variable if the timeMidScan variable isn't present. Function takes the years, months, days etc ScanTime variables and creates a single time variable using datetime.datetime library.
### Changed
### Deprecated
### Removed
Expand Down
42 changes: 41 additions & 1 deletion podaac/subsetter/gpm_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,34 @@
to nscan, nbin, nfreq by using the DimensionNames variable attribute
"""

import datetime
from netCDF4 import date2num # pylint: disable=no-name-in-module

dim_dict = {}


def change_var_dims(nc_dataset, variables=None):
def compute_new_time_data(time_group, nc_dataset):
"""
create a time variable, timeMidScan, that is present in other
GPM collections but not the ENV collections.
"""
# set the time unit for GPM
time_unit_out = "seconds since 1980-01-06 00:00:00"
# conver to a float, seconds variable
new_time_list = [date2num(datetime.datetime(
nc_dataset[time_group+'__Year'][:][i],
nc_dataset[time_group+'__Month'][:][i],
nc_dataset[time_group+'__DayOfMonth'][:][i],
hour=nc_dataset[time_group+'__Hour'][:][i],
minute=nc_dataset[time_group+'__Minute'][:][i],
second=nc_dataset[time_group+'__Second'][:][i],
microsecond=nc_dataset[time_group+'__MilliSecond'][:][i]*1000),
time_unit_out) for i in range(len(nc_dataset[time_group+'__Year'][:]))]

return new_time_list, time_unit_out


def change_var_dims(nc_dataset, variables=None, time_name="_timeMidScan"):
"""
Go through each variable and get the dimension names from attribute "DimensionNames
If the name is unique, add it as a dimension to the netCDF4 dataset. Then change the
Expand Down Expand Up @@ -62,4 +86,20 @@ def change_var_dims(nc_dataset, variables=None):
# copy the data to the new variable with dimension names
new_mapped_var[var_name][:] = var[:]

if not any(time_name in var for var in var_list):
# if there isn't any timeMidScan variables, create one
scan_time_groups = ["__".join(i.split('__')[:-1]) for i in var_list if 'ScanTime' in i]
for time_group in list(set(scan_time_groups)):
# get the seconds since Jan 6, 1980
time_data, time_unit = compute_new_time_data(time_group, nc_dataset)
# make a new variable for each ScanTime group
new_time_var_name = time_group+time_name
# copy dimensions from the Year variable
var_dims = nc_dataset.variables[time_group+'__Year'].dimensions
comp_args = {"zlib": True, "complevel": 1}
nc_dataset.createVariable(new_time_var_name, 'f8', var_dims, **comp_args)
nc_dataset.variables[new_time_var_name].setncattr('unit', time_unit)
# copy the data in
nc_dataset.variables[new_time_var_name][:] = time_data

return nc_dataset
3 changes: 3 additions & 0 deletions podaac/subsetter/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1226,6 +1226,9 @@ def subset(file_to_subset: str, bbox: np.ndarray, output_file: str,
except AttributeError:
pass

if hdf_type == 'GPM':
args['decode_times'] = False

with xr.open_dataset(
xr.backends.NetCDF4DataStore(nc_dataset),
**args
Expand Down
Binary file removed tests/data/GPM/GPM_test_file.HDF5
Binary file not shown.
Binary file added tests/data/GPM/GPM_test_file_2.HDF5
Binary file not shown.
13 changes: 7 additions & 6 deletions tests/test_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2288,21 +2288,22 @@ def test_get_unique_groups():
assert expected_groups_single == unique_groups_single
assert expected_diff_counts_single == diff_counts_single

def test_gpm_dimension_map(data_dir, subset_output_dir, request):
"""Test GPM files for dimension mapping and returns the expected netCDF
dataset without the phony dimensions"""

def test_gpm_compute_new_var_data(data_dir, subset_output_dir, request):
"""Test GPM files that have scantime variable to compute the time for seconds
since 1980-01-06"""

gpm_dir = join(data_dir, 'GPM')
gpm_file = 'GPM_test_file.HDF5'
bbox = np.array(((-180, 180), (-90, 90)))
gpm_file = 'GPM_test_file_2.HDF5'
shutil.copyfile(
os.path.join(gpm_dir, gpm_file),
os.path.join(subset_output_dir, gpm_file)
)

nc_dataset, has_groups, file_extension = subset.open_as_nc_dataset(join(subset_output_dir, gpm_file))

nc_dataset = gc.change_var_dims(nc_dataset)
nc_dataset_new = gc.change_var_dims(nc_dataset, variables=None, time_name='__test_time')
assert int(nc_dataset_new.variables["__FS__ScanTime__test_time"][:][0]) == 1306403820

for var_name, var in nc_dataset.variables.items():
dims = list(var.dimensions)
Expand Down

0 comments on commit e497a68

Please sign in to comment.