diff --git a/xee/ext.py b/xee/ext.py index 763fe84..78663ab 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -67,10 +67,8 @@ 'double': np.float64, } -# While this documentation says that the limit is 10 MB... -# https://developers.google.com/earth-engine/guides/usage#request_payload_size -# actual byte limit seems to depend on other factors. This has been found via -# trial & error. +# Earth Engine image:computePixels request is limited to 48 MB +# https://developers.google.com/earth-engine/reference/rest/v1/projects.image/computePixels REQUEST_BYTE_LIMIT = 2**20 * 48 # 48 MBs # Xee uses the ee.ImageCollection.toList function for slicing into an @@ -80,10 +78,12 @@ _TO_LIST_WARNING_LIMIT = 10000 +# Used in ext_test.py. def _check_request_limit(chunks: Dict[str, int], dtype_size: int, limit: int): """Checks that the actual number of bytes exceeds the limit.""" index, width, height = chunks['index'], chunks['width'], chunks['height'] - actual_bytes = index * width * height * dtype_size + # Add one for the mask byte (Earth Engine bytes-per-pixel accounting). + actual_bytes = index * width * height * (dtype_size + 1) if actual_bytes > limit: raise ValueError( f'`chunks="auto"` failed! Actual bytes {actual_bytes!r} exceeds limit' @@ -105,7 +105,7 @@ class EarthEngineStore(common.AbstractDataStore): # "Safe" default chunks that won't exceed the request limit. PREFERRED_CHUNKS: Dict[str, int] = { 'index': 48, - 'width': 512, + 'width': 256, 'height': 256, } @@ -352,20 +352,22 @@ def _auto_chunks( # height and width follow round numbers (powers of two) and allocate the # remaining bytes available for the index length. To illustrate this logic, # let's follow through with an example where: - # request_byte_limit = 2 ** 20 * 10 # = 10 MBs + # request_byte_limit = 2 ** 20 * 48 # = 48 MBs # dtype_bytes = 8 - log_total = np.log2(request_byte_limit) # e.g.=23.32... - log_dtype = np.log2(dtype_bytes) # e.g.=3 + + log_total = np.log2(request_byte_limit) # e.g.=25.58... + # Add one for the mask byte (Earth Engine bytes-per-pixel accounting). + log_dtype = np.log2(dtype_bytes + 1) # e.g.=3.16... log_limit = 10 * (log_total // 10) # e.g.=20 - log_index = log_total - log_limit # e.g.=3.32... + log_index = log_total - log_limit # e.g.=5.58... # Motivation: How do we divide a number N into the closest sum of two ints? - d = (log_limit - np.ceil(log_dtype)) / 2 # e.g.=17/2=8.5 - wd, ht = np.ceil(d), np.floor(d) # e.g. wd=9, ht=8 + d = (log_limit - np.ceil(log_dtype)) / 2 # e.g.=16/2=8.0 + wd, ht = np.ceil(d), np.floor(d) # e.g. wd=8, ht=8 # Put back to byte space, then round to the nearst integer number of bytes. - index = int(np.rint(2**log_index)) # e.g.=10 - width = int(np.rint(2**wd)) # e.g.=512 + index = int(np.rint(2**log_index)) # e.g.=48 + width = int(np.rint(2**wd)) # e.g.=256 height = int(np.rint(2**ht)) # e.g.=256 return {'index': index, 'width': width, 'height': height} diff --git a/xee/ext_test.py b/xee/ext_test.py index ae732c8..6ebd852 100644 --- a/xee/ext_test.py +++ b/xee/ext_test.py @@ -13,32 +13,32 @@ class EEStoreStandardDatatypesTest(parameterized.TestCase): dict( testcase_name='int8', dtype=np.dtype('int8'), - expected_chunks={'index': 48, 'width': 1024, 'height': 1024}, + expected_chunks={'index': 48, 'width': 1024, 'height': 512}, ), dict( testcase_name='int32', dtype=np.dtype('int32'), - expected_chunks={'index': 48, 'width': 512, 'height': 512}, + expected_chunks={'index': 48, 'width': 512, 'height': 256}, ), dict( testcase_name='int64', dtype=np.dtype('int64'), - expected_chunks={'index': 48, 'width': 512, 'height': 256}, + expected_chunks={'index': 48, 'width': 256, 'height': 256}, ), dict( testcase_name='float32', dtype=np.dtype('float32'), - expected_chunks={'index': 48, 'width': 512, 'height': 512}, + expected_chunks={'index': 48, 'width': 512, 'height': 256}, ), dict( testcase_name='float64', dtype=np.dtype('float64'), - expected_chunks={'index': 48, 'width': 512, 'height': 256}, + expected_chunks={'index': 48, 'width': 256, 'height': 256}, ), dict( testcase_name='complex64', dtype=np.dtype('complex64'), - expected_chunks={'index': 48, 'width': 512, 'height': 256}, + expected_chunks={'index': 48, 'width': 256, 'height': 256}, ), ) def test_auto_chunks__handles_standard_dtypes(self, dtype, expected_chunks): @@ -49,7 +49,7 @@ def test_auto_chunks__handles_standard_dtypes(self, dtype, expected_chunks): ) -class EEStoreTest(absltest.TestCase): +class EEStoreTest(parameterized.TestCase): def test_auto_chunks__handles_range_of_dtype_sizes(self): dt = 0 @@ -59,18 +59,36 @@ def test_auto_chunks__handles_range_of_dtype_sizes(self): except ValueError: self.fail(f'Could not handle data type size {dt}.') - def test_auto_chunks__is_optimal_for_powers_of_two(self): - for p in range(10): - dt = 2**p - chunks = xee.EarthEngineStore._auto_chunks(dt) + def test_auto_chunks__matches_observed_values(self): + observed_results = { + 1: 50331648, + 2: 37748736, + 4: 31457280, + 8: 28311552, + 16: 26738688, + 32: 25952256, + 64: 25559040, + 128: 25362432, + 256: 25264128, + 512: 25214976, + } + + for dtype_bytes, expected_bytes in observed_results.items(): + chunks = xee.EarthEngineStore._auto_chunks(dtype_bytes) + actual_bytes = np.prod(list(chunks.values())) * ( + dtype_bytes + 1 + ) # added +1 to account for the mask byte self.assertEqual( - xee.REQUEST_BYTE_LIMIT, np.prod(list(chunks.values())) * dt + expected_bytes, + actual_bytes, + f'dtype_bytes: {dtype_bytes}, Expected: {expected_bytes}, ' + f'Actual: {actual_bytes}, Chunks: {chunks}', ) def test_exceeding_byte_limit__raises_error(self): dtype_size = 8 # does not fail - chunks = {'index': 48, 'width': 512, 'height': 256} + chunks = {'index': 48, 'width': 256, 'height': 256} ext._check_request_limit(chunks, dtype_size, xee.REQUEST_BYTE_LIMIT) # fails