Skip to content

Commit

Permalink
Account for mask byte in chunk size calculation
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 712580830
  • Loading branch information
jdbcode authored and Xee authors committed Jan 7, 2025
1 parent 065db79 commit a972e94
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 27 deletions.
30 changes: 16 additions & 14 deletions xee/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,8 @@
'double': np.float64,
}

# While this documentation says that the limit is 10 MB...
# https://developers.google.com/earth-engine/guides/usage#request_payload_size
# actual byte limit seems to depend on other factors. This has been found via
# trial & error.
# Earth Engine image:computePixels request is limited to 48 MB
# https://developers.google.com/earth-engine/reference/rest/v1/projects.image/computePixels
REQUEST_BYTE_LIMIT = 2**20 * 48 # 48 MBs

# Xee uses the ee.ImageCollection.toList function for slicing into an
Expand All @@ -80,10 +78,12 @@
_TO_LIST_WARNING_LIMIT = 10000


# Used in ext_test.py.
def _check_request_limit(chunks: Dict[str, int], dtype_size: int, limit: int):
"""Checks that the actual number of bytes exceeds the limit."""
index, width, height = chunks['index'], chunks['width'], chunks['height']
actual_bytes = index * width * height * dtype_size
# Add one for the mask byte (Earth Engine bytes-per-pixel accounting).
actual_bytes = index * width * height * (dtype_size + 1)
if actual_bytes > limit:
raise ValueError(
f'`chunks="auto"` failed! Actual bytes {actual_bytes!r} exceeds limit'
Expand All @@ -105,7 +105,7 @@ class EarthEngineStore(common.AbstractDataStore):
# "Safe" default chunks that won't exceed the request limit.
PREFERRED_CHUNKS: Dict[str, int] = {
'index': 48,
'width': 512,
'width': 256,
'height': 256,
}

Expand Down Expand Up @@ -352,20 +352,22 @@ def _auto_chunks(
# height and width follow round numbers (powers of two) and allocate the
# remaining bytes available for the index length. To illustrate this logic,
# let's follow through with an example where:
# request_byte_limit = 2 ** 20 * 10 # = 10 MBs
# request_byte_limit = 2 ** 20 * 48 # = 48 MBs
# dtype_bytes = 8
log_total = np.log2(request_byte_limit) # e.g.=23.32...
log_dtype = np.log2(dtype_bytes) # e.g.=3

log_total = np.log2(request_byte_limit) # e.g.=25.58...
# Add one for the mask byte (Earth Engine bytes-per-pixel accounting).
log_dtype = np.log2(dtype_bytes + 1) # e.g.=3.16...
log_limit = 10 * (log_total // 10) # e.g.=20
log_index = log_total - log_limit # e.g.=3.32...
log_index = log_total - log_limit # e.g.=5.58...

# Motivation: How do we divide a number N into the closest sum of two ints?
d = (log_limit - np.ceil(log_dtype)) / 2 # e.g.=17/2=8.5
wd, ht = np.ceil(d), np.floor(d) # e.g. wd=9, ht=8
d = (log_limit - np.ceil(log_dtype)) / 2 # e.g.=16/2=8.0
wd, ht = np.ceil(d), np.floor(d) # e.g. wd=8, ht=8

# Put back to byte space, then round to the nearst integer number of bytes.
index = int(np.rint(2**log_index)) # e.g.=10
width = int(np.rint(2**wd)) # e.g.=512
index = int(np.rint(2**log_index)) # e.g.=48
width = int(np.rint(2**wd)) # e.g.=256
height = int(np.rint(2**ht)) # e.g.=256

return {'index': index, 'width': width, 'height': height}
Expand Down
44 changes: 31 additions & 13 deletions xee/ext_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,32 @@ class EEStoreStandardDatatypesTest(parameterized.TestCase):
dict(
testcase_name='int8',
dtype=np.dtype('int8'),
expected_chunks={'index': 48, 'width': 1024, 'height': 1024},
expected_chunks={'index': 48, 'width': 1024, 'height': 512},
),
dict(
testcase_name='int32',
dtype=np.dtype('int32'),
expected_chunks={'index': 48, 'width': 512, 'height': 512},
expected_chunks={'index': 48, 'width': 512, 'height': 256},
),
dict(
testcase_name='int64',
dtype=np.dtype('int64'),
expected_chunks={'index': 48, 'width': 512, 'height': 256},
expected_chunks={'index': 48, 'width': 256, 'height': 256},
),
dict(
testcase_name='float32',
dtype=np.dtype('float32'),
expected_chunks={'index': 48, 'width': 512, 'height': 512},
expected_chunks={'index': 48, 'width': 512, 'height': 256},
),
dict(
testcase_name='float64',
dtype=np.dtype('float64'),
expected_chunks={'index': 48, 'width': 512, 'height': 256},
expected_chunks={'index': 48, 'width': 256, 'height': 256},
),
dict(
testcase_name='complex64',
dtype=np.dtype('complex64'),
expected_chunks={'index': 48, 'width': 512, 'height': 256},
expected_chunks={'index': 48, 'width': 256, 'height': 256},
),
)
def test_auto_chunks__handles_standard_dtypes(self, dtype, expected_chunks):
Expand All @@ -49,7 +49,7 @@ def test_auto_chunks__handles_standard_dtypes(self, dtype, expected_chunks):
)


class EEStoreTest(absltest.TestCase):
class EEStoreTest(parameterized.TestCase):

def test_auto_chunks__handles_range_of_dtype_sizes(self):
dt = 0
Expand All @@ -59,18 +59,36 @@ def test_auto_chunks__handles_range_of_dtype_sizes(self):
except ValueError:
self.fail(f'Could not handle data type size {dt}.')

def test_auto_chunks__is_optimal_for_powers_of_two(self):
for p in range(10):
dt = 2**p
chunks = xee.EarthEngineStore._auto_chunks(dt)
def test_auto_chunks__matches_observed_values(self):
observed_results = {
1: 50331648,
2: 37748736,
4: 31457280,
8: 28311552,
16: 26738688,
32: 25952256,
64: 25559040,
128: 25362432,
256: 25264128,
512: 25214976,
}

for dtype_bytes, expected_bytes in observed_results.items():
chunks = xee.EarthEngineStore._auto_chunks(dtype_bytes)
actual_bytes = np.prod(list(chunks.values())) * (
dtype_bytes + 1
) # added +1 to account for the mask byte
self.assertEqual(
xee.REQUEST_BYTE_LIMIT, np.prod(list(chunks.values())) * dt
expected_bytes,
actual_bytes,
f'dtype_bytes: {dtype_bytes}, Expected: {expected_bytes}, '
f'Actual: {actual_bytes}, Chunks: {chunks}',
)

def test_exceeding_byte_limit__raises_error(self):
dtype_size = 8
# does not fail
chunks = {'index': 48, 'width': 512, 'height': 256}
chunks = {'index': 48, 'width': 256, 'height': 256}
ext._check_request_limit(chunks, dtype_size, xee.REQUEST_BYTE_LIMIT)

# fails
Expand Down

0 comments on commit a972e94

Please sign in to comment.