Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Account for mask byte in chunk size calculation #210

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 16 additions & 13 deletions xee/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,8 @@
'double': np.float64,
}

# While this documentation says that the limit is 10 MB...
# https://developers.google.com/earth-engine/guides/usage#request_payload_size
# actual byte limit seems to depend on other factors. This has been found via
# trial & error.
# Earth Engine image:computePixels request is limited to 48 MB
# https://developers.google.com/earth-engine/reference/rest/v1/projects.image/computePixels
REQUEST_BYTE_LIMIT = 2**20 * 48 # 48 MBs

# Xee uses the ee.ImageCollection.toList function for slicing into an
Expand All @@ -80,10 +78,13 @@
_TO_LIST_WARNING_LIMIT = 10000


# TODO: This isn't used. We could implement a check on the chunk size or wait
# until Earth Engine fails with EEException: Total request size too large.
def _check_request_limit(chunks: Dict[str, int], dtype_size: int, limit: int):
"""Checks that the actual number of bytes exceeds the limit."""
index, width, height = chunks['index'], chunks['width'], chunks['height']
actual_bytes = index * width * height * dtype_size
# Add one for the mask byte (Earth Engine bytes-per-pixel accounting).
actual_bytes = index * width * height * (dtype_size + 1)
if actual_bytes > limit:
raise ValueError(
f'`chunks="auto"` failed! Actual bytes {actual_bytes!r} exceeds limit'
Expand Down Expand Up @@ -352,20 +353,22 @@ def _auto_chunks(
# height and width follow round numbers (powers of two) and allocate the
# remaining bytes available for the index length. To illustrate this logic,
# let's follow through with an example where:
# request_byte_limit = 2 ** 20 * 10 # = 10 MBs
# request_byte_limit = 2 ** 20 * 48 # = 48 MBs
# dtype_bytes = 8
log_total = np.log2(request_byte_limit) # e.g.=23.32...
log_dtype = np.log2(dtype_bytes) # e.g.=3

log_total = np.log2(request_byte_limit) # e.g.=25.58...
# Add one for the mask byte (Earth Engine bytes-per-pixel accounting).
log_dtype = np.log2(dtype_bytes + 1) # e.g.=3.16...
log_limit = 10 * (log_total // 10) # e.g.=20
log_index = log_total - log_limit # e.g.=3.32...
log_index = log_total - log_limit # e.g.=5.58...

# Motivation: How do we divide a number N into the closest sum of two ints?
d = (log_limit - np.ceil(log_dtype)) / 2 # e.g.=17/2=8.5
wd, ht = np.ceil(d), np.floor(d) # e.g. wd=9, ht=8
d = (log_limit - np.ceil(log_dtype)) / 2 # e.g.=16/2=8.0
wd, ht = np.ceil(d), np.floor(d) # e.g. wd=8, ht=8

# Put back to byte space, then round to the nearst integer number of bytes.
index = int(np.rint(2**log_index)) # e.g.=10
width = int(np.rint(2**wd)) # e.g.=512
index = int(np.rint(2**log_index)) # e.g.=48
width = int(np.rint(2**wd)) # e.g.=256
height = int(np.rint(2**ht)) # e.g.=256

return {'index': index, 'width': width, 'height': height}
Expand Down
Loading