diff --git a/pex/common.py b/pex/common.py index fee3e76fc..908d85086 100644 --- a/pex/common.py +++ b/pex/common.py @@ -39,6 +39,8 @@ Union, ) + _DateTime = Tuple[int, int, int, int, int, int] + # We use the start of MS-DOS time, which is what zipfiles use (see section 4.4.6 of # https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT). @@ -138,10 +140,7 @@ def do_copy(): do_copy() -_COPY_BUFSIZE = 64 * 1024 - - -def copy_file_range(source, destination, length, buffer_size=_COPY_BUFSIZE): +def copy_file_range(source, destination, length, buffer_size=io.DEFAULT_BUFFER_SIZE): # type: (io.BufferedIOBase, io.BufferedIOBase, int, int) -> None """Implementation of shutil.copyfileobj() that only copies exactly `length` bytes.""" # We require a BufferedIOBase in order to avoid handling short reads or writes. @@ -194,7 +193,14 @@ class ZipEntry(namedtuple("ZipEntry", ["info", "data"])): pass @classmethod - def zip_entry_from_file(cls, filename, arcname=None, date_time=None): + def zip_entry_from_file( + cls, + filename, # type: str + arcname=None, # type: Optional[str] + date_time=None, # type: Optional[Tuple[int, ...]] + compression=zipfile.ZIP_STORED, # type: int + ): + # type: (...) -> PermPreservingZipFile.ZipEntry """Construct a ZipEntry for a file on the filesystem. Usually a similar `zip_info_from_file` method is provided by `ZipInfo`, but it is not @@ -213,16 +219,20 @@ def zip_entry_from_file(cls, filename, arcname=None, date_time=None): arcname += "/" if date_time is None: date_time = time.localtime(st.st_mtime) - zinfo = zipfile.ZipInfo(filename=arcname, date_time=date_time[:6]) + zinfo = zipfile.ZipInfo(filename=arcname, date_time=cast(_DateTime, date_time[:6])) zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes if isdir: zinfo.file_size = 0 zinfo.external_attr |= 0x10 # MS-DOS directory flag + # Always store directories decompressed, because they are empty but take up 2 bytes when + # compressed. zinfo.compress_type = zipfile.ZIP_STORED data = b"" else: zinfo.file_size = st.st_size - zinfo.compress_type = zipfile.ZIP_DEFLATED + # File contents may be compressed or decompressed. Decompressed is significantly faster + # to write, but caching makes up for that. + zinfo.compress_type = compression with open(filename, "rb") as fp: data = fp.read() return cls.ZipEntry(info=zinfo, data=data) @@ -677,12 +687,12 @@ def write_entry( zip_entry = zf.zip_entry_from_file( filename=filename, arcname=os.path.relpath(arcname, strip_prefix) if strip_prefix else arcname, - date_time=DETERMINISTIC_DATETIME.timetuple() - if deterministic_timestamp - else None, + date_time=( + DETERMINISTIC_DATETIME.timetuple() if deterministic_timestamp else None + ), + compression=compression, ) - # FIXME: this ignores the zinfo.compress_type value from zip_entry_from_file()! - zf.writestr(zip_entry.info, zip_entry.data, compression) + zf.writestr(zip_entry.info, zip_entry.data) def get_parent_dir(path): # type: (str) -> Optional[str] diff --git a/pex/pex_builder.py b/pex/pex_builder.py index de72592cc..33b3d7ddc 100644 --- a/pex/pex_builder.py +++ b/pex/pex_builder.py @@ -869,10 +869,10 @@ def _build_zipapp( # computation, because the compressed entries are simply copied over as-is. # # Note that these cached zips are created in the --layout packed format, without - # the .bootstrap/ or .deps/ prefixes we need to form a proper zipapp. Our zip - # file merging solution edits each entry's .filename with the appropriate - # prefix, but we will still need to generate intermediate directory entries - # before adding the prefixed files in order to unzip correctly. + # the .bootstrap/ or .deps/ prefixes we need to form a proper Pex zipapp + # layout. Our zip file merging solution edits each entry's .filename with the + # appropriate prefix, but we will still need to generate intermediate directory + # entries before adding the prefixed files in order to unzip correctly. # Reuse the file handle to zip into. This isn't necessary (we could close and reopen # it), but it avoids unnecessarily flushing to disk. The ZipFile class will re-parse @@ -928,5 +928,4 @@ def _build_zipapp( labels=self._DIRECT_SOURCE_LABELS, ) - chmod_plus_x(filename)