Skip to content

Commit

Permalink
squashed commit
Browse files Browse the repository at this point in the history
add change to do parallel zipping only, no crawling

modify cli arg format for medusa-zip tool

update cli arg format

fix non-exhaustive CopyMode usage

[BROKEN] add first run of complete medusa zip with cli arg!

the resulting zip cannot be zipimported yet....

medusa zipping works great now, let's revert .zip() changes

bump medusa options

bump more medusa options

use the merged medusa command lines now

manage a cache of parallel intermediate zip generation jobs!

small fix

much closer to mergeable now

working much more complex control flow between the medusa-zip cli

move medusa zip to medusa.py

medusa works for packed apps now too

works for everything, but kind of crazy

close stdin after writing to the child process

factor out a ridiculous amount of boilerplate

add back the non-medusa impl for packed layouts

implement a "normal" version which uses atomic directories

revert unintentional whitespace changes

separate the serial and parallel pex creations

remove the attempts at parallelism

add --medusa-path

remove unused code

make the medusa hook work when not provided

add back a tracer

revert some changes that make things harder to read

revert some changes i shouldn't need

make medusa work with the medusa-zip package and not subprocesses!

update after adding defaults in medusa-zip python package

remove -f arg for resolving medusa-zip

[BROKEN] possibly obsolete!

fix cli arg

add stitched layout

create stitch copymode

no

initial stitch impl

add merge_zip_file() method

move packed wheel caching into common methods

initial impl of merging zips with entry renaming

make MergeableZipFile into a subclass of ZipFile

fix header offset calculation

tmp

fix mypy

remove unused imports

fix buffering for file handles in py2

Revert "tmp"

This reverts commit 8ad12de71a455c3918434cd81dcaf319fa43d9b4.
  • Loading branch information
cosmicexplorer committed Aug 1, 2023
1 parent a938cb9 commit 05ac0ad
Show file tree
Hide file tree
Showing 3 changed files with 348 additions and 87 deletions.
50 changes: 45 additions & 5 deletions pex/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import atexit
import contextlib
import errno
import io
import itertools
import os
import re
Expand Down Expand Up @@ -137,6 +138,26 @@ def do_copy():
do_copy()


_COPY_BUFSIZE = 64 * 1024


def copy_file_range(source, destination, length, buffer_size=_COPY_BUFSIZE):
# type: (io.BufferedIOBase, io.BufferedIOBase, int, int) -> None
"""Implementation of shutil.copyfileobj() that only copies exactly `length` bytes."""
# We require a BufferedIOBase in order to avoid handling short reads or writes.
remaining_length = length
if buffer_size > length:
buffer_size = length
cur_buf = bytearray(buffer_size)
while remaining_length > buffer_size:
assert source.readinto(cur_buf) == buffer_size
assert destination.write(cur_buf) == buffer_size
remaining_length -= buffer_size
remainder = source.read(remaining_length)
assert len(remainder) == remaining_length
assert destination.write(remainder) == remaining_length


# See http://stackoverflow.com/questions/2572172/referencing-other-modules-in-atexit
class MktempTeardownRegistry(object):
def __init__(self):
Expand Down Expand Up @@ -281,18 +302,32 @@ def safe_mkdir(directory, clean=False):
return directory


def _ensure_parent(filename):
# type: (str) -> None
parent_dir = os.path.dirname(filename)
if parent_dir:
safe_mkdir(parent_dir)


def safe_open(filename, *args, **kwargs):
"""Safely open a file.
``safe_open`` ensures that the directory components leading up the specified file have been
created first.
"""
parent_dir = os.path.dirname(filename)
if parent_dir:
safe_mkdir(parent_dir)
_ensure_parent(filename)
return open(filename, *args, **kwargs) # noqa: T802


def safe_io_open(filename, *args, **kwargs):
# type: (str, Any, Any) -> io.IOBase
"""``safe_open()``, but using ``io.open()`` instead.
With the right arguments, this ensures the result produces a buffered file handle on py2."""
_ensure_parent(filename)
return cast("io.IOBase", io.open(filename, *args, **kwargs))


def safe_delete(filename):
# type: (str) -> None
"""Delete a file safely.
Expand Down Expand Up @@ -606,9 +641,13 @@ def delete(self):
# type: () -> None
shutil.rmtree(self.chroot)

# This directory traversal, file I/O, and compression can be made faster with complex
# parallelism and pipelining in a compiled language, but the result is much harder to package,
# and is still less performant than effective caching. See investigation in
# https://github.com/pantsbuild/pex/issues/2158 and https://github.com/pantsbuild/pex/pull/2175.
def zip(
self,
filename, # type: str
output_file, # type: Union[str, io.IOBase]
mode="w", # type: str
deterministic_timestamp=False, # type: bool
exclude_file=lambda _: False, # type: Callable[[str], bool]
Expand All @@ -626,7 +665,7 @@ def zip(
selected_files = self.files()

compression = zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED
with open_zip(filename, mode, compression) as zf:
with open_zip(output_file, mode, compression) as zf:

def write_entry(
filename, # type: str
Expand All @@ -640,6 +679,7 @@ def write_entry(
if deterministic_timestamp
else None,
)
# FIXME: this ignores the zinfo.compress_type value from zip_entry_from_file()!
zf.writestr(zip_entry.info, zip_entry.data, compression)

def get_parent_dir(path):
Expand Down
Loading

0 comments on commit 05ac0ad

Please sign in to comment.