From 6974449d01caffd590293d28557d95ccc205877a Mon Sep 17 00:00:00 2001 From: Peter Ebden Date: Tue, 19 Nov 2024 13:48:44 +0000 Subject: [PATCH 1/3] move a bunch of stuff to plz.py --- tools/please_pex/pex/pex_main.py | 156 ---------------------------- tools/please_pex/pex/plz.py | 171 +++++++++++++++++++++++++++++++ 2 files changed, 171 insertions(+), 156 deletions(-) create mode 100644 tools/please_pex/pex/plz.py diff --git a/tools/please_pex/pex/pex_main.py b/tools/please_pex/pex/pex_main.py index 302862c..61ee0cc 100644 --- a/tools/please_pex/pex/pex_main.py +++ b/tools/please_pex/pex/pex_main.py @@ -26,162 +26,6 @@ ZIP_SAFE = __ZIP_SAFE__ PEX_STAMP = '__PEX_STAMP__' -# Workaround for https://bugs.python.org/issue15795 -class ZipFileWithPermissions(zipfile.ZipFile): - """ Custom ZipFile class handling file permissions. """ - - def _extract_member(self, member, targetpath, pwd): - if not isinstance(member, zipfile.ZipInfo): - member = self.getinfo(member) - - targetpath = super(ZipFileWithPermissions, self)._extract_member( - member, targetpath, pwd - ) - - attr = member.external_attr >> 16 - if attr != 0: - os.chmod(targetpath, attr) - return targetpath - -class SoImport(MetaPathFinder): - """So import. Much binary. Such dynamic. Wow.""" - - def __init__(self): - self.suffixes = machinery.EXTENSION_SUFFIXES # list, as importlib will not be using the file description - self.suffixes_by_length = sorted(self.suffixes, key=lambda x: -len(x)) - # Identify all the possible modules we could handle. - self.modules = {} - if zipfile.is_zipfile(sys.argv[0]): - zf = ZipFileWithPermissions(sys.argv[0]) - for name in zf.namelist(): - path, _ = self.splitext(name) - if path: - if path.startswith('.bootstrap/'): - path = path[len('.bootstrap/'):] - importpath = path.replace('/', '.') - self.modules.setdefault(importpath, name) - if path.startswith(MODULE_DIR): - self.modules.setdefault(importpath[len(MODULE_DIR)+1:], name) - if self.modules: - self.zf = zf - - def find_spec(self, name, path, target=None): - """Implements abc.MetaPathFinder.""" - if name in self.modules: - return spec_from_loader(name, self) - - def create_module(self, spec): - """Create a module object that we're going to load.""" - filename = self.modules[spec.name] - prefix, ext = self.splitext(filename) - with tempfile.NamedTemporaryFile(suffix=ext, prefix=os.path.basename(prefix)) as f: - f.write(self.zf.read(filename)) - f.flush() - spec.origin = f.name - loader = machinery.ExtensionFileLoader(spec.name, f.name) - spec.loader = loader - mod = loader.create_module(spec) - # Make it look like module came from the original location for nicer tracebacks. - mod.__file__ = filename - return mod - - def exec_module(self, mod): - """Because we set spec.loader above, the ExtensionFileLoader's exec_module is called.""" - raise NotImplementedError("SoImport.exec_module isn't used") - - def splitext(self, path): - """Similar to os.path.splitext, but splits our longest known suffix preferentially.""" - for suffix in self.suffixes_by_length: - if path.endswith(suffix): - return path[:-len(suffix)], suffix - return None, None - - -class PexDistribution(Distribution): - """Represents a distribution package that exists within a pex file (which is, ultimately, a zip - file). Distribution packages are identified by the presence of a suitable dist-info or egg-info - directory member inside the pex file, which need not necessarily exist at the top level if a - directory prefix is specified in the constructor. - """ - def __init__(self, name, pex_file, zip_file, files, prefix): - self._name = name - self._zf = zip_file - self._pex_file = pex_file - self._prefix = prefix - # Mapping of -> - self._files = files - - def read_text(self, filename): - full_name = self._files.get(filename) - if full_name: - return self._zf.read(full_name).decode(encoding="utf-8") - - def locate_file(self, path): - return zipfile.Path( - self._pex_file, - at=os.path.join(self._prefix, path) if self._prefix else path, - ) - - read_text.__doc__ = Distribution.read_text.__doc__ - - -class ModuleDirImport(MetaPathFinder): - """Handles imports to a directory equivalently to them being at the top level. - - This means that if one writes `import third_party.python.six`, it's imported like `import six`, - but becomes accessible under both names. This handles both the fully-qualified import names - and packages importing as their expected top-level names internally. - """ - def __init__(self, module_dir=MODULE_DIR): - self.prefix = module_dir.replace("/", ".") + "." - self._distributions = self._find_all_distributions(module_dir) - - def _find_all_distributions(self, module_dir): - pex_file = sys.argv[0] - if zipfile.is_zipfile(pex_file): - zf = ZipFileWithPermissions(pex_file) - r = re.compile(r"{module_dir}{sep}([^/]+)-[^/-]+?\.(?:dist|egg)-info/(.*)".format( - module_dir=module_dir, - sep = os.sep, - )) - filenames = defaultdict(dict) - for name in zf.namelist(): - match = r.match(name) - if match: - filenames[match.group(1)][match.group(2)] = name - return {mod: [PexDistribution(mod, pex_file, zf, files, prefix=module_dir)] - for mod, files in filenames.items()} - return {} - - def find_spec(self, name, path, target=None): - """Implements abc.MetaPathFinder.""" - if name.startswith(self.prefix): - return spec_from_loader(name, self) - - def create_module(self, spec): - """Actually load a module that we said we'd handle in find_module.""" - module = import_module(spec.name.removeprefix(self.prefix)) - sys.modules[spec.name] = module - return module - - def exec_module(self, mod): - """Nothing to do, create_module already did the work.""" - - def find_distributions(self, context): - """Return an iterable of all Distribution instances capable of - loading the metadata for packages for the indicated ``context``. - """ - if context.name: - # The installed directories have underscores in the place of what might be a hyphen - # in the package name (e.g. the package opentelemetry-sdk installs opentelemetry_sdk). - return self._distributions.get(context.name.replace("-", "_"), []) - else: - return itertools.chain(*self._distributions.values()) - - def get_code(self, fullname): - module = import_module(fullname.removeprefix(self.prefix)) - return module.__loader__.get_code(fullname) - def add_module_dir_to_sys_path(dirname): """Adds the given dirname to sys.path if it's nonempty.""" diff --git a/tools/please_pex/pex/plz.py b/tools/please_pex/pex/plz.py new file mode 100644 index 0000000..25eda0d --- /dev/null +++ b/tools/please_pex/pex/plz.py @@ -0,0 +1,171 @@ +"""Internal module for Please builtins.""" + +from collections import defaultdict +from importlib import import_module, machinery +from importlib.abc import MetaPathFinder +from importlib.metadata import Distribution +from importlib.util import spec_from_loader +import itertools +import os +import re +import sys +import tempfile +import zipfile + + +# Workaround for https://bugs.python.org/issue15795 +class ZipFileWithPermissions(zipfile.ZipFile): + """ Custom ZipFile class handling file permissions. """ + + def _extract_member(self, member, targetpath, pwd): + if not isinstance(member, zipfile.ZipInfo): + member = self.getinfo(member) + + targetpath = super(ZipFileWithPermissions, self)._extract_member( + member, targetpath, pwd + ) + + attr = member.external_attr >> 16 + if attr != 0: + os.chmod(targetpath, attr) + return targetpath + + +class SoImport(MetaPathFinder): + """So import. Much binary. Such dynamic. Wow.""" + + def __init__(self, module_dir): + self.suffixes = machinery.EXTENSION_SUFFIXES # list, as importlib will not be using the file description + self.suffixes_by_length = sorted(self.suffixes, key=lambda x: -len(x)) + # Identify all the possible modules we could handle. + self.modules = {} + if zipfile.is_zipfile(sys.argv[0]): + zf = ZipFileWithPermissions(sys.argv[0]) + for name in zf.namelist(): + path, _ = self.splitext(name) + if path: + if path.startswith('.bootstrap/'): + path = path[len('.bootstrap/'):] + importpath = path.replace('/', '.') + self.modules.setdefault(importpath, name) + if path.startswith(module_dir): + self.modules.setdefault(importpath[len(module_dir) + 1:], name) + if self.modules: + self.zf = zf + + def find_spec(self, name, path, target=None): + """Implements abc.MetaPathFinder.""" + if name in self.modules: + return spec_from_loader(name, self) + + def create_module(self, spec): + """Create a module object that we're going to load.""" + filename = self.modules[spec.name] + prefix, ext = self.splitext(filename) + with tempfile.NamedTemporaryFile(suffix=ext, prefix=os.path.basename(prefix)) as f: + f.write(self.zf.read(filename)) + f.flush() + spec.origin = f.name + loader = machinery.ExtensionFileLoader(spec.name, f.name) + spec.loader = loader + mod = loader.create_module(spec) + # Make it look like module came from the original location for nicer tracebacks. + mod.__file__ = filename + return mod + + def exec_module(self, mod): + """Because we set spec.loader above, the ExtensionFileLoader's exec_module is called.""" + raise NotImplementedError("SoImport.exec_module isn't used") + + def splitext(self, path): + """Similar to os.path.splitext, but splits our longest known suffix preferentially.""" + for suffix in self.suffixes_by_length: + if path.endswith(suffix): + return path[:-len(suffix)], suffix + return None, None + + +class PexDistribution(Distribution): + """Represents a distribution package that exists within a pex file (which is, ultimately, a zip + file). Distribution packages are identified by the presence of a suitable dist-info or egg-info + directory member inside the pex file, which need not necessarily exist at the top level if a + directory prefix is specified in the constructor. + """ + def __init__(self, name, pex_file, zip_file, files, prefix): + self._name = name + self._zf = zip_file + self._pex_file = pex_file + self._prefix = prefix + # Mapping of -> + self._files = files + + def read_text(self, filename): + full_name = self._files.get(filename) + if full_name: + return self._zf.read(full_name).decode(encoding="utf-8") + + def locate_file(self, path): + return zipfile.Path( + self._pex_file, + at=os.path.join(self._prefix, path) if self._prefix else path, + ) + + read_text.__doc__ = Distribution.read_text.__doc__ + + +class ModuleDirImport(MetaPathFinder): + """Handles imports to a directory equivalently to them being at the top level. + + This means that if one writes `import third_party.python.six`, it's imported like `import six`, + but becomes accessible under both names. This handles both the fully-qualified import names + and packages importing as their expected top-level names internally. + """ + def __init__(self, module_dir): + self.prefix = module_dir.replace("/", ".") + "." + self._distributions = self._find_all_distributions(module_dir) + + def _find_all_distributions(self, module_dir): + pex_file = sys.argv[0] + if zipfile.is_zipfile(pex_file): + zf = ZipFileWithPermissions(pex_file) + r = re.compile(r"{module_dir}{sep}([^/]+)-[^/-]+?\.(?:dist|egg)-info/(.*)".format( + module_dir=module_dir, + sep=os.sep, + )) + filenames = defaultdict(dict) + for name in zf.namelist(): + match = r.match(name) + if match: + filenames[match.group(1)][match.group(2)] = name + return {mod: [PexDistribution(mod, pex_file, zf, files, prefix=module_dir)] + for mod, files in filenames.items()} + return {} + + def find_spec(self, name, path, target=None): + """Implements abc.MetaPathFinder.""" + if name.startswith(self.prefix): + return spec_from_loader(name, self) + + def create_module(self, spec): + """Actually load a module that we said we'd handle in find_module.""" + module = import_module(spec.name.removeprefix(self.prefix)) + sys.modules[spec.name] = module + return module + + def exec_module(self, mod): + """Nothing to do, create_module already did the work.""" + + def find_distributions(self, context): + """Return an iterable of all Distribution instances capable of + loading the metadata for packages for the indicated ``context``. + """ + if context.name: + # The installed directories have underscores in the place of what might be a hyphen + # in the package name (e.g. the package opentelemetry-sdk installs opentelemetry_sdk). + return self._distributions.get(context.name.replace("-", "_"), []) + else: + return itertools.chain(*self._distributions.values()) + + def get_code(self, fullname): + module = import_module(fullname.removeprefix(self.prefix)) + return module.__loader__.get_code(fullname) From 7941974125aac2da7c858177e721e8a560552667 Mon Sep 17 00:00:00 2001 From: Peter Ebden Date: Tue, 19 Nov 2024 13:57:58 +0000 Subject: [PATCH 2/3] Add it to the zipfile --- tools/please_pex/pex/pex.go | 8 +++++++- tools/please_pex/pex/pex_main.py | 22 ++++++++-------------- tools/please_pex/pex/pex_run.py | 3 +-- tools/please_pex/pex/pex_test_main.py | 2 -- 4 files changed, 16 insertions(+), 19 deletions(-) diff --git a/tools/please_pex/pex/pex.go b/tools/please_pex/pex/pex.go index e7459f9..8c65918 100644 --- a/tools/please_pex/pex/pex.go +++ b/tools/please_pex/pex/pex.go @@ -185,8 +185,14 @@ func (pw *Writer) Write(out, moduleDir string) error { } } + // Write plz.py which contains much of our import hooks etc + b := mustRead("plz.py") + if err := f.WriteFile(".bootstrap/plz.py", b, 0644); err != nil { + return err + } + // Always write pex_main.py, with some templating. - b := mustRead("pex_main.py") + b = mustRead("pex_main.py") b = bytes.Replace(b, []byte("__MODULE_DIR__"), []byte(strings.ReplaceAll(moduleDir, ".", "/")), 1) b = bytes.Replace(b, []byte("__ENTRY_POINT__"), []byte(pw.realEntryPoint), 1) b = bytes.Replace(b, []byte("__ZIP_SAFE__"), []byte(pythonBool(pw.zipSafe)), 1) diff --git a/tools/please_pex/pex/pex_main.py b/tools/please_pex/pex/pex_main.py index 61ee0cc..fbaf3ef 100644 --- a/tools/please_pex/pex/pex_main.py +++ b/tools/please_pex/pex/pex_main.py @@ -1,18 +1,8 @@ """Zipfile entry point which supports auto-extracting itself based on zip-safety.""" -from collections import defaultdict -from importlib import import_module, machinery -from importlib.abc import MetaPathFinder -from importlib.metadata import Distribution -from importlib.util import spec_from_loader -from site import getsitepackages -import itertools import os -import re import runpy import sys -import tempfile -import zipfile # Put this pex on the path before anything else. PEX = os.path.abspath(sys.argv[0]) @@ -27,11 +17,17 @@ PEX_STAMP = '__PEX_STAMP__' -def add_module_dir_to_sys_path(dirname): +def add_module_dir_to_sys_path(dirname, zip_safe=True): """Adds the given dirname to sys.path if it's nonempty.""" + # Add .bootstrap dir to path, after the initial pex entry + sys.path = sys.path[:1] + [os.path.join(sys.path[0], '.bootstrap')] + sys.path[1:] + # Now we have .bootstrap on the path, we can import our own hooks. + import plz if dirname: sys.path = sys.path[:1] + [os.path.join(sys.path[0], dirname)] + sys.path[1:] - sys.meta_path.insert(0, ModuleDirImport(dirname)) + sys.meta_path.insert(0, plz.ModuleDirImport(dirname)) + if zip_safe: + sys.meta_path.append(plz.SoImport(MODULE_DIR)) def pex_basepath(temp=False): @@ -141,8 +137,6 @@ def main(): N.B. This gets redefined by pex_test_main to run tests instead. """ - # Add .bootstrap dir to path, after the initial pex entry - sys.path = sys.path[:1] + [os.path.join(sys.path[0], '.bootstrap')] + sys.path[1:] # Starts a debugging session, if defined, before running the entry point. if os.getenv("PLZ_DEBUG") is not None: start_debugger() diff --git a/tools/please_pex/pex/pex_run.py b/tools/please_pex/pex/pex_run.py index 1eb6d49..2a0f20d 100644 --- a/tools/please_pex/pex/pex_run.py +++ b/tools/please_pex/pex/pex_run.py @@ -1,11 +1,10 @@ def run(explode=False): if explode or not ZIP_SAFE: with explode_zip()(): - add_module_dir_to_sys_path(MODULE_DIR) + add_module_dir_to_sys_path(MODULE_DIR, zip_safe=False) return main() else: add_module_dir_to_sys_path(MODULE_DIR) - sys.meta_path.append(SoImport()) return main() diff --git a/tools/please_pex/pex/pex_test_main.py b/tools/please_pex/pex/pex_test_main.py index e43cf78..1041530 100644 --- a/tools/please_pex/pex/pex_test_main.py +++ b/tools/please_pex/pex/pex_test_main.py @@ -31,8 +31,6 @@ def _xml_file(self, fr, analysis, *args, **kvargs): def main(): """Runs the tests. Returns an appropriate exit code.""" args = [arg for arg in sys.argv[1:]] - # Add .bootstrap dir to path, after the initial pex entry - sys.path = sys.path[:1] + [os.path.join(sys.path[0], '.bootstrap')] + sys.path[1:] if os.getenv('COVERAGE'): # It's important that we run coverage while we load the tests otherwise # we get no coverage for import statements etc. From e8b150c47b60998fff4f12f5da55f349808626c1 Mon Sep 17 00:00:00 2001 From: Peter Ebden Date: Tue, 19 Nov 2024 14:01:08 +0000 Subject: [PATCH 3/3] version --- tools/ChangeLog | 4 ++++ tools/VERSION | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/ChangeLog b/tools/ChangeLog index bf885ea..4758730 100644 --- a/tools/ChangeLog +++ b/tools/ChangeLog @@ -1,3 +1,7 @@ +Version 1.6.0 +------------- + * Import hooks are now added in the `plz` module and are hence more usefully importable (#229) + Version 1.5.5 ------------- * Fix get_code on ModuleDirImport (#226) diff --git a/tools/VERSION b/tools/VERSION index 9075be4..dc1e644 100644 --- a/tools/VERSION +++ b/tools/VERSION @@ -1 +1 @@ -1.5.5 +1.6.0