|
7 | 7 | import logging |
8 | 8 | import os |
9 | 9 | import shutil |
| 10 | +import subprocess |
| 11 | +from contextlib import contextmanager |
10 | 12 |
|
11 | 13 | from pex import pex_warnings |
12 | 14 | from pex.atomic_directory import atomic_directory |
|
20 | 22 | safe_mkdir, |
21 | 23 | safe_mkdtemp, |
22 | 24 | safe_open, |
| 25 | + temporary_dir, |
23 | 26 | ) |
24 | 27 | from pex.compatibility import commonpath, to_bytes |
25 | 28 | from pex.compiler import Compiler |
|
28 | 31 | from pex.environment import PEXEnvironment |
29 | 32 | from pex.finders import get_entry_point_from_console_script, get_script_from_distributions |
30 | 33 | from pex.interpreter import PythonInterpreter |
| 34 | +from pex.jobs import Job |
31 | 35 | from pex.layout import Layout |
32 | 36 | from pex.orderedset import OrderedSet |
33 | 37 | from pex.pex import PEX |
|
37 | 41 | from pex.tracer import TRACER |
38 | 42 | from pex.typing import TYPE_CHECKING |
39 | 43 | from pex.util import CacheHelper |
| 44 | +from pex.ziputils import ZipCommand |
40 | 45 |
|
41 | 46 | if TYPE_CHECKING: |
42 | | - from typing import Dict, Optional |
| 47 | + from typing import BinaryIO, Callable, Dict, Iterator, Optional |
43 | 48 |
|
44 | 49 |
|
45 | 50 | class CopyMode(Enum["CopyMode.Value"]): |
@@ -88,14 +93,14 @@ def __maybe_run_venv__(pex, pex_root, pex_path): |
88 | 93 |
|
89 | 94 | venv_dir = venv_dir( |
90 | 95 | pex_file=pex, |
91 | | - pex_root=pex_root, |
| 96 | + pex_root=pex_root, |
92 | 97 | pex_hash={pex_hash!r}, |
93 | 98 | has_interpreter_constraints={has_interpreter_constraints!r}, |
94 | 99 | pex_path=pex_path, |
95 | 100 | ) |
96 | 101 | venv_pex = os.path.join(venv_dir, 'pex') |
97 | 102 | if not __execute__ or not is_exe(venv_pex): |
98 | | - # Code in bootstrap_pex will (re)create the venv after selecting the correct interpreter. |
| 103 | + # Code in bootstrap_pex will (re)create the venv after selecting the correct interpreter. |
99 | 104 | return venv_dir |
100 | 105 |
|
101 | 106 | TRACER.log('Executing venv PEX for {{}} at {{}}'.format(pex, venv_pex)) |
@@ -434,6 +439,7 @@ def set_header(self, header): |
434 | 439 | self._header = header |
435 | 440 |
|
436 | 441 | def _add_dist_dir(self, path, dist_name, fingerprint=None): |
| 442 | + # type: (str, str, Optional[str]) -> str |
437 | 443 | target_dir = os.path.join(self._pex_info.internal_cache, dist_name) |
438 | 444 | if self._copy_mode == CopyMode.SYMLINK: |
439 | 445 | self._copy_or_link(path, target_dir, label=dist_name) |
@@ -550,6 +556,7 @@ def _copy_or_link(self, src, dst, label=None): |
550 | 556 | elif self._copy_mode == CopyMode.SYMLINK: |
551 | 557 | self._chroot.symlink(src, dst, label) |
552 | 558 | else: |
| 559 | + assert self._copy_mode == CopyMode.LINK |
553 | 560 | self._chroot.link(src, dst, label) |
554 | 561 |
|
555 | 562 | def _prepare_bootstrap(self): |
@@ -769,31 +776,144 @@ def zip_cache_dir(path): |
769 | 776 | os.path.join(internal_cache, location), |
770 | 777 | ) |
771 | 778 |
|
772 | | - def _build_zipapp( |
773 | | - self, |
774 | | - filename, # type: str |
775 | | - deterministic_timestamp=False, # type: bool |
776 | | - compress=True, # type: bool |
777 | | - ): |
778 | | - # type: (...) -> None |
| 779 | + def _cache_dists_for_stitching(self, compress): |
| 780 | + # type: (bool) -> Dict[str, str] |
| 781 | + merge_deps = {} # type: Dict[str, str] |
| 782 | + with TRACER.timed("caching dists for stitched output", V=3): |
| 783 | + for dist_label, fingerprint in self._pex_info.distributions.items(): |
| 784 | + cache_key = "{}-{}".format( |
| 785 | + fingerprint, "compressed" if compress else "uncompressed" |
| 786 | + ) |
| 787 | + cached_zip = os.path.join( |
| 788 | + self._pex_info.pex_root, |
| 789 | + "stitched_dists", |
| 790 | + cache_key, |
| 791 | + dist_label, |
| 792 | + ) |
| 793 | + with atomic_directory(os.path.dirname(cached_zip)) as atomic_zip_dir: |
| 794 | + if not atomic_zip_dir.is_finalized(): |
| 795 | + atomic_output_file = os.path.join( |
| 796 | + atomic_zip_dir.work_dir, os.path.basename(cached_zip) |
| 797 | + ) |
| 798 | + with TRACER.timed("caching single dist {}".format(dist_label), V=3): |
| 799 | + self._chroot.zip( |
| 800 | + atomic_output_file, |
| 801 | + labels=(dist_label,), |
| 802 | + deterministic_timestamp=True, |
| 803 | + compress=compress, |
| 804 | + exclude_file=is_pyc_temporary_file, |
| 805 | + ) |
| 806 | + assert os.path.isfile(cached_zip) |
| 807 | + merge_deps[dist_label] = cached_zip |
| 808 | + |
| 809 | + return merge_deps |
| 810 | + |
| 811 | + @contextmanager |
| 812 | + def _concatenate_cached_entries(self, zip_cmd, deterministic_timestamp, compress): |
| 813 | + # type: (ZipCommand, bool, bool) -> Iterator[BinaryIO] |
| 814 | + merge_deps = self._cache_dists_for_stitching(compress=compress) |
| 815 | + uncached_labels = sorted(frozenset(self._chroot.labels()) - frozenset(merge_deps.keys())) |
| 816 | + |
| 817 | + with TRACER.timed("synthesize zipapp", V=6), temporary_dir() as td: |
| 818 | + concatenated_nonzip = os.path.join(td, "concatenated.broken-zip") |
| 819 | + with open(concatenated_nonzip, "w+b") as concat_f: |
| 820 | + with TRACER.timed("zipping up uncached sources", V=3): |
| 821 | + self._chroot.zip( |
| 822 | + concat_f, |
| 823 | + deterministic_timestamp=deterministic_timestamp, |
| 824 | + compress=compress, |
| 825 | + labels=uncached_labels, |
| 826 | + ) |
| 827 | + |
| 828 | + with TRACER.timed("concatenating cached dist zips", V=3): |
| 829 | + # Sort the cached zips by the prefixes of the filenames they'll be |
| 830 | + # inserting into the merged result, to get a deterministic output. |
| 831 | + for _, path in sorted(merge_deps.items(), key=lambda x: x[0]): |
| 832 | + with open(path, "rb") as f: |
| 833 | + shutil.copyfileobj(f, concat_f) # type: ignore[misc] |
| 834 | + |
| 835 | + fixed_zip = os.path.join(td, "fixed.zip") |
| 836 | + zip_cmd.fix_concatenated_zips(concatenated_nonzip, fixed_zip) |
| 837 | + |
| 838 | + with open(fixed_zip, "rb") as read_handle: |
| 839 | + yield read_handle |
| 840 | + |
| 841 | + @contextmanager |
| 842 | + def _prepare_executable_zipapp(self, filename): |
| 843 | + # type: (str) -> Iterator[BinaryIO] |
779 | 844 | with safe_open(filename, "wb") as pexfile: |
780 | 845 | assert os.path.getsize(pexfile.name) == 0 |
781 | 846 | pexfile.write(to_bytes("{}\n".format(self._shebang))) |
782 | 847 | if self._header: |
783 | 848 | pexfile.write(to_bytes(self._header)) |
784 | | - with TRACER.timed("Zipping PEX file."): |
| 849 | + |
| 850 | + yield pexfile |
| 851 | + |
| 852 | + chmod_plus_x(pexfile.name) |
| 853 | + |
| 854 | + def _uncached_zipapp( |
| 855 | + self, |
| 856 | + filename, # type: str |
| 857 | + deterministic_timestamp, # type: bool |
| 858 | + compress, # type: bool |
| 859 | + ): |
| 860 | + # type: (...) -> None |
| 861 | + |
| 862 | + # When configured with a `copy_mode` of `CopyMode.SYMLINK`, we symlink distributions as |
| 863 | + # pointers to installed wheel directories in ~/.pex/installed_wheels/... Since those |
| 864 | + # installed wheels reside in a shared cache, they can be in-use by other processes and so |
| 865 | + # their code may be in the process of being bytecode compiled as we attempt to zip up our |
| 866 | + # chroot. Bytecode compilation produces ephemeral temporary pyc files that we should avoid |
| 867 | + # copying since they are useless and inherently racy. |
| 868 | + exclude_file = is_pyc_temporary_file |
| 869 | + |
| 870 | + with TRACER.timed("Zipping PEX file."), self._prepare_executable_zipapp( |
| 871 | + filename |
| 872 | + ) as pexfile: |
785 | 873 | self._chroot.zip( |
786 | | - filename, |
787 | | - mode="a", |
| 874 | + pexfile, |
788 | 875 | deterministic_timestamp=deterministic_timestamp, |
789 | | - # When configured with a `copy_mode` of `CopyMode.SYMLINK`, we symlink distributions |
790 | | - # as pointers to installed wheel directories in ~/.pex/installed_wheels/... Since |
791 | | - # those installed wheels reside in a shared cache, they can be in-use by other |
792 | | - # processes and so their code may be in the process of being bytecode compiled as we |
793 | | - # attempt to zip up our chroot. Bytecode compilation produces ephemeral temporary |
794 | | - # pyc files that we should avoid copying since they are useless and inherently |
795 | | - # racy. |
796 | | - exclude_file=is_pyc_temporary_file, |
797 | 876 | compress=compress, |
| 877 | + exclude_file=exclude_file, |
798 | 878 | ) |
799 | | - chmod_plus_x(filename) |
| 879 | + |
| 880 | + def _build_zipapp( |
| 881 | + self, |
| 882 | + filename, # type: str |
| 883 | + deterministic_timestamp=False, # type: bool |
| 884 | + compress=True, # type: bool |
| 885 | + ): |
| 886 | + # type: (...) -> None |
| 887 | + # Naively creating a compressed zipapp with many downloaded distributions would perform |
| 888 | + # a lot of I/O on each pex invocation and spend a lot of CPU on compression. While |
| 889 | + # `--no-compress` runs significantly faster, the result may also be over twice as large. |
| 890 | + should_try_synthesizing_from_cache = bool(self._pex_info.distributions) and compress |
| 891 | + if not should_try_synthesizing_from_cache: |
| 892 | + self._uncached_zipapp( |
| 893 | + filename, deterministic_timestamp=deterministic_timestamp, compress=compress |
| 894 | + ) |
| 895 | + return |
| 896 | + |
| 897 | + # However, if we have access to the `zip` command, we can employ a caching strategy. |
| 898 | + zip_cmd = ZipCommand.find() |
| 899 | + if zip_cmd is None: |
| 900 | + TRACER.log( |
| 901 | + "`zip` command was not found, so compressed dist caches could not be used", |
| 902 | + V=1, |
| 903 | + ) |
| 904 | + self._uncached_zipapp( |
| 905 | + filename, deterministic_timestamp=deterministic_timestamp, compress=compress |
| 906 | + ) |
| 907 | + return |
| 908 | + |
| 909 | + with TRACER.timed( |
| 910 | + "cache dists and synthesize zipapp", V=9 |
| 911 | + ), self._concatenate_cached_entries( |
| 912 | + zip_cmd, |
| 913 | + deterministic_timestamp=deterministic_timestamp, |
| 914 | + compress=compress, |
| 915 | + ) as concatenated_zip_f: |
| 916 | + with TRACER.timed( |
| 917 | + "copying synthesized concatenated zip to output file", V=9 |
| 918 | + ), self._prepare_executable_zipapp(filename) as pexfile: |
| 919 | + shutil.copyfileobj(concatenated_zip_f, pexfile) # type: ignore[misc] |
0 commit comments