From 809848ef77feff05d7446b9512e07a19d9cb193e Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 1 Apr 2020 16:25:33 +0200 Subject: [PATCH 01/20] use a full-blown test package when testing --- dev-requirements.txt | 2 ++ tests/cloudpickle_test.py | 25 +++++-------- .../_cloudpickle_testpkg/__init__.py | 35 +++++++++++++++++++ .../_cloudpickle_testpkg}/mod.py | 0 tests/cloudpickle_testpkg/setup.py | 13 +++++++ tests/mypkg/__init__.py | 17 --------- 6 files changed, 58 insertions(+), 34 deletions(-) create mode 100644 tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py rename tests/{mypkg => cloudpickle_testpkg/_cloudpickle_testpkg}/mod.py (100%) create mode 100644 tests/cloudpickle_testpkg/setup.py delete mode 100644 tests/mypkg/__init__.py diff --git a/dev-requirements.txt b/dev-requirements.txt index 8dc4512d4..4e26b2106 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -11,3 +11,5 @@ numpy; python_version <= '3.8' # Code coverage uploader for Travis: codecov coverage +# Utility package used when running the cloudpickle test suite +./tests/cloudpickle_testpkg diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 2250193f3..3793d9e52 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -52,6 +52,8 @@ from .testutils import assert_run_python_script from .testutils import subprocess_worker +from _cloudpickle_testpkg import relative_imports_factory + _TEST_GLOBAL_VARIABLE = "default_value" @@ -1968,18 +1970,7 @@ def test_relative_import_inside_function(self): # Make sure relative imports inside round-tripped functions is not # broken.This was a bug in cloudpickle versions <= 0.5.3 and was # re-introduced in 0.8.0. - - # Both functions living inside modules and packages are tested. - def f(): - # module_function belongs to mypkg.mod1, which is a module - from .mypkg import module_function - return module_function() - - def g(): - # package_function belongs to mypkg, which is a package - from .mypkg import package_function - return package_function() - + f, g = relative_imports_factory() for func, source in zip([f, g], ["module", "package"]): # Make sure relative imports are initially working assert func() == "hello from a {}!".format(source) @@ -2028,7 +2019,7 @@ def f(a, /, b=1): def test___reduce___returns_string(self): # Non regression test for objects with a __reduce__ method returning a # string, meaning "save by attribute using save_global" - from .mypkg import some_singleton + from _cloudpickle_testpkg import some_singleton assert some_singleton.__reduce__() == "some_singleton" depickled_singleton = pickle_depickle( some_singleton, protocol=self.protocol) @@ -2100,7 +2091,7 @@ def test_pickle_dynamic_typevar_memoization(self): assert depickled_T1 is depickled_T2 def test_pickle_importable_typevar(self): - from .mypkg import T + from _cloudpickle_testpkg import T T1 = pickle_depickle(T, protocol=self.protocol) assert T1 is T @@ -2230,12 +2221,12 @@ def test_lookup_module_and_qualname_dynamic_typevar(): def test_lookup_module_and_qualname_importable_typevar(): - from . import mypkg - T = mypkg.T + import _cloudpickle_testpkg + T = _cloudpickle_testpkg.T module_and_name = _lookup_module_and_qualname(T, name=T.__name__) assert module_and_name is not None module, name = module_and_name - assert module is mypkg + assert module is _cloudpickle_testpkg assert name == 'T' diff --git a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py new file mode 100644 index 000000000..3d3c29b76 --- /dev/null +++ b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py @@ -0,0 +1,35 @@ +import typing + + +def package_function(): + """Function living inside a package, not a simple module""" + return "hello from a package!" + + +class _SingletonClass(object): + def __reduce__(self): + # This reducer is only valid for the top level "some_singleton" object. + return "some_singleton" + + +def relative_imports_factory(): + """Factory creating dynamically-defined functions using relative imports + + Relative import of functions living both inside modules and packages are + tested. + """ + def f(): + # module_function belongs to mypkg.mod, which is a module + from .mod import module_function + return module_function() + + def g(): + # package_function belongs to mypkg, which is a package + from . import package_function + return package_function() + + return f, g + + +some_singleton = _SingletonClass() +T = typing.TypeVar('T') diff --git a/tests/mypkg/mod.py b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py similarity index 100% rename from tests/mypkg/mod.py rename to tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py diff --git a/tests/cloudpickle_testpkg/setup.py b/tests/cloudpickle_testpkg/setup.py new file mode 100644 index 000000000..d80870032 --- /dev/null +++ b/tests/cloudpickle_testpkg/setup.py @@ -0,0 +1,13 @@ +from distutils.core import setup + + +dist = setup( + name='cloudpickle_testpkg', + version='0.0.0', + description='Package used only for cloudpickle testing purposes', + author='Cloudpipe', + author_email='cloudpipe@googlegroups.com', + license='BSD 3-Clause License', + packages=['_cloudpickle_testpkg'], + python_requires='>=3.5', +) diff --git a/tests/mypkg/__init__.py b/tests/mypkg/__init__.py deleted file mode 100644 index 5ef5ab486..000000000 --- a/tests/mypkg/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -import typing -from .mod import module_function - - -def package_function(): - """Function living inside a package, not a simple module""" - return "hello from a package!" - - -class _SingletonClass(object): - def __reduce__(self): - # This reducer is only valid for the top level "some_singleton" object. - return "some_singleton" - - -some_singleton = _SingletonClass() -T = typing.TypeVar('T') From 081133d53ffca0a83a84cb00d442b733fe29880c Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 2 Apr 2020 10:59:54 +0200 Subject: [PATCH 02/20] rename _is_importable_by_name -> _is_importable --- cloudpickle/cloudpickle.py | 34 ++++++++++++++++++++++++++++----- cloudpickle/cloudpickle_fast.py | 11 ++++++----- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index c639daab1..b86dae21f 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -163,9 +163,33 @@ def _whichmodule(obj, name): return None -def _is_importable_by_name(obj, name=None): - """Determine if obj can be pickled as attribute of a file-backed module""" - return _lookup_module_and_qualname(obj, name=name) is not None +if sys.version_info[:2] < (3, 7): # pragma: no branch + # Workaround bug in old Python versions: prior to Python 3.7, T.__module__ + # would always be set to "typing" even when the TypeVar T would be defined + # in a different module. + # + # For such older Python versions, we ignore the __module__ attribute of + # TypeVar instances and instead exhaustively lookup those instances in all + # currently imported modules via the _whichmodule function. + def _get_module_attr(obj): + if isinstance(obj, typing.TypeVar): + return None + return getattr(obj, '__module__', None) +else: + def _get_module_attr(obj): + return getattr(obj, '__module__', None) + + +def _is_importable(obj, name=None): + """Dispatcher utility to test the importability of various constructs.""" + if isinstance(obj, types.FunctionType): + return _lookup_module_and_qualname(obj, name=name) is not None + elif issubclass(type(obj), type): + return _lookup_module_and_qualname(obj, name=name) is not None + else: + raise ValueError( + "cannot check importability for type {}.".format(type(obj)) + ) def _lookup_module_and_qualname(obj, name=None): @@ -536,7 +560,7 @@ def save_function(self, obj, name=None): Determines what kind of function obj is (e.g. lambda, defined at interactive prompt, etc) and handles the pickling appropriately. """ - if _is_importable_by_name(obj, name=name): + if _is_importable(obj, name=name): return Pickler.save_global(self, obj, name=name) elif PYPY and isinstance(obj.__code__, builtin_code_type): return self.save_pypy_builtin_func(obj) @@ -840,7 +864,7 @@ def save_global(self, obj, name=None, pack=struct.pack): self._save_parametrized_type_hint(obj) elif name is not None: Pickler.save_global(self, obj, name=name) - elif not _is_importable_by_name(obj, name=name): + elif not _is_importable(obj, name=name): self.save_dynamic_class(obj) else: Pickler.save_global(self, obj, name=name) diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index 7df95f8dc..926c2d2c3 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -26,9 +26,10 @@ from .cloudpickle import ( _is_dynamic, _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL, - _find_imported_submodules, _get_cell_contents, _is_importable_by_name, _builtin_type, - Enum, _get_or_create_tracker_id, _make_skeleton_class, _make_skeleton_enum, - _extract_class_dict, dynamic_subimport, subimport, _typevar_reduce, _get_bases, + _find_imported_submodules, _get_cell_contents, _is_importable, + _builtin_type, Enum, _get_or_create_tracker_id, _make_skeleton_class, + _make_skeleton_enum, _extract_class_dict, dynamic_subimport, subimport, + _typevar_reduce, _get_bases, ) load, loads = _pickle.load, _pickle.loads @@ -333,7 +334,7 @@ def _class_reduce(obj): return type, (NotImplemented,) elif obj in _BUILTIN_TYPE_NAMES: return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],) - elif not _is_importable_by_name(obj): + elif not _is_importable(obj): return _dynamic_class_reduce(obj) return NotImplemented @@ -505,7 +506,7 @@ def _function_reduce(self, obj): As opposed to cloudpickle.py, There no special handling for builtin pypy functions because cloudpickle_fast is CPython-specific. """ - if _is_importable_by_name(obj): + if _is_importable(obj): return NotImplemented else: return self._dynamic_function_reduce(obj) From 0e47f3860ab421c119143aa53ba17d6ae070fb70 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 2 Apr 2020 11:09:07 +0200 Subject: [PATCH 03/20] factor out a tiny _get_parent(module) utility --- cloudpickle/cloudpickle.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index b86dae21f..39ca07f92 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1331,6 +1331,19 @@ class id will also reuse this enum definition. return _lookup_class_or_track(class_tracker_id, enum_class) +def _get_parent(module): + """(Try to) access the parent module (or package) of a submodule""" + parent_name, _, module_name = module.__name__.rpartition('.') + if parent_name: # pragma: no cover + try: + return sys.modules[parent_name] + except KeyError: + msg = "parent {!r} not in sys.modules" + raise ImportError(msg.format(parent_name)) + else: + return None + + def _is_dynamic(module): """ Return True if the module is special module that cannot be imported by its @@ -1343,24 +1356,13 @@ def _is_dynamic(module): if module.__spec__ is not None: return False - # In PyPy, Some built-in modules such as _codecs can have their - # __spec__ attribute set to None despite being imported. For such - # modules, the ``_find_spec`` utility of the standard library is used. - parent_name = module.__name__.rpartition('.')[0] - if parent_name: # pragma: no cover - # This code handles the case where an imported package (and not - # module) remains with __spec__ set to None. It is however untested - # as no package in the PyPy stdlib has __spec__ set to None after - # it is imported. - try: - parent = sys.modules[parent_name] - except KeyError: - msg = "parent {!r} not in sys.modules" - raise ImportError(msg.format(parent_name)) - else: - pkgpath = parent.__path__ - else: - pkgpath = None + # It *may be* that in PyPy, some built-in modules have their __spec__ + # attribute set to None despite being imported. For such modules, we try a + # little bit harder to see if they can be loaded by using importlib's + # ``_find_spec``. This case is however untested as no package in the PyPy + # stdlib has __spec__ set to None after it is imported. + parent = _get_parent(module) + pkgpath = getattr(parent, '__path__', None) return _find_spec(module.__name__, pkgpath, module) is None From f4d9aa7c610eb36b94dc542a623ddff3e549bcf1 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 2 Apr 2020 11:13:04 +0200 Subject: [PATCH 04/20] check for module importability rather than dynamism --- cloudpickle/cloudpickle.py | 16 +++++++++++----- cloudpickle/cloudpickle_fast.py | 6 +++--- tests/cloudpickle_test.py | 10 ++++++++-- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 39ca07f92..39ee54baf 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -186,6 +186,8 @@ def _is_importable(obj, name=None): return _lookup_module_and_qualname(obj, name=name) is not None elif issubclass(type(obj), type): return _lookup_module_and_qualname(obj, name=name) is not None + elif isinstance(obj, types.ModuleType): + return _module_is_importable(obj) else: raise ValueError( "cannot check importability for type {}.".format(type(obj)) @@ -220,8 +222,9 @@ def _lookup_module_and_qualname(obj, name=None): # supported, as the standard pickle does not support it either. return None - # module has been added to sys.modules, but it can still be dynamic. - if _is_dynamic(module): + # module has been added to sys.modules, but it is not enough to be + # considered importable + if not _is_importable(module): return None try: @@ -520,12 +523,12 @@ def save_module(self, obj): """ Save a module as an import """ - if _is_dynamic(obj): + if _is_importable(obj): + self.save_reduce(subimport, (obj.__name__,), obj=obj) + else: obj.__dict__.pop('__builtins__', None) self.save_reduce(dynamic_subimport, (obj.__name__, vars(obj)), obj=obj) - else: - self.save_reduce(subimport, (obj.__name__,), obj=obj) dispatch[types.ModuleType] = save_module @@ -1366,6 +1369,9 @@ def _is_dynamic(module): return _find_spec(module.__name__, pkgpath, module) is None +def _module_is_importable(obj): + if not _is_dynamic(obj): + return True def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id): tv = typing.TypeVar( diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index 926c2d2c3..5518e780d 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -277,11 +277,11 @@ def _memoryview_reduce(obj): def _module_reduce(obj): - if _is_dynamic(obj): + if _is_importable(obj): + return subimport, (obj.__name__,) + else: obj.__dict__.pop('__builtins__', None) return dynamic_subimport, (obj.__name__, vars(obj)) - else: - return subimport, (obj.__name__,) def _method_reduce(obj): diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 3793d9e52..feb42d8d2 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -43,7 +43,7 @@ tornado = None import cloudpickle -from cloudpickle.cloudpickle import _is_dynamic +from cloudpickle.cloudpickle import _is_dynamic, _is_importable from cloudpickle.cloudpickle import _make_empty_cell, cell_set from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule from cloudpickle.cloudpickle import _lookup_module_and_qualname @@ -680,7 +680,7 @@ def my_small_function(x, y): assert b'unwanted_function' not in b assert b'math' not in b - def test_is_dynamic_module(self): + def test_module_importability(self): import pickle # decouple this test from global imports import os.path import distutils @@ -691,10 +691,16 @@ def test_is_dynamic_module(self): assert not _is_dynamic(distutils) # package assert not _is_dynamic(distutils.ccompiler) # module in package + assert _is_importable(pickle) + assert _is_importable(os.path) # fake (aliased) module + assert _is_importable(distutils) # package + assert _is_importable(distutils.ccompiler) # module in package + # user-created module without using the import machinery are also # dynamic dynamic_module = types.ModuleType('dynamic_module') assert _is_dynamic(dynamic_module) + assert not _is_importable(dynamic_module) if platform.python_implementation() == 'PyPy': import _codecs From 57c2d9767449efe694d97bc26dbfca674cceafdf Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 2 Apr 2020 11:31:49 +0200 Subject: [PATCH 05/20] flag submodules of extension modules as importable --- cloudpickle/cloudpickle.py | 11 ++++++++ tests/cloudpickle_test.py | 9 +++++++ .../_cloudpickle_testpkg/__init__.py | 1 + .../_cloudpickle_testpkg/mod.py | 27 +++++++++++++++++++ 4 files changed, 48 insertions(+) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 39ee54baf..4262a940c 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1372,6 +1372,17 @@ def _is_dynamic(module): def _module_is_importable(obj): if not _is_dynamic(obj): return True + + # #354: the module is dynamic, but can still be importable if it is created + # and added to sys.modules during the initialization of its parent + # (provided that its parent is file-backed) + module_relative_name = obj.__name__.rpartition('.')[-1] + parent = _get_parent(obj) + if hasattr(parent, module_relative_name) and _is_importable(parent): + return True + return False + + def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id): tv = typing.TypeVar( diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index feb42d8d2..cf4a0e87b 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -706,6 +706,15 @@ def test_module_importability(self): import _codecs assert not _is_dynamic(_codecs) + # #354: Check that modules created dynamically during the import of + # their parent modules are flagged by cloudpickle as dynamic, but + # importable. See the mod_with_dynamic_submodule documentation for + # more details of this use case. + import _cloudpickle_testpkg.mod.dynamic_submodule as m + assert _is_dynamic(m) + assert _is_importable(m) + assert pickle_depickle(m, protocol=self.protocol) is m + def test_Ellipsis(self): self.assertEqual(Ellipsis, pickle_depickle(Ellipsis, protocol=self.protocol)) diff --git a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py index 3d3c29b76..2da8a6de1 100644 --- a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py +++ b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py @@ -1,4 +1,5 @@ import typing +from . import mod # noqa def package_function(): diff --git a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py index a703a6aa5..cb564a536 100644 --- a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py +++ b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py @@ -1,2 +1,29 @@ +import sys +import types + + +# #354: To emulate package capabilities while being a single file, an extension +# module (for instance a mod.so file) can dynamically create a module object +# (most likely using the *package_name*.*parent_module_name*.*submodule_name* +# naming convention to be compatible with ``import`` semantics). Internally, +# it will use the Python/C API ``PyImport_AddModule(submodule_qualified_name)`` +# utility, which creates a module object and adds it inside ``sys.modules``. A +# module created this way IS a dynamic module. However, because the ``import`` +# machinery automatically imports the parent package/module of a submodule +# before importing the submodule itself, and importing the parent +# package/module creates and append the submodule to sys.modules (sys.modules +# acts as a cache for the import machinery), this submodule, albeit dynamic, is +# importable. To detect this, we need to recursively check the parent modules +# of this submodule to see if the parent module is importable. If yes, we +# reasonably assume that the submodule named using the aforementioned +# hierarchised convention has been created during the import of its parent +# module. The following lines emulate such a behavior without being a compiled +# extension module. + +submodule_name = '_cloudpickle_testpkg.mod.dynamic_submodule' +dynamic_submodule = types.ModuleType(submodule_name) +sys.modules[submodule_name] = dynamic_submodule + + def module_function(): return "hello from a module!" From 2f3c738fafe80b49eb755ccdc52333e89379c658 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 2 Apr 2020 11:44:30 +0200 Subject: [PATCH 06/20] fixup! check for module importability rather than dynamism --- cloudpickle/cloudpickle.py | 3 ++- tests/cloudpickle_test.py | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 4262a940c..e4331cdf3 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -190,7 +190,8 @@ def _is_importable(obj, name=None): return _module_is_importable(obj) else: raise ValueError( - "cannot check importability for type {}.".format(type(obj)) + "cannot check importability of {} instances".format( + type(obj).__name__) ) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index cf4a0e87b..3eb1dd9d2 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -715,6 +715,10 @@ def test_module_importability(self): assert _is_importable(m) assert pickle_depickle(m, protocol=self.protocol) is m + expected = "cannot check importability of object instances" + with pytest.raises(ValueError, match=expected): + _is_importable(object()) + def test_Ellipsis(self): self.assertEqual(Ellipsis, pickle_depickle(Ellipsis, protocol=self.protocol)) From 53998c198f270af666d4452f7e310aed69fee4bd Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 22 Apr 2020 12:19:02 +0200 Subject: [PATCH 07/20] rename _get_parent ->_get_parent_module_or_package --- cloudpickle/cloudpickle.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index e4331cdf3..2a70959de 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1335,7 +1335,7 @@ class id will also reuse this enum definition. return _lookup_class_or_track(class_tracker_id, enum_class) -def _get_parent(module): +def _get_parent_module_or_package(module): """(Try to) access the parent module (or package) of a submodule""" parent_name, _, module_name = module.__name__.rpartition('.') if parent_name: # pragma: no cover @@ -1365,7 +1365,7 @@ def _is_dynamic(module): # little bit harder to see if they can be loaded by using importlib's # ``_find_spec``. This case is however untested as no package in the PyPy # stdlib has __spec__ set to None after it is imported. - parent = _get_parent(module) + parent = _get_parent_module_or_package(module) pkgpath = getattr(parent, '__path__', None) return _find_spec(module.__name__, pkgpath, module) is None @@ -1378,7 +1378,7 @@ def _module_is_importable(obj): # and added to sys.modules during the initialization of its parent # (provided that its parent is file-backed) module_relative_name = obj.__name__.rpartition('.')[-1] - parent = _get_parent(obj) + parent = _get_parent_module_or_package(obj) if hasattr(parent, module_relative_name) and _is_importable(parent): return True return False From c29408ce3780c0d6d2c8a7609f4e599365d07b5d Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 22 Apr 2020 12:19:57 +0200 Subject: [PATCH 08/20] remove stale pytest pragma --- cloudpickle/cloudpickle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 2a70959de..33fa1b8e7 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1338,7 +1338,7 @@ class id will also reuse this enum definition. def _get_parent_module_or_package(module): """(Try to) access the parent module (or package) of a submodule""" parent_name, _, module_name = module.__name__.rpartition('.') - if parent_name: # pragma: no cover + if parent_name: try: return sys.modules[parent_name] except KeyError: From 48288749956a2f5c93c228eb04c24364956e15dd Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 22 Apr 2020 12:22:11 +0200 Subject: [PATCH 09/20] raise a TypeError when a type mismatch happens --- cloudpickle/cloudpickle.py | 2 +- tests/cloudpickle_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 33fa1b8e7..fd6eb1eb4 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -189,7 +189,7 @@ def _is_importable(obj, name=None): elif isinstance(obj, types.ModuleType): return _module_is_importable(obj) else: - raise ValueError( + raise TypeError( "cannot check importability of {} instances".format( type(obj).__name__) ) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 3eb1dd9d2..e9eb3db0b 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -716,7 +716,7 @@ def test_module_importability(self): assert pickle_depickle(m, protocol=self.protocol) is m expected = "cannot check importability of object instances" - with pytest.raises(ValueError, match=expected): + with pytest.raises(TypeError, match=expected): _is_importable(object()) def test_Ellipsis(self): From da60d2d8ed06f68da022cc7aa3013b84b5ef1c69 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 22 Apr 2020 12:25:52 +0200 Subject: [PATCH 10/20] test nested importable dynamic submodules. --- tests/cloudpickle_test.py | 7 +++++++ tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index e9eb3db0b..6f89ec139 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -715,6 +715,13 @@ def test_module_importability(self): assert _is_importable(m) assert pickle_depickle(m, protocol=self.protocol) is m + # Do the same for an importable dynamic submodule inside a dynamic + # module inside a file-backed module. + import _cloudpickle_testpkg.mod.dynamic_submodule.dynamic_subsubmodule as sm # noqa + assert _is_dynamic(sm) + assert _is_importable(sm) + assert pickle_depickle(sm, protocol=self.protocol) is sm + expected = "cannot check importability of object instances" with pytest.raises(TypeError, match=expected): _is_importable(object()) diff --git a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py index cb564a536..629847344 100644 --- a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py +++ b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py @@ -24,6 +24,15 @@ dynamic_submodule = types.ModuleType(submodule_name) sys.modules[submodule_name] = dynamic_submodule +# What about a dynamic submodule inside a dynamic submodule inside an +# importable module? +subsubmodule_name = ( + '_cloudpickle_testpkg.mod.dynamic_submodule.dynamic_subsubmodule' +) +dynamic_subsubmodule = types.ModuleType(subsubmodule_name) +dynamic_submodule.dynamic_subsubmodule = dynamic_subsubmodule +sys.modules[subsubmodule_name] = dynamic_subsubmodule + def module_function(): return "hello from a module!" From 504b0936866ece1775f31ceb3ee4bd473cd140a3 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 21 May 2020 00:07:29 +0200 Subject: [PATCH 11/20] add more test cases --- tests/cloudpickle_test.py | 23 ++++++++++++++++ .../_cloudpickle_testpkg/mod.py | 27 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 6f89ec139..088e48685 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -715,6 +715,29 @@ def test_module_importability(self): assert _is_importable(m) assert pickle_depickle(m, protocol=self.protocol) is m + # Check for similar behavior for a module that cannot be imported by + # attribute lookup. + from _cloudpickle_testpkg.mod import dynamic_submodule_two as m2 + # Note: import _cloudpickle_testpkg.mod.dynamic_submodule_two as m2 + # works only for Python 3.7+ + assert _is_dynamic(m2) + assert _is_importable(m2) + assert pickle_depickle(m2, protocol=self.protocol) is m2 + + # Submodule_three is a dynamic module only importable via module lookup + with pytest.raises(ImportError): + import _cloudpickle_testpkg.mod.submodule_three # noqa + from _cloudpickle_testpkg.mod import submodule_three as m3 + assert _is_dynamic(m3) + assert not _is_importable(m3) + + # This module cannot be pickled using attribute lookup (as it does not + # have a `__module__` attribute like classes and functions. + assert not hasattr(m3, '__module__') + depickled_m3 = pickle_depickle(m3, protocol=self.protocol) + assert depickled_m3 is not m3 + assert m3.f(1) == depickled_m3.f(1) + # Do the same for an importable dynamic submodule inside a dynamic # module inside a file-backed module. import _cloudpickle_testpkg.mod.dynamic_submodule.dynamic_subsubmodule as sm # noqa diff --git a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py index 629847344..02b144c30 100644 --- a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py +++ b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py @@ -22,7 +22,34 @@ submodule_name = '_cloudpickle_testpkg.mod.dynamic_submodule' dynamic_submodule = types.ModuleType(submodule_name) + +# This line allows the dynamic_module to be imported using either one of: +# - ``from _cloudpickle_testpkg.mod import dynamic_submodule`` +# - ``import _cloudpickle_testpkg.mod.dynamic_submodule`` sys.modules[submodule_name] = dynamic_submodule +# Both lines will make importlib try to get the module from sys.modules after +# importing the parent module, before trying getattr(mod, 'dynamic_submodule'), +# so this dynamic module could be binded to another name. This behavior is +# demonstrated with `dynamic_submodule_two` + +submodule_name_two = '_cloudpickle_testpkg.mod.dynamic_submodule_two' +# Notice the inconsistent name binding, breaking attribute lookup-based import +# attempts. +another_submodule = types.ModuleType(submodule_name_two) +sys.modules[submodule_name_two] = another_submodule + + +# In this third case, the module is not added to sys.modules, and can only be +# imported using attribute lookup-based imports. +submodule_three = types.ModuleType( + '_cloudpickle_testpkg.mod.dynamic_submodule_three' +) +code = """ +def f(x): + return x +""" + +exec(code, vars(submodule_three)) # What about a dynamic submodule inside a dynamic submodule inside an # importable module? From f9dd9156ee16412522084ae452d512c90a96a2d3 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 21 May 2020 00:35:13 +0200 Subject: [PATCH 12/20] [WIP] simplify importability detection --- cloudpickle/cloudpickle.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index fd6eb1eb4..7fdf90159 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1371,17 +1371,7 @@ def _is_dynamic(module): def _module_is_importable(obj): - if not _is_dynamic(obj): - return True - - # #354: the module is dynamic, but can still be importable if it is created - # and added to sys.modules during the initialization of its parent - # (provided that its parent is file-backed) - module_relative_name = obj.__name__.rpartition('.')[-1] - parent = _get_parent_module_or_package(obj) - if hasattr(parent, module_relative_name) and _is_importable(parent): - return True - return False + return obj.__name__ in sys.modules def _make_typevar(name, bound, constraints, covariant, contravariant, From 5c86635de2413ccf2acd86719693c23ce036f358 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 23 May 2020 16:50:47 +0200 Subject: [PATCH 13/20] remove stale code --- cloudpickle/cloudpickle.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 7fdf90159..cf7f674b3 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -163,23 +163,6 @@ def _whichmodule(obj, name): return None -if sys.version_info[:2] < (3, 7): # pragma: no branch - # Workaround bug in old Python versions: prior to Python 3.7, T.__module__ - # would always be set to "typing" even when the TypeVar T would be defined - # in a different module. - # - # For such older Python versions, we ignore the __module__ attribute of - # TypeVar instances and instead exhaustively lookup those instances in all - # currently imported modules via the _whichmodule function. - def _get_module_attr(obj): - if isinstance(obj, typing.TypeVar): - return None - return getattr(obj, '__module__', None) -else: - def _get_module_attr(obj): - return getattr(obj, '__module__', None) - - def _is_importable(obj, name=None): """Dispatcher utility to test the importability of various constructs.""" if isinstance(obj, types.FunctionType): From e31d415e7885a975edc413eb4ce68ef526cdf92b Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 23 May 2020 17:01:59 +0200 Subject: [PATCH 14/20] simplify module importability detection --- cloudpickle/cloudpickle.py | 28 +--------------------------- cloudpickle/cloudpickle_fast.py | 2 +- tests/cloudpickle_test.py | 29 ++++++++++------------------- 3 files changed, 12 insertions(+), 47 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index cf7f674b3..07ead433d 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -170,7 +170,7 @@ def _is_importable(obj, name=None): elif issubclass(type(obj), type): return _lookup_module_and_qualname(obj, name=name) is not None elif isinstance(obj, types.ModuleType): - return _module_is_importable(obj) + return obj.__name__ in sys.modules else: raise TypeError( "cannot check importability of {} instances".format( @@ -1331,32 +1331,6 @@ def _get_parent_module_or_package(module): return None -def _is_dynamic(module): - """ - Return True if the module is special module that cannot be imported by its - name. - """ - # Quick check: module that have __file__ attribute are not dynamic modules. - if hasattr(module, '__file__'): - return False - - if module.__spec__ is not None: - return False - - # It *may be* that in PyPy, some built-in modules have their __spec__ - # attribute set to None despite being imported. For such modules, we try a - # little bit harder to see if they can be loaded by using importlib's - # ``_find_spec``. This case is however untested as no package in the PyPy - # stdlib has __spec__ set to None after it is imported. - parent = _get_parent_module_or_package(module) - pkgpath = getattr(parent, '__path__', None) - return _find_spec(module.__name__, pkgpath, module) is None - - -def _module_is_importable(obj): - return obj.__name__ in sys.modules - - def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id): tv = typing.TypeVar( diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index 5518e780d..b285482ec 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -25,7 +25,7 @@ from _pickle import Pickler from .cloudpickle import ( - _is_dynamic, _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL, + _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL, _find_imported_submodules, _get_cell_contents, _is_importable, _builtin_type, Enum, _get_or_create_tracker_id, _make_skeleton_class, _make_skeleton_enum, _extract_class_dict, dynamic_subimport, subimport, diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 088e48685..89ba840c8 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -43,7 +43,7 @@ tornado = None import cloudpickle -from cloudpickle.cloudpickle import _is_dynamic, _is_importable +from cloudpickle.cloudpickle import _is_importable from cloudpickle.cloudpickle import _make_empty_cell, cell_set from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule from cloudpickle.cloudpickle import _lookup_module_and_qualname @@ -686,32 +686,23 @@ def test_module_importability(self): import distutils import distutils.ccompiler - assert not _is_dynamic(pickle) - assert not _is_dynamic(os.path) # fake (aliased) module - assert not _is_dynamic(distutils) # package - assert not _is_dynamic(distutils.ccompiler) # module in package - assert _is_importable(pickle) assert _is_importable(os.path) # fake (aliased) module assert _is_importable(distutils) # package assert _is_importable(distutils.ccompiler) # module in package - # user-created module without using the import machinery are also - # dynamic dynamic_module = types.ModuleType('dynamic_module') - assert _is_dynamic(dynamic_module) assert not _is_importable(dynamic_module) if platform.python_implementation() == 'PyPy': import _codecs - assert not _is_dynamic(_codecs) + assert _is_importable(_codecs) # #354: Check that modules created dynamically during the import of # their parent modules are flagged by cloudpickle as dynamic, but # importable. See the mod_with_dynamic_submodule documentation for # more details of this use case. import _cloudpickle_testpkg.mod.dynamic_submodule as m - assert _is_dynamic(m) assert _is_importable(m) assert pickle_depickle(m, protocol=self.protocol) is m @@ -720,7 +711,6 @@ def test_module_importability(self): from _cloudpickle_testpkg.mod import dynamic_submodule_two as m2 # Note: import _cloudpickle_testpkg.mod.dynamic_submodule_two as m2 # works only for Python 3.7+ - assert _is_dynamic(m2) assert _is_importable(m2) assert pickle_depickle(m2, protocol=self.protocol) is m2 @@ -728,7 +718,6 @@ def test_module_importability(self): with pytest.raises(ImportError): import _cloudpickle_testpkg.mod.submodule_three # noqa from _cloudpickle_testpkg.mod import submodule_three as m3 - assert _is_dynamic(m3) assert not _is_importable(m3) # This module cannot be pickled using attribute lookup (as it does not @@ -741,7 +730,6 @@ def test_module_importability(self): # Do the same for an importable dynamic submodule inside a dynamic # module inside a file-backed module. import _cloudpickle_testpkg.mod.dynamic_submodule.dynamic_subsubmodule as sm # noqa - assert _is_dynamic(sm) assert _is_importable(sm) assert pickle_depickle(sm, protocol=self.protocol) is sm @@ -1232,11 +1220,14 @@ def func(x): func.__module__ = None class NonModuleObject(object): + def __ini__(self): + self.some_attr = None + def __getattr__(self, name): - # We whitelist func so that a _whichmodule(func, None) call returns - # the NonModuleObject instance if a type check on the entries - # of sys.modules is not carried out, but manipulating this - # instance thinking it really is a module later on in the + # We whitelist func so that a _whichmodule(func, None) call + # returns the NonModuleObject instance if a type check on the + # entries of sys.modules is not carried out, but manipulating + # this instance thinking it really is a module later on in the # pickling process of func errors out if name == 'func': return func @@ -1251,7 +1242,7 @@ def __getattr__(self, name): # Any manipulation of non_module_object relying on attribute access # will raise an Exception with pytest.raises(AttributeError): - _is_dynamic(non_module_object) + _ = non_module_object.some_attr try: sys.modules['NonModuleObject'] = non_module_object From 420c4adc0a7986deb15f595b6a28da3999f7d24b Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 23 May 2020 17:05:58 +0200 Subject: [PATCH 15/20] fixup! simplify module importability detection --- cloudpickle/cloudpickle.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 07ead433d..b0fd075ed 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -197,6 +197,8 @@ def _lookup_module_and_qualname(obj, name=None): if module_name == "__main__": return None + # Note: if module_name is in sys.modules, the corresponding module is + # assumed importable at unpickling time. See #357 module = sys.modules.get(module_name, None) if module is None: # The main reason why obj's module would not be imported is that this @@ -206,11 +208,6 @@ def _lookup_module_and_qualname(obj, name=None): # supported, as the standard pickle does not support it either. return None - # module has been added to sys.modules, but it is not enough to be - # considered importable - if not _is_importable(module): - return None - try: obj2, parent = _getattribute(module, name) except AttributeError: From a74bd9e4b7b1cfb44482fa55b596053bcd28eb65 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 23 May 2020 17:17:58 +0200 Subject: [PATCH 16/20] remove unused function --- cloudpickle/cloudpickle.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index b0fd075ed..737d8ae48 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1315,19 +1315,6 @@ class id will also reuse this enum definition. return _lookup_class_or_track(class_tracker_id, enum_class) -def _get_parent_module_or_package(module): - """(Try to) access the parent module (or package) of a submodule""" - parent_name, _, module_name = module.__name__.rpartition('.') - if parent_name: - try: - return sys.modules[parent_name] - except KeyError: - msg = "parent {!r} not in sys.modules" - raise ImportError(msg.format(parent_name)) - else: - return None - - def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id): tv = typing.TypeVar( From 4d0f7cb84c9819efaab6b9c1d0363321aeae2f63 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sun, 24 May 2020 18:26:17 +0200 Subject: [PATCH 17/20] CI trigger From 6f899b65b0194618f75bd7d270fef7b257b73e8a Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sun, 24 May 2020 18:36:23 +0200 Subject: [PATCH 18/20] CI trigger From 988e4ab0e6892e9cb7ad12f8fed773b12d2475a7 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Fri, 5 Jun 2020 15:56:28 +0200 Subject: [PATCH 19/20] Apply suggestions from code review nitpicks --- cloudpickle/cloudpickle.py | 5 +++++ tests/cloudpickle_test.py | 8 ++++---- .../cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py | 5 +++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 737d8ae48..95a9f8273 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -170,6 +170,11 @@ def _is_importable(obj, name=None): elif issubclass(type(obj), type): return _lookup_module_and_qualname(obj, name=name) is not None elif isinstance(obj, types.ModuleType): + # We assume that sys.modules is primarily used as a cache mechanism for + # the Python import machinery. Checking if a module has been added in + # is sys.modules therefore a cheap and simple heuristic to tell us whether + # we can assume that a given module could be imported by name in + # another Python process. return obj.__name__ in sys.modules else: raise TypeError( diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 89ba840c8..b1821dba6 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -699,9 +699,9 @@ def test_module_importability(self): assert _is_importable(_codecs) # #354: Check that modules created dynamically during the import of - # their parent modules are flagged by cloudpickle as dynamic, but - # importable. See the mod_with_dynamic_submodule documentation for - # more details of this use case. + # their parent modules are considered importable by cloudpickle. + # See the mod_with_dynamic_submodule documentation for more + # details of this use case. import _cloudpickle_testpkg.mod.dynamic_submodule as m assert _is_importable(m) assert pickle_depickle(m, protocol=self.protocol) is m @@ -2008,7 +2008,7 @@ def check_positive(x): def test_relative_import_inside_function(self): # Make sure relative imports inside round-tripped functions is not - # broken.This was a bug in cloudpickle versions <= 0.5.3 and was + # broken. This was a bug in cloudpickle versions <= 0.5.3 and was # re-introduced in 0.8.0. f, g = relative_imports_factory() for func, source in zip([f, g], ["module", "package"]): diff --git a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py index 2da8a6de1..595243c26 100644 --- a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py +++ b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py @@ -20,12 +20,13 @@ def relative_imports_factory(): tested. """ def f(): - # module_function belongs to mypkg.mod, which is a module + # module_function belongs to _cloudpickle_testpkg.mod, which is a + # module from .mod import module_function return module_function() def g(): - # package_function belongs to mypkg, which is a package + # package_function belongs to _cloudpickle_testpkg, which is a package from . import package_function return package_function() From a080dd9344d1950834da704b5915d4a03e7c7aca Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sun, 7 Jun 2020 14:27:06 +0200 Subject: [PATCH 20/20] fill changelog --- CHANGES.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 006ca2525..df02e6a69 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,10 @@ 1.5.0 (in development) ====================== +- Fix a bug causing cloudpickle to crash when pickling dynamically created, + importable modules. + ([issue #360](https://github.com/cloudpipe/cloudpickle/issues/354)) + 1.4.1 =====