diff --git a/CHANGES.md b/CHANGES.md index 006ca2525..df02e6a69 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,10 @@ 1.5.0 (in development) ====================== +- Fix a bug causing cloudpickle to crash when pickling dynamically created, + importable modules. + ([issue #360](https://github.com/cloudpipe/cloudpickle/issues/354)) + 1.4.1 ===== diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index c639daab1..95a9f8273 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -163,9 +163,24 @@ def _whichmodule(obj, name): return None -def _is_importable_by_name(obj, name=None): - """Determine if obj can be pickled as attribute of a file-backed module""" - return _lookup_module_and_qualname(obj, name=name) is not None +def _is_importable(obj, name=None): + """Dispatcher utility to test the importability of various constructs.""" + if isinstance(obj, types.FunctionType): + return _lookup_module_and_qualname(obj, name=name) is not None + elif issubclass(type(obj), type): + return _lookup_module_and_qualname(obj, name=name) is not None + elif isinstance(obj, types.ModuleType): + # We assume that sys.modules is primarily used as a cache mechanism for + # the Python import machinery. Checking if a module has been added in + # is sys.modules therefore a cheap and simple heuristic to tell us whether + # we can assume that a given module could be imported by name in + # another Python process. + return obj.__name__ in sys.modules + else: + raise TypeError( + "cannot check importability of {} instances".format( + type(obj).__name__) + ) def _lookup_module_and_qualname(obj, name=None): @@ -187,6 +202,8 @@ def _lookup_module_and_qualname(obj, name=None): if module_name == "__main__": return None + # Note: if module_name is in sys.modules, the corresponding module is + # assumed importable at unpickling time. See #357 module = sys.modules.get(module_name, None) if module is None: # The main reason why obj's module would not be imported is that this @@ -196,10 +213,6 @@ def _lookup_module_and_qualname(obj, name=None): # supported, as the standard pickle does not support it either. return None - # module has been added to sys.modules, but it can still be dynamic. - if _is_dynamic(module): - return None - try: obj2, parent = _getattribute(module, name) except AttributeError: @@ -496,12 +509,12 @@ def save_module(self, obj): """ Save a module as an import """ - if _is_dynamic(obj): + if _is_importable(obj): + self.save_reduce(subimport, (obj.__name__,), obj=obj) + else: obj.__dict__.pop('__builtins__', None) self.save_reduce(dynamic_subimport, (obj.__name__, vars(obj)), obj=obj) - else: - self.save_reduce(subimport, (obj.__name__,), obj=obj) dispatch[types.ModuleType] = save_module @@ -536,7 +549,7 @@ def save_function(self, obj, name=None): Determines what kind of function obj is (e.g. lambda, defined at interactive prompt, etc) and handles the pickling appropriately. """ - if _is_importable_by_name(obj, name=name): + if _is_importable(obj, name=name): return Pickler.save_global(self, obj, name=name) elif PYPY and isinstance(obj.__code__, builtin_code_type): return self.save_pypy_builtin_func(obj) @@ -840,7 +853,7 @@ def save_global(self, obj, name=None, pack=struct.pack): self._save_parametrized_type_hint(obj) elif name is not None: Pickler.save_global(self, obj, name=name) - elif not _is_importable_by_name(obj, name=name): + elif not _is_importable(obj, name=name): self.save_dynamic_class(obj) else: Pickler.save_global(self, obj, name=name) @@ -1307,39 +1320,6 @@ class id will also reuse this enum definition. return _lookup_class_or_track(class_tracker_id, enum_class) -def _is_dynamic(module): - """ - Return True if the module is special module that cannot be imported by its - name. - """ - # Quick check: module that have __file__ attribute are not dynamic modules. - if hasattr(module, '__file__'): - return False - - if module.__spec__ is not None: - return False - - # In PyPy, Some built-in modules such as _codecs can have their - # __spec__ attribute set to None despite being imported. For such - # modules, the ``_find_spec`` utility of the standard library is used. - parent_name = module.__name__.rpartition('.')[0] - if parent_name: # pragma: no cover - # This code handles the case where an imported package (and not - # module) remains with __spec__ set to None. It is however untested - # as no package in the PyPy stdlib has __spec__ set to None after - # it is imported. - try: - parent = sys.modules[parent_name] - except KeyError: - msg = "parent {!r} not in sys.modules" - raise ImportError(msg.format(parent_name)) - else: - pkgpath = parent.__path__ - else: - pkgpath = None - return _find_spec(module.__name__, pkgpath, module) is None - - def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id): tv = typing.TypeVar( diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index 7df95f8dc..b285482ec 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -25,10 +25,11 @@ from _pickle import Pickler from .cloudpickle import ( - _is_dynamic, _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL, - _find_imported_submodules, _get_cell_contents, _is_importable_by_name, _builtin_type, - Enum, _get_or_create_tracker_id, _make_skeleton_class, _make_skeleton_enum, - _extract_class_dict, dynamic_subimport, subimport, _typevar_reduce, _get_bases, + _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL, + _find_imported_submodules, _get_cell_contents, _is_importable, + _builtin_type, Enum, _get_or_create_tracker_id, _make_skeleton_class, + _make_skeleton_enum, _extract_class_dict, dynamic_subimport, subimport, + _typevar_reduce, _get_bases, ) load, loads = _pickle.load, _pickle.loads @@ -276,11 +277,11 @@ def _memoryview_reduce(obj): def _module_reduce(obj): - if _is_dynamic(obj): + if _is_importable(obj): + return subimport, (obj.__name__,) + else: obj.__dict__.pop('__builtins__', None) return dynamic_subimport, (obj.__name__, vars(obj)) - else: - return subimport, (obj.__name__,) def _method_reduce(obj): @@ -333,7 +334,7 @@ def _class_reduce(obj): return type, (NotImplemented,) elif obj in _BUILTIN_TYPE_NAMES: return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],) - elif not _is_importable_by_name(obj): + elif not _is_importable(obj): return _dynamic_class_reduce(obj) return NotImplemented @@ -505,7 +506,7 @@ def _function_reduce(self, obj): As opposed to cloudpickle.py, There no special handling for builtin pypy functions because cloudpickle_fast is CPython-specific. """ - if _is_importable_by_name(obj): + if _is_importable(obj): return NotImplemented else: return self._dynamic_function_reduce(obj) diff --git a/dev-requirements.txt b/dev-requirements.txt index 8dc4512d4..4e26b2106 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -11,3 +11,5 @@ numpy; python_version <= '3.8' # Code coverage uploader for Travis: codecov coverage +# Utility package used when running the cloudpickle test suite +./tests/cloudpickle_testpkg diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 2250193f3..b1821dba6 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -43,7 +43,7 @@ tornado = None import cloudpickle -from cloudpickle.cloudpickle import _is_dynamic +from cloudpickle.cloudpickle import _is_importable from cloudpickle.cloudpickle import _make_empty_cell, cell_set from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule from cloudpickle.cloudpickle import _lookup_module_and_qualname @@ -52,6 +52,8 @@ from .testutils import assert_run_python_script from .testutils import subprocess_worker +from _cloudpickle_testpkg import relative_imports_factory + _TEST_GLOBAL_VARIABLE = "default_value" @@ -678,25 +680,62 @@ def my_small_function(x, y): assert b'unwanted_function' not in b assert b'math' not in b - def test_is_dynamic_module(self): + def test_module_importability(self): import pickle # decouple this test from global imports import os.path import distutils import distutils.ccompiler - assert not _is_dynamic(pickle) - assert not _is_dynamic(os.path) # fake (aliased) module - assert not _is_dynamic(distutils) # package - assert not _is_dynamic(distutils.ccompiler) # module in package + assert _is_importable(pickle) + assert _is_importable(os.path) # fake (aliased) module + assert _is_importable(distutils) # package + assert _is_importable(distutils.ccompiler) # module in package - # user-created module without using the import machinery are also - # dynamic dynamic_module = types.ModuleType('dynamic_module') - assert _is_dynamic(dynamic_module) + assert not _is_importable(dynamic_module) if platform.python_implementation() == 'PyPy': import _codecs - assert not _is_dynamic(_codecs) + assert _is_importable(_codecs) + + # #354: Check that modules created dynamically during the import of + # their parent modules are considered importable by cloudpickle. + # See the mod_with_dynamic_submodule documentation for more + # details of this use case. + import _cloudpickle_testpkg.mod.dynamic_submodule as m + assert _is_importable(m) + assert pickle_depickle(m, protocol=self.protocol) is m + + # Check for similar behavior for a module that cannot be imported by + # attribute lookup. + from _cloudpickle_testpkg.mod import dynamic_submodule_two as m2 + # Note: import _cloudpickle_testpkg.mod.dynamic_submodule_two as m2 + # works only for Python 3.7+ + assert _is_importable(m2) + assert pickle_depickle(m2, protocol=self.protocol) is m2 + + # Submodule_three is a dynamic module only importable via module lookup + with pytest.raises(ImportError): + import _cloudpickle_testpkg.mod.submodule_three # noqa + from _cloudpickle_testpkg.mod import submodule_three as m3 + assert not _is_importable(m3) + + # This module cannot be pickled using attribute lookup (as it does not + # have a `__module__` attribute like classes and functions. + assert not hasattr(m3, '__module__') + depickled_m3 = pickle_depickle(m3, protocol=self.protocol) + assert depickled_m3 is not m3 + assert m3.f(1) == depickled_m3.f(1) + + # Do the same for an importable dynamic submodule inside a dynamic + # module inside a file-backed module. + import _cloudpickle_testpkg.mod.dynamic_submodule.dynamic_subsubmodule as sm # noqa + assert _is_importable(sm) + assert pickle_depickle(sm, protocol=self.protocol) is sm + + expected = "cannot check importability of object instances" + with pytest.raises(TypeError, match=expected): + _is_importable(object()) def test_Ellipsis(self): self.assertEqual(Ellipsis, @@ -1181,11 +1220,14 @@ def func(x): func.__module__ = None class NonModuleObject(object): + def __ini__(self): + self.some_attr = None + def __getattr__(self, name): - # We whitelist func so that a _whichmodule(func, None) call returns - # the NonModuleObject instance if a type check on the entries - # of sys.modules is not carried out, but manipulating this - # instance thinking it really is a module later on in the + # We whitelist func so that a _whichmodule(func, None) call + # returns the NonModuleObject instance if a type check on the + # entries of sys.modules is not carried out, but manipulating + # this instance thinking it really is a module later on in the # pickling process of func errors out if name == 'func': return func @@ -1200,7 +1242,7 @@ def __getattr__(self, name): # Any manipulation of non_module_object relying on attribute access # will raise an Exception with pytest.raises(AttributeError): - _is_dynamic(non_module_object) + _ = non_module_object.some_attr try: sys.modules['NonModuleObject'] = non_module_object @@ -1966,20 +2008,9 @@ def check_positive(x): def test_relative_import_inside_function(self): # Make sure relative imports inside round-tripped functions is not - # broken.This was a bug in cloudpickle versions <= 0.5.3 and was + # broken. This was a bug in cloudpickle versions <= 0.5.3 and was # re-introduced in 0.8.0. - - # Both functions living inside modules and packages are tested. - def f(): - # module_function belongs to mypkg.mod1, which is a module - from .mypkg import module_function - return module_function() - - def g(): - # package_function belongs to mypkg, which is a package - from .mypkg import package_function - return package_function() - + f, g = relative_imports_factory() for func, source in zip([f, g], ["module", "package"]): # Make sure relative imports are initially working assert func() == "hello from a {}!".format(source) @@ -2028,7 +2059,7 @@ def f(a, /, b=1): def test___reduce___returns_string(self): # Non regression test for objects with a __reduce__ method returning a # string, meaning "save by attribute using save_global" - from .mypkg import some_singleton + from _cloudpickle_testpkg import some_singleton assert some_singleton.__reduce__() == "some_singleton" depickled_singleton = pickle_depickle( some_singleton, protocol=self.protocol) @@ -2100,7 +2131,7 @@ def test_pickle_dynamic_typevar_memoization(self): assert depickled_T1 is depickled_T2 def test_pickle_importable_typevar(self): - from .mypkg import T + from _cloudpickle_testpkg import T T1 = pickle_depickle(T, protocol=self.protocol) assert T1 is T @@ -2230,12 +2261,12 @@ def test_lookup_module_and_qualname_dynamic_typevar(): def test_lookup_module_and_qualname_importable_typevar(): - from . import mypkg - T = mypkg.T + import _cloudpickle_testpkg + T = _cloudpickle_testpkg.T module_and_name = _lookup_module_and_qualname(T, name=T.__name__) assert module_and_name is not None module, name = module_and_name - assert module is mypkg + assert module is _cloudpickle_testpkg assert name == 'T' diff --git a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py new file mode 100644 index 000000000..595243c26 --- /dev/null +++ b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py @@ -0,0 +1,37 @@ +import typing +from . import mod # noqa + + +def package_function(): + """Function living inside a package, not a simple module""" + return "hello from a package!" + + +class _SingletonClass(object): + def __reduce__(self): + # This reducer is only valid for the top level "some_singleton" object. + return "some_singleton" + + +def relative_imports_factory(): + """Factory creating dynamically-defined functions using relative imports + + Relative import of functions living both inside modules and packages are + tested. + """ + def f(): + # module_function belongs to _cloudpickle_testpkg.mod, which is a + # module + from .mod import module_function + return module_function() + + def g(): + # package_function belongs to _cloudpickle_testpkg, which is a package + from . import package_function + return package_function() + + return f, g + + +some_singleton = _SingletonClass() +T = typing.TypeVar('T') diff --git a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py new file mode 100644 index 000000000..02b144c30 --- /dev/null +++ b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py @@ -0,0 +1,65 @@ +import sys +import types + + +# #354: To emulate package capabilities while being a single file, an extension +# module (for instance a mod.so file) can dynamically create a module object +# (most likely using the *package_name*.*parent_module_name*.*submodule_name* +# naming convention to be compatible with ``import`` semantics). Internally, +# it will use the Python/C API ``PyImport_AddModule(submodule_qualified_name)`` +# utility, which creates a module object and adds it inside ``sys.modules``. A +# module created this way IS a dynamic module. However, because the ``import`` +# machinery automatically imports the parent package/module of a submodule +# before importing the submodule itself, and importing the parent +# package/module creates and append the submodule to sys.modules (sys.modules +# acts as a cache for the import machinery), this submodule, albeit dynamic, is +# importable. To detect this, we need to recursively check the parent modules +# of this submodule to see if the parent module is importable. If yes, we +# reasonably assume that the submodule named using the aforementioned +# hierarchised convention has been created during the import of its parent +# module. The following lines emulate such a behavior without being a compiled +# extension module. + +submodule_name = '_cloudpickle_testpkg.mod.dynamic_submodule' +dynamic_submodule = types.ModuleType(submodule_name) + +# This line allows the dynamic_module to be imported using either one of: +# - ``from _cloudpickle_testpkg.mod import dynamic_submodule`` +# - ``import _cloudpickle_testpkg.mod.dynamic_submodule`` +sys.modules[submodule_name] = dynamic_submodule +# Both lines will make importlib try to get the module from sys.modules after +# importing the parent module, before trying getattr(mod, 'dynamic_submodule'), +# so this dynamic module could be binded to another name. This behavior is +# demonstrated with `dynamic_submodule_two` + +submodule_name_two = '_cloudpickle_testpkg.mod.dynamic_submodule_two' +# Notice the inconsistent name binding, breaking attribute lookup-based import +# attempts. +another_submodule = types.ModuleType(submodule_name_two) +sys.modules[submodule_name_two] = another_submodule + + +# In this third case, the module is not added to sys.modules, and can only be +# imported using attribute lookup-based imports. +submodule_three = types.ModuleType( + '_cloudpickle_testpkg.mod.dynamic_submodule_three' +) +code = """ +def f(x): + return x +""" + +exec(code, vars(submodule_three)) + +# What about a dynamic submodule inside a dynamic submodule inside an +# importable module? +subsubmodule_name = ( + '_cloudpickle_testpkg.mod.dynamic_submodule.dynamic_subsubmodule' +) +dynamic_subsubmodule = types.ModuleType(subsubmodule_name) +dynamic_submodule.dynamic_subsubmodule = dynamic_subsubmodule +sys.modules[subsubmodule_name] = dynamic_subsubmodule + + +def module_function(): + return "hello from a module!" diff --git a/tests/cloudpickle_testpkg/setup.py b/tests/cloudpickle_testpkg/setup.py new file mode 100644 index 000000000..d80870032 --- /dev/null +++ b/tests/cloudpickle_testpkg/setup.py @@ -0,0 +1,13 @@ +from distutils.core import setup + + +dist = setup( + name='cloudpickle_testpkg', + version='0.0.0', + description='Package used only for cloudpickle testing purposes', + author='Cloudpipe', + author_email='cloudpipe@googlegroups.com', + license='BSD 3-Clause License', + packages=['_cloudpickle_testpkg'], + python_requires='>=3.5', +) diff --git a/tests/mypkg/__init__.py b/tests/mypkg/__init__.py deleted file mode 100644 index 5ef5ab486..000000000 --- a/tests/mypkg/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -import typing -from .mod import module_function - - -def package_function(): - """Function living inside a package, not a simple module""" - return "hello from a package!" - - -class _SingletonClass(object): - def __reduce__(self): - # This reducer is only valid for the top level "some_singleton" object. - return "some_singleton" - - -some_singleton = _SingletonClass() -T = typing.TypeVar('T') diff --git a/tests/mypkg/mod.py b/tests/mypkg/mod.py deleted file mode 100644 index a703a6aa5..000000000 --- a/tests/mypkg/mod.py +++ /dev/null @@ -1,2 +0,0 @@ -def module_function(): - return "hello from a module!"