Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
1.5.0 (in development)
======================

- Fix a bug causing cloudpickle to crash when pickling dynamically created,
importable modules.
([issue #360](https://github.com/cloudpipe/cloudpickle/issues/354))


1.4.1
=====
Expand Down
70 changes: 25 additions & 45 deletions cloudpickle/cloudpickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,24 @@ def _whichmodule(obj, name):
return None


def _is_importable_by_name(obj, name=None):
"""Determine if obj can be pickled as attribute of a file-backed module"""
return _lookup_module_and_qualname(obj, name=name) is not None
def _is_importable(obj, name=None):
"""Dispatcher utility to test the importability of various constructs."""
if isinstance(obj, types.FunctionType):
return _lookup_module_and_qualname(obj, name=name) is not None
elif issubclass(type(obj), type):
return _lookup_module_and_qualname(obj, name=name) is not None
elif isinstance(obj, types.ModuleType):
# We assume that sys.modules is primarily used as a cache mechanism for
# the Python import machinery. Checking if a module has been added in
# is sys.modules therefore a cheap and simple heuristic to tell us whether
# we can assume that a given module could be imported by name in
# another Python process.
return obj.__name__ in sys.modules
else:
raise TypeError(
"cannot check importability of {} instances".format(
type(obj).__name__)
)


def _lookup_module_and_qualname(obj, name=None):
Expand All @@ -187,6 +202,8 @@ def _lookup_module_and_qualname(obj, name=None):
if module_name == "__main__":
return None

# Note: if module_name is in sys.modules, the corresponding module is
# assumed importable at unpickling time. See #357
module = sys.modules.get(module_name, None)
if module is None:
# The main reason why obj's module would not be imported is that this
Expand All @@ -196,10 +213,6 @@ def _lookup_module_and_qualname(obj, name=None):
# supported, as the standard pickle does not support it either.
return None

# module has been added to sys.modules, but it can still be dynamic.
if _is_dynamic(module):
return None

try:
obj2, parent = _getattribute(module, name)
except AttributeError:
Expand Down Expand Up @@ -496,12 +509,12 @@ def save_module(self, obj):
"""
Save a module as an import
"""
if _is_dynamic(obj):
if _is_importable(obj):
self.save_reduce(subimport, (obj.__name__,), obj=obj)
else:
obj.__dict__.pop('__builtins__', None)
self.save_reduce(dynamic_subimport, (obj.__name__, vars(obj)),
obj=obj)
else:
self.save_reduce(subimport, (obj.__name__,), obj=obj)

dispatch[types.ModuleType] = save_module

Expand Down Expand Up @@ -536,7 +549,7 @@ def save_function(self, obj, name=None):
Determines what kind of function obj is (e.g. lambda, defined at
interactive prompt, etc) and handles the pickling appropriately.
"""
if _is_importable_by_name(obj, name=name):
if _is_importable(obj, name=name):
return Pickler.save_global(self, obj, name=name)
elif PYPY and isinstance(obj.__code__, builtin_code_type):
return self.save_pypy_builtin_func(obj)
Expand Down Expand Up @@ -840,7 +853,7 @@ def save_global(self, obj, name=None, pack=struct.pack):
self._save_parametrized_type_hint(obj)
elif name is not None:
Pickler.save_global(self, obj, name=name)
elif not _is_importable_by_name(obj, name=name):
elif not _is_importable(obj, name=name):
self.save_dynamic_class(obj)
else:
Pickler.save_global(self, obj, name=name)
Expand Down Expand Up @@ -1307,39 +1320,6 @@ class id will also reuse this enum definition.
return _lookup_class_or_track(class_tracker_id, enum_class)


def _is_dynamic(module):
"""
Return True if the module is special module that cannot be imported by its
name.
"""
# Quick check: module that have __file__ attribute are not dynamic modules.
if hasattr(module, '__file__'):
return False

if module.__spec__ is not None:
return False

# In PyPy, Some built-in modules such as _codecs can have their
# __spec__ attribute set to None despite being imported. For such
# modules, the ``_find_spec`` utility of the standard library is used.
parent_name = module.__name__.rpartition('.')[0]
if parent_name: # pragma: no cover
# This code handles the case where an imported package (and not
# module) remains with __spec__ set to None. It is however untested
# as no package in the PyPy stdlib has __spec__ set to None after
# it is imported.
try:
parent = sys.modules[parent_name]
except KeyError:
msg = "parent {!r} not in sys.modules"
raise ImportError(msg.format(parent_name))
else:
pkgpath = parent.__path__
else:
pkgpath = None
return _find_spec(module.__name__, pkgpath, module) is None


def _make_typevar(name, bound, constraints, covariant, contravariant,
class_tracker_id):
tv = typing.TypeVar(
Expand Down
19 changes: 10 additions & 9 deletions cloudpickle/cloudpickle_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@
from _pickle import Pickler

from .cloudpickle import (
_is_dynamic, _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL,
_find_imported_submodules, _get_cell_contents, _is_importable_by_name, _builtin_type,
Enum, _get_or_create_tracker_id, _make_skeleton_class, _make_skeleton_enum,
_extract_class_dict, dynamic_subimport, subimport, _typevar_reduce, _get_bases,
_extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL,
_find_imported_submodules, _get_cell_contents, _is_importable,
_builtin_type, Enum, _get_or_create_tracker_id, _make_skeleton_class,
_make_skeleton_enum, _extract_class_dict, dynamic_subimport, subimport,
_typevar_reduce, _get_bases,
)

load, loads = _pickle.load, _pickle.loads
Expand Down Expand Up @@ -276,11 +277,11 @@ def _memoryview_reduce(obj):


def _module_reduce(obj):
if _is_dynamic(obj):
if _is_importable(obj):
return subimport, (obj.__name__,)
else:
obj.__dict__.pop('__builtins__', None)
return dynamic_subimport, (obj.__name__, vars(obj))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could probably be named to _make_dynamic_module as we actually do not import the module at all.

We should still keep dynamic_subimport as an alias for _make_dynamic_module to preserve backward compat with old pickle files.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The subimport function could also be renamed to _import_module to be more explicit. But we would also need to keep an alias named subimport pointing to _import_module for backward compat as well.

else:
return subimport, (obj.__name__,)


def _method_reduce(obj):
Expand Down Expand Up @@ -333,7 +334,7 @@ def _class_reduce(obj):
return type, (NotImplemented,)
elif obj in _BUILTIN_TYPE_NAMES:
return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],)
elif not _is_importable_by_name(obj):
elif not _is_importable(obj):
return _dynamic_class_reduce(obj)
return NotImplemented

Expand Down Expand Up @@ -505,7 +506,7 @@ def _function_reduce(self, obj):
As opposed to cloudpickle.py, There no special handling for builtin
pypy functions because cloudpickle_fast is CPython-specific.
"""
if _is_importable_by_name(obj):
if _is_importable(obj):
return NotImplemented
else:
return self._dynamic_function_reduce(obj)
Expand Down
2 changes: 2 additions & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ numpy; python_version <= '3.8'
# Code coverage uploader for Travis:
codecov
coverage
# Utility package used when running the cloudpickle test suite
./tests/cloudpickle_testpkg
97 changes: 64 additions & 33 deletions tests/cloudpickle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
tornado = None

import cloudpickle
from cloudpickle.cloudpickle import _is_dynamic
from cloudpickle.cloudpickle import _is_importable
from cloudpickle.cloudpickle import _make_empty_cell, cell_set
from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule
from cloudpickle.cloudpickle import _lookup_module_and_qualname
Expand All @@ -52,6 +52,8 @@
from .testutils import assert_run_python_script
from .testutils import subprocess_worker

from _cloudpickle_testpkg import relative_imports_factory


_TEST_GLOBAL_VARIABLE = "default_value"

Expand Down Expand Up @@ -678,25 +680,62 @@ def my_small_function(x, y):
assert b'unwanted_function' not in b
assert b'math' not in b

def test_is_dynamic_module(self):
def test_module_importability(self):
import pickle # decouple this test from global imports
import os.path
import distutils
import distutils.ccompiler

assert not _is_dynamic(pickle)
assert not _is_dynamic(os.path) # fake (aliased) module
assert not _is_dynamic(distutils) # package
assert not _is_dynamic(distutils.ccompiler) # module in package
assert _is_importable(pickle)
assert _is_importable(os.path) # fake (aliased) module
assert _is_importable(distutils) # package
assert _is_importable(distutils.ccompiler) # module in package

# user-created module without using the import machinery are also
# dynamic
dynamic_module = types.ModuleType('dynamic_module')
assert _is_dynamic(dynamic_module)
assert not _is_importable(dynamic_module)

if platform.python_implementation() == 'PyPy':
import _codecs
assert not _is_dynamic(_codecs)
assert _is_importable(_codecs)

# #354: Check that modules created dynamically during the import of
# their parent modules are considered importable by cloudpickle.
# See the mod_with_dynamic_submodule documentation for more
# details of this use case.
import _cloudpickle_testpkg.mod.dynamic_submodule as m
assert _is_importable(m)
assert pickle_depickle(m, protocol=self.protocol) is m

# Check for similar behavior for a module that cannot be imported by
# attribute lookup.
from _cloudpickle_testpkg.mod import dynamic_submodule_two as m2
# Note: import _cloudpickle_testpkg.mod.dynamic_submodule_two as m2
# works only for Python 3.7+
assert _is_importable(m2)
assert pickle_depickle(m2, protocol=self.protocol) is m2

# Submodule_three is a dynamic module only importable via module lookup
with pytest.raises(ImportError):
import _cloudpickle_testpkg.mod.submodule_three # noqa
from _cloudpickle_testpkg.mod import submodule_three as m3
assert not _is_importable(m3)

# This module cannot be pickled using attribute lookup (as it does not
# have a `__module__` attribute like classes and functions.
assert not hasattr(m3, '__module__')
depickled_m3 = pickle_depickle(m3, protocol=self.protocol)
assert depickled_m3 is not m3
assert m3.f(1) == depickled_m3.f(1)

# Do the same for an importable dynamic submodule inside a dynamic
# module inside a file-backed module.
import _cloudpickle_testpkg.mod.dynamic_submodule.dynamic_subsubmodule as sm # noqa
assert _is_importable(sm)
assert pickle_depickle(sm, protocol=self.protocol) is sm

expected = "cannot check importability of object instances"
with pytest.raises(TypeError, match=expected):
_is_importable(object())

def test_Ellipsis(self):
self.assertEqual(Ellipsis,
Expand Down Expand Up @@ -1181,11 +1220,14 @@ def func(x):
func.__module__ = None

class NonModuleObject(object):
def __ini__(self):
self.some_attr = None

def __getattr__(self, name):
# We whitelist func so that a _whichmodule(func, None) call returns
# the NonModuleObject instance if a type check on the entries
# of sys.modules is not carried out, but manipulating this
# instance thinking it really is a module later on in the
# We whitelist func so that a _whichmodule(func, None) call
# returns the NonModuleObject instance if a type check on the
# entries of sys.modules is not carried out, but manipulating
# this instance thinking it really is a module later on in the
# pickling process of func errors out
if name == 'func':
return func
Expand All @@ -1200,7 +1242,7 @@ def __getattr__(self, name):
# Any manipulation of non_module_object relying on attribute access
# will raise an Exception
with pytest.raises(AttributeError):
_is_dynamic(non_module_object)
_ = non_module_object.some_attr

try:
sys.modules['NonModuleObject'] = non_module_object
Expand Down Expand Up @@ -1966,20 +2008,9 @@ def check_positive(x):

def test_relative_import_inside_function(self):
# Make sure relative imports inside round-tripped functions is not
# broken.This was a bug in cloudpickle versions <= 0.5.3 and was
# broken. This was a bug in cloudpickle versions <= 0.5.3 and was
# re-introduced in 0.8.0.

# Both functions living inside modules and packages are tested.
def f():
# module_function belongs to mypkg.mod1, which is a module
from .mypkg import module_function
return module_function()

def g():
# package_function belongs to mypkg, which is a package
from .mypkg import package_function
return package_function()

f, g = relative_imports_factory()
for func, source in zip([f, g], ["module", "package"]):
# Make sure relative imports are initially working
assert func() == "hello from a {}!".format(source)
Expand Down Expand Up @@ -2028,7 +2059,7 @@ def f(a, /, b=1):
def test___reduce___returns_string(self):
# Non regression test for objects with a __reduce__ method returning a
# string, meaning "save by attribute using save_global"
from .mypkg import some_singleton
from _cloudpickle_testpkg import some_singleton
assert some_singleton.__reduce__() == "some_singleton"
depickled_singleton = pickle_depickle(
some_singleton, protocol=self.protocol)
Expand Down Expand Up @@ -2100,7 +2131,7 @@ def test_pickle_dynamic_typevar_memoization(self):
assert depickled_T1 is depickled_T2

def test_pickle_importable_typevar(self):
from .mypkg import T
from _cloudpickle_testpkg import T
T1 = pickle_depickle(T, protocol=self.protocol)
assert T1 is T

Expand Down Expand Up @@ -2230,12 +2261,12 @@ def test_lookup_module_and_qualname_dynamic_typevar():


def test_lookup_module_and_qualname_importable_typevar():
from . import mypkg
T = mypkg.T
import _cloudpickle_testpkg
T = _cloudpickle_testpkg.T
module_and_name = _lookup_module_and_qualname(T, name=T.__name__)
assert module_and_name is not None
module, name = module_and_name
assert module is mypkg
assert module is _cloudpickle_testpkg
assert name == 'T'


Expand Down
Loading