Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions cloudpickle/cloudpickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,27 @@
PY3 = True


# caches dynamic modules that are not referenced in sys.modules
_dynamic_modules_globals = weakref.WeakValueDictionary()

# _dynamic_modules_globals will store base_globals variables. Or, base_globals
# end up being dicts, and built-in dict instances cannot be weakly referenced.
# Therefore, we create a mirror of the dict type, and at the mutation of
# base globals from string to dict, we use the _DynamicModuleFuncGlobals
# constructor instead of the builtin dict one.


class _DynamicModuleFuncGlobals(dict):
"""Global variables referenced by a function defined in a dynamic module

To avoid leaking references we store such context in a WeakValueDictionary
instance. However instances of python builtin types such as dict cannot
be used directly as values in such a construct, hence the need for a
derived class.
"""
pass


def _make_cell_set_template_code():
"""Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF

Expand Down Expand Up @@ -1090,12 +1111,17 @@ def _make_skel_func(code, cell_count, base_globals=None):
if base_globals is None:
base_globals = {}
elif isinstance(base_globals, str):
base_globals_name = base_globals
if sys.modules.get(base_globals, None) is not None:
# this checks if we can import the previous environment the object
# This checks if we can import the previous environment the object
# lived in
base_globals = vars(sys.modules[base_globals])
else:
base_globals = {}
base_globals = _dynamic_modules_globals.get(
base_globals_name, None)
if base_globals is None:
base_globals = _DynamicModuleFuncGlobals()
_dynamic_modules_globals[base_globals_name] = base_globals

base_globals['__builtins__'] = __builtins__

Expand Down
144 changes: 144 additions & 0 deletions tests/cloudpickle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import collections
import base64
import functools
import gc
import imp
from io import BytesIO
import itertools
Expand Down Expand Up @@ -43,6 +44,7 @@

import cloudpickle
from cloudpickle.cloudpickle import _find_module, _make_empty_cell, cell_set
from cloudpickle.cloudpickle import _dynamic_modules_globals

from .testutils import subprocess_pickle_echo
from .testutils import assert_run_python_script
Expand Down Expand Up @@ -440,6 +442,59 @@ def method(self, x):
mod1, mod2 = pickle_depickle([mod, mod])
self.assertEqual(id(mod1), id(mod2))

def test_dynamic_modules_globals(self):
# _dynamic_modules_globals is a WeakValueDictionary, so if a value
# in this dict (containing a set of global variables from a dynamic
# module created in the parent process) has no other reference than in
# this dict in the child process, it will be garbage collected.

# We first create a module
mod = imp.new_module('mod')
code = '''
x = 1
def func():
return
'''
exec(textwrap.dedent(code), mod.__dict__)

pickled_module_path = 'mod_f.pkl'

child_process_script = '''
import pickle
from cloudpickle.cloudpickle import _dynamic_modules_globals
import gc
with open("{pickled_module_path}", 'rb') as f:
func = pickle.load(f)

# A dictionnary storing the globals of the newly unpickled function
# should have been created
assert list(_dynamic_modules_globals.keys()) == ['mod']

# func.__globals__ is the only non-weak reference to
# _dynamic_modules_globals['mod']. By deleting func, we delete also
# _dynamic_modules_globals['mod']
del func
gc.collect()

# There is no reference to the globals of func since func has been
# deleted and _dynamic_modules_globals is a WeakValueDictionary,
# so _dynamic_modules_globals should now be empty
assert list(_dynamic_modules_globals.keys()) == []
'''

child_process_script = child_process_script.format(
pickled_module_path=pickled_module_path)

try:
with open(pickled_module_path, 'wb') as f:
cloudpickle.dump(mod.func, f)

assert_run_python_script(textwrap.dedent(child_process_script))

finally:
os.unlink(pickled_module_path)


def test_load_dynamic_module_in_grandchild_process(self):
# Make sure that when loaded, a dynamic module preserves its dynamic
# property. Otherwise, this will lead to an ImportError if pickled in
Expand Down Expand Up @@ -993,6 +1048,95 @@ def f1():
finally:
_TEST_GLOBAL_VARIABLE = orig_value

def test_function_from_dynamic_module_with_globals_modifications(self):
"""
this test verifies that:
- any modification in the global variables of a dynamic
module living in a child process won't get overridden
when new objects are unpickled in the child's interpreter

- vice versa, e.g a modification in the parent process does not
override the value of the variables in the child process

The two cases are equivalent, and here, the second case is tested.
"""

# first, we create a dynamic module in the parent process
mod = imp.new_module('mod')
code = '''
x = 1
def func_that_relies_on_dynamic_module(v=None):
global x
if v is not None:
x = v
return x
'''
exec(textwrap.dedent(code), mod.__dict__)

try:
# simple sanity check on the function's output
assert mod.func_that_relies_on_dynamic_module() == 1

# the function of mod is pickled two times, with two different
# values for the global variable x.

# a child process that sequentially unpickles the
# two functions is then launched

# once the _first_ function gets unpickled, mod is created and
# tracked in the child environment. Whatever the global variable
# x's value in the second function, it will be overriden by the
# initial value of x in the child environment

with open('function_with_initial_globals.pk', 'wb') as fid:
cloudpickle.dump(mod.func_that_relies_on_dynamic_module, fid)

# change the mod's global variable x
mod.x = 2

# at this point, mod.func_that_relies_on_dynamic_module()
# returns 2
assert mod.func_that_relies_on_dynamic_module() == 2
with open('function_with_modified_globals.pk', 'wb') as fid:
cloudpickle.dump(mod.func_that_relies_on_dynamic_module, fid)

child_process_code = """
import pickle

with open('function_with_initial_globals.pk','rb') as fid:
function_with_initial_globals = pickle.load(fid)

# at this point, a module called 'mod' should exist in
# _dynamic_modules_globals. further function loading
# will use the globals living in mod

assert function_with_initial_globals() == 1

# load a function with initial global variable x set to 2
with open('function_with_modified_globals.pk','rb') as fid:
function_with_modified_globals = pickle.load(fid)

# assert the initial global got overridden by
# _dynamic_modules_globals
assert function_with_modified_globals()==1

# both function's global x should point to the
# same variable.
# calling function_with_initial_globals('test_value')
# will change this variable: function_with_modified_globals()
# should return the changed variable
assert function_with_initial_globals('test_value') == 'test_value'
assert function_with_modified_globals() == 'test_value'
"""

# finally, we execute the code
assert_run_python_script(textwrap.dedent(child_process_code))

finally:
# remove the created files
os.unlink('function_with_initial_globals.pk')
os.unlink('function_with_modified_globals.pk')

@pytest.mark.skipif(sys.version_info >= (3, 0),
reason="hardcoded pickle bytes for 2.7")
def test_function_pickle_compat_0_4_0(self):
Expand Down