Skip to content
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 47 additions & 9 deletions cloudpickle/cloudpickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,16 +266,46 @@ def _walk_global_ops(code):


class CloudPickler(Pickler):
"""Pickler with extended pickling abilities.

Parameters
----------
file :

protocol : int

override_globals : bool
Functions belonging to the same module (the __main__ or a dynamic
module) share the same global variables. Using cloudpickle, they will
share those global variables in the process they are unpickled in as
well. However, the state of those global variables may differ in the
pickled functions, for example if one global variable was mutated
between two calls to pickle.dump. override_globals is a
switch that allows two different behaviors:

* If override_globals=True, the global variables of the
module containing this function will be overridden in the process
which load the pickled object with the values of these variables at
the pickling time.
* If override_globals=False, the global variables of the
module containing the definition of the nested or dynamically defined
function will not be overridden in the process loading the pickled
object. If the global variable is not declared, it will be
initialized with the value at pickling time.

"""

dispatch = Pickler.dispatch.copy()

def __init__(self, file, protocol=None):
def __init__(self, file, protocol=None, override_globals=True):
if protocol is None:
protocol = DEFAULT_PROTOCOL
Pickler.__init__(self, file, protocol=protocol)
# map ids to dictionary. used to ensure that functions can share global env
self.globals_ref = {}

self.override_globals = override_globals

def dump(self, obj):
self.inject_addons()
try:
Expand Down Expand Up @@ -590,6 +620,7 @@ def save_function_tuple(self, func):
'module': func.__module__,
'name': func.__name__,
'doc': func.__doc__,
'override_globals': self.override_globals
}
if hasattr(func, '__annotations__') and sys.version_info >= (3, 7):
state['annotations'] = func.__annotations__
Expand Down Expand Up @@ -921,7 +952,7 @@ def _rebuild_tornado_coroutine(func):

# Shorthands for legacy support

def dump(obj, file, protocol=None):
def dump(obj, file, protocol=None, override_globals=True):
"""Serialize obj as bytes streamed into file

protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
Expand All @@ -931,10 +962,11 @@ def dump(obj, file, protocol=None):
Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
compatibility with older versions of Python.
"""
CloudPickler(file, protocol=protocol).dump(obj)
CloudPickler(file, protocol=protocol,
override_globals=override_globals).dump(obj)


def dumps(obj, protocol=None):
def dumps(obj, protocol=None, override_globals=True):
"""Serialize obj as a string of bytes allocated in memory

protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
Expand All @@ -946,7 +978,8 @@ def dumps(obj, protocol=None):
"""
file = StringIO()
try:
cp = CloudPickler(file, protocol=protocol)
cp = CloudPickler(file, protocol=protocol,
override_globals=override_globals)
cp.dump(obj)
return file.getvalue()
finally:
Expand Down Expand Up @@ -1078,10 +1111,15 @@ def _fill_function(*args):
else:
raise ValueError('Unexpected _fill_value arguments: %r' % (args,))

# Only set global variables that do not exist.
for k, v in state['globals'].items():
if k not in func.__globals__:
func.__globals__[k] = v
# This updates the variables of func's module using func's globals from the
# time at which it was pickled.
if state.get('override_globals', True):
func.__globals__.update(state['globals'])
else:
for k, v in state['globals'].items():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would re-add the original comment for this block of the condition:

# Only set global variables that do not exist.

# Only set global variables that do not exist.
if k not in func.__globals__:
func.__globals__[k] = v

func.__defaults__ = state['defaults']
func.__dict__ = state['dict']
Expand Down
262 changes: 151 additions & 111 deletions tests/cloudpickle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,7 +1075,9 @@ def f1():

cloned_f0 = {clone_func}(f0, protocol={protocol})
cloned_f1 = {clone_func}(f1, protocol={protocol})
pickled_f1 = dumps(f1, protocol={protocol})
pickled_f1 = dumps(
f1, protocol={protocol},
override_globals={override_globals})

# Change the value of the global variable
cloned_f0()
Expand All @@ -1084,130 +1086,168 @@ def f1():
result_f1 = cloned_f1()
assert result_f1 == "changed_by_f0", result_f1

# Ensure that unpickling the global variable does not change its value
# Ensure that unpickling the global variable overrides (resp. does not
# change) the global variable if override_globals was True
# (resp. False) at pickling time
result_pickled_f1 = loads(pickled_f1)()
assert result_pickled_f1 == "changed_by_f0", result_pickled_f1
if {override_globals}:
assert result_pickled_f1 == "default_value", result_pickled_f1
else:
assert result_pickled_f1 == "changed_by_f0", result_pickled_f1
"""
for clone_func in ['local_clone', 'subprocess_pickle_echo']:
code = code_template.format(protocol=self.protocol,
clone_func=clone_func)
assert_run_python_script(textwrap.dedent(code))
for override_globals in [True, False]:
code = code_template.format(
protocol=self.protocol, clone_func=clone_func,
override_globals=override_globals)
assert_run_python_script(textwrap.dedent(code))

def test_closure_interacting_with_a_global_variable(self):
global _TEST_GLOBAL_VARIABLE
assert _TEST_GLOBAL_VARIABLE == "default_value"
orig_value = _TEST_GLOBAL_VARIABLE
try:
def f0():
global _TEST_GLOBAL_VARIABLE
_TEST_GLOBAL_VARIABLE = "changed_by_f0"

def f1():
return _TEST_GLOBAL_VARIABLE

cloned_f0 = cloudpickle.loads(cloudpickle.dumps(
f0, protocol=self.protocol))
cloned_f1 = cloudpickle.loads(cloudpickle.dumps(
f1, protocol=self.protocol))
pickled_f1 = cloudpickle.dumps(f1, protocol=self.protocol)

# Change the value of the global variable
cloned_f0()
assert _TEST_GLOBAL_VARIABLE == "changed_by_f0"

# Ensure that the global variable is the same for another function
result_cloned_f1 = cloned_f1()
assert result_cloned_f1 == "changed_by_f0", result_cloned_f1
assert f1() == result_cloned_f1

# Ensure that unpickling the global variable does not change its
# value
result_pickled_f1 = cloudpickle.loads(pickled_f1)()
assert result_pickled_f1 == "changed_by_f0", result_pickled_f1
finally:
_TEST_GLOBAL_VARIABLE = orig_value
for override_globals in [True, False]:
try:
def f0():
global _TEST_GLOBAL_VARIABLE
_TEST_GLOBAL_VARIABLE = "changed_by_f0"

def f1():
return _TEST_GLOBAL_VARIABLE

cloned_f0 = cloudpickle.loads(cloudpickle.dumps(
f0, protocol=self.protocol,
override_globals=override_globals))

cloned_f1 = cloudpickle.loads(cloudpickle.dumps(
f1, protocol=self.protocol,
override_globals=override_globals))

pickled_f1 = cloudpickle.dumps(
f1, protocol=self.protocol,
override_globals=override_globals)

# Change the value of the global variable
cloned_f0()
assert _TEST_GLOBAL_VARIABLE == "changed_by_f0"

# Ensure that the global variable is the same for another
# function
result_cloned_f1 = cloned_f1()
assert result_cloned_f1 == "changed_by_f0", result_cloned_f1
assert f1() == result_cloned_f1

# Ensure that unpickling the global variable overrides (resp.
# does not change) its value if override_globals was
# True (resp False) at pickling time
res_pickled_f1 = cloudpickle.loads(pickled_f1)()
if override_globals:
assert res_pickled_f1 == "default_value", res_pickled_f1
else:
assert res_pickled_f1 == "changed_by_f0", res_pickled_f1
finally:
_TEST_GLOBAL_VARIABLE = orig_value

def test_function_from_dynamic_module_with_globals_modifications(self):
# This test verifies that the global variable state of a function
# defined in a dynamic module in a child process are not reset by
# subsequent uplickling.
# subsequent unpickling.

# first, we create a dynamic module in the parent process
mod = types.ModuleType('mod')
code = '''
GLOBAL_STATE = "initial value"

def func_defined_in_dynamic_module(v=None):
global GLOBAL_STATE
if v is not None:
GLOBAL_STATE = v
return GLOBAL_STATE
'''
exec(textwrap.dedent(code), mod.__dict__)

try:
# Simple sanity check on the function's output
assert mod.func_defined_in_dynamic_module() == "initial value"

# The function of mod is pickled two times, with two different
# values for the global variable GLOBAL_STATE.
# Then we launch a child process that sequentially unpickles the
# two functions. Those unpickle functions should share the same
# global variables in the child process:
# Once the first function gets unpickled, mod is created and
# tracked in the child environment. This is state is preserved
# when unpickling the second function whatever the global variable
# GLOBAL_STATE's value at the time of pickling.

with open('function_with_initial_globals.pkl', 'wb') as f:
cloudpickle.dump(mod.func_defined_in_dynamic_module, f)

# Change the mod's global variable
mod.GLOBAL_STATE = 'changed value'

# At this point, mod.func_defined_in_dynamic_module()
# returns the updated value. Let's pickle it again.
assert mod.func_defined_in_dynamic_module() == 'changed value'
with open('function_with_modified_globals.pkl', 'wb') as f:
cloudpickle.dump(mod.func_defined_in_dynamic_module, f,
protocol=self.protocol)

child_process_code = """
import pickle

with open('function_with_initial_globals.pkl','rb') as f:
func_with_initial_globals = pickle.load(f)

# At this point, a module called 'mod' should exist in
# _dynamic_modules_globals. Further function loading
# will use the globals living in mod.

assert func_with_initial_globals() == 'initial value'

# Load a function with initial global variable that was
# pickled after a change in the global variable
with open('function_with_modified_globals.pkl','rb') as f:
func_with_modified_globals = pickle.load(f)

# assert the this unpickling did not modify the value of
# the local
assert func_with_modified_globals() == 'initial value'

# Update the value from the child process and check that
# unpickling again does not reset our change.
assert func_with_initial_globals('new value') == 'new value'
assert func_with_modified_globals() == 'new value'

with open('function_with_initial_globals.pkl','rb') as f:
func_with_initial_globals = pickle.load(f)
assert func_with_initial_globals() == 'new value'
assert func_with_modified_globals() == 'new value'
"""
assert_run_python_script(textwrap.dedent(child_process_code))
for override_globals in [True, False]:
try:
# first, we create a dynamic module in the parent process
mod = types.ModuleType('mod')
code = '''
GLOBAL_STATE = "initial value"

def func_defined_in_dynamic_module(v=None):
global GLOBAL_STATE
if v is not None:
GLOBAL_STATE = v
return GLOBAL_STATE
'''
exec(textwrap.dedent(code), mod.__dict__)
# Simple sanity check on the function's output
assert mod.func_defined_in_dynamic_module() == "initial value"

# The function of mod is pickled two times, with two different
# values for the global variable GLOBAL_STATE. Then we launch
# a child process that sequentially unpickles the two
# functions. Those unpickle functions should share the same
# global variables in the child process: Once the first
# function gets unpickled, mod is created and tracked in the
# child environment. If override_globals is False, then the
# module state is preserved when unpickling the second function
# whatever the global variable GLOBAL_STATE's value at the time
# of pickling.

with open('function_with_initial_globals.pkl', 'wb') as f:
cloudpickle.dump(
mod.func_defined_in_dynamic_module, f,
override_globals=override_globals)

# Change the mod's global variable
mod.GLOBAL_STATE = 'changed value'

# At this point, mod.func_defined_in_dynamic_module()
# returns the updated value. Let's pickle it again.
assert mod.func_defined_in_dynamic_module() == 'changed value'
with open('function_with_modified_globals.pkl', 'wb') as f:
cloudpickle.dump(
mod.func_defined_in_dynamic_module, f,
override_globals=override_globals)

child_process_code = """
import pickle

with open('function_with_initial_globals.pkl','rb') as f:
func_with_initial_globals = pickle.load(f)

# At this point, a module called 'mod' should exist in
# _dynamic_modules_globals. Further function loading
# will use the globals living in mod.

assert func_with_initial_globals() == 'initial value'

# Load a function with initial global variable that was
# pickled after a change in the global variable
with open('function_with_modified_globals.pkl','rb') as f:
func_with_modified_globals = pickle.load(f)

# Verify that loading the function overrides (resp. does
# not change) the global variable GLOBAL_STATE of mod, if
# override_globals was True (resp. False) at
# pickling time

if {override_globals}:
assert func_with_modified_globals() == 'changed value'
else:
assert func_with_modified_globals() == 'initial value'

# Update the value from the child process and check that
# unpickling resets (resp. does not reset) our change if
# override_globals was True (resp. False) at
# pickling time
assert (
func_with_initial_globals('new value') == 'new value')
assert func_with_modified_globals() == 'new value'

with open('function_with_initial_globals.pkl','rb') as f:
func_with_initial_globals = pickle.load(f)

if {override_globals}:
assert func_with_initial_globals() == 'initial value'
assert func_with_modified_globals() == 'initial value'
else:
assert func_with_initial_globals() == 'new value'
assert func_with_modified_globals() == 'new value'

""".format(override_globals=override_globals)
assert_run_python_script(textwrap.dedent(child_process_code))

finally:
os.unlink('function_with_initial_globals.pkl')
os.unlink('function_with_modified_globals.pkl')
finally:
os.unlink('function_with_initial_globals.pkl')
os.unlink('function_with_modified_globals.pkl')

@pytest.mark.skipif(sys.version_info >= (3, 0),
reason="hardcoded pickle bytes for 2.7")
Expand Down