diff --git a/CHANGES.md b/CHANGES.md index b7499530f..4c5a1e3c0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +0.7.0 +===== + +- Add a switch in `cloudpickle.dump` speficying if variables present in the + written pickle string should override colluding values present in their new + namespace at unpickling time. + ([issue #214](https://github.com/cloudpipe/cloudpickle/issues/214)) + + 0.6.1 ===== diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 41638be9c..0cbd00215 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -266,16 +266,46 @@ def _walk_global_ops(code): class CloudPickler(Pickler): + """Pickler with extended pickling abilities. + + Parameters + ---------- + file : + + protocol : int + + override_globals : bool + Functions belonging to the same module (the __main__ or a dynamic + module) share the same global variables. Using cloudpickle, they will + share those global variables in the process they are unpickled in as + well. However, the state of those global variables may differ in the + pickled functions, for example if one global variable was mutated + between two calls to pickle.dump. override_globals is a + switch that allows two different behaviors: + + * If override_globals=True, the global variables of the + module containing this function will be overridden in the process + which load the pickled object with the values of these variables at + the pickling time. + * If override_globals=False, the global variables of the + module containing the definition of the nested or dynamically defined + function will not be overridden in the process loading the pickled + object. If the global variable is not declared, it will be + initialized with the value at pickling time. + + """ dispatch = Pickler.dispatch.copy() - def __init__(self, file, protocol=None): + def __init__(self, file, protocol=None, override_globals=True): if protocol is None: protocol = DEFAULT_PROTOCOL Pickler.__init__(self, file, protocol=protocol) # map ids to dictionary. used to ensure that functions can share global env self.globals_ref = {} + self.override_globals = override_globals + def dump(self, obj): self.inject_addons() try: @@ -590,6 +620,7 @@ def save_function_tuple(self, func): 'module': func.__module__, 'name': func.__name__, 'doc': func.__doc__, + 'override_globals': self.override_globals } if hasattr(func, '__annotations__') and sys.version_info >= (3, 7): state['annotations'] = func.__annotations__ @@ -921,7 +952,7 @@ def _rebuild_tornado_coroutine(func): # Shorthands for legacy support -def dump(obj, file, protocol=None): +def dump(obj, file, protocol=None, override_globals=True): """Serialize obj as bytes streamed into file protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to @@ -931,10 +962,11 @@ def dump(obj, file, protocol=None): Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure compatibility with older versions of Python. """ - CloudPickler(file, protocol=protocol).dump(obj) + CloudPickler(file, protocol=protocol, + override_globals=override_globals).dump(obj) -def dumps(obj, protocol=None): +def dumps(obj, protocol=None, override_globals=True): """Serialize obj as a string of bytes allocated in memory protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to @@ -946,7 +978,8 @@ def dumps(obj, protocol=None): """ file = StringIO() try: - cp = CloudPickler(file, protocol=protocol) + cp = CloudPickler(file, protocol=protocol, + override_globals=override_globals) cp.dump(obj) return file.getvalue() finally: @@ -1078,10 +1111,15 @@ def _fill_function(*args): else: raise ValueError('Unexpected _fill_value arguments: %r' % (args,)) - # Only set global variables that do not exist. - for k, v in state['globals'].items(): - if k not in func.__globals__: - func.__globals__[k] = v + # This updates the variables of func's module using func's globals from the + # time at which it was pickled. + if state.get('override_globals', True): + func.__globals__.update(state['globals']) + else: + for k, v in state['globals'].items(): + # Only set global variables that do not exist. + if k not in func.__globals__: + func.__globals__[k] = v func.__defaults__ = state['defaults'] func.__dict__ = state['dict'] diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index d318da7e1..d88662b2f 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1075,7 +1075,9 @@ def f1(): cloned_f0 = {clone_func}(f0, protocol={protocol}) cloned_f1 = {clone_func}(f1, protocol={protocol}) - pickled_f1 = dumps(f1, protocol={protocol}) + pickled_f1 = dumps( + f1, protocol={protocol}, + override_globals={override_globals}) # Change the value of the global variable cloned_f0() @@ -1084,130 +1086,168 @@ def f1(): result_f1 = cloned_f1() assert result_f1 == "changed_by_f0", result_f1 - # Ensure that unpickling the global variable does not change its value + # Ensure that unpickling the global variable overrides (resp. does not + # change) the global variable if override_globals was True + # (resp. False) at pickling time result_pickled_f1 = loads(pickled_f1)() - assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 + if {override_globals}: + assert result_pickled_f1 == "default_value", result_pickled_f1 + else: + assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 """ for clone_func in ['local_clone', 'subprocess_pickle_echo']: - code = code_template.format(protocol=self.protocol, - clone_func=clone_func) - assert_run_python_script(textwrap.dedent(code)) + for override_globals in [True, False]: + code = code_template.format( + protocol=self.protocol, clone_func=clone_func, + override_globals=override_globals) + assert_run_python_script(textwrap.dedent(code)) def test_closure_interacting_with_a_global_variable(self): global _TEST_GLOBAL_VARIABLE assert _TEST_GLOBAL_VARIABLE == "default_value" orig_value = _TEST_GLOBAL_VARIABLE - try: - def f0(): - global _TEST_GLOBAL_VARIABLE - _TEST_GLOBAL_VARIABLE = "changed_by_f0" - - def f1(): - return _TEST_GLOBAL_VARIABLE - - cloned_f0 = cloudpickle.loads(cloudpickle.dumps( - f0, protocol=self.protocol)) - cloned_f1 = cloudpickle.loads(cloudpickle.dumps( - f1, protocol=self.protocol)) - pickled_f1 = cloudpickle.dumps(f1, protocol=self.protocol) - - # Change the value of the global variable - cloned_f0() - assert _TEST_GLOBAL_VARIABLE == "changed_by_f0" - - # Ensure that the global variable is the same for another function - result_cloned_f1 = cloned_f1() - assert result_cloned_f1 == "changed_by_f0", result_cloned_f1 - assert f1() == result_cloned_f1 - - # Ensure that unpickling the global variable does not change its - # value - result_pickled_f1 = cloudpickle.loads(pickled_f1)() - assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 - finally: - _TEST_GLOBAL_VARIABLE = orig_value + for override_globals in [True, False]: + try: + def f0(): + global _TEST_GLOBAL_VARIABLE + _TEST_GLOBAL_VARIABLE = "changed_by_f0" + + def f1(): + return _TEST_GLOBAL_VARIABLE + + cloned_f0 = cloudpickle.loads(cloudpickle.dumps( + f0, protocol=self.protocol, + override_globals=override_globals)) + + cloned_f1 = cloudpickle.loads(cloudpickle.dumps( + f1, protocol=self.protocol, + override_globals=override_globals)) + + pickled_f1 = cloudpickle.dumps( + f1, protocol=self.protocol, + override_globals=override_globals) + + # Change the value of the global variable + cloned_f0() + assert _TEST_GLOBAL_VARIABLE == "changed_by_f0" + + # Ensure that the global variable is the same for another + # function + result_cloned_f1 = cloned_f1() + assert result_cloned_f1 == "changed_by_f0", result_cloned_f1 + assert f1() == result_cloned_f1 + + # Ensure that unpickling the global variable overrides (resp. + # does not change) its value if override_globals was + # True (resp False) at pickling time + res_pickled_f1 = cloudpickle.loads(pickled_f1)() + if override_globals: + assert res_pickled_f1 == "default_value", res_pickled_f1 + else: + assert res_pickled_f1 == "changed_by_f0", res_pickled_f1 + finally: + _TEST_GLOBAL_VARIABLE = orig_value def test_function_from_dynamic_module_with_globals_modifications(self): # This test verifies that the global variable state of a function # defined in a dynamic module in a child process are not reset by - # subsequent uplickling. + # subsequent unpickling. - # first, we create a dynamic module in the parent process - mod = types.ModuleType('mod') - code = ''' - GLOBAL_STATE = "initial value" - - def func_defined_in_dynamic_module(v=None): - global GLOBAL_STATE - if v is not None: - GLOBAL_STATE = v - return GLOBAL_STATE - ''' - exec(textwrap.dedent(code), mod.__dict__) - - try: - # Simple sanity check on the function's output - assert mod.func_defined_in_dynamic_module() == "initial value" - - # The function of mod is pickled two times, with two different - # values for the global variable GLOBAL_STATE. - # Then we launch a child process that sequentially unpickles the - # two functions. Those unpickle functions should share the same - # global variables in the child process: - # Once the first function gets unpickled, mod is created and - # tracked in the child environment. This is state is preserved - # when unpickling the second function whatever the global variable - # GLOBAL_STATE's value at the time of pickling. - - with open('function_with_initial_globals.pkl', 'wb') as f: - cloudpickle.dump(mod.func_defined_in_dynamic_module, f) - - # Change the mod's global variable - mod.GLOBAL_STATE = 'changed value' - - # At this point, mod.func_defined_in_dynamic_module() - # returns the updated value. Let's pickle it again. - assert mod.func_defined_in_dynamic_module() == 'changed value' - with open('function_with_modified_globals.pkl', 'wb') as f: - cloudpickle.dump(mod.func_defined_in_dynamic_module, f, - protocol=self.protocol) - - child_process_code = """ - import pickle - - with open('function_with_initial_globals.pkl','rb') as f: - func_with_initial_globals = pickle.load(f) - - # At this point, a module called 'mod' should exist in - # _dynamic_modules_globals. Further function loading - # will use the globals living in mod. - - assert func_with_initial_globals() == 'initial value' - - # Load a function with initial global variable that was - # pickled after a change in the global variable - with open('function_with_modified_globals.pkl','rb') as f: - func_with_modified_globals = pickle.load(f) - - # assert the this unpickling did not modify the value of - # the local - assert func_with_modified_globals() == 'initial value' - - # Update the value from the child process and check that - # unpickling again does not reset our change. - assert func_with_initial_globals('new value') == 'new value' - assert func_with_modified_globals() == 'new value' - - with open('function_with_initial_globals.pkl','rb') as f: - func_with_initial_globals = pickle.load(f) - assert func_with_initial_globals() == 'new value' - assert func_with_modified_globals() == 'new value' - """ - assert_run_python_script(textwrap.dedent(child_process_code)) + for override_globals in [True, False]: + try: + # first, we create a dynamic module in the parent process + mod = types.ModuleType('mod') + code = ''' + GLOBAL_STATE = "initial value" + + def func_defined_in_dynamic_module(v=None): + global GLOBAL_STATE + if v is not None: + GLOBAL_STATE = v + return GLOBAL_STATE + ''' + exec(textwrap.dedent(code), mod.__dict__) + # Simple sanity check on the function's output + assert mod.func_defined_in_dynamic_module() == "initial value" + + # The function of mod is pickled two times, with two different + # values for the global variable GLOBAL_STATE. Then we launch + # a child process that sequentially unpickles the two + # functions. Those unpickle functions should share the same + # global variables in the child process: Once the first + # function gets unpickled, mod is created and tracked in the + # child environment. If override_globals is False, then the + # module state is preserved when unpickling the second function + # whatever the global variable GLOBAL_STATE's value at the time + # of pickling. + + with open('function_with_initial_globals.pkl', 'wb') as f: + cloudpickle.dump( + mod.func_defined_in_dynamic_module, f, + override_globals=override_globals) + + # Change the mod's global variable + mod.GLOBAL_STATE = 'changed value' + + # At this point, mod.func_defined_in_dynamic_module() + # returns the updated value. Let's pickle it again. + assert mod.func_defined_in_dynamic_module() == 'changed value' + with open('function_with_modified_globals.pkl', 'wb') as f: + cloudpickle.dump( + mod.func_defined_in_dynamic_module, f, + override_globals=override_globals) + + child_process_code = """ + import pickle + + with open('function_with_initial_globals.pkl','rb') as f: + func_with_initial_globals = pickle.load(f) + + # At this point, a module called 'mod' should exist in + # _dynamic_modules_globals. Further function loading + # will use the globals living in mod. + + assert func_with_initial_globals() == 'initial value' + + # Load a function with initial global variable that was + # pickled after a change in the global variable + with open('function_with_modified_globals.pkl','rb') as f: + func_with_modified_globals = pickle.load(f) + + # Verify that loading the function overrides (resp. does + # not change) the global variable GLOBAL_STATE of mod, if + # override_globals was True (resp. False) at + # pickling time + + if {override_globals}: + assert func_with_modified_globals() == 'changed value' + else: + assert func_with_modified_globals() == 'initial value' + + # Update the value from the child process and check that + # unpickling resets (resp. does not reset) our change if + # override_globals was True (resp. False) at + # pickling time + assert ( + func_with_initial_globals('new value') == 'new value') + assert func_with_modified_globals() == 'new value' + + with open('function_with_initial_globals.pkl','rb') as f: + func_with_initial_globals = pickle.load(f) + + if {override_globals}: + assert func_with_initial_globals() == 'initial value' + assert func_with_modified_globals() == 'initial value' + else: + assert func_with_initial_globals() == 'new value' + assert func_with_modified_globals() == 'new value' + + """.format(override_globals=override_globals) + assert_run_python_script(textwrap.dedent(child_process_code)) - finally: - os.unlink('function_with_initial_globals.pkl') - os.unlink('function_with_modified_globals.pkl') + finally: + os.unlink('function_with_initial_globals.pkl') + os.unlink('function_with_modified_globals.pkl') @pytest.mark.skipif(sys.version_info >= (3, 0), reason="hardcoded pickle bytes for 2.7")