diff --git a/CHANGES.md b/CHANGES.md index 76d380915..160e0d68f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,11 @@ - Add support for pickling interactively defined dataclasses. ([issue #245](https://github.com/cloudpipe/cloudpickle/pull/245)) +- Global variables referenced by functions pickled by cloudpickle are now + unpickled in a new and isolated namespace scoped by the CloudPickler + instance. This restores the (previously untested) behavior of cloudpickle + prior to changes done in 0.5.4 for functions defined in the `__main__` + module, and 0.6.0/1 for other dynamic functions. 0.7.0 ===== diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index f94e4c109..54d745cbc 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -79,22 +79,6 @@ PY3 = True -# Container for the global namespace to ensure consistent unpickling of -# functions defined in dynamic modules (modules not registed in sys.modules). -_dynamic_modules_globals = weakref.WeakValueDictionary() - - -class _DynamicModuleFuncGlobals(dict): - """Global variables referenced by a function defined in a dynamic module - - To avoid leaking references we store such context in a WeakValueDictionary - instance. However instances of python builtin types such as dict cannot - be used directly as values in such a construct, hence the need for a - derived class. - """ - pass - - def _make_cell_set_template_code(): """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF @@ -670,17 +654,17 @@ def extract_func_data(self, func): # save the dict dct = func.__dict__ - base_globals = self.globals_ref.get(id(func.__globals__), None) - if base_globals is None: - # For functions defined in a well behaved module use - # vars(func.__module__) for base_globals. This is necessary to - # share the global variables across multiple pickled functions from - # this module. - if func.__module__ is not None: - base_globals = func.__module__ - else: - base_globals = {} - self.globals_ref[id(func.__globals__)] = base_globals + # base_globals represents the future global namespace of func at + # unpickling time. Looking it up and storing it in globals_ref allow + # functions sharing the same globals at pickling time to also + # share them once unpickled, at one condition: since globals_ref is + # an attribute of a Cloudpickler instance, and that a new CloudPickler is + # created each time pickle.dump or pickle.dumps is called, functions + # also need to be saved within the same invokation of + # cloudpickle.dump/cloudpickle.dumps (for example: cloudpickle.dumps([f1, f2])). There + # is no such limitation when using Cloudpickler.dump, as long as the + # multiple invokations are bound to the same Cloudpickler. + base_globals = self.globals_ref.setdefault(id(func.__globals__), {}) return (code, f_globals, defaults, closure, dct, base_globals) @@ -1096,10 +1080,16 @@ def _fill_function(*args): else: raise ValueError('Unexpected _fill_value arguments: %r' % (args,)) - # Only set global variables that do not exist. - for k, v in state['globals'].items(): - if k not in func.__globals__: - func.__globals__[k] = v + # - At pickling time, any dynamic global variable used by func is + # serialized by value (in state['globals']). + # - At unpickling time, func's __globals__ attribute is initialized by + # first retrieving an empty isolated namespace that will be shared + # with other functions pickled from the same original module + # by the same CloudPickler instance and then updated with the + # content of state['globals'] to populate the shared isolated + # namespace with all the global variables that are specifically + # referenced for this function. + func.__globals__.update(state['globals']) func.__defaults__ = state['defaults'] func.__dict__ = state['dict'] @@ -1137,21 +1127,11 @@ def _make_skel_func(code, cell_count, base_globals=None): code and the correct number of cells in func_closure. All other func attributes (e.g. func_globals) are empty. """ - if base_globals is None: + # This is backward-compatibility code: for cloudpickle versions between + # 0.5.4 and 0.7, base_globals could be a string or None. base_globals + # should now always be a dictionary. + if base_globals is None or isinstance(base_globals, str): base_globals = {} - elif isinstance(base_globals, str): - base_globals_name = base_globals - try: - # First try to reuse the globals from the module containing the - # function. If it is not possible to retrieve it, fallback to an - # empty dictionary. - base_globals = vars(importlib.import_module(base_globals)) - except ImportError: - base_globals = _dynamic_modules_globals.get( - base_globals_name, None) - if base_globals is None: - base_globals = _DynamicModuleFuncGlobals() - _dynamic_modules_globals[base_globals_name] = base_globals base_globals['__builtins__'] = __builtins__ diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 6091fb8bf..4cf62ba82 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -451,57 +451,6 @@ def method(self, x): mod1, mod2 = pickle_depickle([mod, mod]) self.assertEqual(id(mod1), id(mod2)) - def test_dynamic_modules_globals(self): - # _dynamic_modules_globals is a WeakValueDictionary, so if a value - # in this dict (containing a set of global variables from a dynamic - # module created in the parent process) has no other reference than in - # this dict in the child process, it will be garbage collected. - - # We first create a module - mod = types.ModuleType('mod') - code = ''' - x = 1 - def func(): - return - ''' - exec(textwrap.dedent(code), mod.__dict__) - - pickled_module_path = os.path.join(self.tmpdir, 'mod_f.pkl') - child_process_script = ''' - import pickle - from cloudpickle.cloudpickle import _dynamic_modules_globals - import gc - with open("{pickled_module_path}", 'rb') as f: - func = pickle.load(f) - - # A dictionnary storing the globals of the newly unpickled function - # should have been created - assert list(_dynamic_modules_globals.keys()) == ['mod'] - - # func.__globals__ is the only non-weak reference to - # _dynamic_modules_globals['mod']. By deleting func, we delete also - # _dynamic_modules_globals['mod'] - del func - gc.collect() - - # There is no reference to the globals of func since func has been - # deleted and _dynamic_modules_globals is a WeakValueDictionary, - # so _dynamic_modules_globals should now be empty - assert list(_dynamic_modules_globals.keys()) == [] - ''' - - child_process_script = child_process_script.format( - pickled_module_path=_escape(pickled_module_path)) - - try: - with open(pickled_module_path, 'wb') as f: - cloudpickle.dump(mod.func, f, protocol=self.protocol) - - assert_run_python_script(textwrap.dedent(child_process_script)) - - finally: - os.unlink(pickled_module_path) - def test_module_locals_behavior(self): # Makes sure that a local function defined in another module is # correctly serialized. This notably checks that the globals are @@ -1083,20 +1032,42 @@ def f0(): def f1(): return VARIABLE - cloned_f0 = {clone_func}(f0, protocol={protocol}) - cloned_f1 = {clone_func}(f1, protocol={protocol}) + assert f0.__globals__ is f1.__globals__ + + # pickle f0 and f1 inside the same pickle_string + cloned_f0, cloned_f1 = {clone_func}([f0, f1], protocol={protocol}) + + # cloned_f0 and cloned_f1 now share a global namespace that is isolated + # from any previously existing namespace + assert cloned_f0.__globals__ is cloned_f1.__globals__ + assert cloned_f0.__globals__ is not f0.__globals__ + + # pickle f1 another time, but in a new pickle string pickled_f1 = dumps(f1, protocol={protocol}) - # Change the value of the global variable + # Change the value of the global variable in f0's new global namespace cloned_f0() - # Ensure that the global variable is the same for another function - result_f1 = cloned_f1() - assert result_f1 == "changed_by_f0", result_f1 - - # Ensure that unpickling the global variable does not change its value - result_pickled_f1 = loads(pickled_f1)() - assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 + # thanks to cloudpickle isolation, depickling and calling f0 and f1 + # should not affect the globals of already existing modules + assert VARIABLE == "default_value", VARIABLE + + # Ensure that cloned_f1 and cloned_f0 share the same globals, as f1 and + # f0 shared the same globals at pickling time, and cloned_f1 was + # depickled from the same pickle string as cloned_f0 + shared_global_var = cloned_f1() + assert shared_global_var == "changed_by_f0", shared_global_var + + # f1 is unpickled another time, but because it comes from another + # pickle string than pickled_f1 and pickled_f0, it will not share the + # same globals as the latter two. + new_cloned_f1 = loads(pickled_f1) + assert new_cloned_f1.__globals__ is not cloned_f1.__globals__ + assert new_cloned_f1.__globals__ is not f1.__globals__ + + # get the value of new_cloned_f1's VARIABLE + new_global_var = new_cloned_f1() + assert new_global_var == "default_value", new_global_var """ for clone_func in ['local_clone', 'subprocess_pickle_echo']: code = code_template.format(protocol=self.protocol, @@ -1115,117 +1086,44 @@ def f0(): def f1(): return _TEST_GLOBAL_VARIABLE - cloned_f0 = cloudpickle.loads(cloudpickle.dumps( - f0, protocol=self.protocol)) - cloned_f1 = cloudpickle.loads(cloudpickle.dumps( - f1, protocol=self.protocol)) + # pickle f0 and f1 inside the same pickle_string + cloned_f0, cloned_f1 = pickle_depickle([f0, f1], + protocol=self.protocol) + + # cloned_f0 and cloned_f1 now share a global namespace that is + # isolated from any previously existing namespace + assert cloned_f0.__globals__ is cloned_f1.__globals__ + assert cloned_f0.__globals__ is not f0.__globals__ + + # pickle f1 another time, but in a new pickle string pickled_f1 = cloudpickle.dumps(f1, protocol=self.protocol) - # Change the value of the global variable + # Change the global variable's value in f0's new global namespace cloned_f0() - assert _TEST_GLOBAL_VARIABLE == "changed_by_f0" - # Ensure that the global variable is the same for another function - result_cloned_f1 = cloned_f1() - assert result_cloned_f1 == "changed_by_f0", result_cloned_f1 - assert f1() == result_cloned_f1 - - # Ensure that unpickling the global variable does not change its - # value - result_pickled_f1 = cloudpickle.loads(pickled_f1)() - assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 + # depickling f0 and f1 should not affect the globals of already + # existing modules + assert _TEST_GLOBAL_VARIABLE == "default_value" + + # Ensure that cloned_f1 and cloned_f0 share the same globals, as f1 + # and f0 shared the same globals at pickling time, and cloned_f1 + # was depickled from the same pickle string as cloned_f0 + shared_global_var = cloned_f1() + assert shared_global_var == "changed_by_f0", shared_global_var + + # f1 is unpickled another time, but because it comes from another + # pickle string than pickled_f1 and pickled_f0, it will not share + # the same globals as the latter two. + new_cloned_f1 = pickle.loads(pickled_f1) + assert new_cloned_f1.__globals__ is not cloned_f1.__globals__ + assert new_cloned_f1.__globals__ is not f1.__globals__ + + # get the value of new_cloned_f1's VARIABLE + new_global_var = new_cloned_f1() + assert new_global_var == "default_value", new_global_var finally: _TEST_GLOBAL_VARIABLE = orig_value - def test_function_from_dynamic_module_with_globals_modifications(self): - # This test verifies that the global variable state of a function - # defined in a dynamic module in a child process are not reset by - # subsequent uplickling. - - # first, we create a dynamic module in the parent process - mod = types.ModuleType('mod') - code = ''' - GLOBAL_STATE = "initial value" - - def func_defined_in_dynamic_module(v=None): - global GLOBAL_STATE - if v is not None: - GLOBAL_STATE = v - return GLOBAL_STATE - ''' - exec(textwrap.dedent(code), mod.__dict__) - - with_initial_globals_file = os.path.join( - self.tmpdir, 'function_with_initial_globals.pkl') - with_modified_globals_file = os.path.join( - self.tmpdir, 'function_with_modified_globals.pkl') - - try: - # Simple sanity check on the function's output - assert mod.func_defined_in_dynamic_module() == "initial value" - - # The function of mod is pickled two times, with two different - # values for the global variable GLOBAL_STATE. - # Then we launch a child process that sequentially unpickles the - # two functions. Those unpickle functions should share the same - # global variables in the child process: - # Once the first function gets unpickled, mod is created and - # tracked in the child environment. This is state is preserved - # when unpickling the second function whatever the global variable - # GLOBAL_STATE's value at the time of pickling. - - with open(with_initial_globals_file, 'wb') as f: - cloudpickle.dump(mod.func_defined_in_dynamic_module, f) - - # Change the mod's global variable - mod.GLOBAL_STATE = 'changed value' - - # At this point, mod.func_defined_in_dynamic_module() - # returns the updated value. Let's pickle it again. - assert mod.func_defined_in_dynamic_module() == 'changed value' - with open(with_modified_globals_file, 'wb') as f: - cloudpickle.dump(mod.func_defined_in_dynamic_module, f, - protocol=self.protocol) - - child_process_code = """ - import pickle - - with open({with_initial_globals_file!r},'rb') as f: - func_with_initial_globals = pickle.load(f) - - # At this point, a module called 'mod' should exist in - # _dynamic_modules_globals. Further function loading - # will use the globals living in mod. - - assert func_with_initial_globals() == 'initial value' - - # Load a function with initial global variable that was - # pickled after a change in the global variable - with open({with_modified_globals_file!r},'rb') as f: - func_with_modified_globals = pickle.load(f) - - # assert the this unpickling did not modify the value of - # the local - assert func_with_modified_globals() == 'initial value' - - # Update the value from the child process and check that - # unpickling again does not reset our change. - assert func_with_initial_globals('new value') == 'new value' - assert func_with_modified_globals() == 'new value' - - with open({with_initial_globals_file!r},'rb') as f: - func_with_initial_globals = pickle.load(f) - assert func_with_initial_globals() == 'new value' - assert func_with_modified_globals() == 'new value' - """.format( - with_initial_globals_file=_escape(with_initial_globals_file), - with_modified_globals_file=_escape(with_modified_globals_file)) - assert_run_python_script(textwrap.dedent(child_process_code)) - - finally: - os.unlink(with_initial_globals_file) - os.unlink(with_modified_globals_file) - @pytest.mark.skipif(sys.version_info >= (3, 0), reason="hardcoded pickle bytes for 2.7") def test_function_pickle_compat_0_4_0(self):