From f022c41879502123f773c47dee88e16bcbfec346 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 23 Aug 2018 17:25:13 +0200 Subject: [PATCH 1/8] added dynamic module test where expected behavior is ambiguous --- cloudpickle/cloudpickle.py | 12 ++++- tests/cloudpickle_test.py | 89 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 842723539..e5ffb0fa9 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -78,6 +78,10 @@ PY3 = True +# caches dynamic modules that are not referenced in sys.modules +_dynamic_modules_globals = {} + + def _make_cell_set_template_code(): """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF @@ -1090,12 +1094,16 @@ def _make_skel_func(code, cell_count, base_globals=None): if base_globals is None: base_globals = {} elif isinstance(base_globals, str): + base_globals_name=base_globals if sys.modules.get(base_globals, None) is not None: - # this checks if we can import the previous environment the object + # This checks if we can import the previous environment the object # lived in base_globals = vars(sys.modules[base_globals]) else: - base_globals = {} + base_globals = _dynamic_modules_globals.get(base_globals, {}) + # base_globals is not a string anymore, using base_globals_name + # instead + _dynamic_modules_globals[base_globals_name] = base_globals base_globals['__builtins__'] = __builtins__ diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 41f185851..fbcabee90 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -993,6 +993,95 @@ def f1(): finally: _TEST_GLOBAL_VARIABLE = orig_value + def test_function_from_dynamic_module_with_globals_modifications(self): + """ + this test verifies that: + - any modification in the global variables of a dynamic + module living in a child process won't get overridden + when new objects are unpickled in the child's interpreter + + - vice versa, e.g a modification in the parent process does not + override the value of the variables in the child process + + The two cases are equivalent, and here, the second case is tested. + """ + + # first, we create a dynamic module in the parent process + mod = imp.new_module('mod') + code = ''' + x = 1 + def func_that_relies_on_dynamic_module(v=None): + global x + if v is not None: + x = v + return x + ''' + exec(textwrap.dedent(code), mod.__dict__) + + try: + # simple sanity check on the function's output + assert mod.func_that_relies_on_dynamic_module() == 1 + + # the function of mod is pickled two times, with two different + # values for the global variable x. + + # a child process that sequentially unpickles the + # two functions is then launched + + # once the _first_ function gets unpickled, mod is created and + # tracked in the child environment. Whatever the global variable + # x's value in the second function, it will be overriden by the + # initial value of x in the child environment + + with open('function_with_initial_globals.pk', 'wb') as fid: + cloudpickle.dump(mod.func_that_relies_on_dynamic_module, fid) + + # change the mod's global variable x + mod.x = 2 + + # at this point, mod.func_that_relies_on_dynamic_module() + # returns 2 + assert mod.func_that_relies_on_dynamic_module() == 2 + with open('function_with_modified_globals.pk', 'wb') as fid: + cloudpickle.dump(mod.func_that_relies_on_dynamic_module, fid) + + child_process_code = """ + import pickle + + with open('function_with_initial_globals.pk','rb') as fid: + function_with_initial_globals = pickle.load(fid) + + # at this point, a module called 'mod' should exist in + # _dynamic_modules_globals. further function loading + # will use the globals living in mod + + assert function_with_initial_globals() == 1 + + # load a function with initial global variable x set to 2 + with open('function_with_modified_globals.pk','rb') as fid: + function_with_modified_globals = pickle.load(fid) + + # assert the initial global got overridden by + # _dynamic_modules_globals + assert function_with_modified_globals()==1 + + # both function's global x should point to the + # same variable. + # calling function_with_initial_globals('test_value') + # will change this variable: function_with_modified_globals() + # should return the changed variable + assert function_with_initial_globals('test_value') == 'test_value' + assert function_with_modified_globals() == 'test_value' + """ + + # finally, we execute the code + assert_run_python_script(textwrap.dedent(child_process_code)) + + finally: + # remove the created files + os.unlink('function_with_initial_globals.pk') + os.unlink('function_with_modified_globals.pk') + @pytest.mark.skipif(sys.version_info >= (3, 0), reason="hardcoded pickle bytes for 2.7") def test_function_pickle_compat_0_4_0(self): From 2a0c3cbc35ded0b2b804ad937faac31454f13b12 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Fri, 14 Sep 2018 19:12:45 +0200 Subject: [PATCH 2/8] changed _dynamic_modules_globals to a WeakValueDictionary --- cloudpickle/cloudpickle.py | 20 ++++++++++++++--- tests/cloudpickle_test.py | 46 +++++++++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index e5ffb0fa9..f9e35f4f3 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -79,7 +79,20 @@ # caches dynamic modules that are not referenced in sys.modules -_dynamic_modules_globals = {} +_dynamic_modules_globals = weakref.WeakValueDictionary() + +# _dynamic_modules_globals will store _base_globals variables. +# Or, _base_globals end up being dicts, and built-in dict instances +# cannot be weakly referenced. Therefore, we create a mirror of the +# dict type, and at the mutation of _base globals from string to dict, +# we use the DynamicDict constructor instead of the builtin dict one. + + +class DynamicDict(dict): + """class used to instanciate base_globals, in order to be weakly refrenced + into _dynamic_modules_globals + """ + pass def _make_cell_set_template_code(): @@ -1094,13 +1107,14 @@ def _make_skel_func(code, cell_count, base_globals=None): if base_globals is None: base_globals = {} elif isinstance(base_globals, str): - base_globals_name=base_globals + base_globals_name = base_globals if sys.modules.get(base_globals, None) is not None: # This checks if we can import the previous environment the object # lived in base_globals = vars(sys.modules[base_globals]) else: - base_globals = _dynamic_modules_globals.get(base_globals, {}) + base_globals = _dynamic_modules_globals.get( + base_globals, DynamicDict()) # base_globals is not a string anymore, using base_globals_name # instead _dynamic_modules_globals[base_globals_name] = base_globals diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index fbcabee90..bd9896c3a 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -4,6 +4,7 @@ import collections import base64 import functools +import gc import imp from io import BytesIO import itertools @@ -42,7 +43,8 @@ tornado = None import cloudpickle -from cloudpickle.cloudpickle import _find_module, _make_empty_cell, cell_set +from cloudpickle.cloudpickle import _find_module, _make_empty_cell, cell_set,\ + _dynamic_modules_globals from .testutils import subprocess_pickle_echo from .testutils import assert_run_python_script @@ -440,6 +442,48 @@ def method(self, x): mod1, mod2 = pickle_depickle([mod, mod]) self.assertEqual(id(mod1), id(mod2)) + def test_dynamic_modules_cache(self): + # _dynamic_modules_globals is a WeakValueDictionary, so if this dynamic + # module has no other reference than in this # dict (whether on the + # child process or the parent process), this module will be garbage + # collected. + + # We first create a module + mod = imp.new_module('mod') + code = ''' + x = 1 + def f(): + return + ''' + exec(textwrap.dedent(code), mod.__dict__) + + pickled_module_path = 'mod.pkl' + + with open(pickled_module_path, 'wb') as f: + cloudpickle.dump(mod, f) + + # _dynamic_modules_globals will have mod appended to its values + # in the child process only + child_process_script = ''' + import pickle + import cloudpickle + with open("{pickled_module_path}", 'rb') as f: + _ = pickle.load(f) + ''' + child_process_script = child_process_script.format( + pickled_module_path=pickled_module_path) + + assert_run_python_script(textwrap.dedent(child_process_script)) + + # We remove the other references to the object and trigger a + # gc event + del mod + gc.collect() + + # At this point, there is no other reference to this module, so + # _dynamic_modules_globals should be empty + assert list(_dynamic_modules_globals.keys()) == [] + def test_load_dynamic_module_in_grandchild_process(self): # Make sure that when loaded, a dynamic module preserves its dynamic # property. Otherwise, this will lead to an ImportError if pickled in From 780dee7fe59cd617fb0a7382e97f88fcffc5454c Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Sep 2018 11:53:33 +0200 Subject: [PATCH 3/8] changed the dynamic_modules_globals test for now only a function of a dynamic module is unpickled, to show a case where creating a dynamic module is hard. --- tests/cloudpickle_test.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index bd9896c3a..bfbf2281f 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -442,7 +442,7 @@ def method(self, x): mod1, mod2 = pickle_depickle([mod, mod]) self.assertEqual(id(mod1), id(mod2)) - def test_dynamic_modules_cache(self): + def test_dynamic_modules_globals(self): # _dynamic_modules_globals is a WeakValueDictionary, so if this dynamic # module has no other reference than in this # dict (whether on the # child process or the parent process), this module will be garbage @@ -452,23 +452,37 @@ def test_dynamic_modules_cache(self): mod = imp.new_module('mod') code = ''' x = 1 - def f(): + def func(): return ''' exec(textwrap.dedent(code), mod.__dict__) pickled_module_path = 'mod.pkl' + print(mod.func.__module__) with open(pickled_module_path, 'wb') as f: - cloudpickle.dump(mod, f) + cloudpickle.dump(mod.func, f) # _dynamic_modules_globals will have mod appended to its values # in the child process only child_process_script = ''' import pickle - import cloudpickle + from cloudpickle.cloudpickle import _dynamic_modules_globals + import gc with open("{pickled_module_path}", 'rb') as f: - _ = pickle.load(f) + func = pickle.load(f) + + # A dictionnary storing the globals of the newly unpickled function + # should have been created + assert list(_dynamic_modules_globals.keys())==['mod'] + + del func + gc.collect() + + # There is no reference to the globals of func since func has been + # deleted and _dynamic_modules_globals is a WeakValueDictionary, + # so _dynamic_modules_globals should now be empty + assert list(_dynamic_modules_globals.keys())==[] ''' child_process_script = child_process_script.format( pickled_module_path=pickled_module_path) @@ -477,12 +491,6 @@ def f(): # We remove the other references to the object and trigger a # gc event - del mod - gc.collect() - - # At this point, there is no other reference to this module, so - # _dynamic_modules_globals should be empty - assert list(_dynamic_modules_globals.keys()) == [] def test_load_dynamic_module_in_grandchild_process(self): # Make sure that when loaded, a dynamic module preserves its dynamic From 756134605134d97f718079e5165568af1dbf8935 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Sep 2018 12:02:30 +0200 Subject: [PATCH 4/8] removed unused comment --- tests/cloudpickle_test.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index bfbf2281f..f0bdd6fb4 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -489,9 +489,6 @@ def func(): assert_run_python_script(textwrap.dedent(child_process_script)) - # We remove the other references to the object and trigger a - # gc event - def test_load_dynamic_module_in_grandchild_process(self): # Make sure that when loaded, a dynamic module preserves its dynamic # property. Otherwise, this will lead to an ImportError if pickled in From faab1f51c907c86b7df3ae7c798931b015ecb167 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Sep 2018 12:03:35 +0200 Subject: [PATCH 5/8] removed print statement --- tests/cloudpickle_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index f0bdd6fb4..75c2bfbac 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -458,7 +458,6 @@ def func(): exec(textwrap.dedent(code), mod.__dict__) pickled_module_path = 'mod.pkl' - print(mod.func.__module__) with open(pickled_module_path, 'wb') as f: cloudpickle.dump(mod.func, f) From c82008b72959aee78efff4d475bf21dc6c1936e7 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Sep 2018 12:08:23 +0200 Subject: [PATCH 6/8] changed pickled function file path --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 75c2bfbac..9f743a519 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -457,7 +457,7 @@ def func(): ''' exec(textwrap.dedent(code), mod.__dict__) - pickled_module_path = 'mod.pkl' + pickled_module_path = 'mod_f.pkl' with open(pickled_module_path, 'wb') as f: cloudpickle.dump(mod.func, f) From 21f05dbb701d6e5b76c8b56d2d21da4031442888 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 17 Sep 2018 13:07:44 +0200 Subject: [PATCH 7/8] added pickled files removal --- tests/cloudpickle_test.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 9f743a519..55230c919 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -459,9 +459,6 @@ def func(): pickled_module_path = 'mod_f.pkl' - with open(pickled_module_path, 'wb') as f: - cloudpickle.dump(mod.func, f) - # _dynamic_modules_globals will have mod appended to its values # in the child process only child_process_script = ''' @@ -483,10 +480,19 @@ def func(): # so _dynamic_modules_globals should now be empty assert list(_dynamic_modules_globals.keys())==[] ''' + child_process_script = child_process_script.format( pickled_module_path=pickled_module_path) - assert_run_python_script(textwrap.dedent(child_process_script)) + try: + with open(pickled_module_path, 'wb') as f: + cloudpickle.dump(mod.func, f) + + assert_run_python_script(textwrap.dedent(child_process_script)) + + finally: + os.unlink(pickled_module_path) + def test_load_dynamic_module_in_grandchild_process(self): # Make sure that when loaded, a dynamic module preserves its dynamic From f1f4c3196ab8b554b4a6a182354d7531b7f72170 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 18 Sep 2018 14:23:42 +0200 Subject: [PATCH 8/8] various style fixes --- cloudpickle/cloudpickle.py | 26 +++++++++++++++----------- tests/cloudpickle_test.py | 21 +++++++++++---------- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index f9e35f4f3..cd39e92ba 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -81,16 +81,20 @@ # caches dynamic modules that are not referenced in sys.modules _dynamic_modules_globals = weakref.WeakValueDictionary() -# _dynamic_modules_globals will store _base_globals variables. -# Or, _base_globals end up being dicts, and built-in dict instances -# cannot be weakly referenced. Therefore, we create a mirror of the -# dict type, and at the mutation of _base globals from string to dict, -# we use the DynamicDict constructor instead of the builtin dict one. +# _dynamic_modules_globals will store base_globals variables. Or, base_globals +# end up being dicts, and built-in dict instances cannot be weakly referenced. +# Therefore, we create a mirror of the dict type, and at the mutation of +# base globals from string to dict, we use the _DynamicModuleFuncGlobals +# constructor instead of the builtin dict one. -class DynamicDict(dict): - """class used to instanciate base_globals, in order to be weakly refrenced - into _dynamic_modules_globals +class _DynamicModuleFuncGlobals(dict): + """Global variables referenced by a function defined in a dynamic module + + To avoid leaking references we store such context in a WeakValueDictionary + instance. However instances of python builtin types such as dict cannot + be used directly as values in such a construct, hence the need for a + derived class. """ pass @@ -1114,9 +1118,9 @@ def _make_skel_func(code, cell_count, base_globals=None): base_globals = vars(sys.modules[base_globals]) else: base_globals = _dynamic_modules_globals.get( - base_globals, DynamicDict()) - # base_globals is not a string anymore, using base_globals_name - # instead + base_globals_name, None) + if base_globals is None: + base_globals = _DynamicModuleFuncGlobals() _dynamic_modules_globals[base_globals_name] = base_globals base_globals['__builtins__'] = __builtins__ diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 55230c919..22fb0cfd6 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -43,8 +43,8 @@ tornado = None import cloudpickle -from cloudpickle.cloudpickle import _find_module, _make_empty_cell, cell_set,\ - _dynamic_modules_globals +from cloudpickle.cloudpickle import _find_module, _make_empty_cell, cell_set +from cloudpickle.cloudpickle import _dynamic_modules_globals from .testutils import subprocess_pickle_echo from .testutils import assert_run_python_script @@ -443,10 +443,10 @@ def method(self, x): self.assertEqual(id(mod1), id(mod2)) def test_dynamic_modules_globals(self): - # _dynamic_modules_globals is a WeakValueDictionary, so if this dynamic - # module has no other reference than in this # dict (whether on the - # child process or the parent process), this module will be garbage - # collected. + # _dynamic_modules_globals is a WeakValueDictionary, so if a value + # in this dict (containing a set of global variables from a dynamic + # module created in the parent process) has no other reference than in + # this dict in the child process, it will be garbage collected. # We first create a module mod = imp.new_module('mod') @@ -459,8 +459,6 @@ def func(): pickled_module_path = 'mod_f.pkl' - # _dynamic_modules_globals will have mod appended to its values - # in the child process only child_process_script = ''' import pickle from cloudpickle.cloudpickle import _dynamic_modules_globals @@ -470,15 +468,18 @@ def func(): # A dictionnary storing the globals of the newly unpickled function # should have been created - assert list(_dynamic_modules_globals.keys())==['mod'] + assert list(_dynamic_modules_globals.keys()) == ['mod'] + # func.__globals__ is the only non-weak reference to + # _dynamic_modules_globals['mod']. By deleting func, we delete also + # _dynamic_modules_globals['mod'] del func gc.collect() # There is no reference to the globals of func since func has been # deleted and _dynamic_modules_globals is a WeakValueDictionary, # so _dynamic_modules_globals should now be empty - assert list(_dynamic_modules_globals.keys())==[] + assert list(_dynamic_modules_globals.keys()) == [] ''' child_process_script = child_process_script.format(