Skip to content

Commit 3844b7a

Browse files
author
Scott Sanderson
committed
BUG: Fix crash when pickling dynamic class cycles.
Fixes a bug where we would fail to pickle a class created inside a function if that class participated in a cycle with its own __dict__. Such cycles occur, for example, when a class defines a method that makes a Python 2-style super call, because we have a cycle from class -> __dict__ -> function -> __closure__ -> class. The fix for this is to use the same technique we use to dynamically-created functions: we first pickle an empty "skeleton class", which we memoize before pickling the rest of the class' __dict__. We then invoke a reduce function that re-attaches the class' attributes from the __dict__.
1 parent 5eefe28 commit 3844b7a

2 files changed

Lines changed: 181 additions & 15 deletions

File tree

cloudpickle/cloudpickle.py

Lines changed: 121 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,71 @@ def _save_subimports(self, code, top_level_dependencies):
393393
# then discards the reference to it
394394
self.write(pickle.POP)
395395

396+
def save_dynamic_class(self, obj):
397+
"""
398+
Save a class that can't be stored as module global.
399+
400+
This method is used to serialize classes that are defined inside
401+
functions, or that otherwise can't be serialized as attribute lookups
402+
from global modules.
403+
"""
404+
clsdict = dict(obj.__dict__) # copy dict proxy to a dict
405+
if not isinstance(clsdict.get('__dict__', None), property):
406+
# don't extract dict that are properties
407+
clsdict.pop('__dict__', None)
408+
clsdict.pop('__weakref__', None)
409+
410+
# hack as __new__ is stored differently in the __dict__
411+
new_override = clsdict.get('__new__', None)
412+
if new_override:
413+
clsdict['__new__'] = obj.__new__
414+
415+
save = self.save
416+
write = self.write
417+
418+
# We write pickle instructions explicitly here to handle the
419+
# possibility that the type object participates in a cycle with its own
420+
# __dict__. We first write an empty "skeleton" version of the class and
421+
# memoize it before writing the class' __dict__ itself. We then write
422+
# instructions to "rehydrate" the skeleton class by restoring the
423+
# attributes from the __dict__.
424+
#
425+
# A type can appear in a cycle with it's __dict__ if an instance of the
426+
# type appears in the type's __dict__ (which happens for the stdlib
427+
# Enum class), or if the type defines methods that close over the name
428+
# of the type, (which is common for Python 2-style super() calls).
429+
430+
# Push the rehydration function.
431+
save(_rehydrate_skeleton_class)
432+
433+
# Mark the start of the args for the rehydration function.
434+
write(pickle.MARK)
435+
436+
# On PyPy, __doc__ is a readonly attribute, so we need to include it in
437+
# the initial skeleton class. This is safe because we know that the
438+
# doc can't participate in a cycle with the original class.
439+
doc_dict = {'__doc__': clsdict.pop('__doc__', None)}
440+
441+
# Create and memoize an empty class with obj's name and bases.
442+
save(type(obj))
443+
save((
444+
obj.__name__,
445+
obj.__bases__,
446+
doc_dict,
447+
))
448+
write(pickle.REDUCE)
449+
self.memoize(obj)
450+
451+
# Now save the rest of obj's __dict__. Any references to obj
452+
# encountered while saving will point to the skeleton class.
453+
save(clsdict)
454+
455+
# Write a tuple of (skeleton_class, clsdict).
456+
write(pickle.TUPLE)
457+
458+
# Call _rehydrate_skeleton_class(skeleton_class, clsdict)
459+
write(pickle.REDUCE)
460+
396461
def save_function_tuple(self, func):
397462
""" Pickles an actual func object.
398463
@@ -493,8 +558,9 @@ def extract_func_data(self, func):
493558

494559
# process closure
495560
closure = (
496-
[c.cell_contents for c in func.__closure__]
497-
if func.__closure__ is not None else None
561+
list(map(_get_cell_contents, func.__closure__))
562+
if func.__closure__ is not None
563+
else None
498564
)
499565

500566
# save the dict
@@ -512,6 +578,12 @@ def save_builtin_function(self, obj):
512578
dispatch[types.BuiltinFunctionType] = save_builtin_function
513579

514580
def save_global(self, obj, name=None, pack=struct.pack):
581+
"""
582+
Save a "global".
583+
584+
The name of this method is somewhat misleading: all types get
585+
dispatched here.
586+
"""
515587
if obj.__module__ == "__builtin__" or obj.__module__ == "builtins":
516588
if obj in _BUILTIN_TYPE_NAMES:
517589
return self.save_reduce(_builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj)
@@ -535,18 +607,7 @@ def save_global(self, obj, name=None, pack=struct.pack):
535607

536608
typ = type(obj)
537609
if typ is not obj and isinstance(obj, (type, types.ClassType)):
538-
d = dict(obj.__dict__) # copy dict proxy to a dict
539-
if not isinstance(d.get('__dict__', None), property):
540-
# don't extract dict that are properties
541-
d.pop('__dict__', None)
542-
d.pop('__weakref__', None)
543-
544-
# hack as __new__ is stored differently in the __dict__
545-
new_override = d.get('__new__', None)
546-
if new_override:
547-
d['__new__'] = obj.__new__
548-
549-
self.save_reduce(typ, (obj.__name__, obj.__bases__, d), obj=obj)
610+
self.save_dynamic_class(obj)
550611
else:
551612
raise pickle.PicklingError("Can't pickle %r" % obj)
552613

@@ -908,6 +969,40 @@ def _gen_ellipsis():
908969
def _gen_not_implemented():
909970
return NotImplemented
910971

972+
973+
def _get_cell_contents(cell):
974+
try:
975+
return cell.cell_contents
976+
except ValueError:
977+
# sentinel used by ``_fill_function`` which will leave the cell empty
978+
return _empty_cell_value
979+
980+
981+
def instance(cls):
982+
"""Create a new instance of a class.
983+
984+
Parameters
985+
----------
986+
cls : type
987+
The class to create an instance of.
988+
989+
Returns
990+
-------
991+
instance : cls
992+
A new instance of ``cls``.
993+
"""
994+
return cls()
995+
996+
997+
@instance
998+
class _empty_cell_value(object):
999+
"""sentinel for empty closures
1000+
"""
1001+
@classmethod
1002+
def __reduce__(cls):
1003+
return cls.__name__
1004+
1005+
9111006
def _fill_function(func, globals, defaults, dict, closure_values):
9121007
""" Fills in the rest of function data into the skeleton function object
9131008
that were created via _make_skel_func().
@@ -919,7 +1014,8 @@ def _fill_function(func, globals, defaults, dict, closure_values):
9191014
cells = func.__closure__
9201015
if cells is not None:
9211016
for cell, value in zip(cells, closure_values):
922-
cell_set(cell, value)
1017+
if value is not _empty_cell_value:
1018+
cell_set(cell, value)
9231019

9241020
return func
9251021

@@ -950,6 +1046,16 @@ def _make_skel_func(code, cell_count, base_globals=None):
9501046
return types.FunctionType(code, base_globals, None, None, closure)
9511047

9521048

1049+
def _rehydrate_skeleton_class(skeleton_class, class_dict):
1050+
"""Put attributes from `class_dict` back on `skeleton_class`.
1051+
1052+
See CloudPickler.save_dynamic_class for more info.
1053+
"""
1054+
for attrname, attr in class_dict.items():
1055+
setattr(skeleton_class, attrname, attr)
1056+
return skeleton_class
1057+
1058+
9531059
def _find_module(mod_name):
9541060
"""
9551061
Iterate over each part instead of calling imp.find_module directly.

tests/cloudpickle_test.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,24 @@ def f():
167167
msg='g now has closure cells even though f does not',
168168
)
169169

170+
def test_empty_cell_preserved(self):
171+
def f():
172+
if False: # pragma: no cover
173+
cell = None
174+
175+
def g():
176+
cell # NameError, unbound free variable
177+
178+
return g
179+
180+
g1 = f()
181+
with pytest.raises(NameError):
182+
g1()
183+
184+
g2 = pickle_depickle(g1)
185+
with pytest.raises(NameError):
186+
g2()
187+
170188
def test_unhashable_closure(self):
171189
def f():
172190
s = set((1, 2)) # mutable set is unhashable
@@ -179,6 +197,48 @@ def g():
179197
g = pickle_depickle(f())
180198
self.assertEqual(g(), 2)
181199

200+
def test_dynamically_generated_class_that_uses_super(self):
201+
202+
class Base(object):
203+
def method(self):
204+
return 1
205+
206+
class Derived(Base):
207+
"Derived Docstring"
208+
def method(self):
209+
return super(Derived, self).method() + 1
210+
211+
self.assertEqual(Derived().method(), 2)
212+
213+
# Pickle and unpickle the class.
214+
UnpickledDerived = pickle_depickle(Derived)
215+
self.assertEqual(UnpickledDerived().method(), 2)
216+
self.assertEqual(UnpickledDerived.__doc__, "Derived Docstring")
217+
218+
# Pickle and unpickle an instance.
219+
orig_d = Derived()
220+
d = pickle_depickle(orig_d)
221+
self.assertEqual(d.method(), 2)
222+
223+
def test_cycle_in_classdict_globals(self):
224+
225+
class C(object):
226+
227+
def it_works(self):
228+
return "woohoo!"
229+
230+
C.C_again = C
231+
C.instance_of_C = C()
232+
233+
depickled_C = pickle_depickle(C)
234+
depickled_instance = pickle_depickle(C())
235+
236+
# Test instance of depickled class.
237+
self.assertEqual(depickled_C().it_works(), "woohoo!")
238+
self.assertEqual(depickled_C.C_again().it_works(), "woohoo!")
239+
self.assertEqual(depickled_C.instance_of_C.it_works(), "woohoo!")
240+
self.assertEqual(depickled_instance.it_works(), "woohoo!")
241+
182242
@pytest.mark.skipif(sys.version_info >= (3, 4)
183243
and sys.version_info < (3, 4, 3),
184244
reason="subprocess has a bug in 3.4.0 to 3.4.2")

0 commit comments

Comments
 (0)