Skip to content
58 changes: 48 additions & 10 deletions cloudpickle/cloudpickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,43 @@
from io import BytesIO as StringIO
PY3 = True


try:
from ctypes import pythonapi, py_object, c_int, PYFUNCTYPE
except ImportError:
supports_recursive_closure = False

def compress_closure(closure):
return closure

def decompress_closure(compressed_closure):
return compressed_closure

def fill_cells(cells, values):
pass
else:
supports_recursive_closure = True

def compress_closure(closure):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is badly named, since it merely returns the closure length.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The point here is that the closure "compression" is different on different versions of Python. on pypy this is an identity function. I didn't want to leak the implementation here, the only important thing is that this works when decompress_closure is called on the result.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I understand better now. Can you add a comment explaining design choices at the beginning of those changes? So that further readers don't get lost.

return len(closure) if closure is not None else -1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why -1? Why not just 0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just pushed a test that shows the problem. Using 0 causes the deserialized function to have a __closure__ of () instead of None which is different behavior.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it matter? Why not special-case 0 in decompress_closure?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you have explicitly created a function with a closure of () you want to have this appear on the other side. It seems weird to lose this information if we don't need to.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure who would explicitly create such a function (and then depend on the closure being ()). For the sake of simplicity, I think we should only care about real-world use cases.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm with @llllllllll on this. IMO you should not lose information if you don't need to.


def decompress_closure(compressed_closure):
return (
tuple(_make_cell(None) for _ in range(compressed_closure))
if compressed_closure >= 0 else
None
)

_cell_set = PYFUNCTYPE(c_int, py_object, py_object)(
('PyCell_Set', pythonapi), ((1, 'cell'), (1, 'value')),
)

def fill_cells(cells, values):
if cells is not None:
for cell, value in zip(cells, values):
_cell_set(cell, value)


#relevant opcodes
STORE_GLOBAL = opcode.opmap['STORE_GLOBAL']
DELETE_GLOBAL = opcode.opmap['DELETE_GLOBAL']
Expand Down Expand Up @@ -305,7 +342,6 @@ def _save_subimports(self, code, top_level_dependencies):
# then discards the reference to it
self.write(pickle.POP)


def save_function_tuple(self, func):
""" Pickles an actual func object.

Expand All @@ -331,18 +367,22 @@ def save_function_tuple(self, func):
save(_fill_function) # skeleton function updater
write(pickle.MARK) # beginning of tuple that _fill_function expects

self._save_subimports(code, set(f_globals.values()) | set(closure))
self._save_subimports(
code,
itertools.chain(f_globals.values(), closure),
)

# create a skeleton function object and memoize it
save(_make_skel_func)
save((code, closure, base_globals))
save((code, compress_closure(func.__closure__), base_globals))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why func.__closure__ instead of just closure?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Related to my comment above: I wanted to be able to tell the difference between None and () to preserve this across a cloudpickle round trip.

write(pickle.REDUCE)
self.memoize(func)

# save the rest of the func data needed by _fill_function
save(f_globals)
save(defaults)
save(dct)
save(closure)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me try to understand: this saves the closure twice in PyPy? Once in compress_closure above, and once here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is correct, we should optimize out the second case here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated to remove this call if it is not needed

write(pickle.TUPLE)
write(pickle.REDUCE) # applies _fill_function on the tuple

Expand Down Expand Up @@ -799,14 +839,14 @@ def _gen_ellipsis():
def _gen_not_implemented():
return NotImplemented

def _fill_function(func, globals, defaults, dict):
def _fill_function(func, globals, defaults, dict, closure):
""" Fills in the rest of function data into the skeleton function object
that were created via _make_skel_func().
"""
func.__globals__.update(globals)
func.__defaults__ = defaults
func.__dict__ = dict

fill_cells(func.__closure__, closure)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this work only on CPython? I'm a bit lost.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is correct, this is a nop function on pypy where the original closure is passed to the skeleton function.

return func


Expand All @@ -818,19 +858,17 @@ def _reconstruct_closure(values):
return tuple([_make_cell(v) for v in values])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should _reconstruct_closure be removed now?



def _make_skel_func(code, closures, base_globals = None):
def _make_skel_func(code, compressed_closure, base_globals=None):
""" Creates a skeleton function object that contains just the provided
code and the correct number of cells in func_closure. All other
func attributes (e.g. func_globals) are empty.
"""
closure = _reconstruct_closure(closures) if closures else None

if base_globals is None:
base_globals = {}
base_globals['__builtins__'] = __builtins__

return types.FunctionType(code, base_globals,
None, None, closure)
closure = decompress_closure(compressed_closure)
return types.FunctionType(code, base_globals, None, None, closure)


def _find_module(mod_name):
Expand Down
66 changes: 65 additions & 1 deletion tests/cloudpickle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from io import BytesIO

import cloudpickle
from cloudpickle.cloudpickle import _find_module
from cloudpickle.cloudpickle import _find_module, supports_recursive_closure

from .testutils import subprocess_pickle_echo

Expand Down Expand Up @@ -133,6 +133,70 @@ def test_nested_lambdas(self):
f2 = lambda x: f1(x) // b
self.assertEqual(pickle_depickle(f2)(1), 1)

@pytest.mark.skipif(
not supports_recursive_closure,
reason='The C API is needed for recursively defined closures'
)
def test_recursive_closure(self):
def f1():
def g():
return g
return g

def f2(base):
def g(n):
return base if n <= 1 else n * g(n - 1)
return g

g1 = pickle_depickle(f1())
self.assertEqual(g1(), g1)

g2 = pickle_depickle(f2(2))
self.assertEqual(g2(5), 240)

@pytest.mark.skipif(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test actually should succeed on PyPy, no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, this is a mistake

not supports_recursive_closure,
reason='The C API is needed for recursively defined closures'
)
def test_closure_none_is_preserved(self):
def f():
"""a function with no closure cells
"""

self.assertIsNone(f.__closure__, msg='f actually has closure cells!')

g = pickle_depickle(f)

self.assertIsNone(
g.__closure__,
msg='g now has closure cells even though f does not',
)

@pytest.mark.skipif(
supports_recursive_closure,
reason="Recursive closures shouldn't raise an exception if supported"
)
@pytest.mark.xfail
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than a xfail, I'd like to see the specific exception tested for inside the test, so that we don't miss it failing for other reasons...

def test_recursive_closure_unsupported(self):
def f1():
def g():
return g
return g

pickle_depickle(f1())

def test_unhashable_closure(self):
def f():
s = set((1, 2)) # mutable set is unhashable

def g():
return len(s)

return g

g = pickle_depickle(f())
self.assertEqual(g(), 2)

@pytest.mark.skipif(sys.version_info >= (3, 4)
and sys.version_info < (3, 4, 3),
reason="subprocess has a bug in 3.4.0 to 3.4.2")
Expand Down