Skip to content

Commit bd36c83

Browse files
committed
handle very long longs with custom serialization callback
1 parent 49a4acb commit bd36c83

1 file changed

Lines changed: 42 additions & 20 deletions

File tree

cpp/src/arrow/python/python_to_arrow.cc

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,25 @@ extern "C" {
3030

3131
namespace arrow {
3232

33+
Status CallCustomSerializationCallback(PyObject* elem, PyObject** serialized_object) {
34+
*serialized_object = NULL;
35+
if (!pyarrow_serialize_callback) {
36+
std::stringstream ss;
37+
ss << "data type of " << PyUnicode_AsUTF8(PyObject_Repr(elem))
38+
<< " not recognized and custom serialization handler not registered";
39+
return Status::NotImplemented(ss.str());
40+
} else {
41+
PyObject* arglist = Py_BuildValue("(O)", elem);
42+
// The reference count of the result of the call to PyObject_CallObject
43+
// must be decremented. This is done in SerializeDict in this file.
44+
PyObject* result = PyObject_CallObject(pyarrow_serialize_callback, arglist);
45+
Py_XDECREF(arglist);
46+
if (!result) { return Status::NotImplemented("python error"); }
47+
*serialized_object = result;
48+
}
49+
return Status::OK();
50+
}
51+
3352
Status append(PyObject* elem, SequenceBuilder& builder, std::vector<PyObject*>& sublists,
3453
std::vector<PyObject*>& subtuples, std::vector<PyObject*>& subdicts,
3554
std::vector<PyObject*>& tensors_out) {
@@ -41,8 +60,18 @@ Status append(PyObject* elem, SequenceBuilder& builder, std::vector<PyObject*>&
4160
} else if (PyLong_Check(elem)) {
4261
int overflow = 0;
4362
int64_t data = PyLong_AsLongLongAndOverflow(elem, &overflow);
44-
RETURN_NOT_OK(builder.AppendInt64(data));
45-
if (overflow) { return Status::NotImplemented("long overflow"); }
63+
if (!overflow) {
64+
RETURN_NOT_OK(builder.AppendInt64(data));
65+
} else {
66+
// Attempt to serialize the object using the custom callback.
67+
PyObject* serialized_object;
68+
// The reference count of serialized_object is incremented in the function
69+
// CallCustomSerializationCallback (if the call is successful), and it will
70+
// be decremented in SerializeDict in this file.
71+
RETURN_NOT_OK(CallCustomSerializationCallback(elem, &serialized_object));
72+
RETURN_NOT_OK(builder.AppendDict(PyDict_Size(serialized_object)));
73+
subdicts.push_back(serialized_object);
74+
}
4675
#if PY_MAJOR_VERSION < 3
4776
} else if (PyInt_Check(elem)) {
4877
RETURN_NOT_OK(builder.AppendInt64(static_cast<int64_t>(PyInt_AS_LONG(elem))));
@@ -65,13 +94,13 @@ Status append(PyObject* elem, SequenceBuilder& builder, std::vector<PyObject*>&
6594
#endif
6695
RETURN_NOT_OK(s);
6796
} else if (PyList_Check(elem)) {
68-
builder.AppendList(PyList_Size(elem));
97+
RETURN_NOT_OK(builder.AppendList(PyList_Size(elem)));
6998
sublists.push_back(elem);
7099
} else if (PyDict_Check(elem)) {
71-
builder.AppendDict(PyDict_Size(elem));
100+
RETURN_NOT_OK(builder.AppendDict(PyDict_Size(elem)));
72101
subdicts.push_back(elem);
73102
} else if (PyTuple_CheckExact(elem)) {
74-
builder.AppendTuple(PyTuple_Size(elem));
103+
RETURN_NOT_OK(builder.AppendTuple(PyTuple_Size(elem)));
75104
subtuples.push_back(elem);
76105
} else if (PyArray_IsScalar(elem, Generic)) {
77106
RETURN_NOT_OK(AppendScalar(elem, builder));
@@ -80,21 +109,14 @@ Status append(PyObject* elem, SequenceBuilder& builder, std::vector<PyObject*>&
80109
} else if (elem == Py_None) {
81110
RETURN_NOT_OK(builder.AppendNone());
82111
} else {
83-
if (!pyarrow_serialize_callback) {
84-
std::stringstream ss;
85-
ss << "data type of " << PyBytes_AS_STRING(PyObject_Repr(elem))
86-
<< " not recognized and custom serialization handler not registered";
87-
return Status::NotImplemented(ss.str());
88-
} else {
89-
PyObject* arglist = Py_BuildValue("(O)", elem);
90-
// The reference count of the result of the call to PyObject_CallObject
91-
// must be decremented. This is done in SerializeDict in this file.
92-
PyObject* result = PyObject_CallObject(pyarrow_serialize_callback, arglist);
93-
Py_XDECREF(arglist);
94-
if (!result) { return Status::NotImplemented("python error"); }
95-
builder.AppendDict(PyDict_Size(result));
96-
subdicts.push_back(result);
97-
}
112+
// Attempt to serialize the object using the custom callback.
113+
PyObject* serialized_object;
114+
// The reference count of serialized_object is incremented in the function
115+
// CallCustomSerializationCallback (if the call is successful), and it will
116+
// be decremented in SerializeDict in this file.
117+
RETURN_NOT_OK(CallCustomSerializationCallback(elem, &serialized_object));
118+
RETURN_NOT_OK(builder.AppendDict(PyDict_Size(serialized_object)));
119+
subdicts.push_back(serialized_object);
98120
}
99121
return Status::OK();
100122
}

0 commit comments

Comments
 (0)