From 8fd53fd39218c0dd235493e9957e4fffd2db1a14 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Thu, 19 Aug 2021 09:31:57 +0000 Subject: [PATCH 1/7] Support getitem by Bool index --- paddle/fluid/operators/index_select_op.cc | 4 + paddle/fluid/pybind/imperative.cc | 108 +++++++++++++++--- .../fluid/tests/unittests/test_var_base.py | 31 +++++ .../fluid/tests/unittests/test_variable.py | 25 +++- python/paddle/fluid/variable_index.py | 38 +++--- 5 files changed, 167 insertions(+), 39 deletions(-) diff --git a/paddle/fluid/operators/index_select_op.cc b/paddle/fluid/operators/index_select_op.cc index 60ca7e2fe7cfd3..7eaf5a621930bd 100644 --- a/paddle/fluid/operators/index_select_op.cc +++ b/paddle/fluid/operators/index_select_op.cc @@ -54,6 +54,10 @@ class IndexSelectOp : public framework::OperatorWithKernel { "the dimension of Input(Index) is [%d].", index_dim, index_dim.size())); + PADDLE_ENFORCE_EQ(index_dim[0] > 0, true, + platform::errors::InvalidArgument( + "The length of Input(Index) can't be 0.")); + auto output_dim = framework::vectorize(input_dim); if (dim < 0) { dim += input_dim.size(); diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 0b6af3b542395d..a40bd04c390677 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -414,17 +414,15 @@ static int _PySlice_GetIndices(PySliceObject *r, Py_ssize_t length, return 0; } -static void ParseIndexingSlice(framework::LoDTensor *tensor, PyObject *_index, - std::vector *slice_axes, - std::vector *slice_starts, - std::vector *slice_ends, - std::vector *slice_strides, - std::vector *decrease_axis, - std::vector *none_axes, - std::vector *infer_flags) { - // We allow indexing by Integers, Slices, and tuples of those +static void ParseIndexingSlice( + framework::LoDTensor *tensor, PyObject *_index, + std::vector *slice_axes, std::vector *slice_starts, + std::vector *slice_ends, std::vector *slice_strides, + std::vector *decrease_axis, std::vector *none_axes, + std::vector *infer_flags, std::vector *list_select_idxs, + bool *list_select_flag) { + // We allow indexing by Integers, Slices, Ellipsis, None and tuples of those // types. - // Ellipsis and None are not supported yet. // wrap to tuple PyObject *index = !PyTuple_Check(_index) ? PyTuple_Pack(1, _index) : _index; PADDLE_ENFORCE_EQ( @@ -490,11 +488,58 @@ static void ParseIndexingSlice(framework::LoDTensor *tensor, PyObject *_index, dim += rank - specified_dims; } else if (slice_item == Py_None) { none_axes->push_back(dim); + } else if (PyList_Check(slice_item)) { + *list_select_flag = true; + if (size != 1) { + PADDLE_THROW(platform::errors::InvalidArgument( + "When index contains a list, its length is excepted to 1, " + "but received %s", + size)); + } + bool all_bool = true; + int list_size = PyList_GET_SIZE(slice_item); + for (int j = 0; j < list_size; ++j) { + PyObject *list_item = PyList_GetItem(slice_item, j); + if (PyCheckInteger(list_item)) { + all_bool = false; + } else if (!PyBool_Check(list_item)) { + PADDLE_THROW(platform::errors::InvalidArgument( + "Only support int or bool in index list.")); + } + } + if (all_bool) { + if (list_size != shape[0]) { + PADDLE_THROW(platform::errors::InvalidArgument( + "The dimension of bool index doesn't match indexed array along " + "dimension 0, the target dimension is %d, but received %d.", + shape[0], list_size)); + } + for (int j = 0; j < list_size; ++j) { + PyObject *list_item = PyList_GetItem(slice_item, j); + if (list_item == Py_True) { + list_select_idxs->push_back(j); + } + } + } else { + for (int j = 0; j < list_size; ++j) { + PyObject *list_item = PyList_GetItem(slice_item, j); + if (PyCheckInteger(list_item)) { + list_select_idxs->push_back( + static_cast(PyLong_AsLong(list_item))); + } else if (list_item == Py_True) { + list_select_idxs->push_back(1); + } else { + list_select_idxs->push_back(0); + } + } + } + } else { PADDLE_THROW(platform::errors::InvalidArgument( - "Currently, VarBase.__getitem__() only allows indexing" - "by Integers, Slices, Ellipsis, None and tuples of " - "these types, but received %s in %dth slice item", + "Currently, VarBase.__getitem__() only allows indexing " + "by Integers, Slices, Ellipsis, None, tuples of these types " + "and list of Bool and Integers, but received " + "%s in %dth slice item", std::string(Py_TYPE(slice_item)->tp_name), i + 1)); } } @@ -797,10 +842,13 @@ void BindImperative(py::module *m_ptr) { // copys data to cpu place, which reduces performance. if (parse_index && value_is_tensor) { std::vector axes, starts, ends, steps, decrease_axes, - none_axes, infer_flags; + none_axes, infer_flags, list_select_idxs; + // if index is a list, list_select_flag will be true + bool list_select_flag; ParseIndexingSlice(self_tensor, index_ptr, &axes, &starts, &ends, &steps, &decrease_axes, &none_axes, - &infer_flags); + &infer_flags, &list_select_idxs, + &list_select_flag); framework::AttributeMap attrs = { {"axes", axes}, @@ -858,21 +906,26 @@ void BindImperative(py::module *m_ptr) { .def("_getitem_index_not_tensor", [](std::shared_ptr &self, py::handle _index) { std::vector slice_axes, slice_starts, slice_ends, - slice_strides, decrease_axis, none_axes, infer_flags; + slice_strides, decrease_axis, none_axes, infer_flags, + list_select_idxs; + // if index is a list, list_select_flag will be true + bool list_select_flag = false; auto tensor = self->MutableVar()->GetMutable(); ParseIndexingSlice(tensor, _index.ptr(), &slice_axes, &slice_starts, &slice_ends, &slice_strides, - &decrease_axis, &none_axes, &infer_flags); + &decrease_axis, &none_axes, &infer_flags, + &list_select_idxs, &list_select_flag); // release gil and do tracing py::gil_scoped_release release; const auto &tracer = imperative::GetCurrentTracer(); - auto out = slice_axes.empty() + auto out = slice_axes.empty() && !list_select_flag ? self : std::shared_ptr( new imperative::VarBase( tracer->GenerateUniqueName())); + if (!slice_axes.empty()) { imperative::NameVarBaseMap ins = {{"Input", {self}}}; framework::AttributeMap attrs = { @@ -958,6 +1011,25 @@ void BindImperative(py::module *m_ptr) { } } + // the index is a list + if (list_select_flag) { + auto select_index = std::shared_ptr( + new imperative::VarBase(tracer->GenerateUniqueName())); + auto *idx_tensor = select_index->MutableVar() + ->GetMutable(); + auto *dev_ctx = platform::DeviceContextPool::Instance().Get( + tracer->ExpectedPlace()); + TensorFromVector(list_select_idxs, *dev_ctx, idx_tensor); + for (int v : list_select_idxs) { + VLOG(4) << v; + } + + imperative::NameVarBaseMap ins = {{"X", {self}}, + {"Index", {select_index}}}; + imperative::NameVarBaseMap outs = {{"Out", {out}}}; + tracer->TraceOp("index_select", ins, outs, {{"dim", 0}}); + } + return out; }) .def( diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index cdf34c27c0a345..d1afa40e005efb 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -733,6 +733,36 @@ def _test_none_index(self): # self.assertTrue( # np.array_equal(var[10], np_value[0, 1:10:2, None, None, ...])) + def _test_bool_index(self): + shape = (4, 8, 5, 64) + np_value = np.random.random(shape).astype('float32') + var_tensor = paddle.to_tensor(np_value) + index = [[True, True, True, True], [True, False, True, True], + [True, False, False, True], [False, 0, 1, True, True]] + var = [ + var_tensor[index[0]].numpy(), + var_tensor[index[1]].numpy(), + var_tensor[index[2]].numpy(), + var_tensor[index[3]].numpy(), + ] + self.assertTrue(np.array_equal(var[0], np_value[index[0]])) + self.assertTrue(np.array_equal(var[1], np_value[index[1]])) + self.assertTrue(np.array_equal(var[2], np_value[index[2]])) + self.assertTrue(np.array_equal(var[3], np_value[index[3]])) + self.assertTrue( + np.array_equal(var_tensor[var_tensor > 0.67], np_value[np_value > + 0.67])) + self.assertTrue( + np.array_equal(var_tensor[var_tensor < 0.55], np_value[np_value < + 0.55])) + + with self.assertRaises(ValueError): + var_tensor[[False, False, False, False]] + with self.assertRaises(ValueError): + var_tensor[[True, False]] + with self.assertRaises(ValueError): + var_tensor[[True, False, False, False, False]] + def _test_for_var(self): np_value = np.random.random((30, 100, 100)).astype('float32') w = fluid.dygraph.to_variable(np_value) @@ -747,6 +777,7 @@ def test_slice(self): self._test_for_var() self._test_for_getitem_ellipsis_index() self._test_none_index() + self._test_bool_index() var = fluid.dygraph.to_variable(self.array) self.assertTrue(np.array_equal(var[1, :].numpy(), self.array[1, :])) diff --git a/python/paddle/fluid/tests/unittests/test_variable.py b/python/paddle/fluid/tests/unittests/test_variable.py index a998d58fdbc607..b54046cba0643d 100644 --- a/python/paddle/fluid/tests/unittests/test_variable.py +++ b/python/paddle/fluid/tests/unittests/test_variable.py @@ -252,26 +252,39 @@ def _test_slice_index_list_bool(self, place): x = paddle.assign(data) idx0 = [True, False] idx1 = [False, True] - idx2 = [False, False] - idx3 = [True, True] + idx2 = [True, True] + idx3 = [False, False, 1] + idx4 = [True, False, 0] out0 = x[idx0] out1 = x[idx1] out2 = x[idx2] out3 = x[idx3] + out4 = x[idx4] + out5 = x[x < 0.36] + out6 = x[x > 0.6] exe = paddle.static.Executor(place) - result = exe.run(prog, fetch_list=[out0, out1, out2, out3]) + result = exe.run(prog, + fetch_list=[out0, out1, out2, out3, out4, out5, out6]) - expected = [data[idx0], data[idx1], data[idx2], data[idx3]] + expected = [ + data[idx0], data[idx1], data[idx2], data[idx3], data[idx4], + data[data < 0.36], data[data > 0.6] + ] self.assertTrue((result[0] == expected[0]).all()) self.assertTrue((result[1] == expected[1]).all()) self.assertTrue((result[2] == expected[2]).all()) self.assertTrue((result[3] == expected[3]).all()) + self.assertTrue((result[4] == expected[4]).all()) + self.assertTrue((result[5] == expected[5]).all()) + self.assertTrue((result[6] == expected[6]).all()) - with self.assertRaises(TypeError): - res = x[[True, 0]] + with self.assertRaises(IndexError): + res = x[[True, False, False]] + with self.assertRaises(ValueError): + res = x[[False, False]] def test_slice(self): places = [fluid.CPUPlace()] diff --git a/python/paddle/fluid/variable_index.py b/python/paddle/fluid/variable_index.py index 1ba44cea76347d..9d91c1af298c22 100644 --- a/python/paddle/fluid/variable_index.py +++ b/python/paddle/fluid/variable_index.py @@ -150,31 +150,37 @@ def _getitem_impl_(var, item): end = MAX_INTEGER if step > 0 else -1 elif isinstance(slice_item, list): - is_bool_list = False + all_bool = True for i in slice_item: - if not isinstance(i, (int, bool)): + if type(i) is int: + all_bool = False + elif not isinstance(i, bool): raise TypeError("Only support int or bool in index list.") - if isinstance(i, bool): - is_bool_list = True - break - if len(item) != 1: raise IndexError( "When index contains a list, its length must be 1, but received {}". format(len(item))) - - if is_bool_list: - new_slice_item = [] + new_slice_item = [] + if all_bool: + if len(slice_item) != var.shape[0]: + raise IndexError( + "The dimension of bool index doesn't match indexed array along "\ + "dimension 0, the target dimension is {}, but received {}.". + format(var.shape[0], len(slice_item))) for idx, ele in enumerate(slice_item): - if not isinstance(ele, bool): - raise TypeError( - "Mixed bool index with other types is not supported." - ) - if ele is True: new_slice_item.append(idx) slice_item = new_slice_item + else: + for idx, ele in enumerate(slice_item): + if type(ele) is int: + new_slice_item.append(ele) + elif ele is True: + new_slice_item.append(1) + else: + new_slice_item.append(0) + slice_item = new_slice_item from .layers import assign from ..tensor import index_select @@ -188,7 +194,9 @@ def _getitem_impl_(var, item): "When index contains a Tensor, its length must be 1, but received {}". format(len(item))) - from ..tensor import index_select + from ..tensor import index_select, masked_select + if slice_item.dtype == core.VarDesc.VarType.BOOL: + return masked_select(var, slice_item) return index_select(var, index=slice_item, axis=0) else: From 0155baf089d73692c3b522c5ab5cc5da9fdc24c0 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Thu, 19 Aug 2021 15:38:05 +0000 Subject: [PATCH 2/7] delete some debug info of bool index --- paddle/fluid/operators/index_select_op.cc | 2 +- paddle/fluid/pybind/imperative.cc | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/operators/index_select_op.cc b/paddle/fluid/operators/index_select_op.cc index 7eaf5a621930bd..5bf16960170f98 100644 --- a/paddle/fluid/operators/index_select_op.cc +++ b/paddle/fluid/operators/index_select_op.cc @@ -54,7 +54,7 @@ class IndexSelectOp : public framework::OperatorWithKernel { "the dimension of Input(Index) is [%d].", index_dim, index_dim.size())); - PADDLE_ENFORCE_EQ(index_dim[0] > 0, true, + PADDLE_ENFORCE_EQ(index_dim[0] != 0, true, platform::errors::InvalidArgument( "The length of Input(Index) can't be 0.")); diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 92a857ef37878c..777d68ea2f9ca7 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -421,8 +421,8 @@ static void ParseIndexingSlice( std::vector *decrease_axis, std::vector *none_axes, std::vector *infer_flags, std::vector *list_select_idxs, bool *list_select_flag) { - // We allow indexing by Integers, Slices, Ellipsis, None and tuples of those - // types. + // We allow indexing by Integers, Slices, Ellipsis, None, tuples of those + // types, and list of Bool and Integers. // wrap to tuple PyObject *index = !PyTuple_Check(_index) ? PyTuple_Pack(1, _index) : _index; PADDLE_ENFORCE_EQ( @@ -493,7 +493,7 @@ static void ParseIndexingSlice( if (size != 1) { PADDLE_THROW(platform::errors::InvalidArgument( "When index contains a list, its length is excepted to 1, " - "but received %s", + "but received %d", size)); } bool all_bool = true; @@ -1022,9 +1022,6 @@ void BindImperative(py::module *m_ptr) { auto *dev_ctx = platform::DeviceContextPool::Instance().Get( tracer->ExpectedPlace()); TensorFromVector(list_select_idxs, *dev_ctx, idx_tensor); - for (int v : list_select_idxs) { - VLOG(4) << v; - } imperative::NameVarBaseMap ins = {{"X", {self}}, {"Index", {select_index}}}; From b4430b4fd01c56f3bce7df0e57f223a74eb76207 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Sun, 22 Aug 2021 17:02:44 +0000 Subject: [PATCH 3/7] support the case that the shape of bool index is different from indexed tensor --- .../fluid/tests/unittests/test_var_base.py | 11 ++++++++- .../fluid/tests/unittests/test_variable.py | 16 ++++++++----- python/paddle/fluid/variable_index.py | 23 +++++++++++++++---- 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index d1afa40e005efb..416f125caa2b62 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -734,21 +734,28 @@ def _test_none_index(self): # np.array_equal(var[10], np_value[0, 1:10:2, None, None, ...])) def _test_bool_index(self): - shape = (4, 8, 5, 64) + shape = (4, 2, 5, 64) np_value = np.random.random(shape).astype('float32') var_tensor = paddle.to_tensor(np_value) index = [[True, True, True, True], [True, False, True, True], [True, False, False, True], [False, 0, 1, True, True]] + index2d = np.array([[True, True], [False, False], [True, False], + [True, True]]) + tensor_index = paddle.to_tensor(index2d) var = [ var_tensor[index[0]].numpy(), var_tensor[index[1]].numpy(), var_tensor[index[2]].numpy(), var_tensor[index[3]].numpy(), + var_tensor[paddle.to_tensor(index[0])].numpy(), + var_tensor[tensor_index].numpy(), ] self.assertTrue(np.array_equal(var[0], np_value[index[0]])) self.assertTrue(np.array_equal(var[1], np_value[index[1]])) self.assertTrue(np.array_equal(var[2], np_value[index[2]])) self.assertTrue(np.array_equal(var[3], np_value[index[3]])) + self.assertTrue(np.array_equal(var[4], np_value[index[0]])) + self.assertTrue(np.array_equal(var[5], np_value[index2d])) self.assertTrue( np.array_equal(var_tensor[var_tensor > 0.67], np_value[np_value > 0.67])) @@ -762,6 +769,8 @@ def _test_bool_index(self): var_tensor[[True, False]] with self.assertRaises(ValueError): var_tensor[[True, False, False, False, False]] + with self.assertRaises(IndexError): + var_tensor[paddle.to_tensor([[True, False, False, False]])] def _test_for_var(self): np_value = np.random.random((30, 100, 100)).astype('float32') diff --git a/python/paddle/fluid/tests/unittests/test_variable.py b/python/paddle/fluid/tests/unittests/test_variable.py index b54046cba0643d..0c120100faf066 100644 --- a/python/paddle/fluid/tests/unittests/test_variable.py +++ b/python/paddle/fluid/tests/unittests/test_variable.py @@ -246,7 +246,8 @@ def _test_slice_index_ellipsis(self, place): res = x[[1.2, 0]] def _test_slice_index_list_bool(self, place): - data = np.random.rand(2, 3).astype("float32") + data = np.random.rand(2, 3, 4).astype("float32") + np_idx = np.array([[True, False, False], [True, False, True]]) prog = paddle.static.Program() with paddle.static.program_guard(prog): x = paddle.assign(data) @@ -255,22 +256,24 @@ def _test_slice_index_list_bool(self, place): idx2 = [True, True] idx3 = [False, False, 1] idx4 = [True, False, 0] + idx5 = paddle.assign(np_idx) out0 = x[idx0] out1 = x[idx1] out2 = x[idx2] out3 = x[idx3] out4 = x[idx4] - out5 = x[x < 0.36] - out6 = x[x > 0.6] + out5 = x[idx5] + out6 = x[x < 0.36] + out7 = x[x > 0.6] exe = paddle.static.Executor(place) - result = exe.run(prog, - fetch_list=[out0, out1, out2, out3, out4, out5, out6]) + result = exe.run( + prog, fetch_list=[out0, out1, out2, out3, out4, out5, out6, out7]) expected = [ data[idx0], data[idx1], data[idx2], data[idx3], data[idx4], - data[data < 0.36], data[data > 0.6] + data[np_idx], data[data < 0.36], data[data > 0.6] ] self.assertTrue((result[0] == expected[0]).all()) @@ -280,6 +283,7 @@ def _test_slice_index_list_bool(self, place): self.assertTrue((result[4] == expected[4]).all()) self.assertTrue((result[5] == expected[5]).all()) self.assertTrue((result[6] == expected[6]).all()) + self.assertTrue((result[7] == expected[7]).all()) with self.assertRaises(IndexError): res = x[[True, False, False]] diff --git a/python/paddle/fluid/variable_index.py b/python/paddle/fluid/variable_index.py index 9d91c1af298c22..5bdf0451b54e91 100644 --- a/python/paddle/fluid/variable_index.py +++ b/python/paddle/fluid/variable_index.py @@ -159,7 +159,7 @@ def _getitem_impl_(var, item): if len(item) != 1: raise IndexError( - "When index contains a list, its length must be 1, but received {}". + "When index contains a list, its length must be 1, but received {}.". format(len(item))) new_slice_item = [] if all_bool: @@ -191,12 +191,27 @@ def _getitem_impl_(var, item): elif isinstance(slice_item, Variable): if len(item) != 1: raise IndexError( - "When index contains a Tensor, its length must be 1, but received {}". + "When index contains a Tensor, its length must be 1, but received {}.". format(len(item))) - from ..tensor import index_select, masked_select + from ..tensor import index_select, gather_nd + from .layers.nn import where + if slice_item.dtype == core.VarDesc.VarType.BOOL: - return masked_select(var, slice_item) + if len(slice_item.shape) > len(var.shape): + raise IndexError( + "The dims of bool index doesn't match indexed array, " + "the dims of bool index except to be equal or less " + "than {}, but received {}.".format( + len(var.shape), len(slice_item.shape))) + for i, dim_len in enumerate(slice_item.shape): + if dim_len != var.shape[i]: + raise IndexError( + "The dimension of bool index doesn't match indexed array along "\ + "dimension {}, the target dimension is {}, but received {}.". + format(i, var.shape[i], dim_len)) + bool_2_idx = where(slice_item == True) + return gather_nd(var, bool_2_idx) return index_select(var, index=slice_item, axis=0) else: From d8b4e40c8c81c0e9c73132d86864fd489b67a34c Mon Sep 17 00:00:00 2001 From: zyfncg Date: Tue, 24 Aug 2021 12:41:10 +0000 Subject: [PATCH 4/7] support setitem by bool index --- paddle/fluid/pybind/imperative.cc | 219 +++++++++--------- .../fluid/dygraph/varbase_patch_methods.py | 26 ++- .../tests/unittests/test_set_value_op.py | 55 +++++ python/paddle/fluid/variable_index.py | 66 +++++- 4 files changed, 256 insertions(+), 110 deletions(-) diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 777d68ea2f9ca7..3906e4a107054b 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -490,12 +490,12 @@ static void ParseIndexingSlice( none_axes->push_back(dim); } else if (PyList_Check(slice_item)) { *list_select_flag = true; - if (size != 1) { - PADDLE_THROW(platform::errors::InvalidArgument( - "When index contains a list, its length is excepted to 1, " - "but received %d", - size)); - } + PADDLE_ENFORCE_EQ( + size, 1, + platform::errors::InvalidArgument( + "When index contains a list, its length is excepted to 1, " + "but received %d", + size)); bool all_bool = true; int list_size = PyList_GET_SIZE(slice_item); for (int j = 0; j < list_size; ++j) { @@ -508,12 +508,13 @@ static void ParseIndexingSlice( } } if (all_bool) { - if (list_size != shape[0]) { - PADDLE_THROW(platform::errors::InvalidArgument( - "The dimension of bool index doesn't match indexed array along " - "dimension 0, the target dimension is %d, but received %d.", - shape[0], list_size)); - } + PADDLE_ENFORCE_EQ( + list_size, shape[0], + platform::errors::InvalidArgument( + "The dimension of bool index doesn't match indexed array along " + "dimension 0, the target dimension is %d, but received %d.", + shape[0], list_size)); + for (int j = 0; j < list_size; ++j) { PyObject *list_item = PyList_GetItem(slice_item, j); if (list_item == Py_True) { @@ -808,103 +809,109 @@ void BindImperative(py::module *m_ptr) { .def("__init__", &InitVarBaseFromNumpyWithArgDefault, py::arg("value")) .def("__init__", &InitVarBaseFromTensorWithArgDefault, py::arg("tensor")) .def("__init__", &InitVarBaseFromNumpyWithKwargs) - .def("__setitem__", - [](std::shared_ptr &self, py::handle _index, - py::object &value_obj) { - auto self_tensor = - self->MutableVar()->GetMutable(); - PyObject *index_ptr = !PyTuple_Check(_index.ptr()) - ? PyTuple_Pack(1, _index.ptr()) - : _index.ptr(); - // 1. Check argumnets - // 1.1 Check whether value obj is a tensor. - bool value_is_tensor = true; - bool parse_index = true; - if (py::isinstance(value_obj) || - py::isinstance(value_obj) || - py::isinstance(value_obj)) { - value_is_tensor = false; - } - - // 1.2 Check whether _index can be parsed. - const int size = PyTuple_GET_SIZE(index_ptr); - for (int dim = 0; dim < size; ++dim) { - PyObject *slice_item = PyTuple_GetItem(index_ptr, dim); - if (!(PyCheckInteger(slice_item) || PySlice_Check(slice_item) || - slice_item == Py_Ellipsis || slice_item == Py_None)) { - parse_index = false; - break; - } - } - - // 2. Call op set_value to speed up if the condition is met, - // otherwise call TensorToPyArray. - // TODO(liym27): Try not to call TensorToPyArray because it always - // copys data to cpu place, which reduces performance. - if (parse_index && value_is_tensor) { - std::vector axes, starts, ends, steps, decrease_axes, - none_axes, infer_flags, list_select_idxs; - // if index is a list, list_select_flag will be true - bool list_select_flag; - ParseIndexingSlice(self_tensor, index_ptr, &axes, &starts, &ends, - &steps, &decrease_axes, &none_axes, - &infer_flags, &list_select_idxs, - &list_select_flag); + .def( + "_setitem_index_not_tensor", + [](std::shared_ptr &self, py::handle _index, + py::object &value_obj) { + auto self_tensor = + self->MutableVar()->GetMutable(); + PyObject *index_ptr = !PyTuple_Check(_index.ptr()) + ? PyTuple_Pack(1, _index.ptr()) + : _index.ptr(); + // 1. Check argumnets + // 1.1 Check whether value obj is a tensor. + bool value_is_tensor = true; + bool parse_index = true; + if (py::isinstance(value_obj) || + py::isinstance(value_obj) || + py::isinstance(value_obj)) { + value_is_tensor = false; + } - framework::AttributeMap attrs = { - {"axes", axes}, - {"starts", starts}, - {"ends", ends}, - {"steps", steps}, - {"decrease_axes", decrease_axes}, - {"none_axes", none_axes}}; + // 1.2 Check whether _index can be parsed. + const int size = PyTuple_GET_SIZE(index_ptr); + for (int dim = 0; dim < size; ++dim) { + PyObject *slice_item = PyTuple_GetItem(index_ptr, dim); + if (!(PyCheckInteger(slice_item) || PySlice_Check(slice_item) || + slice_item == Py_Ellipsis || slice_item == Py_None || + PyList_Check(slice_item))) { + parse_index = false; + break; + } + } - imperative::NameVarBaseMap ins = {{"Input", {self}}}; - imperative::NameVarBaseMap outs = {{"Out", {self}}}; + // 2. Call op set_value to speed up if the condition is met, + // otherwise call TensorToPyArray. + // TODO(liym27): Try not to call TensorToPyArray because it always + // copys data to cpu place, which reduces performance. + if (parse_index && value_is_tensor) { + std::vector axes, starts, ends, steps, decrease_axes, + none_axes, infer_flags, list_select_idxs; + // if index is a list, list_select_flag will be true + bool list_select_flag; + ParseIndexingSlice(self_tensor, index_ptr, &axes, &starts, &ends, + &steps, &decrease_axes, &none_axes, + &infer_flags, &list_select_idxs, + &list_select_flag); + + if (!list_select_flag) { + framework::AttributeMap attrs = { + {"axes", axes}, + {"starts", starts}, + {"ends", ends}, + {"steps", steps}, + {"decrease_axes", decrease_axes}, + {"none_axes", none_axes}}; + + imperative::NameVarBaseMap ins = {{"Input", {self}}}; + imperative::NameVarBaseMap outs = {{"Out", {self}}}; + + PADDLE_ENFORCE_EQ( + self->IsLeaf() && !self->OverridedStopGradient(), false, + platform::errors::InvalidArgument( + "Leaf Tensor (%s) that doesn't stop gradient can't use " + "inplace strategy.", + self->Name())); + + auto value_tensor = + value_obj.cast>(); + ins.insert({"ValueTensor", {value_tensor}}); + + const auto &tracer = imperative::GetCurrentTracer(); + { + // Release gil and do tracing + py::gil_scoped_release release; + tracer->TraceOp("set_value", ins, outs, std::move(attrs), + {{"Input", "Out"}}); + } + } else { + } - PADDLE_ENFORCE_EQ( - self->IsLeaf() && !self->OverridedStopGradient(), false, - platform::errors::InvalidArgument( - "Leaf Tensor (%s) that doesn't stop gradient can't use " - "inplace strategy.", - self->Name())); - - auto value_tensor = - value_obj.cast>(); - ins.insert({"ValueTensor", {value_tensor}}); - - const auto &tracer = imperative::GetCurrentTracer(); - { - // Release gil and do tracing - py::gil_scoped_release release; - tracer->TraceOp("set_value", ins, outs, std::move(attrs), - {{"Input", "Out"}}); - } - } else { - auto self_numpy = TensorToPyArray(*self_tensor); - - if (value_is_tensor) { - auto value = - value_obj.cast>(); - auto value_tensor = - value->MutableVar()->GetMutable(); - auto value_numpy = TensorToPyArray(*value_tensor); - - self_numpy[_index] = value_numpy; - SetTensorFromPyArray(self_tensor, self_numpy, - self_tensor->place(), true); - } else { - auto value_numpy = value_obj; - self_numpy[_index] = value_numpy; - SetTensorFromPyArray(self_tensor, self_numpy, - self_tensor->place(), true); - } - } - // NOTE(liym27): - // Increase the version of VarBase self because __setitem__ is an - // inplace operator for the VarBase self. - self->BumpInplaceVersion(); - }) + } else { + auto self_numpy = TensorToPyArray(*self_tensor); + + if (value_is_tensor) { + auto value = + value_obj.cast>(); + auto value_tensor = + value->MutableVar()->GetMutable(); + auto value_numpy = TensorToPyArray(*value_tensor); + + self_numpy[_index] = value_numpy; + SetTensorFromPyArray(self_tensor, self_numpy, + self_tensor->place(), true); + } else { + auto value_numpy = value_obj; + self_numpy[_index] = value_numpy; + SetTensorFromPyArray(self_tensor, self_numpy, + self_tensor->place(), true); + } + } + // NOTE(liym27): + // Increase the version of VarBase self because __setitem__ is an + // inplace operator for the VarBase self. + self->BumpInplaceVersion(); + }) .def("_getitem_index_not_tensor", [](std::shared_ptr &self, py::handle _index) { std::vector slice_axes, slice_starts, slice_ends, diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 2fda67e891abfd..78a207552413b9 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -22,7 +22,7 @@ from .. import framework from .. import core from .. import unique_name -from ..framework import Variable, Parameter, ParamBase, _getitem_impl_ +from ..framework import Variable, Parameter, ParamBase, _getitem_impl_, _setitem_impl_ from .base import switch_to_static_graph from .math_op_patch import monkey_patch_math_varbase from .parallel import scale_loss @@ -568,6 +568,27 @@ def contain_tensor(item): # 2. Call c++ func getitem_index_not_tensor to speedup. return self._getitem_index_not_tensor(item) + def __setitem__(self, item, value): + def contain_tensor_or_list(item): + if not isinstance(item, tuple): + item = [item] + + for slice_item in item: + if isinstance(slice_item, list): + return True + elif isinstance(slice_item, Variable): + return True + + return False + + if contain_tensor_or_list(item): + # To reuse code with static graph, + # Call _setitem_impl_ when item contains tensor or list. + return _setitem_impl_(self, item, value) + + else: + return self._setitem_index_not_tensor(item, value) + for method_name, method in ( ("__bool__", __bool__), ("__nonzero__", __nonzero__), ("_to_static_var", _to_static_var), ("set_value", set_value), @@ -577,7 +598,8 @@ def contain_tensor(item): ("__str__", __str__), ("__repr__", __str__), ("__deepcopy__", __deepcopy__), ("__module__", "paddle"), ("__name__", "Tensor"), ("__array__", __array__), - ("__getitem__", __getitem__), ("item", item)): + ("__getitem__", __getitem__), ("__setitem__", __setitem__), + ("item", item)): setattr(core.VarBase, method_name, method) # NOTE(zhiqiu): pybind11 will set a default __str__ method of enum class. diff --git a/python/paddle/fluid/tests/unittests/test_set_value_op.py b/python/paddle/fluid/tests/unittests/test_set_value_op.py index d26055b3166d6e..ae435607d7f301 100644 --- a/python/paddle/fluid/tests/unittests/test_set_value_op.py +++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py @@ -408,6 +408,61 @@ def _get_answer(self): self.data[None, :, 1, ..., None] = np.zeros(self.shape)[0, 0, :, None] +# 1.5 item is list or Tensor of bol +class TestSetValueItemBool1(TestSetValueApi): + def _call_setitem(self, x): + x[[True, False]] = self.value + + def _get_answer(self): + self.data[[True, False]] = self.value + + +class TestSetValueItemBool2(TestSetValueApi): + def _call_setitem(self, x): + x[[False, False]] = self.value + + def _get_answer(self): + self.data[[False, False]] = self.value + + +class TestSetValueItemBool3(TestSetValueApi): + def _call_setitem(self, x): + x[[False, True]] = np.zeros(self.shape[2]) + + def _get_answer(self): + self.data[[False, True]] = np.zeros(self.shape[2]) + + +class TestSetValueItemBool4(TestSetValueApi): + def _call_setitem(self, x): + idx = paddle.assign(np.array([False, True])) + x[idx] = np.zeros(self.shape[2]) + + def _get_answer(self): + self.data[np.array([False, True])] = np.zeros(self.shape[2]) + + +class TestSetValueItemBool5(TestSetValueApi): + def _call_setitem(self, x): + idx = paddle.assign( + np.array([[False, True, False], [True, True, False]])) + x[idx] = self.value + + def _get_answer(self): + self.data[np.array([[False, True, False], [True, True, False] + ])] = self.value + + +class TestSetValueItemBool6(TestSetValueApi): + def _call_setitem(self, x): + x[0, ...] = 0 + x[x > 0] = self.value + + def _get_answer(self): + self.data[0, ...] = 0 + self.data[self.data > 0] = self.value + + # 2. Test different type of value: int, float, numpy.ndarray, Tensor # 2.1 value is int32, int64, float32, float64, bool diff --git a/python/paddle/fluid/variable_index.py b/python/paddle/fluid/variable_index.py index 5bdf0451b54e91..beeea26fca4f2a 100644 --- a/python/paddle/fluid/variable_index.py +++ b/python/paddle/fluid/variable_index.py @@ -347,10 +347,35 @@ def _setitem_impl_(var, item, value): if end is None: end = MAX_INTEGER if step > 0 else (0 - MAX_INTEGER) + elif isinstance(slice_item, list): + if len(item) != 1: + raise IndexError( + "When index contains a list, its length must be 1, but received {}.". + format(len(item))) + for i in slice_item: + if not isinstance(i, bool): + raise TypeError("Only support bool in index list.") + + from .layers import assign + idx_tensor = assign(slice_item) + return set_value_for_bool_tensor(var, idx_tensor, value) + + elif isinstance(slice_item, Variable): + if len(item) != 1: + raise IndexError( + "When index contains a Tensor, its length must be 1, but received {}.". + format(len(item))) + + if slice_item.dtype == core.VarDesc.VarType.BOOL: + return set_value_for_bool_tensor(var, slice_item, value) + else: + raise IndexError( + "When index is a Tensor, its type must be Bool, but received {}.". + format(slice_item)) else: raise IndexError( - "Valid index accept int, slice, ellipsis or None, but received {}.". - format(slice_item)) + "Valid index accept int, slice, ellipsis, None, list of bool, Variable, " + "but received {}.".format(slice_item)) axes.append(dim) starts.append(start) @@ -427,3 +452,40 @@ def _setitem_impl_(var, item, value): type="set_value", inputs=inputs, outputs={'Out': var}, attrs=attrs) return var + + +# the item is a tensor of bool +def set_value_for_bool_tensor(var, item, value): + + if len(item.shape) > len(var.shape): + raise IndexError("The dims of bool index doesn't match indexed array, " + "the dims of bool index except to be equal or less " + "than {}, but received {}.".format( + len(var.shape), len(item.shape))) + for i, dim_len in enumerate(item.shape): + if dim_len != var.shape[i]: + raise IndexError( + "The dimension of bool index doesn't match indexed array along " + "dimension {}, the target dimension is {}, but received {}.". + format(i, var.shape[i], dim_len)) + + def idx_not_empty(var, item, value): + from .framework import Variable + from .layers import assign + from .layers.nn import where + from ..tensor import gather_nd, scatter_nd_add + + if not isinstance(value, Variable): + value = assign(value).cast(var.dtype) + + idx = where(item) + gather_val = gather_nd(var, idx) + gather_val_new = value - gather_val + out = scatter_nd_add(var, idx, gather_val_new) + var[:] = out + + from .layers.control_flow import cond + # If all the bool index is False, just do nothing + cond(item.any(), lambda: idx_not_empty(var, item, value)) + + return var From 03b9dbf58165ec863aa1a6f155e76033895c76c2 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Wed, 25 Aug 2021 07:24:56 +0000 Subject: [PATCH 5/7] add the unittest for throwing exception --- .../tests/unittests/test_set_value_op.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/test_set_value_op.py b/python/paddle/fluid/tests/unittests/test_set_value_op.py index ae435607d7f301..ba83bbd8591b7c 100644 --- a/python/paddle/fluid/tests/unittests/test_set_value_op.py +++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py @@ -885,6 +885,26 @@ def _ellipsis_error(self): one = paddle.ones([1]) x[::one] = self.value + def _bool_list_error(self): + with self.assertRaises(TypeError): + x = paddle.ones(shape=self.shape, dtype=self.dtype) + x[[True, False, 0]] = 0 + + with self.assertRaises(IndexError): + x = paddle.ones(shape=self.shape, dtype=self.dtype) + x[[True, False], [True, False]] = 0 + + def _bool_tensor_error(self): + with self.assertRaises(IndexError): + x = paddle.ones(shape=self.shape, dtype=self.dtype) + idx = paddle.assign([True, False, True]) + x[idx] = 0 + + with self.assertRaises(IndexError): + x = paddle.ones(shape=self.shape, dtype=self.dtype) + idx = paddle.assign([0, 1]) + x[idx] = 0 + def _broadcast_mismatch(self): program = paddle.static.Program() with paddle.static.program_guard(program): @@ -901,6 +921,8 @@ def test_error(self): self._value_type_error() self._dtype_error() self._step_error() + self._bool_list_error() + self._bool_tensor_error() self._broadcast_mismatch() From 37ed12f50f963cdcc89caa207dea9f5864bb24ad Mon Sep 17 00:00:00 2001 From: zyfncg Date: Fri, 27 Aug 2021 03:17:37 +0000 Subject: [PATCH 6/7] merge conflict --- python/paddle/fluid/variable_index.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/python/paddle/fluid/variable_index.py b/python/paddle/fluid/variable_index.py index c05310022d0cbf..af894cccde0f61 100644 --- a/python/paddle/fluid/variable_index.py +++ b/python/paddle/fluid/variable_index.py @@ -560,21 +560,12 @@ def _setitem_impl_(var, item, value): slice_info.update(slice_item) continue - elif isinstance(slice_item, list): - if len(item) != 1: - raise IndexError( - "When index contains a list, its length must be 1, but received {}.". - format(len(item))) - for i in slice_item: - if not isinstance(i, bool): - raise TypeError("Only support bool in index list.") - - from .layers import assign - idx_tensor = assign(slice_item) - return set_value_for_bool_tensor(var, idx_tensor, value) - elif isinstance(slice_item, Variable): if slice_item.dtype == core.VarDesc.VarType.BOOL: + if len(item) != 1: + raise IndexError( + "When index contains a bool tensor, its length must be 1, but received {}.". + format(len(item))) return set_value_for_bool_tensor(var, slice_item, value) else: slice_info.update(slice_item) From 6666c94196a27a6c35b62c7ea54ff7fec2860b63 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Tue, 31 Aug 2021 04:21:26 +0000 Subject: [PATCH 7/7] add check for int tensor when index is bool --- python/paddle/fluid/variable_index.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/paddle/fluid/variable_index.py b/python/paddle/fluid/variable_index.py index af894cccde0f61..1b9a82ba85f05a 100644 --- a/python/paddle/fluid/variable_index.py +++ b/python/paddle/fluid/variable_index.py @@ -660,6 +660,13 @@ def _setitem_impl_(var, item, value): # the item is a tensor of bool def set_value_for_bool_tensor(var, item, value): + # TODO(zyfncg): Now scatter_nd_add only support float32 and float64 tensor, + # so in the current version we also only support float32 and float64 tensor, + # this problem will be fixed in the future. + if var.dtype != core.VarDesc.VarType.FP32 and var.dtype != core.VarDesc.VarType.FP64: + raise TypeError("Only support float and double tensor for bool index, " + "but received {}.".format(var.dtype)) + if len(item.shape) > len(var.shape): raise IndexError("The dims of bool index doesn't match indexed array, " "the dims of bool index except to be equal or less "