[Prim] Add index_put_grad for static decomposition (#73747)

HydrogenSulfate · web-flow · commit 24392e6ecbec · 2025-07-03T19:31:13.000+08:00
* support index_put_grad in static prim

* fix

* fix typo

* disable cinn in UT
diff --git a/paddle/fluid/pir/dialect/op_generator/decomp_interface_gen_op_list.py b/paddle/fluid/pir/dialect/op_generator/decomp_interface_gen_op_list.py
@@ -138,6 +138,7 @@
     'unsqueeze_grad',
     'p_norm_grad',
     'masked_fill_grad',
+    'index_put_grad',
     'index_add_grad',
 ]
 
diff --git a/paddle/fluid/primitive/codegen/decomp_vjp_gen.py b/paddle/fluid/primitive/codegen/decomp_vjp_gen.py
@@ -162,6 +162,7 @@
     'swiglu_grad',
     'p_norm_grad',
     'masked_fill_grad',
+    'index_put_grad',
     'index_add_grad',
 ]  # custom vjp list of composite op
 
diff --git a/paddle/fluid/primitive/decomp_rule/decomp_vjp/details.h b/paddle/fluid/primitive/decomp_rule/decomp_vjp/details.h
@@ -440,6 +440,67 @@ void roll_grad(const Tensor& x,
   }
 }
 
+template <typename T>
+void index_put_grad(const Tensor& x,
+                    const std::vector<Tensor>& indices,
+                    const Tensor& value,
+                    const Tensor& out_grad,
+                    const bool accumulate,
+                    Tensor* x_grad,
+                    Tensor* value_grad) {
+  if (x_grad) {
+    if (accumulate) {
+      by_pass<T>(out_grad, x_grad);
+    } else {
+      Tensor x_grad_tmp;
+      if (has_dynamic_shape(x.shape()) ||
+          std::any_of(
+              indices.cbegin(),
+              indices.cend(),
+              [](const Tensor& t) { return has_dynamic_shape(t.shape()); }) ||
+          has_dynamic_shape(out_grad.shape())) {
+        x_grad_tmp = index_put<T>(
+            out_grad,
+            indices,
+            backend::full_with_tensor<T>(
+                shape64<T>(value), 0, out_grad.dtype(), out_grad.place()));
+      } else {
+        x_grad_tmp = index_put<T>(out_grad,
+                                  indices,
+                                  full<T>(common::vectorize(value.dims()),
+                                          0,
+                                          out_grad.dtype(),
+                                          out_grad.place()));
+      }
+      set_output<T>(x_grad_tmp, x_grad);
+    }
+  }
+
+  if (value_grad) {
+    std::vector<Tensor> indices_vec;
+
+    if (has_dynamic_shape(x.shape()) ||
+        std::any_of(
+            indices.cbegin(),
+            indices.cend(),
+            [](const Tensor& t) { return has_dynamic_shape(t.shape()); }) ||
+        has_dynamic_shape(out_grad.shape())) {
+      for (int i = 0; i < indices.size(); ++i) {
+        indices_vec.push_back(backend::unsqueeze<T>(
+            indices[i], full<T>({1}, -1, DataType::INT64, indices[i].place())));
+      }
+    } else {
+      for (int i = 0; i < indices.size(); ++i) {
+        indices_vec.push_back(unsqueeze<T>(indices[i], {-1}));
+      }
+    }
+
+    Tensor stacked_indices = concat<T>(indices_vec, -1);
+    Tensor value_grad_tmp = gather_nd<T>(out_grad, stacked_indices);
+    set_output<T>(value_grad_tmp, value_grad);
+  }
+}
+
 template <typename T>
 void transpose_grad(const Tensor& grad_out,
                     const std::vector<int>& perm,
diff --git a/paddle/fluid/primitive/primitive/primitive.yaml b/paddle/fluid/primitive/primitive/primitive.yaml
@@ -72,6 +72,7 @@
 - gather_nd
 - scatter
 - scatter_nd
+- index_put
 - scatter_nd_add
 - put_along_axis
 - take_along_axis
diff --git a/paddle/phi/ops/yaml/backward.yaml b/paddle/phi/ops/yaml/backward.yaml
@@ -1732,6 +1732,7 @@
   data_transform :
     skip_transform : indices
   backward : index_put_double_grad
+  no_need_buffer: x, value
 
 - backward_op : index_sample_grad
   forward : index_sample (Tensor x, Tensor index) -> Tensor(out)
diff --git a/python/paddle/autograd/backward_utils.py b/python/paddle/autograd/backward_utils.py
@@ -107,6 +107,7 @@
     "pd_op.unsqueeze",
     "pd_op.where",
     "pd_op.p_norm",
+    "pd_op.index_put",
     "pd_op.index_add",
     "pd_op.elu",
     "pd_op.masked_fill",
diff --git a/test/legacy_test/test_index_put_op.py b/test/legacy_test/test_index_put_op.py
@@ -1086,5 +1086,146 @@ def test_dygraph_forward(self):
             )
 
 
+class TestIndexPutPrim(unittest.TestCase):
+    def __int__(self):
+        self().__init__()
+
+    def test_prim(self):
+        try:
+            paddle.framework.core._set_prim_all_enabled(True)
+            for accumulate in [False, True]:
+                for x_shape, indices_shape, value_shape in [
+                    ([16], [10], [10]),
+                    ([16, 16], [20, 2], [20]),
+                    ([12, 13, 14], [88, 1], [88, 13, 14]),
+                    ([12, 13, 14], [88, 2], [88, 14]),
+                    ([12, 13, 14], [88, 3], [88]),
+                    ([12, 13, 14], [12 * 13 * 14, 3], [12 * 13 * 14]),
+                ]:
+                    n_indices = indices_shape[0]
+                    index_dim_size = (
+                        indices_shape[1] if len(indices_shape) > 1 else 1
+                    )
+
+                    x_np = np.random.randn(*x_shape)
+                    indices_np = tuple(
+                        [
+                            np.random.randint(
+                                -x_shape[i], x_shape[i], [n_indices]
+                            )
+                            for i in range(max(index_dim_size, 1))
+                        ]
+                    )
+                    value_np = np.random.randn(*value_shape).astype("float32")
+
+                    # run paddle
+                    x_pd = paddle.to_tensor(
+                        x_np.copy(),
+                        "float32",
+                        stop_gradient=False,
+                    )
+                    indices_pd = tuple(
+                        [
+                            paddle.to_tensor(
+                                indice.copy(),
+                                "int64",
+                                stop_gradient=True,
+                            )
+                            for indice in indices_np
+                        ]
+                    )
+                    value_pd = paddle.to_tensor(
+                        value_np.copy(),
+                        "float32",
+                        stop_gradient=False,
+                    )
+
+                    out_pd = paddle.index_put(
+                        x_pd, indices_pd, value_pd, accumulate=accumulate
+                    )
+                    # out_pd = paddle.tanh(out_pd) #
+                    dout_np = np.random.randn(*out_pd.shape)
+
+                    dout_pd = paddle.to_tensor(
+                        dout_np.copy(),
+                        "float32",
+                        stop_gradient=False,
+                    )
+                    dout_pd.stop_gradient = False
+
+                    if accumulate:
+
+                        def compute_dx_dv(x, indices, v, dy, accumulate=True):
+                            y = paddle.index_put(x, indices, v, True)
+                            return paddle.grad(y, [x, v], dy, create_graph=True)
+
+                    else:
+
+                        def compute_dx_dv(x, indices, v, dy, accumulate=False):
+                            y = paddle.index_put(x, indices, v, False)
+                            return paddle.grad(y, [x, v], dy, create_graph=True)
+
+                    # eager
+                    dx_ref, dv_ref = compute_dx_dv(
+                        x_pd, indices_pd, value_pd, dout_pd
+                    )
+
+                    # static dynamic shape
+                    st_func1 = paddle.jit.to_static(
+                        compute_dx_dv,
+                        input_spec=[
+                            paddle.static.InputSpec(
+                                shape=[-1, -1], dtype='float32'
+                            ),
+                            tuple(
+                                paddle.static.InputSpec(
+                                    shape=[-1], dtype='int64'
+                                )
+                                for _ in range(len(indices_pd))
+                            ),
+                            paddle.static.InputSpec(
+                                shape=[-1, -1], dtype='float32'
+                            ),
+                            paddle.static.InputSpec(
+                                shape=[-1, -1], dtype='float32'
+                            ),
+                        ],
+                        full_graph=True,
+                        backend=None,
+                    )
+                    dx_1, dv_1 = st_func1(x_pd, indices_pd, value_pd, dout_pd)
+
+                    # static fixed shape
+                    st_func2 = paddle.jit.to_static(
+                        compute_dx_dv,
+                        full_graph=True,
+                        backend=None,
+                    )
+                    dx_2, dv_2 = st_func2(x_pd, indices_pd, value_pd, dout_pd)
+
+                    np.testing.assert_allclose(
+                        dx_1.numpy(),
+                        dx_ref.numpy(),
+                        err_msg=f"accumulate={accumulate}\nx_np:\n{x_np}\nindices_np:\n{indices_np}\nvalue_np:\n{value_np}\nout_np:{out_pd.numpy()}\n",
+                    )
+                    np.testing.assert_allclose(
+                        dv_1.numpy(),
+                        dv_ref.numpy(),
+                        err_msg=f"accumulate={accumulate}\nx_np:\n{x_np}\nindices_np:\n{indices_np}\nvalue_np:\n{value_np}\nout_np:{out_pd.numpy()}\n",
+                    )
+                    np.testing.assert_allclose(
+                        dx_2.numpy(),
+                        dx_ref.numpy(),
+                        err_msg=f"accumulate={accumulate}\nx_np:\n{x_np}\nindices_np:\n{indices_np}\nvalue_np:\n{value_np}\nout_np:{out_pd.numpy()}\n",
+                    )
+                    np.testing.assert_allclose(
+                        dv_2.numpy(),
+                        dv_ref.numpy(),
+                        err_msg=f"accumulate={accumulate}\nx_np:\n{x_np}\nindices_np:\n{indices_np}\nvalue_np:\n{value_np}\nout_np:{out_pd.numpy()}\n",
+                    )
+        finally:
+            paddle.framework.core._set_prim_all_enabled(False)
+
+
 if __name__ == '__main__':
     unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -138,6 +138,7 @@`
`138`	`138`	`'unsqueeze_grad',`
`139`	`139`	`'p_norm_grad',`
`140`	`140`	`'masked_fill_grad',`
	`141`	`+ 'index_put_grad',`
`141`	`142`	`'index_add_grad',`
`142`	`143`	`]`
`143`	`144`