PaddlePaddle · jeff41404 · Dec 25, 2023 · Nov 15, 2023 · Nov 15, 2023 · Nov 16, 2023
diff --git a/paddle/fluid/operators/set_value_op.cc b/paddle/fluid/operators/set_value_op.cc
@@ -151,31 +151,26 @@ class SetValueGradMaker : public framework::SingleGradOpMaker<T> {
 
  protected:
   void Apply(GradOpPtr<T> op) const override {
-    if (this->HasInput("ValueTensor")) {
-      op->SetType("set_value_grad");
+    op->SetType("set_value_grad");
+    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
+    if (this->HasInput("StartsTensorList")) {
+      op->SetInput("StartsTensorList", this->Input("StartsTensorList"));
+    }
+    if (this->HasInput("EndsTensorList")) {
+      op->SetInput("EndsTensorList", this->Input("EndsTensorList"));
+    }
+    if (this->HasInput("StepsTensorList")) {
+      op->SetInput("StepsTensorList", this->Input("StepsTensorList"));
+    }
 
-      op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
-      op->SetInput("ValueTensor", this->Input("ValueTensor"));
-      if (this->HasInput("StartsTensorList")) {
-        op->SetInput("StartsTensorList", this->Input("StartsTensorList"));
-      }
-      if (this->HasInput("EndsTensorList")) {
-        op->SetInput("EndsTensorList", this->Input("EndsTensorList"));
-      }
-      if (this->HasInput("StepsTensorList")) {
-        op->SetInput("StepsTensorList", this->Input("StepsTensorList"));
-      }
+    op->SetAttrMap(this->Attrs());
 
-      op->SetAttrMap(this->Attrs());
+    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
 
+    if (this->HasInput("ValueTensor")) {
+      op->SetInput("ValueTensor", this->Input("ValueTensor"));
       op->SetOutput(framework::GradVarName("ValueTensor"),
                     this->InputGrad("ValueTensor"));
-      op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
-
-    } else {
-      op->SetType("assign");
-      op->SetInput("X", this->OutputGrad("Out"));
-      op->SetOutput("Out", this->InputGrad("Input"));
     }
   }
 };

diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -612,14 +612,14 @@
 
 - backward_op : set_value_grad
   forward : set_value (Tensor x, IntArray starts, IntArray ends, IntArray steps, int64_t[] axes, int64_t[] decrease_axes, int64_t[] none_axes, int64_t[] shape, Scalar[] values) -> Tensor(out)
-  args : (Tensor out_grad)
+  args :  (Tensor out_grad, IntArray starts, IntArray ends, IntArray steps, int64_t[] axes, int64_t[] decrease_axes, int64_t[] none_axes)
   output : Tensor(x_grad)
   infer_meta:
     func: UnchangedInferMeta
     param: [out_grad]
   kernel:
-    func: assign
-    param: [out_grad]
+    func: set_value_with_scalar_grad
+    param: [out_grad, starts, ends, steps, axes, decrease_axes, none_axes]
 
 - backward_op : set_value_with_tensor_grad
   forward: set_value_with_tensor (Tensor x, Tensor values, IntArray starts, IntArray ends, IntArray steps, int64_t[] axes, int64_t[] decrease_axes, int64_t[] none_axes) -> Tensor(out)

diff --git a/paddle/phi/kernels/cpu/set_value_grad_kernel.cc b/paddle/phi/kernels/cpu/set_value_grad_kernel.cc
@@ -35,3 +35,20 @@ PD_REGISTER_KERNEL(set_value_grad,
                    phi::dtype::float16,
                    phi::dtype::complex<float>,
                    phi::dtype::complex<double>) {}
+
+PD_REGISTER_KERNEL(set_value_with_scalar_grad,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::SetValueWithScalarGradKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   bool,
+                   int16_t,
+                   uint8_t,
+                   int8_t,
+                   phi::dtype::bfloat16,
+                   phi::dtype::float16,
+                   phi::dtype::complex<float>,
+                   phi::dtype::complex<double>) {}
diff --git a/paddle/phi/kernels/gpu/set_value_grad_kernel.cu b/paddle/phi/kernels/gpu/set_value_grad_kernel.cu
@@ -35,3 +35,20 @@ PD_REGISTER_KERNEL(set_value_grad,
                    phi::dtype::bfloat16,
                    phi::dtype::complex<float>,
                    phi::dtype::complex<double>) {}
+
+PD_REGISTER_KERNEL(set_value_with_scalar_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::SetValueWithScalarGradKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   bool,
+                   int16_t,
+                   uint8_t,
+                   int8_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16,
+                   phi::dtype::complex<float>,
+                   phi::dtype::complex<double>) {}
diff --git a/paddle/phi/kernels/impl/set_value_grad_kernel_impl.h b/paddle/phi/kernels/impl/set_value_grad_kernel_impl.h
@@ -341,4 +341,97 @@ void SetValueGradKernel(const Context& dev_ctx,
   }
 }
 
+template <typename T, typename Context>
+void SetValueWithScalarGradKernel(const Context& dev_ctx,
+                                  const DenseTensor& out_grad,
+                                  const IntArray& starts,
+                                  const IntArray& ends,
+                                  const IntArray& steps,
+                                  const std::vector<int64_t>& axes,
+                                  const std::vector<int64_t>& decrease_axes,
+                                  const std::vector<int64_t>& none_axes,
+                                  DenseTensor* x_grad) {
+  const int rank = out_grad.dims().size();
+
+  switch (rank) {
+    case 1:
+      SetValueGradImpl<T, Context, 1>(dev_ctx,
+                                      out_grad,
+                                      starts,
+                                      ends,
+                                      steps,
+                                      axes,
+                                      decrease_axes,
+                                      none_axes,
+                                      x_grad,
+                                      nullptr);
+      break;
+    case 2:
+      SetValueGradImpl<T, Context, 2>(dev_ctx,
+                                      out_grad,
+                                      starts,
+                                      ends,
+                                      steps,
+                                      axes,
+                                      decrease_axes,
+                                      none_axes,
+                                      x_grad,
+                                      nullptr);
+      break;
+    case 3:
+      SetValueGradImpl<T, Context, 3>(dev_ctx,
+                                      out_grad,
+                                      starts,
+                                      ends,
+                                      steps,
+                                      axes,
+                                      decrease_axes,
+                                      none_axes,
+                                      x_grad,
+                                      nullptr);
+      break;
+    case 4:
+      SetValueGradImpl<T, Context, 4>(dev_ctx,
+                                      out_grad,
+                                      starts,
+                                      ends,
+                                      steps,
+                                      axes,
+                                      decrease_axes,
+                                      none_axes,
+                                      x_grad,
+                                      nullptr);
+      break;
+    case 5:
+      SetValueGradImpl<T, Context, 5>(dev_ctx,
+                                      out_grad,
+                                      starts,
+                                      ends,
+                                      steps,
+                                      axes,
+                                      decrease_axes,
+                                      none_axes,
+                                      x_grad,
+                                      nullptr);
+      break;
+    case 6:
+      SetValueGradImpl<T, Context, 6>(dev_ctx,
+                                      out_grad,
+                                      starts,
+                                      ends,
+                                      steps,
+                                      axes,
+                                      decrease_axes,
+                                      none_axes,
+                                      x_grad,
+                                      nullptr);
+      break;
+    default:
+      PADDLE_THROW(phi::errors::InvalidArgument(
+          "The rank of set_value_with_scalar_grad's input should be less than "
+          "7, but "
+          "received %d.",
+          rank));
+  }
+}
 }  // namespace phi
diff --git a/paddle/phi/kernels/set_value_grad_kernel.h b/paddle/phi/kernels/set_value_grad_kernel.h
@@ -32,4 +32,14 @@ void SetValueGradKernel(const Context& dev_ctx,
                         DenseTensor* x_grad,
                         DenseTensor* value_grad);
 
+template <typename T, typename Context>
+void SetValueWithScalarGradKernel(const Context& dev_ctx,
+                                  const DenseTensor& out_grad,
+                                  const IntArray& starts,
+                                  const IntArray& ends,
+                                  const IntArray& steps,
+                                  const std::vector<int64_t>& axes,
+                                  const std::vector<int64_t>& decrease_axes,
+                                  const std::vector<int64_t>& none_axes,
+                                  DenseTensor* x_grad);
 }  // namespace phi
diff --git a/paddle/phi/kernels/xpu/set_value_grad_kernel.cc b/paddle/phi/kernels/xpu/set_value_grad_kernel.cc
@@ -397,6 +397,100 @@ void SetValueGradKernel(const Context& dev_ctx,
   }
 }
 
+template <typename T, typename Context>
+void SetValueWithScalarGradKernel(const Context& dev_ctx,
+                                  const DenseTensor& out_grad,
+                                  const IntArray& starts,
+                                  const IntArray& ends,
+                                  const IntArray& steps,
+                                  const std::vector<int64_t>& axes,
+                                  const std::vector<int64_t>& decrease_axes,
+                                  const std::vector<int64_t>& none_axes,
+                                  DenseTensor* x_grad) {
+  const int rank = out_grad.dims().size();
+
+  switch (rank) {
+    case 1:
+      SetValueGradImpl<T, Context, 1>(dev_ctx,
+                                      out_grad,
+                                      starts,
+                                      ends,
+                                      steps,
+                                      axes,
+                                      decrease_axes,
+                                      none_axes,
+                                      x_grad,
+                                      nullptr);
+      break;
+    case 2:
+      SetValueGradImpl<T, Context, 2>(dev_ctx,
+                                      out_grad,
+                                      starts,
+                                      ends,
+                                      steps,
+                                      axes,
+                                      decrease_axes,
+                                      none_axes,
+                                      x_grad,
+                                      nullptr);
+      break;
+    case 3:
+      SetValueGradImpl<T, Context, 3>(dev_ctx,
+                                      out_grad,
+                                      starts,
+                                      ends,
+                                      steps,
+                                      axes,
+                                      decrease_axes,
+                                      none_axes,
+                                      x_grad,
+                                      nullptr);
+      break;
+    case 4:
+      SetValueGradImpl<T, Context, 4>(dev_ctx,
+                                      out_grad,
+                                      starts,
+                                      ends,
+                                      steps,
+                                      axes,
+                                      decrease_axes,
+                                      none_axes,
+                                      x_grad,
+                                      nullptr);
+      break;
+    case 5:
+      SetValueGradImpl<T, Context, 5>(dev_ctx,
+                                      out_grad,
+                                      starts,
+                                      ends,
+                                      steps,
+                                      axes,
+                                      decrease_axes,
+                                      none_axes,
+                                      x_grad,
+                                      nullptr);
+      break;
+    case 6:
+      SetValueGradImpl<T, Context, 6>(dev_ctx,
+                                      out_grad,
+                                      starts,
+                                      ends,
+                                      steps,
+                                      axes,
+                                      decrease_axes,
+                                      none_axes,
+                                      x_grad,
+                                      nullptr);
+      break;
+    default:
+      PADDLE_THROW(phi::errors::InvalidArgument(
+          "The rank of set_value_with_scalar_grad's input should be less than "
+          "7, but "
+          "received %d.",
+          rank));
+  }
+}
+
 }  // namespace phi
 
 PD_REGISTER_KERNEL(set_value_grad,
@@ -407,3 +501,12 @@ PD_REGISTER_KERNEL(set_value_grad,
                    phi::dtype::float16,
                    int,
                    int64_t) {}
+
+PD_REGISTER_KERNEL(set_value_with_scalar_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::SetValueWithScalarGradKernel,
+                   float,
+                   phi::dtype::float16,
+                   int,
+                   int64_t) {}
diff --git a/test/legacy_test/test_zero_dim_tensor.py b/test/legacy_test/test_zero_dim_tensor.py
@@ -830,6 +830,7 @@ def test_setitem(self):
         np.testing.assert_allclose(out[1, 2, 3, 4], np.array(10))
         self.assertEqual(x.grad.shape, [2, 3, 4, 5])
         x_grad_expected = np.ones((2, 3, 4, 5)) * 2
+        x_grad_expected[1, 2, 3, 4] = 0
         np.testing.assert_allclose(x.grad, x_grad_expected)
 
         # case2: 0-D Tensor indice in some axis
@@ -847,6 +848,7 @@ def test_setitem(self):
         self.assertEqual(out.shape, x.shape)
         np.testing.assert_allclose(out[1, 1], np.ones((4, 5)) * 0.5)
         x_grad_expected = np.ones((2, 3, 4, 5))
+        x_grad_expected[1, 1] = 0
         np.testing.assert_allclose(x.grad, x_grad_expected)
 
         # case3：0-D Tensor indice in some axis, value is a Tensor

diff --git a/test/xpu/test_zero_dim_tensor_xpu.py b/test/xpu/test_zero_dim_tensor_xpu.py
@@ -441,6 +441,7 @@ def test_setitem(self):
         np.testing.assert_allclose(out[1, 2, 3, 4], np.array(10))
         self.assertEqual(x.grad.shape, [2, 3, 4, 5])
         x_grad_expected = np.ones((2, 3, 4, 5)) * 2
+        x_grad_expected[1, 2, 3, 4] = 0
         np.testing.assert_allclose(x.grad, x_grad_expected)
 
         # case2: 0-D Tensor indice in some axis