PaddlePaddle · QingshuChen · Dec 29, 2022 · Dec 26, 2022
diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake
@@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
 if(NOT DEFINED XPU_BASE_URL)
   set(XPU_BASE_URL_WITHOUT_DATE
       "https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
-  set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221215")
+  set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221227")
 else()
   set(XPU_BASE_URL "${XPU_BASE_URL}")
 endif()

diff --git a/paddle/phi/kernels/xpu/compare_kernel.cc b/paddle/phi/kernels/xpu/compare_kernel.cc
@@ -52,22 +52,30 @@ void XPUCompareKernelImpl(const Context& dev_ctx,
   PADDLE_ENFORCE_XDNN_SUCCESS(ret, "compare op");
 }
 
-#define DEFINE_XPU_COMPARE_KERNEL(name, functor)                            \
-  template <typename T, typename Context>                                   \
-  void name##RawKernel(const Context& dev_ctx,                              \
-                       const DenseTensor& x,                                \
-                       const DenseTensor& y,                                \
-                       int axis,                                            \
-                       DenseTensor* out) {                                  \
-    using XPUType = typename XPUTypeTrait<T>::Type;                         \
-    XPUCompareKernelImpl<T, XPUType, Context>(dev_ctx, x, y, out, functor); \
-  }                                                                         \
-  template <typename T, typename Context>                                   \
-  void name##Kernel(const Context& dev_ctx,                                 \
-                    const DenseTensor& x,                                   \
-                    const DenseTensor& y,                                   \
-                    DenseTensor* out) {                                     \
-    name##RawKernel<T, Context>(dev_ctx, x, y, -1, out);                    \
+#define DEFINE_XPU_COMPARE_KERNEL(name, functor)                      \
+  template <typename T, typename Context>                             \
+  void name##RawKernel(const Context& dev_ctx,                        \
+                       const DenseTensor& x,                          \
+                       const DenseTensor& y,                          \
+                       int axis,                                      \
+                       DenseTensor* out) {                            \
+    using XPUType = typename XPUTypeTrait<T>::Type;                   \
+    auto f = [](xpu::Context* ctx,                                    \
+                const XPUType* x,                                     \
+                const XPUType* y,                                     \
+                bool* z,                                              \
+                const std::vector<int>& xshape,                       \
+                const std::vector<int>& yshape) {                     \
+      return functor(ctx, x, y, z, xshape, yshape);                   \
+    };                                                                \
+    XPUCompareKernelImpl<T, XPUType, Context>(dev_ctx, x, y, out, f); \
+  }                                                                   \
+  template <typename T, typename Context>                             \
+  void name##Kernel(const Context& dev_ctx,                           \
+                    const DenseTensor& x,                             \
+                    const DenseTensor& y,                             \
+                    DenseTensor* out) {                               \
+    name##RawKernel<T, Context>(dev_ctx, x, y, -1, out);              \
   }
 
 DEFINE_XPU_COMPARE_KERNEL(Equal, xpu::broadcast_equal<XPUType>)

diff --git a/paddle/phi/kernels/xpu/elementwise_add_kernel.cc b/paddle/phi/kernels/xpu/elementwise_add_kernel.cc
@@ -54,8 +54,17 @@ void AddRawKernel(const Context& dev_ctx,
                   int axis,
                   DenseTensor* out) {
   using XPUType = typename XPUTypeTrait<T>::Type;
-  XPUElementwise<T, XPUType>(
-      dev_ctx, x, y, axis, out, xpu::broadcast_add<XPUType>);
+
+  auto f = [](xpu::Context* ctx,
+              const XPUType* x,
+              const XPUType* y,
+              XPUType* z,
+              const std::vector<int>& xshape,
+              const std::vector<int>& yshape) {
+    return xpu::broadcast_add<XPUType>(ctx, x, y, z, xshape, yshape);
+  };
+
+  XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
 }
 
 }  // namespace phi

diff --git a/paddle/phi/kernels/xpu/elementwise_divide_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_divide_grad_kernel.cc
@@ -35,15 +35,21 @@ void DivideGradKernel(const Context& dev_ctx,
                       DenseTensor* dy) {
   using XPUType = typename XPUTypeTrait<T>::Type;
   funcs::ElementwiseGradPreProcess(dout, dx);
-  XPUElementwiseGrad<T, XPUType>(dev_ctx,
-                                 x,
-                                 y,
-                                 dout,
-                                 axis,
-                                 dx,
-                                 dy,
-                                 xpu::broadcast_div_grad<XPUType>,
-                                 true);
+
+  auto f = [](xpu::Context* ctx,
+              const XPUType* x,
+              const XPUType* y,
+              const XPUType* z,
+              const XPUType* dz,
+              XPUType* dy,
+              XPUType* dx,
+              const std::vector<int>& xshape,
+              const std::vector<int>& yshape) {
+    return xpu::broadcast_div_grad<XPUType>(
+        ctx, x, y, z, dz, dy, dx, xshape, yshape);
+  };
+
+  XPUElementwiseGrad<T, XPUType>(dev_ctx, x, y, dout, axis, dx, dy, f, true);
 }
 
 }  // namespace phi

diff --git a/paddle/phi/kernels/xpu/elementwise_divide_kernel.cc b/paddle/phi/kernels/xpu/elementwise_divide_kernel.cc
@@ -31,8 +31,16 @@ void DivideRawKernel(const Context& dev_ctx,
                      int axis,
                      DenseTensor* out) {
   using XPUType = typename XPUTypeTrait<T>::Type;
-  XPUElementwise<T, XPUType>(
-      dev_ctx, x, y, axis, out, xpu::broadcast_div<XPUType>);
+  auto f = [](xpu::Context* ctx,
+              const XPUType* x,
+              const XPUType* y,
+              XPUType* z,
+              const std::vector<int>& xshape,
+              const std::vector<int>& yshape) {
+    return xpu::broadcast_div<XPUType>(ctx, x, y, z, xshape, yshape);
+  };
+
+  XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
 }
 
 }  // namespace phi

diff --git a/paddle/phi/kernels/xpu/elementwise_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_grad_kernel.cc
@@ -29,15 +29,21 @@ void MaximumGradKernel(const Context& dev_ctx,
                        DenseTensor* dx,
                        DenseTensor* dy) {
   using XPUType = typename XPUTypeTrait<T>::Type;
-  XPUElementwiseGrad<T, XPUType>(dev_ctx,
-                                 x,
-                                 y,
-                                 dout,
-                                 axis,
-                                 dx,
-                                 dy,
-                                 xpu::broadcast_max_grad<XPUType>,
-                                 true);
+
+  auto f = [](xpu::Context* ctx,
+              const XPUType* x,
+              const XPUType* y,
+              const XPUType* z,
+              const XPUType* dz,
+              XPUType* dy,
+              XPUType* dx,
+              const std::vector<int>& xshape,
+              const std::vector<int>& yshape) {
+    return xpu::broadcast_max_grad<XPUType>(
+        ctx, x, y, z, dz, dy, dx, xshape, yshape);
+  };
+
+  XPUElementwiseGrad<T, XPUType>(dev_ctx, x, y, dout, axis, dx, dy, f, true);
 }
 
 template <typename T, typename Context>
@@ -49,15 +55,21 @@ void MinimumGradKernel(const Context& dev_ctx,
                        DenseTensor* dx,
                        DenseTensor* dy) {
   using XPUType = typename XPUTypeTrait<T>::Type;
-  XPUElementwiseGrad<T, XPUType>(dev_ctx,
-                                 x,
-                                 y,
-                                 dout,
-                                 axis,
-                                 dx,
-                                 dy,
-                                 xpu::broadcast_min_grad<XPUType>,
-                                 true);
+
+  auto f = [](xpu::Context* ctx,
+              const XPUType* x,
+              const XPUType* y,
+              const XPUType* z,
+              const XPUType* dz,
+              XPUType* dy,
+              XPUType* dx,
+              const std::vector<int>& xshape,
+              const std::vector<int>& yshape) {
+    return xpu::broadcast_min_grad<XPUType>(
+        ctx, x, y, z, dz, dy, dx, xshape, yshape);
+  };
+
+  XPUElementwiseGrad<T, XPUType>(dev_ctx, x, y, dout, axis, dx, dy, f, true);
 }
 
 }  // namespace phi

diff --git a/paddle/phi/kernels/xpu/elementwise_kernel.cc b/paddle/phi/kernels/xpu/elementwise_kernel.cc
@@ -27,8 +27,16 @@ void FloorDivideRawKernel(const Context& dev_ctx,
                           int axis,
                           DenseTensor* out) {
   using XPUType = typename XPUTypeTrait<T>::Type;
-  XPUElementwise<T, XPUType>(
-      dev_ctx, x, y, axis, out, xpu::broadcast_floordiv<XPUType>);
+  auto f = [](xpu::Context* ctx,
+              const XPUType* x,
+              const XPUType* y,
+              XPUType* z,
+              const std::vector<int>& xshape,
+              const std::vector<int>& yshape) {
+    return xpu::broadcast_floordiv<XPUType>(ctx, x, y, z, xshape, yshape);
+  };
+
+  XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
 }
 
 template <typename T, typename Context>
@@ -38,8 +46,16 @@ void MaximumRawKernel(const Context& dev_ctx,
                       int axis,
                       DenseTensor* out) {
   using XPUType = typename XPUTypeTrait<T>::Type;
-  XPUElementwise<T, XPUType>(
-      dev_ctx, x, y, axis, out, xpu::broadcast_max<XPUType>);
+  auto f = [](xpu::Context* ctx,
+              const XPUType* x,
+              const XPUType* y,
+              XPUType* z,
+              const std::vector<int>& xshape,
+              const std::vector<int>& yshape) {
+    return xpu::broadcast_max<XPUType>(ctx, x, y, z, xshape, yshape);
+  };
+
+  XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
 }
 
 template <typename T, typename Context>
@@ -49,8 +65,16 @@ void MinimumRawKernel(const Context& dev_ctx,
                       int axis,
                       DenseTensor* out) {
   using XPUType = typename XPUTypeTrait<T>::Type;
-  XPUElementwise<T, XPUType>(
-      dev_ctx, x, y, axis, out, xpu::broadcast_min<XPUType>);
+  auto f = [](xpu::Context* ctx,
+              const XPUType* x,
+              const XPUType* y,
+              XPUType* z,
+              const std::vector<int>& xshape,
+              const std::vector<int>& yshape) {
+    return xpu::broadcast_min<XPUType>(ctx, x, y, z, xshape, yshape);
+  };
+
+  XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
 }
 
 template <typename T, typename Context>
@@ -60,8 +84,16 @@ void RemainderRawKernel(const Context& dev_ctx,
                         int axis,
                         DenseTensor* out) {
   using XPUType = typename XPUTypeTrait<T>::Type;
-  XPUElementwise<T, XPUType>(
-      dev_ctx, x, y, axis, out, xpu::broadcast_mod<XPUType>);
+  auto f = [](xpu::Context* ctx,
+              const XPUType* x,
+              const XPUType* y,
+              XPUType* z,
+              const std::vector<int>& xshape,
+              const std::vector<int>& yshape) {
+    return xpu::broadcast_mod<XPUType>(ctx, x, y, z, xshape, yshape);
+  };
+
+  XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
 }
 
 template <typename T, typename Context>
@@ -71,8 +103,16 @@ void ElementwisePowRawKernel(const Context& dev_ctx,
                              int axis,
                              DenseTensor* out) {
   using XPUType = typename XPUTypeTrait<T>::Type;
-  XPUElementwise<T, XPUType>(
-      dev_ctx, x, y, axis, out, xpu::broadcast_pow<XPUType>);
+  auto f = [](xpu::Context* ctx,
+              const XPUType* x,
+              const XPUType* y,
+              XPUType* z,
+              const std::vector<int>& xshape,
+              const std::vector<int>& yshape) {
+    return xpu::broadcast_pow<XPUType>(ctx, x, y, z, xshape, yshape);
+  };
+
+  XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
 }
 
 }  // namespace phi

diff --git a/paddle/phi/kernels/xpu/elementwise_multiply_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_multiply_grad_kernel.cc
@@ -34,15 +34,20 @@ void MultiplyGradKernel(const Context& dev_ctx,
                         DenseTensor* dy) {
   using XPUType = typename XPUTypeTrait<T>::Type;
   funcs::ElementwiseGradPreProcess(dout, dx);
-  XPUElementwiseGrad<T, XPUType>(dev_ctx,
-                                 x,
-                                 y,
-                                 dout,
-                                 axis,
-                                 dx,
-                                 dy,
-                                 xpu::broadcast_mul_grad<XPUType>,
-                                 true);
+  auto f = [](xpu::Context* ctx,
+              const XPUType* x,
+              const XPUType* y,
+              const XPUType* z,
+              const XPUType* dz,
+              XPUType* dy,
+              XPUType* dx,
+              const std::vector<int>& xshape,
+              const std::vector<int>& yshape) {
+    return xpu::broadcast_mul_grad<XPUType>(
+        ctx, x, y, z, dz, dy, dx, xshape, yshape);
+  };
+
+  XPUElementwiseGrad<T, XPUType>(dev_ctx, x, y, dout, axis, dx, dy, f, true);
 }
 
 }  // namespace phi

diff --git a/paddle/phi/kernels/xpu/elementwise_multiply_kernel.cc b/paddle/phi/kernels/xpu/elementwise_multiply_kernel.cc
@@ -31,8 +31,16 @@ void MultiplyRawKernel(const Context& dev_ctx,
                        int axis,
                        DenseTensor* out) {
   using XPUType = typename XPUTypeTrait<T>::Type;
-  XPUElementwise<T, XPUType>(
-      dev_ctx, x, y, axis, out, xpu::broadcast_mul<XPUType>);
+  auto f = [](xpu::Context* ctx,
+              const XPUType* x,
+              const XPUType* y,
+              XPUType* z,
+              const std::vector<int>& xshape,
+              const std::vector<int>& yshape) {
+    return xpu::broadcast_mul<XPUType>(ctx, x, y, z, xshape, yshape);
+  };
+
+  XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
 }
 
 }  // namespace phi

diff --git a/paddle/phi/kernels/xpu/elementwise_subtract_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_subtract_grad_kernel.cc
@@ -28,15 +28,22 @@ void SubtractGradKernel(const Context& dev_ctx,
                         DenseTensor* dx,
                         DenseTensor* dy) {
   using XPUType = typename XPUTypeTrait<T>::Type;
-  phi::XPUElementwiseGrad<T, XPUType>(dev_ctx,
-                                      x,
-                                      y,
-                                      dout,
-                                      axis,
-                                      dx,
-                                      dy,
-                                      xpu::broadcast_sub_grad<XPUType>,
-                                      false);
+
+  auto f = [](xpu::Context* ctx,
+              const XPUType* x,
+              const XPUType* y,
+              const XPUType* z,
+              const XPUType* dz,
+              XPUType* dy,
+              XPUType* dx,
+              const std::vector<int>& xshape,
+              const std::vector<int>& yshape) {
+    return xpu::broadcast_sub_grad<XPUType>(
+        ctx, x, y, z, dz, dy, dx, xshape, yshape);
+  };
+
+  phi::XPUElementwiseGrad<T, XPUType>(
+      dev_ctx, x, y, dout, axis, dx, dy, f, false);
 }
 
 }  // namespace phi

diff --git a/paddle/phi/kernels/xpu/elementwise_subtract_kernel.cc b/paddle/phi/kernels/xpu/elementwise_subtract_kernel.cc
@@ -26,8 +26,16 @@ void SubtractRawKernel(const Context& dev_ctx,
                        int axis,
                        DenseTensor* out) {
   using XPUType = typename XPUTypeTrait<T>::Type;
-  phi::XPUElementwise<T, XPUType>(
-      dev_ctx, x, y, axis, out, xpu::broadcast_sub<XPUType>);
+  auto f = [](xpu::Context* ctx,
+              const XPUType* x,
+              const XPUType* y,
+              XPUType* z,
+              const std::vector<int>& xshape,
+              const std::vector<int>& yshape) {
+    return xpu::broadcast_sub<XPUType>(ctx, x, y, z, xshape, yshape);
+  };
+
+  phi::XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
 }
 
 }  // namespace phi