diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 728cebe64f6045..58d7d897c3b055 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -32,7 +32,7 @@ if(NOT DEFINED XPU_XDNN_BASE_DATE) set(XPU_XDNN_BASE_DATE "20240327") endif() if(NOT DEFINED XPU_XHPC_BASE_DATE) - set(XPU_XHPC_BASE_DATE "20240422") + set(XPU_XHPC_BASE_DATE "20240506") endif() set(XPU_XCCL_BASE_VERSION "1.2.0.5") if(NOT DEFINED XPU_XFT_BASE_VERSION) diff --git a/paddle/phi/backends/xpu/xpu3_op_list.cc b/paddle/phi/backends/xpu/xpu3_op_list.cc index d23c149b2d7245..1a3c414f0dd4db 100644 --- a/paddle/phi/backends/xpu/xpu3_op_list.cc +++ b/paddle/phi/backends/xpu/xpu3_op_list.cc @@ -845,13 +845,15 @@ XPUOpMap& get_kl3_ops() { phi::DataType::INT32, phi::DataType::INT64, phi::DataType::FLOAT16, - phi::DataType::BOOL})}, + phi::DataType::BOOL, + phi::DataType::BFLOAT16})}, {"set_value_with_tensor", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16, phi::DataType::INT32, phi::DataType::INT64, - phi::DataType::BOOL})}, + phi::DataType::BOOL, + phi::DataType::BFLOAT16})}, {"set_value_grad", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::INT32, @@ -1030,6 +1032,14 @@ XPUOpMap& get_kl3_ops() { phi::DataType::INT16, phi::DataType::INT32})}, {"sum", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, + {"swiglu", + XPUKernelSet({phi::DataType::FLOAT32, + phi::DataType::FLOAT16, + phi::DataType::BFLOAT16})}, + {"swiglu_grad", + XPUKernelSet({phi::DataType::FLOAT32, + phi::DataType::FLOAT16, + phi::DataType::BFLOAT16})}, {"swish", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16, diff --git a/paddle/phi/kernels/xpu/set_value_kernel.cc b/paddle/phi/kernels/xpu/set_value_kernel.cc index 60b0fff7d9d7c8..7cab453cb1848e 100644 --- a/paddle/phi/kernels/xpu/set_value_kernel.cc +++ b/paddle/phi/kernels/xpu/set_value_kernel.cc @@ -430,6 +430,7 @@ PD_REGISTER_KERNEL(set_value, phi::SetValueKernel, float, phi::dtype::float16, + phi::dtype::bfloat16, int, int64_t, bool) {} @@ -440,6 +441,7 @@ PD_REGISTER_KERNEL(set_value_with_tensor, phi::SetTensorValueKernel, float, phi::dtype::float16, + phi::dtype::bfloat16, int, int64_t, bool) {} diff --git a/paddle/phi/kernels/xpu/swiglu_kernel.cc b/paddle/phi/kernels/xpu/swiglu_kernel.cc new file mode 100644 index 00000000000000..a7815931fa6a81 --- /dev/null +++ b/paddle/phi/kernels/xpu/swiglu_kernel.cc @@ -0,0 +1,66 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/swiglu_kernel.h" +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { +template +void SwiGluKernel(const Context& ctx, + const DenseTensor& x, + const paddle::optional& y, + DenseTensor* z) { + using XPUType = typename XPUTypeTrait::Type; + using XPUTypefp32 = typename XPUTypeTrait::Type; + const auto* x_data = x.data(); + auto* z_data = ctx.template Alloc(z); + const auto& dims = x.dims(); + int64_t axis = dims.size() - 1; + auto dims_vec = common::vectorize(dims); + const XPUTypefp32* const_nullptr = nullptr; + const XPUType* y_ptr = nullptr; + + if (y) { + const auto& y_tensor = y.get(); + const auto& y_dims = y_tensor.dims(); + const auto* y_data = y_tensor.data(); + y_ptr = reinterpret_cast(y_data); + PADDLE_ENFORCE_EQ( + y_dims, + dims, + phi::errors::InvalidArgument("The shape of Input(Y):[%s] must be equal " + "to the shape of Input(X):[%s].", + y_dims, + dims)); + } + int ret = xpu::swiglu(ctx.x_context(), + reinterpret_cast(x_data), + reinterpret_cast(z_data), + dims_vec, + axis, + false, + const_nullptr, + nullptr, + y_ptr); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "swiglu"); +} +} // namespace phi +PD_REGISTER_KERNEL(swiglu, + XPU, + ALL_LAYOUT, + phi::SwiGluKernel, + float, + phi::dtype::float16, + phi::dtype::bfloat16){}; diff --git a/paddle/phi/kernels/xpu/swiglu_kernel_grad.cc b/paddle/phi/kernels/xpu/swiglu_kernel_grad.cc new file mode 100644 index 00000000000000..994699a9fa63ac --- /dev/null +++ b/paddle/phi/kernels/xpu/swiglu_kernel_grad.cc @@ -0,0 +1,79 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/swiglu_grad_kernel.h" + +namespace phi { + +template +void SwiGluGradKernel(const Context& ctx, + const DenseTensor& x, + const paddle::optional& y, + const DenseTensor& dz, + DenseTensor* dx, + DenseTensor* dy) { + using XPUType = typename XPUTypeTrait::Type; + const auto* x_data = x.data(); + const auto* dz_data = dz.data(); + auto* dx_data = ctx.template Alloc(dx); + const auto& dims = x.dims(); + int64_t axis = dims.size() - 1; + auto dims_vec = common::vectorize(dims); + const XPUType* y_ptr = nullptr; + XPUType* dy_ptr = nullptr; + + if (y) { + const auto& y_tensor = y.get(); + const auto& y_dims = y_tensor.dims(); + const auto* y_data = y_tensor.data(); + auto* dy_data = ctx.template Alloc(dy); + y_ptr = reinterpret_cast(y_data); + dy_ptr = reinterpret_cast(dy_data); + PADDLE_ENFORCE_EQ( + y_dims, + dims, + phi::errors::InvalidArgument("The shape of Input(Y):[%s] must be equal " + "to the shape of Input(X):[%s].", + y_dims, + dims)); + } + int ret = xpu::swiglu_grad(ctx.x_context(), + reinterpret_cast(x_data), + reinterpret_cast(dz_data), + reinterpret_cast(dx_data), + dims_vec, + axis, + false, + y_ptr, + dy_ptr); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "swiglu_grad"); +} +} // namespace phi +PD_REGISTER_KERNEL(swiglu_grad, + XPU, + ALL_LAYOUT, + phi::SwiGluGradKernel, + float, + phi::dtype::float16, + phi::dtype::bfloat16){};