PaddlePaddle · XiaoguangHu01 · Sep 18, 2021 · Aug 10, 2021 · Aug 12, 2021 · Aug 13, 2021
@@ -38,6 +38,8 @@ project(paddle CXX C)
 # enable language CUDA
 # TODO(Shibo Tao): remove find_package(CUDA) completely.
 find_package(CUDA QUIET)
+find_package(MKL CONFIG QUIET)
+option(WITH_ONEMKL      "Compile PaddlePaddle with oneMKL"              OFF)
 option(WITH_GPU         "Compile PaddlePaddle with NVIDIA GPU"          ${CUDA_FOUND})
 option(WITH_TENSORRT    "Compile PaddlePaddle with NVIDIA TensorRT"     OFF)
 option(WITH_XPU         "Compile PaddlePaddle with BAIDU KUNLUN XPU"    OFF)
@@ -225,6 +227,7 @@ option(WITH_STRIP       "Strip so files of Whl packages"         OFF)
 option(NEW_RELEASE_CUBIN   "PaddlePaddle next-level release strategy for pypi cubin package"             OFF)
 option(NEW_RELEASE_JIT   "PaddlePaddle next-level release strategy for backup jit package"             OFF)
 option(WITH_ASCEND_INT64 "Compile with int64 kernel for ascend NPU"    OFF)
+option(WITH_POCKETFFT    "Compile with pocketfft support"      ON)
 
 # PY_VERSION
 if(NOT PY_VERSION)
@@ -373,6 +376,10 @@ if (WITH_MIPS)
     add_definitions(-DPADDLE_WITH_MIPS)
 endif()
 
+if (WITH_ONEMKL)
+    add_definitions(-DPADDLE_WITH_ONEMKL)
+endif()
+
 if (WITH_HETERPS)
     if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -faligned-new")

diff --git a/cmake/FindGperftools.cmake b/cmake/FindGperftools.cmake
@@ -20,7 +20,7 @@
 find_library(GPERFTOOLS_TCMALLOC
   NAMES tcmalloc
   HINTS ${Gperftools_ROOT_DIR}/lib)
-
+  
 find_library(GPERFTOOLS_PROFILER
   NAMES profiler
   HINTS ${Gperftools_ROOT_DIR}/lib)

diff --git a/cmake/external/pocketfft.cmake b/cmake/external/pocketfft.cmake
@@ -0,0 +1,44 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+include(ExternalProject)
+
+
+set(POCKETFFT_PATH           "${THIRD_PARTY_PATH}/pocketfft" CACHE STRING "A path setting for external_pocketfft path.")
+set(POCKETFFT_PREFIX_DIR     ${POCKETFFT_PATH})
+
+set(POCKETFFT_REPOSITORY  https://gitlab.mpcdf.mpg.de/mtr/pocketfft.git)
+set(POCKETFFT_TAG         release_for_eigen)
+
+SET(POCKETFFT_INCLUDE_DIR  ${POCKETFFT_PREFIX_DIR}/src)
+message("POCKETFFT_INCLUDE_DIR is ${POCKETFFT_INCLUDE_DIR}")
+include_directories(${POCKETFFT_INCLUDE_DIR})
+
+ExternalProject_Add(
+  extern_pocketfft
+  ${EXTERNAL_PROJECT_LOG_ARGS}
+  ${SHALLOW_CLONE}
+  GIT_REPOSITORY  ${POCKETFFT_REPOSITORY}
+  GIT_TAG         ${POCKETFFT_TAG}
+  PREFIX          ${POCKETFFT_PREFIX_DIR}
+  UPDATE_COMMAND    ""
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND     ""
+  INSTALL_COMMAND   ""
+  TEST_COMMAND      ""
+)
+
+add_library(pocketfft INTERFACE)
+
+add_dependencies(pocketfft extern_pocketfft)
diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake
@@ -361,4 +361,10 @@ if (WITH_CRYPTO)
     add_definitions(-DPADDLE_WITH_CRYPTO)
 endif (WITH_CRYPTO)
 
+if (WITH_POCKETFFT)
+    include(external/pocketfft)
+    list(APPEND third_party_deps extern_pocketfft)
+    add_definitions(-DPADDLE_WITH_POCKETFFT)
+endif (WITH_POCKETFFT)
+
 add_custom_target(third_party ALL DEPENDS ${third_party_deps})
diff --git a/paddle/fluid/framework/data_type.h b/paddle/fluid/framework/data_type.h
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
+#include <iostream>
 #include <string>
 #include <typeindex>
 
@@ -170,11 +171,26 @@ extern inline proto::VarType::Type ToComplexType(proto::VarType::Type t) {
       return proto::VarType::COMPLEX128;
     default:
       PADDLE_THROW(platform::errors::Unimplemented(
-          "Unknown complex value data type (%s), now only support float32 and "
+          "Unknown real value data type (%s), now only support float32 and "
           "float64.",
           DataTypeToString(t)));
   }
 }
 
+extern inline proto::VarType::Type ToRealType(proto::VarType::Type t) {
+  switch (t) {
+    case proto::VarType::COMPLEX64:
+      return proto::VarType::FP32;
+    case proto::VarType::COMPLEX128:
+      return proto::VarType::FP64;
+    default:
+      PADDLE_THROW(platform::errors::Unimplemented(
+          "Unknown complex value data type (%s), now only support complex64 "
+          "and "
+          "complex128.",
+          DataTypeToString(t)));
+  }
+}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
@@ -59,6 +59,10 @@ if (WITH_GPU)
     endif()
 endif()
 
+if (WITH_POCKETFFT)
+    SET(OP_HEADER_DEPS ${OP_HEADER_DEPS} pocketfft)
+endif()
+
 
 SET(OP_MKL_DEPS "")
 if (NOT WITH_MKL OR NOT WITH_AVX)
@@ -75,7 +79,7 @@ if(WITH_UNITY_BUILD)
 endif()
 
 register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op lstm_op run_program_op eye_op recurrent_op
-        sync_batch_norm_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS})
+        sync_batch_norm_op spectral_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS})
 
 op_library(run_program_op SRCS run_program_op.cc run_program_op.cu.cc DEPS executor_cache ${OP_HEADER_DEPS})
 
@@ -94,6 +98,12 @@ else()
     op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale)
 endif()
 
+if (WITH_GPU AND (NOT WITH_ROCM))
+    op_library(spectral_op SRCS spectral_op.cc spectral_op.cu DEPS dynload_cuda ${OP_HEADER_DEPS})
+else()
+    op_library(spectral_op SRCS spectral_op.cc DEPS ${OP_HEADER_DEPS})
+endif()
+
 op_library(lstm_op DEPS ${OP_HEADER_DEPS}  lstm_compute)
 op_library(eye_op DEPS ${OP_HEADER_DEPS})
 op_library(recurrent_op DEPS ${OP_HEADER_DEPS})

diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/concat_op.h"
 
+#include <paddle/fluid/platform/complex.h>
 #include <memory>
 #include <string>
 #include <vector>
@@ -237,7 +238,11 @@ REGISTER_OP_CPU_KERNEL(
     ops::ConcatKernel<paddle::platform::CPUDeviceContext,
                       paddle::platform::float16>,
     ops::ConcatKernel<paddle::platform::CPUDeviceContext, int>,
-    ops::ConcatKernel<paddle::platform::CPUDeviceContext, uint8_t>);
+    ops::ConcatKernel<paddle::platform::CPUDeviceContext, uint8_t>,
+    ops::ConcatKernel<paddle::platform::CPUDeviceContext,
+                      paddle::platform::complex<float>>,
+    ops::ConcatKernel<paddle::platform::CPUDeviceContext,
+                      paddle::platform::complex<double>>);
 REGISTER_OP_CPU_KERNEL(
     concat_grad,
     ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, double>,
@@ -247,4 +252,8 @@ REGISTER_OP_CPU_KERNEL(
     ops::ConcatGradKernel<paddle::platform::CPUDeviceContext,
                           paddle::platform::float16>,
     ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, int>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, uint8_t>);
+    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, uint8_t>,
+    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext,
+                          paddle::platform::complex<float>>,
+    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext,
+                          paddle::platform::complex<double>>);
diff --git a/paddle/fluid/operators/concat_op.cu.cc b/paddle/fluid/operators/concat_op.cu.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/operators/concat_op.h"
+#include "paddle/fluid/platform/complex.h"
 #include "paddle/fluid/platform/float16.h"
 
 namespace ops = paddle::operators;
@@ -24,7 +25,11 @@ REGISTER_OP_CUDA_KERNEL(
     ops::ConcatKernel<paddle::platform::CUDADeviceContext, plat::float16>,
     ops::ConcatKernel<paddle::platform::CUDADeviceContext, int64_t>,
     ops::ConcatKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::ConcatKernel<paddle::platform::CUDADeviceContext, uint8_t>);
+    ops::ConcatKernel<paddle::platform::CUDADeviceContext, uint8_t>,
+    ops::ConcatKernel<paddle::platform::CUDADeviceContext,
+                      plat::complex<float>>,
+    ops::ConcatKernel<paddle::platform::CUDADeviceContext,
+                      plat::complex<double>>);
 REGISTER_OP_CUDA_KERNEL(
     concat_grad,
     ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, double>,
@@ -33,4 +38,8 @@ REGISTER_OP_CUDA_KERNEL(
     ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, plat::float16>,
     ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
     ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, uint8_t>);
+    ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, uint8_t>,
+    ops::ConcatGradKernel<paddle::platform::CUDADeviceContext,
+                          plat::complex<float>>,
+    ops::ConcatGradKernel<paddle::platform::CUDADeviceContext,
+                          plat::complex<double>>);
diff --git a/paddle/fluid/operators/eigen/scale.cc b/paddle/fluid/operators/eigen/scale.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/operators/eigen/eigen_function.h"
 #include "paddle/fluid/platform/bfloat16.h"
+#include "paddle/fluid/platform/complex.h"
 
 namespace paddle {
 namespace operators {
@@ -42,6 +43,8 @@ template struct EigenScale<Eigen::DefaultDevice, int8_t>;
 template struct EigenScale<Eigen::DefaultDevice, int16_t>;
 template struct EigenScale<Eigen::DefaultDevice, int>;
 template struct EigenScale<Eigen::DefaultDevice, int64_t>;
+template struct EigenScale<Eigen::DefaultDevice, platform::complex<float>>;
+template struct EigenScale<Eigen::DefaultDevice, platform::complex<double>>;
 
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/scale.cu b/paddle/fluid/operators/eigen/scale.cu
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/operators/eigen/eigen_function.h"
+#include "paddle/fluid/platform/complex.h"
 #include "paddle/fluid/platform/float16.h"
 
 namespace paddle {
@@ -41,6 +42,8 @@ template struct EigenScale<Eigen::GpuDevice, int16_t>;
 template struct EigenScale<Eigen::GpuDevice, int>;
 template struct EigenScale<Eigen::GpuDevice, int64_t>;
 template struct EigenScale<Eigen::GpuDevice, platform::float16>;
+template struct EigenScale<Eigen::GpuDevice, platform::complex<float>>;
+template struct EigenScale<Eigen::GpuDevice, platform::complex<double>>;
 
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/fill_zeros_like_op.cc b/paddle/fluid/operators/fill_zeros_like_op.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/operators/fill_zeros_like_op.h"
+#include "paddle/fluid/platform/complex.h"
 
 namespace paddle {
 namespace operators {
@@ -93,12 +94,20 @@ REGISTER_OP_CPU_KERNEL(
     ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext, int64_t>,
     ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext, float>,
     ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext, double>,
-    ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext, bool>);
+    ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext, bool>,
+    ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext,
+                             paddle::platform::complex<float>>,
+    ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext,
+                             paddle::platform::complex<double>>);
 
 REGISTER_OP_CPU_KERNEL(
     fill_zeros_like2,
     ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext, int>,
     ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext, int64_t>,
     ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext, float>,
     ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext, double>,
-    ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext, bool>);
+    ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext, bool>,
+    ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext,
+                             paddle::platform::complex<float>>,
+    ops::FillZerosLikeKernel<paddle::platform::CPUDeviceContext,
+                             paddle::platform::complex<double>>);
diff --git a/paddle/fluid/operators/fill_zeros_like_op.cu.cc b/paddle/fluid/operators/fill_zeros_like_op.cu.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/fill_zeros_like_op.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/platform/complex.h"
 #include "paddle/fluid/platform/float16.h"
 
 namespace ops = paddle::operators;
@@ -25,7 +26,11 @@ REGISTER_OP_CUDA_KERNEL(
     ops::FillZerosLikeKernel<paddle::platform::CUDADeviceContext, double>,
     ops::FillZerosLikeKernel<paddle::platform::CUDADeviceContext,
                              paddle::platform::float16>,
-    ops::FillZerosLikeKernel<paddle::platform::CUDADeviceContext, bool>);
+    ops::FillZerosLikeKernel<paddle::platform::CUDADeviceContext, bool>,
+    ops::FillZerosLikeKernel<paddle::platform::CUDADeviceContext,
+                             paddle::platform::complex<float>>,
+    ops::FillZerosLikeKernel<paddle::platform::CUDADeviceContext,
+                             paddle::platform::complex<double>>);
 
 REGISTER_OP_CUDA_KERNEL(
     fill_zeros_like2,
@@ -35,4 +40,8 @@ REGISTER_OP_CUDA_KERNEL(
     ops::FillZerosLikeKernel<paddle::platform::CUDADeviceContext, double>,
     ops::FillZerosLikeKernel<paddle::platform::CUDADeviceContext,
                              paddle::platform::float16>,
-    ops::FillZerosLikeKernel<paddle::platform::CUDADeviceContext, bool>);
+    ops::FillZerosLikeKernel<paddle::platform::CUDADeviceContext, bool>,
+    ops::FillZerosLikeKernel<paddle::platform::CUDADeviceContext,
+                             paddle::platform::complex<float>>,
+    ops::FillZerosLikeKernel<paddle::platform::CUDADeviceContext,
+                             paddle::platform::complex<double>>);
diff --git a/paddle/fluid/operators/flip_op.cc b/paddle/fluid/operators/flip_op.cc
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <unordered_map>
 #include <vector>
 #include "paddle/fluid/framework/op_version_registry.h"
+#include "paddle/fluid/platform/complex.h"
 
 namespace paddle {
 namespace operators {
@@ -145,6 +146,7 @@ class FlipOpGradMaker : public framework::SingleGradOpMaker<T> {
 }  // namespace paddle
 
 namespace ops = paddle::operators;
+namespace plat = paddle::platform;
 REGISTER_OPERATOR(flip, ops::FlipOp, ops::FlipOpMaker, ops::FlipOpInferVarType,
                   ops::FlipOpGradMaker<paddle::framework::OpDesc>,
                   ops::FlipOpGradMaker<paddle::imperative::OpBase>);
@@ -153,7 +155,9 @@ REGISTER_OP_CPU_KERNEL(
     ops::FlipKernel<paddle::platform::CPUDeviceContext, double>,
     ops::FlipKernel<paddle::platform::CPUDeviceContext, int32_t>,
     ops::FlipKernel<paddle::platform::CPUDeviceContext, int64_t>,
-    ops::FlipKernel<paddle::platform::CPUDeviceContext, bool>);
+    ops::FlipKernel<paddle::platform::CPUDeviceContext, bool>,
+    ops::FlipKernel<paddle::platform::CPUDeviceContext, plat::complex<float>>,
+    ops::FlipKernel<paddle::platform::CPUDeviceContext, plat::complex<double>>);
 
 /* ==========================  register checkpoint ===========================*/
 REGISTER_OP_VERSION(flip)

diff --git a/paddle/fluid/operators/flip_op.cu b/paddle/fluid/operators/flip_op.cu
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include <vector>
 #include "paddle/fluid/memory/malloc.h"
+#include "paddle/fluid/platform/complex.h"
 
 namespace paddle {
 namespace operators {
@@ -163,4 +164,7 @@ REGISTER_OP_CUDA_KERNEL(
     ops::FlipKernel<paddle::platform::CUDADeviceContext, plat::float16>,
     ops::FlipKernel<paddle::platform::CUDADeviceContext, int>,
     ops::FlipKernel<paddle::platform::CUDADeviceContext, int64_t>,
-    ops::FlipKernel<paddle::platform::CUDADeviceContext, bool>);
+    ops::FlipKernel<paddle::platform::CUDADeviceContext, bool>,
+    ops::FlipKernel<paddle::platform::CUDADeviceContext, plat::complex<float>>,
+    ops::FlipKernel<paddle::platform::CUDADeviceContext,
+                    plat::complex<double>>);