Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions cmake/external/mklml.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@ IF(WIN32)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib)
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll)
SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.dll)
ELSE()
SET(MKLML_VER "mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
ELSE()
#TODO(intel-huying):
# Now enable Erf function in mklml library temporarily, it will be updated as offical version later.
SET(MKLML_VER "Glibc225_vsErf_mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)
Expand Down
22 changes: 22 additions & 0 deletions paddle/fluid/operators/activation_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ limitations under the License. */

#pragma once
#include <glog/logging.h>
#include <algorithm>
#include <string>
#include <unordered_set>
#include <utility>
Expand All @@ -24,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/platform/float16.h"

#ifdef PADDLE_WITH_MKLDNN
Expand Down Expand Up @@ -301,8 +303,28 @@ template <typename T>
struct GeluFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
// Because the execute or device context can not be deliver here, it keep the
// marco for NVCC.
#if defined(PADDLE_WITH_MKLML) && !defined(_WIN32) && !defined(__APPLE__) && \
!defined(__OSX__) && !defined(PADDLE_WITH_CUDA)
auto x_data = x.data();
auto out_data = out.data();
int n = std::min(x.size(), out.size());

std::memset(out_data, 0, n * sizeof(T));
math::CBlas<T>::AXPY(n, static_cast<T>(M_SQRT1_2), x_data, 1, out_data, 1);
math::CBlas<T>::VMERF(n, out_data, out_data, VML_LA);
for (int i = 0; i < n; i++) {
out_data[i] += static_cast<T>(1);
}
math::CBlas<T>::VMUL(n, x_data, out_data, out_data);
for (int i = 0; i < n; i++) {
out_data[i] *= static_cast<T>(0.5);
}
#else
auto temp = (x * static_cast<T>(M_SQRT1_2)).erf();
out.device(d) = x * static_cast<T>(0.5) * (static_cast<T>(1) + temp);
#endif
}
};

Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/operators/math/blas.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,9 @@ class Blas {
template <typename T>
void VINV(int n, const T* a, T* y) const;

template <typename T>
void VMERF(int n, const T* a, T* y, int64_t mode) const;

private:
const DeviceContext& context_;
};
Expand Down Expand Up @@ -290,6 +293,11 @@ class BlasT : private Blas<DeviceContext> {
Base()->template VINV<T>(args...);
}

template <typename... ARGS>
void VMERF(ARGS... args) const {
Base()->template VMERF<T>(args...);
}

private:
const Blas<DeviceContext>* Base() const {
return static_cast<const Blas<DeviceContext>*>(this);
Expand Down
23 changes: 23 additions & 0 deletions paddle/fluid/operators/math/blas_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ struct CBlas<float> {
static void VINV(ARGS... args) {
platform::dynload::vsInv(args...);
}

template <typename... ARGS>
static void VMERF(ARGS... args) {
platform::dynload::vmsErf(args...);
}
};

template <>
Expand Down Expand Up @@ -223,6 +228,11 @@ struct CBlas<double> {
static void VINV(ARGS... args) {
platform::dynload::vdInv(args...);
}

template <typename... ARGS>
static void VMERF(ARGS... args) {
platform::dynload::vmdErf(args...);
}
};

#else
Expand Down Expand Up @@ -625,6 +635,19 @@ void Blas<DeviceContext>::VINV(int n, const T *a, T *y) const {
#endif
}

template <>
template <typename T>
void Blas<platform::CPUDeviceContext>::VMERF(int n, const T *a, T *y,
int64_t mode) const {
#ifdef PADDLE_WITH_MKLML
CBlas<T>::VMERF(n, a, y, mode);
#else
for (int i = 0; i < n; ++i) {
y[i] = std::erf(a[i]);
}
#endif
}

} // namespace math
} // namespace operators
} // namespace paddle
2 changes: 2 additions & 0 deletions paddle/fluid/platform/dynload/mklml.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ extern void* mklml_dso_handle;
__macro(vdPowx); \
__macro(vsInv); \
__macro(vdInv); \
__macro(vmsErf); \
__macro(vmdErf); \
__macro(MKL_Set_Num_Threads)

MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
Expand Down