Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# A image for building paddle binaries
# Use cuda devel base image for both cpu and gpu environment

# When you modify it, please be aware of cudnn-runtime version
# When you modify it, please be aware of cudnn-runtime version
# and libcudnn.so.x in paddle/scripts/docker/build.sh
FROM nvidia/cuda:8.0-cudnn7-devel-ubuntu16.04
MAINTAINER PaddlePaddle Authors <[email protected]>
Expand All @@ -24,7 +23,7 @@ ENV HOME /root
COPY ./paddle/scripts/docker/root/ /root/

RUN apt-get update && \
apt-get install -y \
apt-get install -y --allow-downgrades \
git python-pip python-dev openssh-server bison \
libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
Expand Down
2 changes: 2 additions & 0 deletions cmake/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ set(CUDA_PROPAGATE_HOST_FLAGS OFF)
list(APPEND CUDA_NVCC_FLAGS "-std=c++11")
list(APPEND CUDA_NVCC_FLAGS "--use_fast_math")
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
# in cuda9, suppress cuda warning on eigen
list(APPEND CUDA_NVCC_FLAGS "-w")
# Set :expt-relaxed-constexpr to suppress Eigen warnings
list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr")

Expand Down
4 changes: 3 additions & 1 deletion cmake/external/eigen.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ else()
extern_eigen3
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git"
GIT_TAG 70661066beef694cadf6c304d0d07e0758825c10
# eigen on cuda9.1 missing header of math_funtions.hpp
# https://stackoverflow.com/questions/43113508/math-functions-hpp-not-found-when-using-cuda-with-eigen
GIT_TAG 917060c364181f33a735dc023818d5a54f60e54c
PREFIX ${EIGEN_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
Expand Down
10 changes: 5 additions & 5 deletions paddle/cuda/src/hl_cuda_lstm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,9 @@ __device__ __forceinline__ void transpose_32x32(real a[], const int idx) {
int addr = idx % 32;
#pragma unroll
for (int k = 1; k < 32; k++) {
// rSrc[k] = __shfl(rSrc[k], (threadIdx.x + k) % 32, 32);
addr = __shfl(addr, (idx + 1) % 32, 32);
a[k] = __shfl(a[k], addr, 32);
// rSrc[k] = __shfl_sync(rSrc[k], (threadIdx.x + k) % 32, 32);
addr = __shfl_sync(addr, (idx + 1) % 32, 32);
a[k] = __shfl_sync(a[k], addr, 32);
}

#pragma unroll
Expand All @@ -362,8 +362,8 @@ __device__ __forceinline__ void transpose_32x32(real a[], const int idx) {
addr = (32 - idx) % 32;
#pragma unroll
for (int k = 0; k < 32; k++) {
a[k] = __shfl(a[k], addr, 32);
addr = __shfl(addr, (idx + 31) % 32, 32);
a[k] = __shfl_sync(a[k], addr, 32);
addr = __shfl_sync(addr, (idx + 31) % 32, 32);
}
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/cuda/src/hl_top_k.cu
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ __device__ __forceinline__ void blockReduce(Pair* shTopK,
}
}
if (maxId[0] / 32 == warp) {
if (__shfl(beam, (maxId[0]) % 32, 32) == maxLength) break;
if (__shfl_sync(beam, (maxId[0]) % 32, 32) == maxLength) break;
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/accuracy_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ limitations under the License. */
#include <thrust/execution_policy.h>
#include <thrust/reduce.h>
#include "paddle/fluid/operators/accuracy_op.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/gpu_info.h"

namespace paddle {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/adagrad_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/operators/adagrad_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/box_coder_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/box_coder_op.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/conv_shift_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ limitations under the License. */

#include "paddle/fluid/operators/conv_shift_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/edit_distance_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/edit_distance_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/gpu_info.h"

namespace paddle {
Expand Down
21 changes: 5 additions & 16 deletions paddle/fluid/operators/elementwise_op_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ limitations under the License. */
#ifdef __NVCC__
#include <cuda.h>
#include <thrust/iterator/iterator_adaptor.h>
#include "paddle/fluid/platform/cuda_primitives.h"
constexpr int ELEMWISE_MAX_BLOCK_DIM = 1024;
#endif

Expand Down Expand Up @@ -333,24 +334,12 @@ static void ElemwiseGradBroadcast1CPU(const T* x, const T* y, const T* out,
}
}
}
#ifdef __NVCC__

// __shfl_down has been deprecated as of CUDA 9.0.
#if CUDA_VERSION < 9000
template <typename T>
__forceinline__ __device__ T __shfl_down_sync(unsigned, T val, int delta) {
return __shfl_down(val, delta);
}
#define CREATE_SHFL_MASK(mask, predicate) mask = 0u;
#else
#define FULL_WARP_MASK 0xFFFFFFFF
#define CREATE_SHFL_MASK(mask, predicate) \
mask = __ballot_sync(FULL_WARP_MASK, (predicate))
#endif
#ifdef __NVCC__

template <typename T>
__device__ T reduceSum(T val, int tid, int len) {
// TODO(zcd): The warp size should be taken from the
// NOTE(zcd): The warp size should be taken from the
// parameters of the GPU but not specified as 32 simply.
// To make the reduceSum more efficiently,
// I use Warp-Level Parallelism and assume the Warp size
Expand All @@ -362,7 +351,7 @@ __device__ T reduceSum(T val, int tid, int len) {
CREATE_SHFL_MASK(mask, tid < len);

for (int offset = warpSize / 2; offset > 0; offset /= 2)
val += __shfl_down_sync(mask, val, offset);
val += platform::__shfl_down_sync(mask, val, offset);

if (tid < warpSize) shm[tid] = 0;

Expand All @@ -377,7 +366,7 @@ __device__ T reduceSum(T val, int tid, int len) {
if (tid < warpSize) {
val = shm[tid];
for (int offset = warpSize / 2; offset > 0; offset /= 2)
val += __shfl_down_sync(mask, val, offset);
val += platform::__shfl_down_sync(mask, val, offset);
}

return val;
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/lookup_table_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/lookup_table_op.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/math/concat.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ limitations under the License. */

#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/operators/math/concat.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/math/cos_sim_functor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/math/cos_sim_functor.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
11 changes: 6 additions & 5 deletions paddle/fluid/operators/math/cross_entropy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/math/cross_entropy.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand All @@ -31,11 +32,11 @@ __global__ void CrossEntropyKernel(T* Y, const T* X, const int64_t* label,

template <typename T>
__device__ __forceinline__ T sum_single_warp(T val) {
val += __shfl_down(val, 16);
val += __shfl_down(val, 8);
val += __shfl_down(val, 4);
val += __shfl_down(val, 2);
val += __shfl_down(val, 1);
val += platform::__shfl_down_sync(0, val, 16);
val += platform::__shfl_down_sync(0, val, 8);
val += platform::__shfl_down_sync(0, val, 4);
val += platform::__shfl_down_sync(0, val, 2);
val += platform::__shfl_down_sync(0, val, 1);
return val;
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/math/depthwise_conv.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/math/depthwise_conv.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/math/detail/gru_gpu_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ limitations under the License. */
#include <type_traits>
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/gru_compute.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/device_context.h"

namespace paddle {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/math/detail/lstm_gpu_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/device_context.h"

#include <type_traits>
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/math/im2col.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/math/im2col.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/math/maxouting.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/math/maxouting.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/math/pooling.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/math/pooling.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/math/selected_rows_functor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ limitations under the License. */

#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/math/sequence_pooling.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ limitations under the License. */

#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/sequence_pooling.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/math/sequence_scale.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/math/sequence_scale.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/math/unpooling.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/math/unpooling.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/math/vol2col.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/math/vol2col.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/one_hot_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.

#include "paddle/fluid/operators/one_hot_op.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/gpu_info.h"

namespace paddle {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/roi_pool_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/roi_pool_op.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/operators/row_conv_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ limitations under the License. */

#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/row_conv_op.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -220,7 +220,7 @@ __global__ void RowConvGradFilterImproved(const T *in, const T *dout,

for (int offset = 16; offset > 0;
offset = offset / 2) { // blockDim.x is 32.
val += __shfl_down(val, offset);
val += platform::__shfl_down_sync(0, val, offset);
}
__syncthreads();

Expand Down Expand Up @@ -276,7 +276,7 @@ __global__ void RowConvGradFilter(const T *in, const T *dout, int num_sequence,

for (int offset = 16; offset > 0;
offset = offset / 2) { // blockDim.x is 32.
val += __shfl_down(val, offset);
val += platform::__shfl_down_sync(0, val, offset);
}
__syncthreads();

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/sequence_erase_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ limitations under the License. */
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include "paddle/fluid/operators/sequence_erase_op.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/sequence_expand_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ limitations under the License. */

#include <algorithm>
#include "paddle/fluid/operators/sequence_expand_op.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/sgd_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ limitations under the License. */

#define EIGEN_USE_GPU
#include "paddle/fluid/operators/sgd_op.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/cuda_primitives.h"

namespace paddle {
namespace operators {
Expand Down
Loading