Merge pull request #4159 from flx42/cudnn_v5_support

shelhamer · shelhamer · commit 7cf3538407f1 · 2016-05-17T12:14:39.000-07:00
Add cuDNN v5 support, drop cuDNN v3 support
diff --git a/docker/Makefile b/docker/Makefile
@@ -22,7 +22,7 @@ docker_files: standalone_files
 
 standalone_files: standalone/cpu/Dockerfile standalone/gpu/Dockerfile
 
-FROM_GPU = "nvidia/cuda:7.5-cudnn4-devel-ubuntu14.04"
+FROM_GPU = "nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04"
 FROM_CPU = "ubuntu:14.04"
 GPU_CMAKE_ARGS = -DUSE_CUDNN=1
 CPU_CMAKE_ARGS = -DCPU_ONLY=1
diff --git a/docker/standalone/gpu/Dockerfile b/docker/standalone/gpu/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:7.5-cudnn4-devel-ubuntu14.04
+FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04
 MAINTAINER caffe-maint@googlegroups.com
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
diff --git a/docs/installation.md b/docs/installation.md
@@ -40,14 +40,14 @@ Optional dependencies:
 
 * [OpenCV](http://opencv.org/) >= 2.4 including 3.0
 * IO libraries: `lmdb`, `leveldb` (note: leveldb requires `snappy`)
-* cuDNN for GPU acceleration (v4)
+* cuDNN for GPU acceleration (v5)
 
 Pycaffe and Matcaffe interfaces have their own natural needs.
 
 * For Python Caffe:  `Python 2.7` or `Python 3.3+`, `numpy (>= 1.7)`, boost-provided `boost.python`
 * For MATLAB Caffe: MATLAB with the `mex` compiler.
 
-**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v4; older versions are supported in older Caffe.
+**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v5; older versions are supported in older Caffe.
 
 **CPU-only Caffe**: for cold-brewed CPU-only Caffe uncomment the `CPU_ONLY := 1` flag in `Makefile.config` to configure and build Caffe without CUDA. This is helpful for cloud or cluster deployment.
 
diff --git a/include/caffe/layers/cudnn_relu_layer.hpp b/include/caffe/layers/cudnn_relu_layer.hpp
@@ -37,6 +37,7 @@ class CuDNNReLULayer : public ReLULayer<Dtype> {
   cudnnHandle_t             handle_;
   cudnnTensorDescriptor_t bottom_desc_;
   cudnnTensorDescriptor_t top_desc_;
+  cudnnActivationDescriptor_t activ_desc_;
 };
 #endif
 
diff --git a/include/caffe/layers/cudnn_sigmoid_layer.hpp b/include/caffe/layers/cudnn_sigmoid_layer.hpp
@@ -37,6 +37,7 @@ class CuDNNSigmoidLayer : public SigmoidLayer<Dtype> {
   cudnnHandle_t             handle_;
   cudnnTensorDescriptor_t bottom_desc_;
   cudnnTensorDescriptor_t top_desc_;
+  cudnnActivationDescriptor_t activ_desc_;
 };
 #endif
 
diff --git a/include/caffe/layers/cudnn_tanh_layer.hpp b/include/caffe/layers/cudnn_tanh_layer.hpp
@@ -37,6 +37,7 @@ class CuDNNTanHLayer : public TanHLayer<Dtype> {
   cudnnHandle_t             handle_;
   cudnnTensorDescriptor_t bottom_desc_;
   cudnnTensorDescriptor_t top_desc_;
+  cudnnActivationDescriptor_t activ_desc_;
 };
 #endif
 
diff --git a/include/caffe/util/cudnn.hpp b/include/caffe/util/cudnn.hpp
@@ -91,8 +91,13 @@ template <typename Dtype>
 inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
     int n, int c, int h, int w) {
   CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnSetFilter4dDescriptor(*desc, dataType<Dtype>::type,
-      n, c, h, w));
+      CUDNN_TENSOR_NCHW, n, c, h, w));
+#else
+  CUDNN_CHECK(cudnnSetFilter4dDescriptor_v4(*desc, dataType<Dtype>::type,
+      CUDNN_TENSOR_NCHW, n, c, h, w));
+#endif
 }
 
 template <typename Dtype>
@@ -123,8 +128,21 @@ inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool_desc,
     LOG(FATAL) << "Unknown pooling method.";
   }
   CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool_desc));
-  CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode, h, w,
-        pad_h, pad_w, stride_h, stride_w));
+#if CUDNN_VERSION_MIN(5, 0, 0)
+  CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode,
+        CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w));
+#else
+  CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4(*pool_desc, *mode,
+        CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w));
+#endif
+}
+
+template <typename Dtype>
+inline void createActivationDescriptor(cudnnActivationDescriptor_t* activ_desc,
+    cudnnActivationMode_t mode) {
+  CUDNN_CHECK(cudnnCreateActivationDescriptor(activ_desc));
+  CUDNN_CHECK(cudnnSetActivationDescriptor(*activ_desc, mode,
+                                           CUDNN_PROPAGATE_NAN, Dtype(0)));
 }
 
 }  // namespace cudnn
diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu
@@ -30,19 +30,11 @@ void CuDNNConvolutionLayer<Dtype>::Forward_gpu(
       // Bias.
       if (this->bias_term_) {
         const Dtype* bias_data = this->blobs_[1]->gpu_data();
-#if CUDNN_VERSION_MIN(4, 0, 0)
         CUDNN_CHECK(cudnnAddTensor(handle_[g],
               cudnn::dataType<Dtype>::one,
               bias_desc_, bias_data + bias_offset_ * g,
               cudnn::dataType<Dtype>::one,
               top_descs_[i], top_data + top_offset_ * g));
-#else
-        CUDNN_CHECK(cudnnAddTensor(handle_[g], CUDNN_ADD_SAME_C,
-              cudnn::dataType<Dtype>::one,
-              bias_desc_, bias_data + bias_offset_ * g,
-              cudnn::dataType<Dtype>::one,
-              top_descs_[i], top_data + top_offset_ * g));
-#endif
       }
     }
 
@@ -82,7 +74,7 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       // Gradient w.r.t. weights.
       if (this->param_propagate_down_[0]) {
         const Dtype* bottom_data = bottom[i]->gpu_data();
-        CUDNN_CHECK(cudnnConvolutionBackwardFilter_v3(
+        CUDNN_CHECK(cudnnConvolutionBackwardFilter(
               handle_[1*this->group_ + g],
               cudnn::dataType<Dtype>::one,
               bottom_descs_[i], bottom_data + bottom_offset_ * g,
@@ -100,7 +92,7 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
           weight = this->blobs_[0]->gpu_data();
         }
         Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();
-        CUDNN_CHECK(cudnnConvolutionBackwardData_v3(
+        CUDNN_CHECK(cudnnConvolutionBackwardData(
               handle_[2*this->group_ + g],
               cudnn::dataType<Dtype>::one,
               filter_desc_, weight + this->weight_offset_ * g,
diff --git a/src/caffe/layers/cudnn_relu_layer.cpp b/src/caffe/layers/cudnn_relu_layer.cpp
@@ -13,6 +13,7 @@ void CuDNNReLULayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   CUDNN_CHECK(cudnnCreate(&handle_));
   cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
   cudnn::createTensor4dDesc<Dtype>(&top_desc_);
+  cudnn::createActivationDescriptor<Dtype>(&activ_desc_, CUDNN_ACTIVATION_RELU);
   handles_setup_ = true;
 }
 
diff --git a/src/caffe/layers/cudnn_relu_layer.cu b/src/caffe/layers/cudnn_relu_layer.cu
@@ -15,12 +15,21 @@ void CuDNNReLULayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
-        CUDNN_ACTIVATION_RELU,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->top_desc_, top_data));
+#else
+  CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->top_desc_, top_data));
+#endif
 }
 
 template <typename Dtype>
@@ -40,13 +49,23 @@ void CuDNNReLULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_diff = top[0]->gpu_diff();
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-        CUDNN_ACTIVATION_RELU,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->top_desc_, top_data, this->top_desc_, top_diff,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->bottom_desc_, bottom_diff));
+#else
+  CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->bottom_desc_, bottom_diff));
+#endif
 }
 
 INSTANTIATE_LAYER_GPU_FUNCS(CuDNNReLULayer);
diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cpp b/src/caffe/layers/cudnn_sigmoid_layer.cpp
@@ -13,6 +13,8 @@ void CuDNNSigmoidLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   CUDNN_CHECK(cudnnCreate(&handle_));
   cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
   cudnn::createTensor4dDesc<Dtype>(&top_desc_);
+  cudnn::createActivationDescriptor<Dtype>(&activ_desc_,
+      CUDNN_ACTIVATION_SIGMOID);
   handles_setup_ = true;
 }
 
diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cu b/src/caffe/layers/cudnn_sigmoid_layer.cu
@@ -10,12 +10,21 @@ void CuDNNSigmoidLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
-        CUDNN_ACTIVATION_SIGMOID,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->top_desc_, top_data));
+#else
+  CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->top_desc_, top_data));
+#endif
 }
 
 template <typename Dtype>
@@ -30,13 +39,23 @@ void CuDNNSigmoidLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_diff = top[0]->gpu_diff();
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-        CUDNN_ACTIVATION_SIGMOID,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->top_desc_, top_data, this->top_desc_, top_diff,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->bottom_desc_, bottom_diff));
+#else
+  CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->bottom_desc_, bottom_diff));
+#endif
 }
 
 INSTANTIATE_LAYER_GPU_FUNCS(CuDNNSigmoidLayer);
diff --git a/src/caffe/layers/cudnn_tanh_layer.cpp b/src/caffe/layers/cudnn_tanh_layer.cpp
@@ -13,6 +13,7 @@ void CuDNNTanHLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   CUDNN_CHECK(cudnnCreate(&handle_));
   cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
   cudnn::createTensor4dDesc<Dtype>(&top_desc_);
+  cudnn::createActivationDescriptor<Dtype>(&activ_desc_, CUDNN_ACTIVATION_TANH);
   handles_setup_ = true;
 }
 
diff --git a/src/caffe/layers/cudnn_tanh_layer.cu b/src/caffe/layers/cudnn_tanh_layer.cu
@@ -10,12 +10,21 @@ void CuDNNTanHLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
-        CUDNN_ACTIVATION_TANH,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->top_desc_, top_data));
+#else
+  CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->top_desc_, top_data));
+#endif
 }
 
 template <typename Dtype>
@@ -31,13 +40,23 @@ void CuDNNTanHLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
 
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-        CUDNN_ACTIVATION_TANH,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->top_desc_, top_data, this->top_desc_, top_diff,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->bottom_desc_, bottom_diff));
+#else
+  CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->bottom_desc_, bottom_diff));
+#endif
 }
 
 INSTANTIATE_LAYER_GPU_FUNCS(CuDNNTanHLayer);

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-FROM nvidia/cuda:7.5-cudnn4-devel-ubuntu14.04`
	`1`	`+FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04`
`2`	`2`	`MAINTAINER [email protected]`
`3`	`3`
`4`	`4`	`RUN apt-get update && apt-get install -y --no-install-recommends \`
Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@ void CuDNNReLULayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,`
`13`	`13`	`CUDNN_CHECK(cudnnCreate(&handle_));`
`14`	`14`	`cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);`
`15`	`15`	`cudnn::createTensor4dDesc<Dtype>(&top_desc_);`
	`16`	`+ cudnn::createActivationDescriptor<Dtype>(&activ_desc_, CUDNN_ACTIVATION_RELU);`
`16`	`17`	`handles_setup_ = true;`
`17`	`18`	`}`
`18`	`19`