BVLC · WGW101 · Jun 16, 2015 · Jun 17, 2015 · Jun 19, 2015 · Jun 19, 2015
diff --git a/include/caffe/util/cudnn.hpp b/include/caffe/util/cudnn.hpp
@@ -65,6 +65,11 @@ inline void createTensor4dDesc(cudnnTensorDescriptor_t* desc) {
   CUDNN_CHECK(cudnnCreateTensorDescriptor(desc));
 }
 
+template <typename Dtype>
+inline void createTensorDesc(cudnnTensorDescriptor_t* desc) {
+  CUDNN_CHECK(cudnnCreateTensorDescriptor(desc));
+}
+
 template <typename Dtype>
 inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
     int n, int c, int h, int w,
@@ -73,6 +78,15 @@ inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
         n, c, h, w, stride_n, stride_c, stride_h, stride_w));
 }
 
+template <typename Dtype>
+inline void setTensorNdDesc(cudnnTensorDescriptor_t* desc,
+    std::vector<int> shape,
+    std::vector<int> stride) {
+  CHECK_EQ(shape.size(), stride.size()) << "Dimensions of shape and stride don't match !";
+  CUDNN_CHECK(cudnnSetTensorNdDescriptor(*desc, dataType<Dtype>::type,
+        shape.size(), shape.data(), stride.data()));
+}
+
 template <typename Dtype>
 inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
     int n, int c, int h, int w) {
@@ -84,6 +98,16 @@ inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
                          stride_n, stride_c, stride_h, stride_w);
 }
 
+template <typename Dtype>
+inline void setTensorNdDesc(cudnnTensorDescriptor_t* desc,
+    std::vector<int> shape) {
+  std::vector<int> stride(shape.size(), 1);
+  for(int i = stride.size()-2; i >= 0; --i) {
+	  stride[i] = shape[i+1] * stride[i+1];
+  }
+  setTensorNdDesc<Dtype>(desc, shape, stride);
+}
+
 template <typename Dtype>
 inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
     int n, int c, int h, int w) {
@@ -92,6 +116,14 @@ inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
       n, c, h, w));
 }
 
+template <typename Dtype>
+inline void createNdFilterDesc(cudnnFilterDescriptor_t* desc,
+    std::vector<int> shape) {
+  CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
+  CUDNN_CHECK(cudnnSetFilterNdDescriptor(*desc, dataType<Dtype>::type,
+      shape.size(), shape.data()));
+}
+
 template <typename Dtype>
 inline void createConvolutionDesc(cudnnConvolutionDescriptor_t* conv) {
   CUDNN_CHECK(cudnnCreateConvolutionDescriptor(conv));
@@ -105,6 +137,21 @@ inline void setConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
       pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION));
 }
 
+template <typename Dtype>
+inline void setNdConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
+    cudnnTensorDescriptor_t bottom, cudnnFilterDescriptor_t filter,
+    std::vector<int> pad, std::vector<int> stride) {
+  int nbDims;
+  std::vector<int> shape(pad.size()+2);
+  cudnnDataType_t cudnn_type;
+  cudnnGetFilterNdDescriptor(filter, shape.size(), &cudnn_type, &nbDims, shape.data());
+  CHECK_EQ(nbDims, pad.size()+2) << "Dimensions of filters and pad don't match !";
+  CHECK_EQ(nbDims, stride.size()+2) << "Dimensions of filters and stride don't match !";
+  std::vector<int> upscale(pad.size(), 1);
+  CUDNN_CHECK(cudnnSetConvolutionNdDescriptor(*conv,
+      pad.size(), pad.data(), stride.data(), upscale.data(), CUDNN_CROSS_CORRELATION));
+}
+
 template <typename Dtype>
 inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool_desc,
     PoolingParameter_PoolMethod poolmethod, cudnnPoolingMode_t* mode,
@@ -124,6 +171,27 @@ inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool_desc,
         pad_h, pad_w, stride_h, stride_w));
 }
 
+template <typename Dtype>
+inline void createNdPoolingDesc(cudnnPoolingDescriptor_t* pool_desc,
+    PoolingParameter_PoolMethod poolmethod, cudnnPoolingMode_t* mode,
+    std::vector<int> shape, std::vector<int> pad, std::vector<int> stride) {
+  CHECK_EQ(shape.size(), pad.size()) << "Dimensions of shape and pad don't match !";
+  CHECK_EQ(shape.size(), stride.size()) << "Dimensions of shape and stride don't match !";
+  switch (poolmethod) {
+  case PoolingParameter_PoolMethod_MAX:
+    *mode = CUDNN_POOLING_MAX;
+    break;
+  case PoolingParameter_PoolMethod_AVE:
+    *mode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
+    break;
+  default:
+    LOG(FATAL) << "Unknown pooling method.";
+  }
+  CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool_desc));
+  CUDNN_CHECK(cudnnSetPoolingNdDescriptor(*pool_desc, *mode, shape.size(),
+        shape.data(), pad.data(), stride.data()));
+}
+
 }  // namespace cudnn
 
 }  // namespace caffe

diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp
@@ -254,6 +254,61 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
   size_t workspaceSizeInBytes;
   void *workspace;
 };
+
+template <typename Dtype>
+class CudnnNdConvolutionLayer : public Layer<Dtype> {
+ public:
+  explicit CudnnNdConvolutionLayer(const LayerParameter& param)
+      : Layer<Dtype>(param), handles_setup_(false) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual ~CudnnNdConvolutionLayer();
+
+  virtual inline const char* type() const { return "NdConvolution"; }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  // Compute height_out_ and width_out_ from other parameters.
+  virtual void compute_output_shape();
+
+  vector<int> kernel_shape_;
+  vector<int> stride_shape_;
+  int num_;
+  int channels_;
+  vector<int> pad_shape_;
+  vector<int> input_shape_;
+  int group_;
+  int num_output_;
+  vector<int> output_shape_;
+  bool bias_term_;
+
+  int conv_out_spatial_dim_;
+  int kernel_dim_;
+  int output_offset_;
+
+  Blob<Dtype> bias_multiplier_;
+
+  bool handles_setup_;
+  cudnnHandle_t* handle_;
+  cudaStream_t*  stream_;
+  vector<cudnnTensorDescriptor_t> bottom_descs_, top_descs_;
+  cudnnTensorDescriptor_t    bias_desc_;
+  cudnnFilterDescriptor_t      filter_desc_;
+  vector<cudnnConvolutionDescriptor_t> conv_descs_;
+  int bottom_offset_, top_offset_, weight_offset_, bias_offset_;
+  size_t workspaceSizeInBytes;
+  void *workspace;
+};
 #endif
 
 /**
@@ -451,6 +506,50 @@ class CuDNNPoolingLayer : public PoolingLayer<Dtype> {
   cudnnPoolingDescriptor_t  pooling_desc_;
   cudnnPoolingMode_t        mode_;
 };
+
+template <typename Dtype>
+class CudnnNdPoolingLayer : public Layer<Dtype> {
+ public:
+  explicit CudnnNdPoolingLayer(const LayerParameter& param)
+      : Layer<Dtype>(param), handles_setup_(false) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual ~CudnnNdPoolingLayer();
+
+  virtual inline const char* type() const { return "NdPooling"; }
+  virtual inline int ExactNumBottomBlobs() const { return 1; }
+  virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+  virtual void compute_output_shape();
+
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  vector<int> kernel_shape_;
+  vector<int> stride_shape_;
+  vector<int> pad_shape_;
+  int channels_;
+  vector<int> input_shape_;
+  vector<int> pooled_shape_;
+  bool global_pooling_;
+  Blob<Dtype> rand_idx_;
+  Blob<int> max_idx_;
+
+  bool handles_setup_;
+  cudnnHandle_t             handle_;
+  cudnnTensorDescriptor_t bottom_desc_, top_desc_;
+  cudnnPoolingDescriptor_t  pooling_desc_;
+  cudnnPoolingMode_t        mode_;
+};
 #endif
 
 /**

diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp
@@ -40,6 +40,29 @@ shared_ptr<Layer<Dtype> > GetConvolutionLayer(
 
 REGISTER_LAYER_CREATOR(Convolution, GetConvolutionLayer);
 
+template <typename Dtype>
+shared_ptr<Layer<Dtype> > GetNdConvolutionLayer(
+    const LayerParameter& param) {
+  ConvolutionParameter_Engine engine = param.convolution_param().engine();
+  if (engine == ConvolutionParameter_Engine_DEFAULT) {
+    engine = ConvolutionParameter_Engine_CAFFE;
+#ifdef USE_CUDNN
+    engine = ConvolutionParameter_Engine_CUDNN;
+#endif
+  }
+  if (engine == ConvolutionParameter_Engine_CAFFE) {
+	  NOT_IMPLEMENTED;
+#ifdef USE_CUDNN
+  } else if (engine == ConvolutionParameter_Engine_CUDNN) {
+    return shared_ptr<Layer<Dtype> >(new CudnnNdConvolutionLayer<Dtype>(param));
+#endif
+  } else {
+    LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+  }
+}
+
+REGISTER_LAYER_CREATOR(NdConvolution, GetNdConvolutionLayer);
+
 // Get pooling layer according to engine.
 template <typename Dtype>
 shared_ptr<Layer<Dtype> > GetPoolingLayer(const LayerParameter& param) {
@@ -70,6 +93,29 @@ shared_ptr<Layer<Dtype> > GetPoolingLayer(const LayerParameter& param) {
 
 REGISTER_LAYER_CREATOR(Pooling, GetPoolingLayer);
 
+// Get pooling layer according to engine.
+template <typename Dtype>
+shared_ptr<Layer<Dtype> > GetNdPoolingLayer(const LayerParameter& param) {
+  PoolingParameter_Engine engine = param.pooling_param().engine();
+  if (engine == PoolingParameter_Engine_DEFAULT) {
+    engine = PoolingParameter_Engine_CAFFE;
+#ifdef USE_CUDNN
+    engine = PoolingParameter_Engine_CUDNN;
+#endif
+  }
+  if (engine == PoolingParameter_Engine_CAFFE) {
+	  NOT_IMPLEMENTED;
+#ifdef USE_CUDNN
+  } else if (engine == PoolingParameter_Engine_CUDNN) {
+    return shared_ptr<Layer<Dtype> >(new CudnnNdPoolingLayer<Dtype>(param));
+#endif
+  } else {
+    LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+  }
+}
+
+REGISTER_LAYER_CREATOR(NdPooling, GetNdPoolingLayer);
+
 // Get relu layer according to engine.
 template <typename Dtype>
 shared_ptr<Layer<Dtype> > GetReLULayer(const LayerParameter& param) {