Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions include/caffe/util/cudnn.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ inline void createTensor4dDesc(cudnnTensorDescriptor_t* desc) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(desc));
}

template <typename Dtype>
inline void createTensorDesc(cudnnTensorDescriptor_t* desc) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(desc));
}

template <typename Dtype>
inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
int n, int c, int h, int w,
Expand All @@ -73,6 +78,15 @@ inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
n, c, h, w, stride_n, stride_c, stride_h, stride_w));
}

template <typename Dtype>
inline void setTensorNdDesc(cudnnTensorDescriptor_t* desc,
std::vector<int> shape,
std::vector<int> stride) {
CHECK_EQ(shape.size(), stride.size()) << "Dimensions of shape and stride don't match !";
CUDNN_CHECK(cudnnSetTensorNdDescriptor(*desc, dataType<Dtype>::type,
shape.size(), shape.data(), stride.data()));
}

template <typename Dtype>
inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
int n, int c, int h, int w) {
Expand All @@ -84,6 +98,16 @@ inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
stride_n, stride_c, stride_h, stride_w);
}

template <typename Dtype>
inline void setTensorNdDesc(cudnnTensorDescriptor_t* desc,
std::vector<int> shape) {
std::vector<int> stride(shape.size(), 1);
for(int i = stride.size()-2; i >= 0; --i) {
stride[i] = shape[i+1] * stride[i+1];
}
setTensorNdDesc<Dtype>(desc, shape, stride);
}

template <typename Dtype>
inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
int n, int c, int h, int w) {
Expand All @@ -92,6 +116,14 @@ inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
n, c, h, w));
}

template <typename Dtype>
inline void createNdFilterDesc(cudnnFilterDescriptor_t* desc,
std::vector<int> shape) {
CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
CUDNN_CHECK(cudnnSetFilterNdDescriptor(*desc, dataType<Dtype>::type,
shape.size(), shape.data()));
}

template <typename Dtype>
inline void createConvolutionDesc(cudnnConvolutionDescriptor_t* conv) {
CUDNN_CHECK(cudnnCreateConvolutionDescriptor(conv));
Expand All @@ -105,6 +137,21 @@ inline void setConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION));
}

template <typename Dtype>
inline void setNdConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
cudnnTensorDescriptor_t bottom, cudnnFilterDescriptor_t filter,
std::vector<int> pad, std::vector<int> stride) {
int nbDims;
std::vector<int> shape(pad.size()+2);
cudnnDataType_t cudnn_type;
cudnnGetFilterNdDescriptor(filter, shape.size(), &cudnn_type, &nbDims, shape.data());
CHECK_EQ(nbDims, pad.size()+2) << "Dimensions of filters and pad don't match !";
CHECK_EQ(nbDims, stride.size()+2) << "Dimensions of filters and stride don't match !";
std::vector<int> upscale(pad.size(), 1);
CUDNN_CHECK(cudnnSetConvolutionNdDescriptor(*conv,
pad.size(), pad.data(), stride.data(), upscale.data(), CUDNN_CROSS_CORRELATION));
}

template <typename Dtype>
inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool_desc,
PoolingParameter_PoolMethod poolmethod, cudnnPoolingMode_t* mode,
Expand All @@ -124,6 +171,27 @@ inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool_desc,
pad_h, pad_w, stride_h, stride_w));
}

template <typename Dtype>
inline void createNdPoolingDesc(cudnnPoolingDescriptor_t* pool_desc,
PoolingParameter_PoolMethod poolmethod, cudnnPoolingMode_t* mode,
std::vector<int> shape, std::vector<int> pad, std::vector<int> stride) {
CHECK_EQ(shape.size(), pad.size()) << "Dimensions of shape and pad don't match !";
CHECK_EQ(shape.size(), stride.size()) << "Dimensions of shape and stride don't match !";
switch (poolmethod) {
case PoolingParameter_PoolMethod_MAX:
*mode = CUDNN_POOLING_MAX;
break;
case PoolingParameter_PoolMethod_AVE:
*mode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
break;
default:
LOG(FATAL) << "Unknown pooling method.";
}
CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool_desc));
CUDNN_CHECK(cudnnSetPoolingNdDescriptor(*pool_desc, *mode, shape.size(),
shape.data(), pad.data(), stride.data()));
}

} // namespace cudnn

} // namespace caffe
Expand Down
99 changes: 99 additions & 0 deletions include/caffe/vision_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,61 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
size_t workspaceSizeInBytes;
void *workspace;
};

template <typename Dtype>
class CudnnNdConvolutionLayer : public Layer<Dtype> {
public:
explicit CudnnNdConvolutionLayer(const LayerParameter& param)
: Layer<Dtype>(param), handles_setup_(false) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual ~CudnnNdConvolutionLayer();

virtual inline const char* type() const { return "NdConvolution"; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

// Compute height_out_ and width_out_ from other parameters.
virtual void compute_output_shape();

vector<int> kernel_shape_;
vector<int> stride_shape_;
int num_;
int channels_;
vector<int> pad_shape_;
vector<int> input_shape_;
int group_;
int num_output_;
vector<int> output_shape_;
bool bias_term_;

int conv_out_spatial_dim_;
int kernel_dim_;
int output_offset_;

Blob<Dtype> bias_multiplier_;

bool handles_setup_;
cudnnHandle_t* handle_;
cudaStream_t* stream_;
vector<cudnnTensorDescriptor_t> bottom_descs_, top_descs_;
cudnnTensorDescriptor_t bias_desc_;
cudnnFilterDescriptor_t filter_desc_;
vector<cudnnConvolutionDescriptor_t> conv_descs_;
int bottom_offset_, top_offset_, weight_offset_, bias_offset_;
size_t workspaceSizeInBytes;
void *workspace;
};
#endif

/**
Expand Down Expand Up @@ -451,6 +506,50 @@ class CuDNNPoolingLayer : public PoolingLayer<Dtype> {
cudnnPoolingDescriptor_t pooling_desc_;
cudnnPoolingMode_t mode_;
};

template <typename Dtype>
class CudnnNdPoolingLayer : public Layer<Dtype> {
public:
explicit CudnnNdPoolingLayer(const LayerParameter& param)
: Layer<Dtype>(param), handles_setup_(false) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual ~CudnnNdPoolingLayer();

virtual inline const char* type() const { return "NdPooling"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }

protected:
virtual void compute_output_shape();

virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

vector<int> kernel_shape_;
vector<int> stride_shape_;
vector<int> pad_shape_;
int channels_;
vector<int> input_shape_;
vector<int> pooled_shape_;
bool global_pooling_;
Blob<Dtype> rand_idx_;
Blob<int> max_idx_;

bool handles_setup_;
cudnnHandle_t handle_;
cudnnTensorDescriptor_t bottom_desc_, top_desc_;
cudnnPoolingDescriptor_t pooling_desc_;
cudnnPoolingMode_t mode_;
};
#endif

/**
Expand Down
46 changes: 46 additions & 0 deletions src/caffe/layer_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,29 @@ shared_ptr<Layer<Dtype> > GetConvolutionLayer(

REGISTER_LAYER_CREATOR(Convolution, GetConvolutionLayer);

template <typename Dtype>
shared_ptr<Layer<Dtype> > GetNdConvolutionLayer(
const LayerParameter& param) {
ConvolutionParameter_Engine engine = param.convolution_param().engine();
if (engine == ConvolutionParameter_Engine_DEFAULT) {
engine = ConvolutionParameter_Engine_CAFFE;
#ifdef USE_CUDNN
engine = ConvolutionParameter_Engine_CUDNN;
#endif
}
if (engine == ConvolutionParameter_Engine_CAFFE) {
NOT_IMPLEMENTED;
#ifdef USE_CUDNN
} else if (engine == ConvolutionParameter_Engine_CUDNN) {
return shared_ptr<Layer<Dtype> >(new CudnnNdConvolutionLayer<Dtype>(param));
#endif
} else {
LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
}
}

REGISTER_LAYER_CREATOR(NdConvolution, GetNdConvolutionLayer);

// Get pooling layer according to engine.
template <typename Dtype>
shared_ptr<Layer<Dtype> > GetPoolingLayer(const LayerParameter& param) {
Expand Down Expand Up @@ -70,6 +93,29 @@ shared_ptr<Layer<Dtype> > GetPoolingLayer(const LayerParameter& param) {

REGISTER_LAYER_CREATOR(Pooling, GetPoolingLayer);

// Get pooling layer according to engine.
template <typename Dtype>
shared_ptr<Layer<Dtype> > GetNdPoolingLayer(const LayerParameter& param) {
PoolingParameter_Engine engine = param.pooling_param().engine();
if (engine == PoolingParameter_Engine_DEFAULT) {
engine = PoolingParameter_Engine_CAFFE;
#ifdef USE_CUDNN
engine = PoolingParameter_Engine_CUDNN;
#endif
}
if (engine == PoolingParameter_Engine_CAFFE) {
NOT_IMPLEMENTED;
#ifdef USE_CUDNN
} else if (engine == PoolingParameter_Engine_CUDNN) {
return shared_ptr<Layer<Dtype> >(new CudnnNdPoolingLayer<Dtype>(param));
#endif
} else {
LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
}
}

REGISTER_LAYER_CREATOR(NdPooling, GetNdPoolingLayer);

// Get relu layer according to engine.
template <typename Dtype>
shared_ptr<Layer<Dtype> > GetReLULayer(const LayerParameter& param) {
Expand Down
Loading