@@ -25,48 +25,27 @@ __global__ void KernelUnpool2dMax(const int nthreads, const T* input_data,
2525 const int channels, T* output_data,
2626 const int output_height,
2727 const int output_width) {
28- int in_n_stride = input_height * input_width * channels;
29- int in_c_stride = input_height * input_width;
30- int out_n_stride = output_height * output_width * channels;
31- int out_c_stride = output_height * output_width;
32- int index = blockIdx .x * blockDim .x + threadIdx .x ;
33- int offset = blockDim .x * gridDim .x ;
34- for (int i = index; i < nthreads; i += offset) {
35- int bidx = i / in_n_stride;
36- int boffset = i % in_n_stride;
37- int cidx = boffset / in_c_stride;
38- int out_offset = bidx * out_n_stride + cidx * out_c_stride;
39- int out_index = indices_data[i];
40- PADDLE_ENFORCE (out_index < out_c_stride,
41- " out_index < out_c_stride. Expected %ld < %ld, but got "
42- " %ld >= %ld. Please check input value." ,
43- out_index, out_c_stride, out_index, out_c_stride);
44- output_data[out_offset + out_index] = input_data[i];
28+ CUDA_KERNEL_LOOP (linearIndex, nthreads) {
29+ int c = (linearIndex / input_width / input_height) % channels;
30+ int n = linearIndex / input_width / input_height / channels;
31+ output_data += (n * channels + c) * output_height * output_width;
32+ int maxind = indices_data[linearIndex];
33+ output_data[maxind] = input_data[linearIndex];
4534 }
4635}
36+
4737template <typename T>
4838__global__ void KernelUnpool2dMaxGrad (
4939 const int nthreads, const T* input_data, const int * indices_data,
5040 const int input_height, const int input_width, const int channels,
5141 const T* output_data, const T* output_grad, const int output_height,
5242 const int output_width, T* input_grad) {
53- int in_n_stride = input_height * input_width * channels;
54- int in_c_stride = input_height * input_width;
55- int out_n_stride = output_height * output_width * channels;
56- int out_c_stride = output_height * output_width;
57- int index = blockIdx .x * blockDim .x + threadIdx .x ;
58- int offset = blockDim .x * gridDim .x ;
59- for (int i = index; i < nthreads; i += offset) {
60- int bidx = i / in_n_stride;
61- int boffset = i % in_n_stride;
62- int cidx = boffset / in_c_stride;
63- int out_offset = bidx * out_n_stride + cidx * out_c_stride;
64- int out_index = indices_data[i];
65- PADDLE_ENFORCE (out_index < out_c_stride,
66- " out_index < out_c_stride. Expected %ld < %ld, but got "
67- " %ld >= %ld. Please check input value." ,
68- out_index, out_c_stride, out_index, out_c_stride);
69- input_grad[i] = output_grad[out_offset + out_index];
43+ CUDA_KERNEL_LOOP (linearIndex, nthreads) {
44+ int c = (linearIndex / input_width / input_height) % channels;
45+ int n = linearIndex / input_width / input_height / channels;
46+ output_grad += (n * channels + c) * output_height * output_width;
47+ int maxind = indices_data[linearIndex];
48+ input_grad[linearIndex] = output_grad[maxind];
7049 }
7150}
7251/*
0 commit comments