@@ -64,9 +64,10 @@ template <class T>
6464class Im2ColFunctor <paddle::operators::math::ColFormat::kCFO ,
6565 platform::GPUPlace, T> {
6666 public:
67- void operator ()(const framework::Tensor& im, framework::Tensor& col,
67+ void operator ()(const platform::DeviceContext& context,
68+ const framework::Tensor& im, framework::Tensor& col,
6869 int stride_height, int stride_width, int padding_height,
69- int padding_width, platform::DeviceContext* context ) {
70+ int padding_width) {
7071 PADDLE_ENFORCE (im.dims ().size () == 3 );
7172 PADDLE_ENFORCE (col.dims ().size () == 5 );
7273
@@ -84,9 +85,9 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
8485 int block_y = (blocks + 512 - 1 ) / 512 ;
8586 dim3 threads (1024 , 1 );
8687 dim3 grid (block_x, block_y);
87- im2col<T><<<
88- grid, threads, 0 ,
89- reinterpret_cast <platform::CUDADeviceContext*>(context)-> stream ()>>>(
88+ im2col<T><<<grid, threads, 0 ,
89+ reinterpret_cast < const platform::CUDADeviceContext&>(context)
90+ . stream()>>> (
9091 im.data <T>(), num_outputs, input_height, input_width, filter_height,
9192 filter_width, stride_height, stride_width, padding_height,
9293 padding_width, output_height, output_width, col.data <T>());
@@ -149,9 +150,9 @@ template <class T>
149150class Col2ImFunctor <paddle::operators::math::ColFormat::kCFO ,
150151 platform::GPUPlace, T> {
151152 public:
152- void operator ()(framework::Tensor& im, const framework::Tensor& col ,
153- int stride_height, int stride_width , int padding_height ,
154- int padding_width, platform::DeviceContext* context ) {
153+ void operator ()(const platform::DeviceContext& context, framework::Tensor& im ,
154+ const framework::Tensor& col , int stride_height ,
155+ int stride_width, int padding_height, int padding_width ) {
155156 PADDLE_ENFORCE (im.dims ().size () == 3 );
156157 PADDLE_ENFORCE (col.dims ().size () == 5 );
157158
@@ -174,9 +175,9 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
174175
175176 // To avoid involving atomic operations, we will launch one kernel per
176177 // bottom dimension, and then in the kernel add up the top dimensions.
177- col2im<T><<<
178- grid, threads, 0 ,
179- reinterpret_cast <platform::CUDADeviceContext*>(context)-> stream ()>>>(
178+ col2im<T><<<grid, threads, 0 ,
179+ reinterpret_cast < const platform::CUDADeviceContext&>(context)
180+ . stream()>>> (
180181 num_kernels, col.data <T>(), input_height + 2 * padding_height,
181182 input_width + 2 * padding_width, input_channels, filter_height,
182183 filter_width, stride_height, stride_width, padding_height,
@@ -235,9 +236,10 @@ template <class T>
235236class Im2ColFunctor <paddle::operators::math::ColFormat::kOCF ,
236237 platform::GPUPlace, T> {
237238 public:
238- void operator ()(const framework::Tensor& im, framework::Tensor& col,
239+ void operator ()(const platform::DeviceContext& context,
240+ const framework::Tensor& im, framework::Tensor& col,
239241 int stride_height, int stride_width, int padding_height,
240- int padding_width, platform::DeviceContext* context ) {
242+ int padding_width) {
241243 PADDLE_ENFORCE (im.dims ().size () == 3 );
242244 PADDLE_ENFORCE (col.dims ().size () == 5 );
243245 int input_channels = im.dims ()[0 ];
@@ -268,9 +270,9 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
268270 dim3 threads (block_dim_x, block_dim_y,
269271 std::min (block_dim_z, input_channels));
270272 dim3 grid (output_width, output_height);
271- im2colOCF<T><<<
272- grid, threads, 0 ,
273- reinterpret_cast <platform::CUDADeviceContext*>(context)-> stream ()>>>(
273+ im2colOCF<T><<<grid, threads, 0 ,
274+ reinterpret_cast < const platform::CUDADeviceContext&>(context)
275+ . stream()>>> (
274276 im.data <T>(), col.data <T>(), input_channels, input_height, input_width,
275277 filter_height, filter_width, stride_height, stride_width,
276278 padding_height, padding_width, output_height, output_width);
@@ -318,9 +320,9 @@ template <class T>
318320class Col2ImFunctor <paddle::operators::math::ColFormat::kOCF ,
319321 platform::GPUPlace, T> {
320322 public:
321- void operator ()(framework::Tensor& im, const framework::Tensor& col ,
322- int stride_height, int stride_width , int padding_height ,
323- int padding_width, platform::DeviceContext* context ) {
323+ void operator ()(const platform::DeviceContext& context, framework::Tensor& im ,
324+ const framework::Tensor& col , int stride_height ,
325+ int stride_width, int padding_height, int padding_width ) {
324326 PADDLE_ENFORCE (im.dims ().size () == 3 );
325327 PADDLE_ENFORCE (col.dims ().size () == 5 );
326328 int input_channels = im.dims ()[0 ];
@@ -351,9 +353,9 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
351353 dim3 threads (block_dim_x, block_dim_y,
352354 std::min (block_dim_z, input_channels));
353355 dim3 grid (output_width, output_height);
354- col2imOCF<T><<<
355- grid, threads, 0 ,
356- reinterpret_cast <platform::CUDADeviceContext*>(context)-> stream ()>>>(
356+ col2imOCF<T><<<grid, threads, 0 ,
357+ reinterpret_cast < const platform::CUDADeviceContext&>(context)
358+ . stream()>>> (
357359 im.data <T>(), col.data <T>(), input_channels, input_height, input_width,
358360 filter_height, filter_width, stride_height, stride_width,
359361 padding_height, padding_width, output_height, output_width);
0 commit comments