Skip to content

Commit 444c285

Browse files
authored
【NPU】Add TensorCopy to NPU kernel for reduce_sum op (#31667)
* update unittest * add TensorCopy in npu grad kernel
1 parent 8f08f16 commit 444c285

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ class ReduceSumGradNPUKernel : public framework::OpKernel<T> {
8383
Tensor out_grad_tmp(out_grad->type());
8484
out_grad_tmp.Resize(out_dims);
8585
out_grad_tmp.mutable_data<T>(ctx.GetPlace());
86+
framework::TensorCopy(
87+
*out_grad, ctx.GetPlace(),
88+
ctx.template device_context<platform::DeviceContext>(),
89+
&out_grad_tmp);
90+
out_grad_tmp.Resize(out_dims);
8691

8792
auto runner = NpuOpRunner("BroadcastToD", {out_grad_tmp}, {*x_grad},
8893
{{"shape", framework::vectorize(x->dims())}});

python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,9 @@ def _test(self, run_npu=True):
102102
label = paddle.static.data(
103103
name="label", shape=[2, 1], dtype='int64')
104104

105-
z = paddle.add(a, b)
105+
a_1 = fluid.layers.fc(input=a, size=4, num_flatten_dims=2, act=None)
106+
b_1 = fluid.layers.fc(input=b, size=4, num_flatten_dims=2, act=None)
107+
z = paddle.add(a_1, b_1)
106108
z_1 = self.set_reduce_sum_function(z)
107109

108110
prediction = fluid.layers.fc(input=z_1, size=2, act='softmax')

0 commit comments

Comments
 (0)