Skip to content

Commit e928274

Browse files
authored
[NPU] log_softmax_grad, test=develop (#35484)
* [NPU] log_softmax_grad, test=develop * remove debug files, test=develop * update lookup_table_v2 for CANN 5.0.x, test=develop
1 parent e9ae8dd commit e928274

3 files changed

Lines changed: 117 additions & 21 deletions

File tree

paddle/fluid/operators/log_softmax_op_npu.cc

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,32 +14,61 @@
1414

1515
#include "paddle/fluid/operators/log_softmax_op.h"
1616
#include "paddle/fluid/operators/npu_op_runner.h"
17+
1718
namespace paddle {
1819
namespace operators {
19-
template <typename DeviceContext, typename T>
20+
21+
using NPUDeviceContext = platform::NPUDeviceContext;
22+
23+
template <typename T>
2024
class LogSoftmaxNPUKernel : public framework::OpKernel<T> {
2125
public:
2226
void Compute(const framework::ExecutionContext& ctx) const override {
2327
auto* X = ctx.Input<framework::Tensor>("X");
2428
auto* Out = ctx.Output<framework::Tensor>("Out");
2529
const int rank = X->dims().size();
2630
const int axis = CanonicalAxis(ctx.Attr<int>("axis"), rank);
27-
std::vector<int> axes;
28-
axes.push_back(axis);
29-
framework::NPUAttributeMap attr_input = {{"axes", axes}};
3031
Out->mutable_data<T>(ctx.GetPlace());
31-
const auto& runner = NpuOpRunner("LogSoftmaxV2", {*X}, {*Out}, attr_input);
32-
auto stream =
33-
ctx.template device_context<paddle::platform::NPUDeviceContext>()
34-
.stream();
35-
runner.Run(stream);
32+
33+
if (X->numel() != 0) {
34+
auto stream = ctx.template device_context<NPUDeviceContext>().stream();
35+
const auto& runner = NpuOpRunner("LogSoftmaxV2", {*X}, {*Out},
36+
{{"axes", std::vector<int>{axis}}});
37+
runner.Run(stream);
38+
}
3639
}
3740
};
41+
42+
template <typename T>
43+
class LogSoftmaxGradNPUKernel : public framework::OpKernel<T> {
44+
public:
45+
void Compute(const framework::ExecutionContext& ctx) const override {
46+
auto* Out = ctx.Input<framework::Tensor>("Out");
47+
auto* dOut = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
48+
auto* dX = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
49+
const int rank = dOut->dims().size();
50+
const int axis = CanonicalAxis(ctx.Attr<int>("axis"), rank);
51+
52+
// allocate memory on device.
53+
dX->mutable_data<T>(ctx.GetPlace());
54+
55+
if (dOut->numel() != 0) {
56+
auto stream = ctx.template device_context<NPUDeviceContext>().stream();
57+
const auto& runner = NpuOpRunner("LogSoftmaxGrad", {*dOut, *Out}, {*dX},
58+
{{"axis", std::vector<int>{axis}}});
59+
runner.Run(stream);
60+
}
61+
}
62+
};
63+
3864
} // namespace operators
3965
} // namespace paddle
66+
4067
namespace ops = paddle::operators;
4168
namespace plat = paddle::platform;
4269

43-
REGISTER_OP_NPU_KERNEL(
44-
log_softmax,
45-
ops::LogSoftmaxNPUKernel<paddle::platform::NPUDeviceContext, float>);
70+
REGISTER_OP_NPU_KERNEL(log_softmax, ops::LogSoftmaxNPUKernel<float>,
71+
ops::LogSoftmaxNPUKernel<plat::float16>);
72+
73+
REGISTER_OP_NPU_KERNEL(log_softmax_grad, ops::LogSoftmaxGradNPUKernel<float>,
74+
ops::LogSoftmaxGradNPUKernel<plat::float16>);

paddle/fluid/operators/lookup_table_v2_op_npu.cc

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,6 @@ class LookupTableV2NPUKernel : public framework::OpKernel<T> {
2929
auto *output_t = ctx.Output<framework::LoDTensor>("Out"); // float tensor
3030
auto *table_t = ctx.Input<framework::LoDTensor>("W");
3131

32-
// It seems cann 20.1 accepts int64, but cann 20.2+ not.
33-
PADDLE_ENFORCE_EQ(ids_t->type(), framework::proto::VarType::INT32,
34-
platform::errors::Unimplemented(
35-
"The index of LookupTableV2 should be int32."));
36-
3732
auto *table_var = ctx.InputVar("W");
3833
PADDLE_ENFORCE_EQ(
3934
table_var->IsType<framework::LoDTensor>(), true,

python/paddle/fluid/tests/unittests/npu/test_log_softmax_op_npu.py

Lines changed: 76 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@
2222
import paddle.fluid as fluid
2323
from paddle.fluid import core
2424
import paddle.nn.functional as F
25+
2526
from test_log_softmax import ref_log_softmax, ref_log_softmax_grad
27+
2628
paddle.enable_static()
27-
np.random.seed(10)
2829

2930

3031
class TestLogSoftmaxNPUOp(OpTest):
@@ -55,10 +56,16 @@ def set_dtype(self):
5556
pass
5657

5758
def test_check_output(self):
58-
self.check_output_with_place(self.place)
59+
if self.dtype == np.float16:
60+
self.check_output_with_place(self.place, atol=1e-2)
61+
else:
62+
self.check_output_with_place(self.place)
5963

6064
def test_check_grad(self):
61-
pass
65+
if self.dtype == np.float16:
66+
return
67+
self.check_grad_with_place(
68+
self.place, ['X'], ['Out'], user_defined_grads=[self.x_grad])
6269

6370

6471
def test_class(op_type, typename):
@@ -88,8 +95,73 @@ def set_dtype(self):
8895
globals()[cls_name] = TestLogSoftmaxAxis
8996

9097

91-
for _typename in {'float32'}:
98+
for _typename in {np.float32, np.float16}:
9299
test_class("logsoftmax", _typename)
93100
test_class2("logsoftmax", _typename)
101+
102+
103+
class TestNNLogSoftmaxAPI(unittest.TestCase):
104+
def setUp(self):
105+
self.x_shape = [2, 3, 4, 5]
106+
self.x = np.random.uniform(-1., 1., self.x_shape).astype(np.float32)
107+
self.place = paddle.NPUPlace(0) \
108+
if paddle.fluid.core.is_compiled_with_npu() \
109+
else paddle.CPUPlace()
110+
111+
def check_api(self, axis=-1):
112+
ref_out = np.apply_along_axis(ref_log_softmax, axis, self.x)
113+
114+
logsoftmax = paddle.nn.LogSoftmax(axis)
115+
# test static api
116+
with paddle.static.program_guard(paddle.static.Program()):
117+
x = paddle.fluid.data(name='x', shape=self.x_shape)
118+
y = logsoftmax(x)
119+
exe = paddle.static.Executor(self.place)
120+
out = exe.run(feed={'x': self.x}, fetch_list=[y])
121+
self.assertTrue(np.allclose(out[0], ref_out))
122+
123+
# test dygrapg api
124+
paddle.disable_static(self.place)
125+
x = paddle.to_tensor(self.x)
126+
y = logsoftmax(x)
127+
self.assertTrue(np.allclose(y.numpy(), ref_out))
128+
paddle.enable_static()
129+
130+
def test_check_api(self):
131+
for axis in [-1, 1]:
132+
self.check_api(axis)
133+
134+
135+
class TestNNFunctionalLogSoftmaxAPI(unittest.TestCase):
136+
def setUp(self):
137+
self.x_shape = [2, 3, 4, 5]
138+
self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32)
139+
self.place = paddle.NPUPlace(0) \
140+
if paddle.fluid.core.is_compiled_with_npu() \
141+
else paddle.CPUPlace()
142+
143+
def check_api(self, axis=-1, dtype=None):
144+
x = self.x.copy()
145+
if dtype is not None:
146+
x = x.astype(dtype)
147+
ref_out = np.apply_along_axis(ref_log_softmax, axis, x)
148+
with paddle.static.program_guard(paddle.static.Program()):
149+
x = paddle.fluid.data(name='x', shape=self.x_shape)
150+
y = F.log_softmax(x, axis, dtype)
151+
exe = paddle.static.Executor(self.place)
152+
out = exe.run(feed={'x': self.x}, fetch_list=[y])
153+
self.assertTrue(np.allclose(out[0], ref_out))
154+
155+
paddle.disable_static(self.place)
156+
x = paddle.to_tensor(self.x)
157+
y = F.log_softmax(x, axis, dtype)
158+
self.assertTrue(np.allclose(y.numpy(), ref_out), True)
159+
paddle.enable_static()
160+
161+
def test_check_api(self):
162+
for axis in [-1, 1]:
163+
self.check_api(axis)
164+
165+
94166
if __name__ == '__main__':
95167
unittest.main()

0 commit comments

Comments
 (0)