Skip to content

Commit 5f04875

Browse files
authored
Fix xpu error message (#28061)
* fix error message,test=kunlun * fix, test=kunlun
1 parent c8d32c8 commit 5f04875

File tree

3 files changed

+102
-26
lines changed

3 files changed

+102
-26
lines changed

paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc

Lines changed: 65 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -86,53 +86,97 @@ class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel<T> {
8686
int r = xpu::matrix_vector_add_grad(
8787
dev_ctx.x_context(), dout->data<T>(), dout->data<T>(),
8888
dout->data<T>(), dout->data<T>(), dx_data, dy_data, pre, n);
89-
PADDLE_ENFORCE_EQ(
90-
r, XPU_SUCCESS,
91-
platform::errors::External(
92-
"XPU API return wrong value[%d], please check whether "
93-
"Baidu Kunlun Card is properly installed.",
94-
r));
89+
if (r == xpu::Error_t::INVALID_PARAM) {
90+
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
91+
platform::errors::InvalidArgument(
92+
"XPU kernel error of ElementWiseAddOp, error "
93+
"message: INVALID_PARAM, "
94+
"please check your input & output."));
95+
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
96+
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
97+
platform::errors::Unavailable(
98+
"XPU kernel error of ElementWiseAddOp, error "
99+
"message: RUNTIME_ERROR, "
100+
"please check whether Baidu Kunlun card is "
101+
"properly installed."));
102+
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
103+
PADDLE_ENFORCE_EQ(
104+
r, xpu::Error_t::SUCCESS,
105+
platform::errors::ResourceExhausted(
106+
"XPU kernel error of ElementWiseAddOp, error message: "
107+
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
108+
}
95109
return;
96110
}
97111

98112
if (dx == nullptr) {
99113
PADDLE_ENFORCE_EQ(
100114
xpu_malloc(reinterpret_cast<void **>(&dx_data), len * sizeof(float)),
101-
XPU_SUCCESS, platform::errors::External("XPU has no enough memory"));
115+
XPU_SUCCESS,
116+
platform::errors::ResourceExhausted("XPU has no enough memory"));
102117
}
103118

104119
if (dy == nullptr) {
105120
PADDLE_ENFORCE_EQ(
106121
xpu_malloc(reinterpret_cast<void **>(&dy_data), len * sizeof(float)),
107-
XPU_SUCCESS, platform::errors::External("XPU has no enough memory"));
122+
XPU_SUCCESS,
123+
platform::errors::ResourceExhausted("XPU has no enough memory"));
108124
} else {
109125
if (len != n) {
110126
PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast<void **>(&dy_data),
111127
len * sizeof(float)),
112-
XPU_SUCCESS, platform::errors::External(
128+
XPU_SUCCESS, platform::errors::ResourceExhausted(
113129
"XPU has no enough memory"));
114130
}
115131
}
116132

117133
int r = xpu::elementwise_add_grad(
118134
dev_ctx.x_context(), dout->data<T>() /*x*/, dout->data<T>() /*y*/,
119135
dout->data<T>() /*out*/, dout->data<T>(), dx_data, dy_data, len);
120-
PADDLE_ENFORCE_EQ(
121-
r, XPU_SUCCESS,
122-
platform::errors::External(
123-
"XPU API return wrong value[%d], please check whether "
124-
"Baidu Kunlun Card is properly installed.",
125-
r));
136+
if (r == xpu::Error_t::INVALID_PARAM) {
137+
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
138+
platform::errors::InvalidArgument(
139+
"XPU kernel error of ElementWiseAddOp, error "
140+
"message: INVALID_PARAM, "
141+
"please check your input & output."));
142+
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
143+
PADDLE_ENFORCE_EQ(
144+
r, xpu::Error_t::SUCCESS,
145+
platform::errors::Unavailable(
146+
"XPU kernel error of ElementWiseAddOp, error message: "
147+
"RUNTIME_ERROR, "
148+
"please check whether Baidu Kunlun card is properly installed."));
149+
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
150+
PADDLE_ENFORCE_EQ(
151+
r, xpu::Error_t::SUCCESS,
152+
platform::errors::ResourceExhausted(
153+
"XPU kernel error of ElementWiseAddOp, error message: "
154+
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
155+
}
126156

127157
if ((dy != nullptr) && (len != n)) {
128158
r = xpu::reduce_ew(dev_ctx.x_context(), dy_data, dy->data<T>(), pre, n,
129159
post, xpu::ElementwiseOp::ASSIGN);
130-
PADDLE_ENFORCE_EQ(
131-
r, XPU_SUCCESS,
132-
platform::errors::External(
133-
"XPU API return wrong value[%d], please check whether "
134-
"Baidu Kunlun Card is properly installed.",
135-
r));
160+
if (r == xpu::Error_t::INVALID_PARAM) {
161+
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
162+
platform::errors::InvalidArgument(
163+
"XPU kernel error of ElementWiseAddOp, error "
164+
"message: INVALID_PARAM, "
165+
"please check your input & output."));
166+
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
167+
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
168+
platform::errors::Unavailable(
169+
"XPU kernel error of ElementWiseAddOp, error "
170+
"message: RUNTIME_ERROR, "
171+
"please check whether Baidu Kunlun card is "
172+
"properly installed."));
173+
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
174+
PADDLE_ENFORCE_EQ(
175+
r, xpu::Error_t::SUCCESS,
176+
platform::errors::ResourceExhausted(
177+
"XPU kernel error of ElementWiseAddOp, error message: "
178+
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
179+
}
136180
dev_ctx.Wait();
137181
xpu_free(dy_data);
138182
}

paddle/fluid/operators/optimizers/momentum_op_xpu.cc

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,24 @@ class MomentumOpXPUKernel : public framework::OpKernel<T> {
4848
dev_ctx.x_context(), param->data<float>(), velocity->data<float>(),
4949
grad->data<float>(), lr, use_nesterov, mu, param_out->numel(),
5050
param_out->data<float>(), velocity_out->data<float>());
51-
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
52-
platform::errors::PermissionDenied("XPU kernel error!"));
51+
if (r == xpu::Error_t::INVALID_PARAM) {
52+
PADDLE_ENFORCE_EQ(
53+
r, xpu::Error_t::SUCCESS,
54+
platform::errors::InvalidArgument(
55+
"XPU kernel error of MomentumOp, error message: INVALID_PARAM, "
56+
"please check your input & output."));
57+
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
58+
PADDLE_ENFORCE_EQ(
59+
r, xpu::Error_t::SUCCESS,
60+
platform::errors::Unavailable(
61+
"XPU kernel error of MomentumOp, error message: RUNTIME_ERROR, "
62+
"please check whether Baidu Kunlun card is properly installed."));
63+
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
64+
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
65+
platform::errors::ResourceExhausted(
66+
"XPU kernel error of MomentumOp, error message: "
67+
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
68+
}
5369
}
5470
};
5571
} // namespace operators

paddle/fluid/operators/optimizers/sgd_op_xpu.cc

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,25 @@ class SGDOpXPUKernel : public framework::OpKernel<T> {
5656
auto &dev_ctx = ctx.template device_context<DeviceContext>();
5757
int r = xpu::sgd(dev_ctx.x_context(), sz, grad_data, param_data, lr,
5858
out_data);
59-
PADDLE_ENFORCE_EQ(
60-
r, xpu::Error_t::SUCCESS,
61-
platform::errors::PermissionDenied("XPU kernel error!"));
59+
if (r == xpu::Error_t::INVALID_PARAM) {
60+
PADDLE_ENFORCE_EQ(
61+
r, xpu::Error_t::SUCCESS,
62+
platform::errors::InvalidArgument(
63+
"XPU kernel error of SgdOp, error message: INVALID_PARAM, "
64+
"please check your input & output."));
65+
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
66+
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
67+
platform::errors::Unavailable(
68+
"XPU kernel error of SgdOp, error message: "
69+
"RUNTIME_ERROR, please check whether Baidu "
70+
"Kunlun Card is properly installed."));
71+
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
72+
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
73+
platform::errors::ResourceExhausted(
74+
"XPU kernel error of SgdOp, error "
75+
"message: NO_ENOUGH_WORKSPACE, XPU "
76+
"has no enough memory."));
77+
}
6278
} else {
6379
PADDLE_ENFORCE_EQ(false, true,
6480
platform::errors::PermissionDenied(

0 commit comments

Comments
 (0)