Skip to content

Commit 8600f47

Browse files
authored
error message opt for XPU, test=kunlun (#27972) (#28078)
* add stack pool2d roi_align xpu op,test=kunlun * error message opt, test=kunlun * add xpu unittest,test=kunlun * skip check grad,test=kunlun * fix boostget , test=kunlun * error message opt for XPU, test=kunlun
1 parent d89deae commit 8600f47

File tree

3 files changed

+62
-29
lines changed

3 files changed

+62
-29
lines changed

paddle/fluid/operators/pool_op_xpu.cc

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,14 @@ class PoolXPUKernel : public framework::OpKernel<T> {
4343
bool exclusive = context.Attr<bool>("exclusive");
4444
bool is_test = context.Attr<bool>("is_test");
4545
bool adaptive = context.Attr<bool>("adaptive");
46-
PADDLE_ENFORCE_EQ(!adaptive, true,
47-
platform::errors::InvalidArgument(
48-
"XPU does not support adaptive == true!"));
49-
PADDLE_ENFORCE_EQ(ksize.size(), 2,
50-
platform::errors::InvalidArgument(
51-
"XPU only support 2 dimension pooling!"));
46+
PADDLE_ENFORCE_EQ(
47+
!adaptive, true,
48+
platform::errors::InvalidArgument(
49+
"The Pool2d XPU OP does not support adaptive == true!"));
50+
PADDLE_ENFORCE_EQ(
51+
ksize.size(), 2,
52+
platform::errors::InvalidArgument(
53+
"The Pool2d XPU OP only support 2 dimension pooling!"));
5254
int* index_data = nullptr;
5355
if (context.Attr<bool>("global_pooling")) {
5456
for (size_t i = 0; i < ksize.size(); ++i) {
@@ -80,7 +82,10 @@ class PoolXPUKernel : public framework::OpKernel<T> {
8082
stride_w, out_h, out_w);
8183
PADDLE_ENFORCE_EQ(
8284
r, xpu::Error_t::SUCCESS,
83-
platform::errors::InvalidArgument("pool2d XPU kernel error!"));
85+
platform::errors::External(
86+
"The pool2d XPU API return wrong value[%d], please check "
87+
"where Baidu Kunlun Card is properly installed.",
88+
r));
8489
}
8590
};
8691
template <typename DeviceContext, typename T>
@@ -99,12 +104,15 @@ class PoolGradXPUKernel : public framework::OpKernel<T> {
99104
bool exclusive = context.Attr<bool>("exclusive");
100105
bool adaptive = context.Attr<bool>("adaptive");
101106
const int* index_data = nullptr;
102-
PADDLE_ENFORCE_EQ(!adaptive, true,
103-
platform::errors::InvalidArgument(
104-
"XPU does not support adaptive == true!"));
105-
PADDLE_ENFORCE_EQ(ksize.size(), 2,
106-
platform::errors::InvalidArgument(
107-
"XPU only support 2 dimension pooling!"));
107+
PADDLE_ENFORCE_EQ(
108+
!adaptive, true,
109+
platform::errors::InvalidArgument(
110+
"The Pool2d XPU OP does not support adaptive == true!"));
111+
PADDLE_ENFORCE_EQ(ksize.size(), 2, platform::errors::InvalidArgument(
112+
"The Pool2d XPU OP only support 2 "
113+
"dimension pooling!, but received "
114+
"%d-dimension pool kernel size",
115+
ksize.size()));
108116
if (context.Attr<bool>("global_pooling")) {
109117
for (size_t i = 0; i < ksize.size(); ++i) {
110118
paddings[i] = 0;
@@ -139,16 +147,22 @@ class PoolGradXPUKernel : public framework::OpKernel<T> {
139147
int r =
140148
xpu::memset(dev_ctx.x_context(), reinterpret_cast<void**>(input_grad),
141149
zero, in_x_grad->numel() * sizeof(float));
142-
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
143-
platform::errors::InvalidArgument(
144-
"There are pool2d grad XPU kernel error raised!"));
150+
PADDLE_ENFORCE_EQ(
151+
r, xpu::Error_t::SUCCESS,
152+
platform::errors::External(
153+
"The Pool2d XPU OP return wrong value[%d], please check "
154+
"where Baidu Kunlun Card is properly installed.",
155+
r));
145156
r = xpu::pooling_backward(dev_ctx.x_context(), input, output, index_data,
146157
output_grad, input_grad, pool_type, c, in_h, in_w,
147158
pad_left, pad_right, pad_up, pad_down, win_h,
148159
win_w, stride_h, stride_w, out_h, out_w);
149-
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
150-
platform::errors::InvalidArgument(
151-
"There are pool2d grad XPU kernel error raised!"));
160+
PADDLE_ENFORCE_EQ(
161+
r, xpu::Error_t::SUCCESS,
162+
platform::errors::External(
163+
"The Pool2d XPU OP return wrong value[%d], please check "
164+
"where Baidu Kunlun Card is properly installed.",
165+
r));
152166
}
153167
};
154168

paddle/fluid/operators/roi_align_op_xpu.cc

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,16 @@ class XPUROIAlignOpKernel : public framework::OpKernel<T> {
4444
PADDLE_ENFORCE_EQ(
4545
rois_batch_size, batch_size,
4646
platform::errors::InvalidArgument(
47-
"The rois_batch_size and imgs batch_size must be the same."));
47+
"The rois_batch_size and imgs batch_size of roi_align_xpu OP must "
48+
"be the same. But received rois_batch_size %d , batch_size %d",
49+
rois_batch_size, batch_size));
4850
int rois_num_with_lod = rois_lod[rois_batch_size];
49-
PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod,
50-
platform::errors::InvalidArgument(
51-
"The rois_num from input and lod must be the same."));
51+
PADDLE_ENFORCE_EQ(
52+
rois_num, rois_num_with_lod,
53+
platform::errors::InvalidArgument(
54+
"The rois_num from input and lod of roi_align_xpu OP must be the "
55+
"same. But received input rois_num %d , input lod %d",
56+
rois_num, rois_num_with_lod));
5257
T* output_data = out->mutable_data<T>(ctx.GetPlace());
5358
const T* rois_data = rois->data<T>();
5459
for (int n = 0; n < rois_batch_size; n++) {
@@ -62,7 +67,10 @@ class XPUROIAlignOpKernel : public framework::OpKernel<T> {
6267
rois_lod[n] * channels * pooled_height * pooled_width);
6368
PADDLE_ENFORCE_EQ(
6469
r, xpu::Error_t::SUCCESS,
65-
platform::errors::InvalidArgument("roi_align XPU kernel error!"));
70+
platform::errors::External(
71+
"The roi_align XPU OP return wrong value[%d], please check "
72+
"where Baidu Kunlun Card is properly installed.",
73+
r));
6674
}
6775
}
6876
}

paddle/fluid/operators/stack_op_xpu.cc

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
#include "paddle/fluid/operators/stack_op.h"
16+
#include <string>
1617
#ifdef PADDLE_WITH_XPU
1718

1819
namespace paddle {
@@ -45,8 +46,15 @@ class StackXPUKernel : public framework::OpKernel<T> {
4546
auto& dev_ctx = ctx.template device_context<DeviceContext>();
4647
void* x_datas_host = std::malloc(n * sizeof(void*));
4748
void* x_datas_device = nullptr;
48-
PADDLE_ENFORCE(xpu_malloc(reinterpret_cast<void**>(&x_datas_device),
49-
n * sizeof(void*)) == XPU_SUCCESS);
49+
PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast<void**>(&x_datas_device),
50+
n * sizeof(void*)),
51+
XPU_SUCCESS,
52+
platform::errors::ResourceExhausted(
53+
"\n\nOut of memory error on XPU, Cannot"
54+
"allocate %s memory on XPU. \n\nPlease "
55+
"check whether there is any other process "
56+
"using XPU.\n",
57+
string::HumanReadableSize(n * sizeof(void*))));
5058
for (auto i = 0; i < n; ++i) {
5159
((const void**)x_datas_host)[i] = x[i]->data<T>();
5260
}
@@ -55,9 +63,12 @@ class StackXPUKernel : public framework::OpKernel<T> {
5563
n * sizeof(void*));
5664
int r = xpu::stack_forward<float>(dev_ctx.x_context(), pre, post, n,
5765
x_datas_device, y_data);
58-
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
59-
platform::errors::InvalidArgument(
60-
"There are stack XPU kernel error raised!"));
66+
PADDLE_ENFORCE_EQ(
67+
r, xpu::Error_t::SUCCESS,
68+
platform::errors::External(
69+
"The stack XPU API return wrong value[%d], please check "
70+
"where Baidu Kunlun Card is properly installed.",
71+
r));
6172
dev_ctx.Wait();
6273
std::free(x_datas_host);
6374
xpu_free(x_datas_device);

0 commit comments

Comments
 (0)