From 6ca6aaaa242b7a47cd9a24d213b8dc75cf32b5d5 Mon Sep 17 00:00:00 2001 From: guosheng Date: Fri, 13 Nov 2020 21:05:41 +0800 Subject: [PATCH 1/3] Fix gradients with ignore_idx in softmax_with_cross_entropy. test=develop --- .../operators/softmax_with_cross_entropy_op.cu | 13 ++++++++++--- .../fluid/operators/softmax_with_cross_entropy_op.h | 6 ++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu index 3ac7a5a127b379..f86f02544dc980 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu @@ -37,11 +37,17 @@ __global__ void CrossEntropyGrad(T* logit_grad, const int64_t* labels, template __global__ void Scale(T* logit_grad, const T* loss_grad, const int num, - const int d, const int remain) { + const int d, const int remain, const int64_t* labels, + const int ignore_index) { CUDA_KERNEL_LOOP(index, num) { int idx_n = index / d; int idx_remain = index % remain; - logit_grad[index] *= loss_grad[idx_n * remain + idx_remain]; + int idx_lbl = idx_n * remain + idx_remain; + if (labels[idx_lbl] == ignore_index) { + logit_grad[index] = static_cast(0.); + } else { + logit_grad[index] *= loss_grad[idx_lbl]; + } } } @@ -260,6 +266,7 @@ struct HardLabelSoftmaxWithCrossEntropyFunctor { int idx_remain = idx % remain; // labels, loss view as [n, remain] int idx_lbl = idx_n * remain + idx_remain; + // It also would ignore labels not in range(class_num). if (idx_axis != labels_[idx_lbl]) { log_softmax_[idx] = exp_on_device(log_softmax_[idx]); } else { @@ -513,7 +520,7 @@ class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel { int num = n * d; grid = (num + block - 1) / block; Scale<<>>(logit_grad_data, loss_grad_data, num, - d, remain); + d, remain, label_data, ignore_index); } } }; diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.h b/paddle/fluid/operators/softmax_with_cross_entropy_op.h index cebd466f361d1e..9f57a966d92ed5 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.h +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.h @@ -82,6 +82,7 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { } const bool soft_label = context.Attr("soft_label"); + auto ignore_index = context.Attr("ignore_index"); const int rank = logit_grad->dims().size(); const int axis = CanonicalAxis(context.Attr("axis"), rank); @@ -117,6 +118,11 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { int idx = i * remain + j; logit_grad_data[i * d + label_data[idx] * remain + j] -= out_grad_data[idx]; + if (label_data[idx] == ignore_index) { + for (int k = 0; k < axis_dim; ++k) { + logit_grad_data[i * d + k * remain + j] = 0; + } + } } } } From 332141f379962d5ec625b67f4ed8773523fa847a Mon Sep 17 00:00:00 2001 From: guosheng Date: Sat, 14 Nov 2020 20:28:18 +0800 Subject: [PATCH 2/3] Fix gradients with ignore_idx in softmax_with_cross_entropy on cpu. Remove softmax_with_cross_entropy from op_threshold_white_list. test=develop --- paddle/fluid/operators/softmax_with_cross_entropy_op.h | 5 +++-- .../tests/unittests/test_softmax_with_cross_entropy_op.py | 6 +++--- .../tests/unittests/white_list/op_threshold_white_list.py | 1 - 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.h b/paddle/fluid/operators/softmax_with_cross_entropy_op.h index 9f57a966d92ed5..93f2552c3cee90 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.h +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.h @@ -116,12 +116,13 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { for (int i = 0; i < n; ++i) { for (int j = 0; j < remain; j++) { int idx = i * remain + j; - logit_grad_data[i * d + label_data[idx] * remain + j] -= - out_grad_data[idx]; if (label_data[idx] == ignore_index) { for (int k = 0; k < axis_dim; ++k) { logit_grad_data[i * d + k * remain + j] = 0; } + } else { + logit_grad_data[i * d + label_data[idx] * remain + j] -= + out_grad_data[idx]; } } } diff --git a/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py b/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py index df2a0a523ad1ef..4af26f0e05fc97 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py @@ -83,9 +83,9 @@ def setUp(self): self.attrs = { "numeric_stable_mode": self.numeric_stable_mode, "soft_label": self.soft_label, + "ignore_index": self.ignore_index, } - if self.ignore_index >= 0: - self.attrs['ignore_index'] = self.ignore_index + if self.axis != -1: self.attrs['axis'] = self.axis @@ -93,7 +93,7 @@ def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(["Logits"], "Loss", max_relative_error=0.05) + self.check_grad(["Logits"], "Loss") class TestSoftmaxWithCrossEntropyOpNoCudnn(TestSoftmaxWithCrossEntropyOp): diff --git a/python/paddle/fluid/tests/unittests/white_list/op_threshold_white_list.py b/python/paddle/fluid/tests/unittests/white_list/op_threshold_white_list.py index 47d62999c92d12..58cf4d94386549 100644 --- a/python/paddle/fluid/tests/unittests/white_list/op_threshold_white_list.py +++ b/python/paddle/fluid/tests/unittests/white_list/op_threshold_white_list.py @@ -36,7 +36,6 @@ 'selu', \ 'sigmoid_cross_entropy_with_logits', \ 'soft_relu', \ - 'softmax_with_cross_entropy', \ 'spp', \ 'teacher_student_sigmoid_loss', \ 'unpool', \ From 7e590ed1ca88e9b33ecd4d3fcc2d789d1c40c1c0 Mon Sep 17 00:00:00 2001 From: guosheng Date: Sat, 14 Nov 2020 21:48:30 +0800 Subject: [PATCH 3/3] Fix test_softmax_cross_entropy_op.py. test=develop --- .../fluid/tests/unittests/test_softmax_with_cross_entropy_op.py | 2 +- .../fluid/tests/unittests/white_list/op_threshold_white_list.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py b/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py index 4af26f0e05fc97..0ee58d5be15e60 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py @@ -93,7 +93,7 @@ def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(["Logits"], "Loss") + self.check_grad(["Logits"], "Loss", max_relative_error=5e-5) class TestSoftmaxWithCrossEntropyOpNoCudnn(TestSoftmaxWithCrossEntropyOp): diff --git a/python/paddle/fluid/tests/unittests/white_list/op_threshold_white_list.py b/python/paddle/fluid/tests/unittests/white_list/op_threshold_white_list.py index 58cf4d94386549..47d62999c92d12 100644 --- a/python/paddle/fluid/tests/unittests/white_list/op_threshold_white_list.py +++ b/python/paddle/fluid/tests/unittests/white_list/op_threshold_white_list.py @@ -36,6 +36,7 @@ 'selu', \ 'sigmoid_cross_entropy_with_logits', \ 'soft_relu', \ + 'softmax_with_cross_entropy', \ 'spp', \ 'teacher_student_sigmoid_loss', \ 'unpool', \