Skip to content

Commit c711e91

Browse files
authored
Add double grad op for sigmoid activation, test=develop (#32971)
Sigmoid: Out = Sigmoid(X) SigmoidGrad: DX = DOut*(1-Out)*Out [This Patch] Out DOut -> SigmoidGradGrad -> DOutNew DDX DDOut DDOut = (1-Out)*Out*DDX DOutNew = (1-2*Out)*DOut*DDX
1 parent 20b9be6 commit c711e91

File tree

4 files changed

+180
-2
lines changed

4 files changed

+180
-2
lines changed

paddle/fluid/operators/activation_op.cc

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,27 @@ class ActivationOpDoubleGrad2 : public framework::OperatorWithKernel {
789789
}
790790
};
791791

792+
template <typename T>
793+
class SigmoidDoubleGradMaker
794+
: public ::paddle::framework::SingleGradOpMaker<T> {
795+
public:
796+
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;
797+
798+
protected:
799+
void Apply(GradOpPtr<T> op) const override {
800+
op->SetType("sigmoid_grad_grad");
801+
// input1: Out
802+
op->SetInput("Out", this->Input("Out"));
803+
// input2: ddx
804+
op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X")));
805+
op->SetInput("DOut", this->Input(framework::GradVarName("Out")));
806+
op->SetAttrMap(this->Attrs());
807+
// output: ddy
808+
op->SetOutput("DOutNew", this->InputGrad("Out"));
809+
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
810+
}
811+
};
812+
792813
template <typename T>
793814
class TanhDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
794815
public:
@@ -1068,6 +1089,47 @@ namespace plat = paddle::platform;
10681089
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP);
10691090
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL);
10701091

1092+
/* ========================== sigmoid register =============================
1093+
*/
1094+
// 1. Register Sigmoid Operator
1095+
REGISTER_OPERATOR(
1096+
sigmoid, ops::ActivationOp, ops::SigmoidOpMaker,
1097+
ops::ActivationOpInferVarType,
1098+
ops::ActivationGradOpMaker<ops::SigmoidGradFunctor<float>::FwdDeps(),
1099+
paddle::framework::OpDesc>,
1100+
ops::ActivationGradOpMaker<ops::SigmoidGradFunctor<float>::FwdDeps(),
1101+
paddle::imperative::OpBase>,
1102+
std::conditional<ops::CanInplaceAct<ops::SigmoidGradFunctor<float>>(),
1103+
ops::ActFwdInplaceInferer, void>::type);
1104+
1105+
// 2. Register Sigmoid Grad Operator
1106+
REGISTER_OPERATOR(sigmoid_grad, ops::ActivationOpGrad,
1107+
ops::ActivationGradOpInplaceInferer,
1108+
ops::SigmoidDoubleGradMaker<paddle::framework::OpDesc>,
1109+
ops::SigmoidDoubleGradMaker<paddle::imperative::OpBase>)
1110+
1111+
// 3. Register Sigmoid DoubleGrad Operator
1112+
REGISTER_OPERATOR(
1113+
sigmoid_grad_grad,
1114+
ops::ActivationOpDoubleGrad<ops::SigmoidGradFunctor<float>::FwdDeps()>,
1115+
ops::ActivationDoubleGradOpInplaceInferer);
1116+
1117+
// Register Sigmoid/GradSigmoid Kernels
1118+
REGISTER_ACTIVATION_CPU_KERNEL(sigmoid, Sigmoid, SigmoidFunctor,
1119+
SigmoidGradFunctor);
1120+
1121+
// Register DoubleGrad Kernel
1122+
REGISTER_OP_CPU_KERNEL(
1123+
sigmoid_grad_grad,
1124+
ops::SigmoidDoubleGradKernel<plat::CPUDeviceContext,
1125+
ops::SigmoidGradGradFunctor<float>>,
1126+
ops::SigmoidDoubleGradKernel<plat::CPUDeviceContext,
1127+
ops::SigmoidGradGradFunctor<double>>,
1128+
ops::SigmoidDoubleGradKernel<plat::CPUDeviceContext,
1129+
ops::SigmoidGradGradFunctor<plat::float16>>);
1130+
1131+
/* ========================================================================== */
1132+
10711133
/* ========================== tanh register ============================= */
10721134
REGISTER_OPERATOR(
10731135
tanh, ops::ActivationOp, ops::TanhOpMaker, ops::ActivationOpInferVarType,

paddle/fluid/operators/activation_op.cu

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1481,6 +1481,21 @@ REGISTER_OP_CUDA_KERNEL(
14811481
#endif
14821482
/* ========================================================================== */
14831483

1484+
/* =========================== sigmoid register ============================
1485+
*/
1486+
REGISTER_ACTIVATION_CUDA_KERNEL(sigmoid, Sigmoid, CudaSigmoidFunctor,
1487+
CudaSigmoidGradFunctor);
1488+
1489+
REGISTER_OP_CUDA_KERNEL(
1490+
sigmoid_grad_grad,
1491+
ops::SigmoidDoubleGradKernel<paddle::platform::CUDADeviceContext,
1492+
ops::SigmoidGradGradFunctor<float>>,
1493+
ops::SigmoidDoubleGradKernel<paddle::platform::CUDADeviceContext,
1494+
ops::SigmoidGradGradFunctor<double>>,
1495+
ops::SigmoidDoubleGradKernel<plat::CUDADeviceContext,
1496+
ops::SigmoidGradGradFunctor<plat::float16>>);
1497+
/* ========================================================================== */
1498+
14841499
/* =========================== tanh register ============================ */
14851500
REGISTER_ACTIVATION_CUDA_KERNEL(tanh, Tanh, CudaTanhFunctor,
14861501
CudaTanhGradFunctor);
@@ -1595,7 +1610,6 @@ REGISTER_OP_CUDA_KERNEL(
15951610
/* ========================================================================== */
15961611

15971612
#define FOR_EACH_ACTIVATION_CUDA_OP(__macro) \
1598-
__macro(sigmoid, Sigmoid, CudaSigmoidFunctor, CudaSigmoidGradFunctor); \
15991613
__macro(silu, Silu, CudaSiluFunctor, CudaSiluGradFunctor); \
16001614
__macro(logsigmoid, LogSigmoid, CudaLogSigmoidFunctor, \
16011615
CudaLogSigmoidGradFunctor); \

paddle/fluid/operators/activation_op.h

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,43 @@ struct SigmoidGradFunctor : public BaseActivationFunctor<T> {
258258
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
259259
};
260260

261+
/*
262+
Out
263+
DOut -> SigmoidGradGrad -> DOutNew
264+
DDX DDOut
265+
266+
DDOut = (1-Out)*Out*DDX
267+
DOutNew = (1-2*Out)*DOut*DDX
268+
*/
269+
template <typename T>
270+
struct SigmoidGradGradFunctor : public BaseActivationFunctor<T> {
271+
template <typename Device>
272+
void operator()(const Device& dev, const framework::Tensor* Out,
273+
const framework::Tensor* ddX, const framework::Tensor* dOut,
274+
framework::Tensor* dOutNew, framework::Tensor* ddOut) const {
275+
auto* d = dev.eigen_device();
276+
auto ddx = framework::EigenVector<T>::Flatten(
277+
GET_DATA_SAFELY(ddX, "Input", "DDX", "SigmoidGradGrad"));
278+
auto out = framework::EigenVector<T>::Flatten(
279+
GET_DATA_SAFELY(Out, "Input", "Out", "SigmoidGradGrad"));
280+
281+
if (dOutNew) {
282+
auto dout = framework::EigenVector<T>::Flatten(
283+
GET_DATA_SAFELY(dOut, "Input", "DOut", "SigmoidGradGrad"));
284+
auto dout_new = framework::EigenVector<T>::Flatten(
285+
GET_DATA_SAFELY(dOutNew, "Output", "DOutNew", "SquareGradGrad"));
286+
dout_new.device(*d) =
287+
(static_cast<T>(1) - static_cast<T>(2) * out) * dout * ddx;
288+
}
289+
if (ddOut) {
290+
auto ddout = framework::EigenVector<T>::Flatten(
291+
GET_DATA_SAFELY(ddOut, "Output", "DDOut", "SquareGradGrad"));
292+
ddout.device(*d) = (static_cast<T>(1) - out) * out * ddx;
293+
}
294+
}
295+
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
296+
};
297+
261298
// silu(x) = x / (1 + exp(-x))
262299
template <typename T>
263300
struct SiluFunctor : public BaseActivationFunctor<T> {
@@ -1789,6 +1826,50 @@ inline void ExtractDoubleGradTensorWithInputDOut(
17891826
}
17901827
}
17911828

1829+
template <typename DeviceContext, typename Functor>
1830+
class SigmoidDoubleGradKernel
1831+
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
1832+
public:
1833+
using T = typename Functor::ELEMENT_TYPE;
1834+
void Compute(const framework::ExecutionContext& ctx) const override {
1835+
const framework::Tensor *Out, *ddX, *dOut;
1836+
framework::Tensor *dOutNew, *ddOut;
1837+
Out = ddX = dOut = nullptr;
1838+
dOutNew = ddOut = nullptr;
1839+
1840+
// extract ddx(input) and out(input)
1841+
ddX = ctx.Input<framework::Tensor>("DDX");
1842+
Out = ctx.Input<framework::Tensor>("Out");
1843+
PADDLE_ENFORCE_NOT_NULL(
1844+
ddX, platform::errors::NotFound(
1845+
"Cannot get input Variable ddX, variable name = %s",
1846+
ctx.InputName("DDX")));
1847+
PADDLE_ENFORCE_NOT_NULL(
1848+
Out, platform::errors::NotFound(
1849+
"Cannot get input Variable Out, variable name = %s",
1850+
ctx.InputName("Out")));
1851+
1852+
// set output ddout
1853+
ddOut = ctx.Output<framework::Tensor>("DDOut");
1854+
1855+
// extract dOut(intput)
1856+
dOut = ctx.Input<framework::Tensor>("DOut");
1857+
PADDLE_ENFORCE_NOT_NULL(
1858+
dOut, platform::errors::NotFound(
1859+
"Cannot get input Variable dOut, variable name = %s",
1860+
ctx.InputName("DOut")));
1861+
1862+
// set output dout_new
1863+
dOutNew = ctx.Output<framework::Tensor>("DOutNew");
1864+
1865+
if (dOutNew) dOutNew->mutable_data<T>(Out->dims(), ctx.GetPlace());
1866+
if (ddOut) ddOut->mutable_data<T>(Out->dims(), ctx.GetPlace());
1867+
auto& place = ctx.template device_context<DeviceContext>();
1868+
Functor functor;
1869+
functor(place, Out, ddX, dOut, dOutNew, ddOut);
1870+
}
1871+
};
1872+
17921873
template <typename DeviceContext, typename Functor>
17931874
class TanhDoubleGradKernel
17941875
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
@@ -2153,7 +2234,6 @@ struct LogGradGradFunctor : public BaseActivationFunctor<T> {
21532234
} // namespace paddle
21542235

21552236
#define FOR_EACH_ACTIVATION_OP(__macro) \
2156-
__macro(sigmoid, Sigmoid, SigmoidFunctor, SigmoidGradFunctor); \
21572237
__macro(silu, Silu, SiluFunctor, SiluGradFunctor); \
21582238
__macro(logsigmoid, LogSigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \
21592239
__macro(atan, Atan, AtanFunctor, AtanGradFunctor); \

python/paddle/fluid/tests/unittests/test_activation_nn_grad.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,28 @@
2626
from decorator_helper import prog_scope
2727

2828

29+
class TestSigmoidDoubleGradCheck(unittest.TestCase):
30+
@prog_scope()
31+
def func(self, place):
32+
shape = [2, 3, 7, 9]
33+
eps = 0.0005
34+
dtype = np.float64
35+
x = layers.data('x', shape, False, dtype=dtype)
36+
x.persistable = True
37+
y = layers.sigmoid(x)
38+
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
39+
x_arr[np.abs(x_arr) < 0.005] = 0.002
40+
gradient_checker.double_grad_check(
41+
[x], y, x_init=x_arr, place=place, eps=eps)
42+
43+
def test_grad(self):
44+
places = [fluid.CPUPlace()]
45+
if core.is_compiled_with_cuda():
46+
places.append(fluid.CUDAPlace(0))
47+
for p in places:
48+
self.func(p)
49+
50+
2951
class TestTanhDoubleGradCheck(unittest.TestCase):
3052
@prog_scope()
3153
def func(self, place):

0 commit comments

Comments
 (0)