@@ -62,47 +62,48 @@ class LayerNormOp : public framework::OperatorWithKernel {
6262class LayerNormOpMaker : public framework ::OpProtoAndCheckerMaker {
6363 public:
6464 void Make () override {
65- AddInput (" X" , " (LoDTensor) The input tensor." );
65+ AddInput (" X" , " The input tensor." );
6666 AddInput (" Scale" ,
67- " (Tensor, optional) Scale is a 1-dimensional tensor of size "
67+ " (optional) Scale is a 1-dimensional tensor of size "
6868 " H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])."
6969 " It is applied to the output." )
7070 .AsDispensable ();
7171 AddInput (" Bias" ,
72- " (Tensor, optional) Bias is a 1-dimensional tensor of size "
72+ " (optional) Bias is a 1-dimensional tensor of size "
7373 " H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])."
7474 " It is applied to the output." )
7575 .AsDispensable ();
76- AddOutput (" Y" , " (LoDTensor) Result after normalization." );
77- AddOutput (" Mean" , " (Tensor) Mean of the current mini batch." )
78- .AsIntermediate ();
79- AddOutput (" Variance" , " (Tensor) Variance of the current mini batch." )
76+ AddOutput (" Y" , " Result after normalization." );
77+ AddOutput (" Mean" , " Mean of the current mini batch." ).AsIntermediate ();
78+ AddOutput (" Variance" , " Variance of the current mini batch." )
8079 .AsIntermediate ();
8180
8281 AddAttr<float >(" epsilon" ,
83- " (float, default 1e-5) Constant for "
84- " numerical stability" )
82+ " Constant for numerical stability [default 1e-5]." )
8583 .SetDefault (1e-5 )
8684 .AddCustomChecker ([](const float &epsilon) {
8785 PADDLE_ENFORCE (epsilon >= 0 .0f && epsilon <= 0 .001f ,
8886 " 'epsilon' should be between 0.0 and 0.001." );
8987 });
9088 AddAttr<int >(" begin_norm_axis" ,
91- " (int default:1), the "
92- " axis of `begin_norm_axis ... Rank(X) - 1` will be "
89+ " the axis of `begin_norm_axis ... Rank(X) - 1` will be "
9390 " normalized. `begin_norm_axis` splits the tensor(`X`) to a "
94- " matrix [N,H]." )
91+ " matrix [N,H]. [default 1]. " )
9592 .SetDefault (1 )
9693 .AddCustomChecker ([](const int &begin_norm_axis) {
9794 PADDLE_ENFORCE_GT (begin_norm_axis, 0 ,
9895 " 'begin_norm_axis' should be greater than zero." );
9996 });
10097
10198 AddComment (R"DOC(
102- Layer Normalization.
103- Layer Norm has been implemented as discussed in the paper:
104- https://arxiv.org/abs/1607.06450
105- ...
99+ Assume feature vectors exist on dimensions
100+ :attr:`begin_norm_axis ... rank(input)` and calculate the moment statistics
101+ along these dimensions for each feature vector :math:`a` with size
102+ :math:`H`, then normalize each feature vector using the corresponding
103+ statistics. After that, apply learnable gain and bias on the normalized
104+ tensor to scale and shift if :attr:`scale` and :attr:`shift` are set.
105+
106+ Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_
106107)DOC" );
107108 }
108109};
0 commit comments