@@ -124,9 +124,9 @@ class FusedAttentionOp : public framework::OperatorWithKernel {
124124 PADDLE_ENFORCE_EQ (y_dim.size (),
125125 2 ,
126126 common::errors::InvalidArgument (
127- " The dimensions of qkv_weight must be 2 if enable"
128- " transpose_qkv_wb: (dim_embed, 3 * dim_embed),"
129- " but received dimensions of"
127+ " The dimensions of qkv_weight must be 2 if enable "
128+ " transpose_qkv_wb: (dim_embed, 3 * dim_embed), "
129+ " but received dimensions of "
130130 " Input is [%d]" ,
131131 y_dim.size ()));
132132 PADDLE_ENFORCE_GT (num_heads,
@@ -159,7 +159,7 @@ class FusedAttentionOp : public framework::OperatorWithKernel {
159159 PADDLE_ENFORCE_EQ (y_dim.size (),
160160 4 ,
161161 common::errors::InvalidArgument (
162- " The dimensions of qkv_weight must be 4 if not"
162+ " The dimensions of qkv_weight must be 4 if not "
163163 " enable transpose_qkv_wb: (3, num_head, dim_head, "
164164 " dim_embed), but received [%d]" ,
165165 y_dim.size ()));
@@ -186,8 +186,8 @@ class FusedAttentionOp : public framework::OperatorWithKernel {
186186 x_dim.size (),
187187 3 ,
188188 common::errors::InvalidArgument (" The dimensions of x must be 3"
189- " (batch_size, seq_len, dim_embed),"
190- " but received dimensions of"
189+ " (batch_size, seq_len, dim_embed), "
190+ " but received dimensions of "
191191 " Input is [%d]" ,
192192 x_dim.size ()));
193193
@@ -431,7 +431,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker {
431431 " attn_dropout_implementation" ,
432432 " [\" downgrade_in_infer\" |\" upscale_in_train\" ]"
433433 " There are two kinds of ways to implement dropout"
434- " (the mask below is a tensor have the same shape with input"
434+ " (the mask below is a tensor have the same shape with input, "
435435 " the value of mask is 0 or 1, the ratio of 0 is dropout_rate)"
436436 " 1. downgrade_in_infer(default), downgrade the outcome at inference "
437437 " time"
0 commit comments