6262 'im2sequence' ,
6363 'nce' ,
6464 'beam_search' ,
65+ 'row_conv' ,
6566]
6667
6768
@@ -193,7 +194,7 @@ def embedding(input,
193194 """
194195 **Embedding Layer**
195196
196- This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
197+ This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
197198 a lookup table. The result of this lookup is the embedding of each ID in the
198199 :attr:`input`.
199200
@@ -208,8 +209,8 @@ def embedding(input,
208209 is_sparse(bool): The flag indicating whether to use sparse update.
209210 padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup.
210211 Otherwise the given :attr:`padding_idx` indicates padding the output
211- with zeros whenever lookup encounters it in :attr:`input`. If
212- :math:`padding_idx < 0`, the padding_idx to use in lookup is
212+ with zeros whenever lookup encounters it in :attr:`input`. If
213+ :math:`padding_idx < 0`, the padding_idx to use in lookup is
213214 :math:`size[0] + dim`.
214215 param_attr(ParamAttr): Parameters for this layer
215216 dtype(np.dtype|core.DataType|str): The type of data : float32, float_16, int etc
@@ -396,9 +397,9 @@ def dynamic_gru(input,
396397 """
397398 **Dynamic GRU Layer**
398399
399- Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on
400+ Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on
400401 Sequence Modeling <https://arxiv.org/abs/1412.3555>`_
401-
402+
402403 The formula is as follows:
403404
404405 .. math::
@@ -408,47 +409,47 @@ def dynamic_gru(input,
408409 r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)
409410
410411 \\ tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)
411-
412+
412413 h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \\ tilde{h_t}
413-
414+
414415 The :math:`\odot` is the element-wise product of the vectors. :math:`act_g`
415- is the update gate and reset gate activation function and :math:`sigmoid`
416- is usually used for it. :math:`act_c` is the activation function for
416+ is the update gate and reset gate activation function and :math:`sigmoid`
417+ is usually used for it. :math:`act_c` is the activation function for
417418 candidate hidden state and :math:`tanh` is usually used for it.
418419
419420 Note that these :math:`W_{ux}x_{t}, W_{rx}x_{t}, W_{cx}x_{t}` operations on
420421 the input :math:`x_{t}` are NOT included in this operator. Users can choose
421- to use fully-connect layer before GRU layer.
422+ to use fully-connect layer before GRU layer.
422423
423424 Args:
424- input(Variable): The input of dynamic_gru layer, which supports
425- variable-time length input sequence. The underlying tensor in this
425+ input(Variable): The input of dynamic_gru layer, which supports
426+ variable-time length input sequence. The underlying tensor in this
426427 Variable is a matrix with shape :math:`(T \\ times 3D)`, where
427- :math:`T` is the total time steps in this mini-batch, :math:`D`
428+ :math:`T` is the total time steps in this mini-batch, :math:`D`
428429 is the hidden size.
429430 size(int): The dimension of the gru cell.
430- param_attr(ParamAttr|None): The parameter attribute for the learnable
431+ param_attr(ParamAttr|None): The parameter attribute for the learnable
431432 hidden-hidden weight matrix. Note:
432433
433- - The shape of the weight matrix is :math:`(T \\ times 3D)`, where
434+ - The shape of the weight matrix is :math:`(T \\ times 3D)`, where
434435 :math:`D` is the hidden size.
435- - All elements in the weight matrix can be divided into two parts.
436+ - All elements in the weight matrix can be divided into two parts.
436437 The first part are weights of the update gate and reset gate with
437- shape :math:`(D \\ times 2D)`, and the second part are weights for
438+ shape :math:`(D \\ times 2D)`, and the second part are weights for
438439 candidate hidden state with shape :math:`(D \\ times D)`.
439- bias_attr(ParamAttr): The parameter attribute for learnable the
440+ bias_attr(ParamAttr): The parameter attribute for learnable the
440441 hidden-hidden bias.
441- is_reverse(bool): Whether to compute reversed GRU, default
442+ is_reverse(bool): Whether to compute reversed GRU, default
442443 :attr:`False`.
443444 gate_activation(str): The activation for update gate and reset gate.
444445 Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid".
445- activation(str): The activation for candidate hidden state.
446+ activation(str): The activation for candidate hidden state.
446447 Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh".
447448
448449 Returns:
449450 Variable: The hidden state of GRU. The shape is (T \\ times D), and lod \
450451 is the same with the input.
451-
452+
452453 Examples:
453454 .. code-block:: python
454455
@@ -2564,3 +2565,56 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
25642565 'paddings' : padding ,
25652566 })
25662567 return out
2568+
2569+
2570+ def row_conv (input , future_context_size , param_attr = None , act = None ):
2571+ """Row Conv Operator. This layer will apply lookahead convolution to
2572+ **input**. The input variable should be a 2D LoDTensor with shape [T, D].
2573+ Parameters with shape [future_context_size + 1, D] will be created. The math
2574+ equation of row convolution is as follows:
2575+
2576+ .. math::
2577+ Out_{i} = \sum_{j = i} ^ {i + \\ tau} X_{j} \odot W_{i - j}
2578+
2579+ In the above equation:
2580+
2581+ * :math:`Out_{i}`: The i-th row of output variable with shape [1, D].
2582+ * :math:`\\ tau`: Future context size.
2583+ * :math:`X_{j}`: The j-th row of input variable with shape [1, D].
2584+ * :math:`W_{i-j}`: The (i-j)-th row of parameters with shape [1, D].
2585+
2586+ More details about row_conv please refer to the paper \
2587+ (http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf) and
2588+ the design document \
2589+ (https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645).
2590+
2591+ Args:
2592+ input (Variable): Input variable, a 2D LoDTensor with shape [T, D].
2593+ future_context_size (int): Future context size. Please note, the shape
2594+ of convolution kernel is [future_context_size + 1, D].
2595+ param_attr (ParamAttr): Attributes of parameters, including
2596+ name, initializer etc.
2597+ act (str): Non-linear activation to be applied to output variable.
2598+
2599+ Returns:
2600+ Variable: The output tensor with same shape as input tensor.
2601+
2602+ Examples:
2603+ .. code-block:: python
2604+
2605+ x = fluid.layers.data(name='x', shape=[16],
2606+ dtype='float32', lod_level=1)
2607+ out = fluid.layers.row_conv(input=x, future_context_size=2)
2608+ """
2609+ helper = LayerHelper ('row_conv' , ** locals ())
2610+ dtype = helper .input_dtype ()
2611+ filter_shape = [future_context_size + 1 , input .shape [1 ]]
2612+ filter_param = helper .create_parameter (
2613+ attr = helper .param_attr , shape = filter_shape , dtype = dtype )
2614+ out = helper .create_tmp_variable (dtype )
2615+ helper .append_op (
2616+ type = 'row_conv' ,
2617+ inputs = {'X' : [input ],
2618+ 'Filter' : [filter_param ]},
2619+ outputs = {'Out' : [out ]})
2620+ return helper .append_activation (out )
0 commit comments