Merge pull request #7575 from pkuyym/fix-7555

pkuyym · web-flow · commit de89b4729833 · 2018-01-23T23:09:18.000-06:00
Add pyton wrapper for row conv operator.
diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst
@@ -529,3 +529,8 @@ sequence_reshape
 ----------------
 ..  autofunction:: paddle.v2.fluid.layers.sequence_reshape
     :noindex:
+
+row_conv
+--------
+..  autofunction:: paddle.v2.fluid.layers.row_conv
+    :noindex:
diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py
@@ -62,6 +62,7 @@
     'im2sequence',
     'nce',
     'beam_search',
+    'row_conv',
 ]
 
 
@@ -193,7 +194,7 @@ def embedding(input,
     """
     **Embedding Layer**
 
-    This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in 
+    This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
     a lookup table. The result of this lookup is the embedding of each ID in the
     :attr:`input`.
 
@@ -208,8 +209,8 @@ def embedding(input,
         is_sparse(bool): The flag indicating whether to use sparse update.
         padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup.
             Otherwise the given :attr:`padding_idx` indicates padding the output
-            with zeros whenever lookup encounters it in :attr:`input`. If 
-            :math:`padding_idx < 0`, the padding_idx to use in lookup is 
+            with zeros whenever lookup encounters it in :attr:`input`. If
+            :math:`padding_idx < 0`, the padding_idx to use in lookup is
             :math:`size[0] + dim`.
         param_attr(ParamAttr): Parameters for this layer
         dtype(np.dtype|core.DataType|str): The type of data : float32, float_16, int etc
@@ -396,9 +397,9 @@ def dynamic_gru(input,
     """
     **Dynamic GRU Layer**
 
-    Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on 
+    Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on
     Sequence Modeling <https://arxiv.org/abs/1412.3555>`_
-    
+
     The formula is as follows:
 
     .. math::
@@ -408,47 +409,47 @@ def dynamic_gru(input,
         r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)
 
         \\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)
-        
+
         h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \\tilde{h_t}
-    
+
     The :math:`\odot` is the element-wise product of the vectors. :math:`act_g`
-    is the update gate and reset gate activation function and :math:`sigmoid` 
-    is usually used for it. :math:`act_c` is the activation function for 
+    is the update gate and reset gate activation function and :math:`sigmoid`
+    is usually used for it. :math:`act_c` is the activation function for
     candidate hidden state and :math:`tanh` is usually used for it.
 
     Note that these :math:`W_{ux}x_{t}, W_{rx}x_{t}, W_{cx}x_{t}` operations on
     the input :math:`x_{t}` are NOT included in this operator. Users can choose
-    to use fully-connect layer before GRU layer. 
+    to use fully-connect layer before GRU layer.
 
     Args:
-        input(Variable): The input of dynamic_gru layer, which supports 
-            variable-time length input sequence. The underlying tensor in this 
+        input(Variable): The input of dynamic_gru layer, which supports
+            variable-time length input sequence. The underlying tensor in this
             Variable is a matrix with shape :math:`(T \\times 3D)`, where
-            :math:`T` is the total time steps in this mini-batch, :math:`D` 
+            :math:`T` is the total time steps in this mini-batch, :math:`D`
             is the hidden size.
         size(int): The dimension of the gru cell.
-        param_attr(ParamAttr|None): The parameter attribute for the learnable 
+        param_attr(ParamAttr|None): The parameter attribute for the learnable
             hidden-hidden weight matrix. Note:
 
-            - The shape of the weight matrix is :math:`(T \\times 3D)`, where 
+            - The shape of the weight matrix is :math:`(T \\times 3D)`, where
               :math:`D` is the hidden size.
-            - All elements in the weight matrix can be divided into two parts. 
+            - All elements in the weight matrix can be divided into two parts.
               The first part are weights of the update gate and reset gate with
-              shape :math:`(D \\times 2D)`, and the second part are weights for 
+              shape :math:`(D \\times 2D)`, and the second part are weights for
               candidate hidden state with shape :math:`(D \\times D)`.
-        bias_attr(ParamAttr): The parameter attribute for learnable the 
+        bias_attr(ParamAttr): The parameter attribute for learnable the
             hidden-hidden bias.
-        is_reverse(bool): Whether to compute reversed GRU, default 
+        is_reverse(bool): Whether to compute reversed GRU, default
             :attr:`False`.
         gate_activation(str): The activation for update gate and reset gate.
             Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid".
-        activation(str): The activation for candidate hidden state. 
+        activation(str): The activation for candidate hidden state.
             Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh".
 
     Returns:
         Variable: The hidden state of GRU. The shape is (T \\times D), and lod \
             is the same with the input.
-    
+
     Examples:
         .. code-block:: python
 
@@ -2564,3 +2565,56 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
             'paddings': padding,
         })
     return out
+
+
+def row_conv(input, future_context_size, param_attr=None, act=None):
+    """Row Conv Operator. This layer will apply lookahead convolution to
+    **input**. The input variable should be a 2D LoDTensor with shape [T, D].
+    Parameters with shape [future_context_size + 1, D] will be created. The math
+    equation of row convolution is as follows:
+
+    .. math::
+        Out_{i} = \sum_{j = i} ^ {i + \\tau} X_{j} \odot W_{i - j}
+
+    In the above equation:
+
+    * :math:`Out_{i}`: The i-th row of output variable with shape [1, D].
+    * :math:`\\tau`: Future context size.
+    * :math:`X_{j}`: The j-th row of input variable with shape [1, D].
+    * :math:`W_{i-j}`: The (i-j)-th row of parameters with shape [1, D].
+
+    More details about row_conv please refer to the paper \
+    (http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf) and
+    the design document \
+    (https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645).
+
+    Args:
+        input (Variable): Input variable, a 2D LoDTensor with shape [T, D].
+        future_context_size (int): Future context size. Please note, the shape
+            of convolution kernel is [future_context_size + 1, D].
+        param_attr (ParamAttr): Attributes of parameters, including
+            name, initializer etc.
+        act (str): Non-linear activation to be applied to output variable.
+
+    Returns:
+        Variable: The output tensor with same shape as input tensor.
+
+    Examples:
+        .. code-block:: python
+
+            x = fluid.layers.data(name='x', shape=[16],
+                            dtype='float32', lod_level=1)
+            out = fluid.layers.row_conv(input=x, future_context_size=2)
+    """
+    helper = LayerHelper('row_conv', **locals())
+    dtype = helper.input_dtype()
+    filter_shape = [future_context_size + 1, input.shape[1]]
+    filter_param = helper.create_parameter(
+        attr=helper.param_attr, shape=filter_shape, dtype=dtype)
+    out = helper.create_tmp_variable(dtype)
+    helper.append_op(
+        type='row_conv',
+        inputs={'X': [input],
+                'Filter': [filter_param]},
+        outputs={'Out': [out]})
+    return helper.append_activation(out)
diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py
@@ -271,6 +271,14 @@ def test_nce(self):
         self.assertIsNotNone(avg_loss)
         print(str(default_main_program()))
 
+    def test_row_conv(self):
+        program = Program()
+        with program_guard(program):
+            x = layers.data(name='x', shape=[16], dtype='float32', lod_level=1)
+            out = layers.row_conv(input=x, future_context_size=2)
+            self.assertIsNotNone(out)
+        print(str(program))
+
 
 if __name__ == '__main__':
     unittest.main()