Skip to content
Merged
Changes from 30 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
43c4a00
API(Compute) error message enhancement on line 44, 50, 53.
smallv0221 Sep 24, 2020
2725f51
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
smallv0221 Sep 24, 2020
a35a2ee
lstm_unit error message enhancement.
smallv0221 Sep 25, 2020
d0c9f7e
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
smallv0221 Sep 25, 2020
1e66171
Update lstm_unit_op.cc
smallv0221 Sep 27, 2020
31fee18
Update lstm_unit_op.h
smallv0221 Sep 27, 2020
eef287d
error msg enhancement.
smallv0221 Sep 27, 2020
6447a5e
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
smallv0221 Sep 27, 2020
f6d3908
Update sequence_conv_op.cc
smallv0221 Sep 27, 2020
6cb4f06
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
smallv0221 Sep 28, 2020
bbf9e9d
Merge branch 'yxp0925' of https://github.com/smallv0221/Paddle into y…
smallv0221 Sep 28, 2020
3f948be
Update lstm_unit_op.cc
smallv0221 Sep 29, 2020
ce4398f
Update sequence_conv_op.cc
smallv0221 Sep 29, 2020
0d31b52
Update sequence_enumerate_op.cc
smallv0221 Sep 29, 2020
f9bc7b7
Update sequence_enumerate_op.cu
smallv0221 Sep 29, 2020
14ea1f7
Update sequence_enumerate_op.h
smallv0221 Sep 29, 2020
17812b1
Update sequence_pool_op.h
smallv0221 Sep 29, 2020
2c369cd
Merge branch 'yxp0925' of https://github.com/smallv0221/Paddle into y…
smallv0221 Sep 29, 2020
f11c5cf
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
smallv0221 Sep 30, 2020
e38ad8e
error message enhencement.
smallv0221 Sep 30, 2020
70f709a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
smallv0221 Sep 30, 2020
10d1e19
error message enhancement.
smallv0221 Sep 30, 2020
7a8e0b0
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
smallv0221 Sep 30, 2020
cb35e69
Fix en doc for rnn.py. test=document_fix
smallv0221 Oct 12, 2020
14569c9
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
smallv0221 Oct 12, 2020
a7eefa8
fix doc
smallv0221 Oct 12, 2020
65073f0
fix doc.
smallv0221 Oct 12, 2020
8d42057
fix doc
smallv0221 Oct 12, 2020
58c3611
fix doc
smallv0221 Oct 12, 2020
a143785
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
smallv0221 Oct 12, 2020
4911417
fix doc
smallv0221 Oct 12, 2020
36d6bfe
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
smallv0221 Oct 12, 2020
48a737b
fix doc
smallv0221 Oct 13, 2020
8a1dc77
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
smallv0221 Oct 13, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 95 additions & 38 deletions python/paddle/nn/layer/rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def get_initial_states(self,
states. For `batch_ref`'s shape d, `d[batch_dim_idx]` is
treated as batch size.
shape (list|tuple, optional): A (possibly nested structure of) shape[s],
where a shape is a list/tuple of integer). `-1` (for batch size)
where a shape is a list/tuple of integer. `-1` (for batch size)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

157、177前面需要加个空行 不然会有显示bug

will be automatically prepended if a shape does not starts with
it. If None, property `state_shape` will be used. Defaults to
None.
Expand Down Expand Up @@ -268,11 +268,9 @@ class SimpleRNNCell(RNNCellBase):
The formula used is as follows:

.. math::
h_{t} & = \mathrm{tanh}(W_{ih}x_{t} + b_{ih} + W_{hh}h{t-1} + b_{hh})
h_{t} & = \mathrm{tanh}(W_{ih}x_{t} + b_{ih} + W_{hh}h_{t-1} + b_{hh})

y_{t} & = h_{t}

where :math:`\sigma` is the sigmoid fucntion, and \* is the elemetwise
multiplication operator.

Please refer to `Finding Structure in Time
<https://crl.ucsd.edu/~elman/Papers/fsit.pdf>`_ for more details.
Expand Down Expand Up @@ -329,13 +327,15 @@ class SimpleRNNCell(RNNCellBase):
.. code-block:: python

import paddle
paddle.disable_static()

x = paddle.randn((4, 16))
prev_h = paddle.randn((4, 32))

cell = paddle.nn.SimpleRNNCell(16, 32)
y, h = cell(x, prev_h)
print(y.shape)

#[4,32]

"""

Expand Down Expand Up @@ -407,14 +407,20 @@ class LSTMCell(RNNCellBase):

.. math::
i_{t} & = \sigma(W_{ii}x_{t} + b_{ii} + W_{hi}h_{t-1} + b_{hi})

f_{t} & = \sigma(W_{if}x_{t} + b_{if} + W_{hf}h_{t-1} + b_{hf})

o_{t} & = \sigma(W_{io}x_{t} + b_{io} + W_{ho}h_{t-1} + b_{ho})
\\widetilde{c}_{t} & = \\tanh (W_{ig}x_{t} + b_{ig} + W_{hg}h_{t-1} + b_{hg})
c_{t} & = f_{t} \* c{t-1} + i{t} \* \\widetile{c}_{t}
h_{t} & = o_{t} \* \\tanh(c_{t})

\widetilde{c}_{t} & = \tanh (W_{ig}x_{t} + b_{ig} + W_{hg}h_{t-1} + b_{hg})

c_{t} & = f_{t} * c_{t-1} + i_{t} * \widetilde{c}_{t}

h_{t} & = o_{t} * \tanh(c_{t})

y_{t} & = h_{t}

where :math:`\sigma` is the sigmoid fucntion, and \* is the elemetwise
where :math:`\sigma` is the sigmoid fucntion, and * is the elemetwise
multiplication operator.

Please refer to `An Empirical Exploration of Recurrent Network Architectures
Expand Down Expand Up @@ -462,7 +468,7 @@ class LSTMCell(RNNCellBase):
corresponding to :math:`h_{t}` in the formula.
states (tuple): a tuple of two tensors, each of shape
`[batch_size, hidden_size]`, the new hidden states,
corresponding to :math:`h_{t}, c{t}` in the formula.
corresponding to :math:`h_{t}, c_{t}` in the formula.

Notes:
All the weights and bias are initialized with `Uniform(-std, std)` by
Expand All @@ -475,7 +481,6 @@ class LSTMCell(RNNCellBase):
.. code-block:: python

import paddle
paddle.disable_static()

x = paddle.randn((4, 16))
prev_h = paddle.randn((4, 32))
Expand All @@ -484,6 +489,14 @@ class LSTMCell(RNNCellBase):
cell = paddle.nn.LSTMCell(16, 32)
y, (h, c) = cell(x, (prev_h, prev_c))

print(y.shape)
print(h.shape)
print(c.shape)

#[4,32]
#[4,32]
#[4,32]

"""

def __init__(self,
Expand Down Expand Up @@ -559,15 +572,19 @@ class GRUCell(RNNCellBase):

The formula for GRU used is as follows:

.. math::
.. math::

r_{t} & = \sigma(W_{ir}x_{t} + b_{ir} + W_{hr}x_{t} + b_{hr})
z_{t} & = \sigma(W_{iz)x_{t} + b_{iz} + W_{hz}x_{t} + b_{hz})
\\widetilde{h}_{t} & = \\tanh(W_{ic)x_{t} + b_{ic} + r_{t} \* (W_{hc}x_{t} + b{hc}))
h_{t} & = z_{t} \* h_{t-1} + (1 - z_{t}) \* \\widetilde{h}_{t}

z_{t} & = \sigma(W_{iz}x_{t} + b_{iz} + W_{hz}x_{t} + b_{hz})

\widetilde{h}_{t} & = \tanh(W_{ic}x_{t} + b_{ic} + r_{t} * (W_{hc}x_{t} + b_{hc}))

h_{t} & = z_{t} * h_{t-1} + (1 - z_{t}) * \widetilde{h}_{t}

y_{t} & = h_{t}

where :math:`\sigma` is the sigmoid fucntion, and \* is the elemetwise
where :math:`\sigma` is the sigmoid fucntion, and * is the elemetwise
multiplication operator.

Please refer to `An Empirical Exploration of Recurrent Network Architectures
Expand Down Expand Up @@ -625,14 +642,19 @@ class GRUCell(RNNCellBase):
.. code-block:: python

import paddle
paddle.disable_static()

x = paddle.randn((4, 16))
prev_h = paddle.randn((4, 32))

cell = paddle.nn.GRUCell(16, 32)
y, h = cell(x, prev_h)

print(y.shape)
print(h.shape)

#[4,32]
#[4,32]

"""

def __init__(self,
Expand Down Expand Up @@ -717,8 +739,8 @@ class RNN(Layer):
Inputs:
inputs (Tensor): A (possibly nested structure of) tensor[s]. The input
sequences.
If time major is True, the shape is `[batch_size, time_steps, input_size]`
If time major is False, the shape is [time_steps, batch_size, input_size]`
If time major is False, the shape is `[batch_size, time_steps, input_size]`
If time major is True, the shape is `[time_steps, batch_size, input_size]`
where `input_size` is the input size of the cell.
initial_states (Tensor|list|tuple, optional): Tensor of a possibly
nested structure of tensors, representing the initial state for
Expand Down Expand Up @@ -753,7 +775,6 @@ class RNN(Layer):
.. code-block:: python

import paddle
paddle.disable_static()

inputs = paddle.rand((4, 23, 16))
prev_h = paddle.randn((4, 32))
Expand All @@ -762,6 +783,12 @@ class RNN(Layer):
rnn = paddle.nn.RNN(cell)
outputs, final_states = rnn(inputs, prev_h)

print(outputs.shape)
print(final_states.shape)

#[4,23,32]
#[4,32]

"""

def __init__(self, cell, is_reverse=False, time_major=False):
Expand Down Expand Up @@ -841,7 +868,6 @@ class BiRNN(Layer):
.. code-block:: python

import paddle
paddle.disable_static()

cell_fw = paddle.nn.LSTMCell(16, 32)
cell_bw = paddle.nn.LSTMCell(16, 32)
Expand All @@ -850,6 +876,12 @@ class BiRNN(Layer):
inputs = paddle.rand((2, 23, 16))
outputs, final_states = rnn(inputs)

print(outputs.shape)
print(final_states[0][0].shape,len(final_states),len(final_states[0]))

#[4,23,64]
#[2,32] 2 2

"""

def __init__(self, cell_fw, cell_bw, time_major=False):
Expand Down Expand Up @@ -936,11 +968,9 @@ class SimpleRNN(RNNMixin):

.. math::

h_{t} & = \mathrm{tanh}(W_{ih}x_{t} + b_{ih} + W_{hh}h{t-1} + b_{hh})
h_{t} & = \mathrm{tanh}(W_{ih}x_{t} + b_{ih} + W_{hh}h_{t-1} + b_{hh})

y_{t} & = h_{t}

where :math:`\sigma` is the sigmoid fucntion, and \* is the elemetwise
multiplication operator.

Arguments:
input_size (int): The input size for the first layer's cell.
Expand Down Expand Up @@ -997,14 +1027,19 @@ class SimpleRNN(RNNMixin):
.. code-block:: python

import paddle
paddle.disable_static()

rnn = paddle.nn.SimpleRNN(16, 32, 2)

x = paddle.randn((4, 23, 16))
prev_h = paddle.randn((2, 4, 32))
y, h = rnn(x, prev_h)

print(y.shape)
print(h.shape)

#[4,23,32]
#[2,4,32]

"""

def __init__(self,
Expand Down Expand Up @@ -1077,14 +1112,20 @@ class LSTM(RNNMixin):
.. math::

i_{t} & = \sigma(W_{ii}x_{t} + b_{ii} + W_{hi}h_{t-1} + b_{hi})

f_{t} & = \sigma(W_{if}x_{t} + b_{if} + W_{hf}h_{t-1} + b_{hf})

o_{t} & = \sigma(W_{io}x_{t} + b_{io} + W_{ho}h_{t-1} + b_{ho})
\\widetilde{c}_{t} & = \\tanh (W_{ig}x_{t} + b_{ig} + W_{hg}h_{t-1} + b_{hg})
c_{t} & = f_{t} \* c{t-1} + i{t} \* \\widetile{c}_{t}
h_{t} & = o_{t} \* \\tanh(c_{t})

\widetilde{c}_{t} & = \tanh (W_{ig}x_{t} + b_{ig} + W_{hg}h_{t-1} + b_{hg})

c_{t} & = f_{t} * c_{t-1} + i_{t} * \widetilde{c}_{t}

h_{t} & = o_{t} * \tanh(c_{t})

y_{t} & = h_{t}

where :math:`\sigma` is the sigmoid fucntion, and \* is the elemetwise
where :math:`\sigma` is the sigmoid fucntion, and * is the elemetwise
multiplication operator.

Arguments:
Expand Down Expand Up @@ -1130,7 +1171,7 @@ class LSTM(RNNMixin):
`[batch_size, time_steps, num_directions * hidden_size]`.
Note that `num_directions` is 2 if direction is "bidirectional"
else 1.
final_states (Tensor): the final state, a tuple of two tensors, h and c.
final_states (tuple): the final state, a tuple of two tensors, h and c.
The shape of each is
`[num_lauers * num_directions, batch_size, hidden_size]`.
Note that `num_directions` is 2 if direction is "bidirectional"
Expand All @@ -1141,7 +1182,6 @@ class LSTM(RNNMixin):
.. code-block:: python

import paddle
paddle.disable_static()

rnn = paddle.nn.LSTM(16, 32, 2)

Expand All @@ -1150,6 +1190,14 @@ class LSTM(RNNMixin):
prev_c = paddle.randn((2, 4, 32))
y, (h, c) = rnn(x, (prev_h, prev_c))

print(y.shape)
print(h.shape)
print(c.shape)

#[4,23,32]
#[2,4,32]
#[2,4,32]

"""

def __init__(self,
Expand Down Expand Up @@ -1215,12 +1263,16 @@ class GRU(RNNMixin):
.. math::

r_{t} & = \sigma(W_{ir}x_{t} + b_{ir} + W_{hr}x_{t} + b_{hr})
z_{t} & = \sigma(W_{iz)x_{t} + b_{iz} + W_{hz}x_{t} + b_{hz})
\\widetilde{h}_{t} & = \\tanh(W_{ic)x_{t} + b_{ic} + r_{t} \* (W_{hc}x_{t} + b{hc}))
h_{t} & = z_{t} \* h_{t-1} + (1 - z_{t}) \* \\widetilde{h}_{t}

z_{t} & = \sigma(W_{iz}x_{t} + b_{iz} + W_{hz}x_{t} + b_{hz})

\widetilde{h}_{t} & = \tanh(W_{ic}x_{t} + b_{ic} + r_{t} * (W_{hc}x_{t} + b_{hc}))

h_{t} & = z_{t} * h_{t-1} + (1 - z_{t}) * \widetilde{h}_{t}

y_{t} & = h_{t}

where :math:`\sigma` is the sigmoid fucntion, and \* is the elemetwise
where :math:`\sigma` is the sigmoid fucntion, and * is the elemetwise
multiplication operator.

Arguments:
Expand Down Expand Up @@ -1277,14 +1329,19 @@ class GRU(RNNMixin):
.. code-block:: python

import paddle
paddle.disable_static()

rnn = paddle.nn.GRU(16, 32, 2)

x = paddle.randn((4, 23, 16))
prev_h = paddle.randn((2, 4, 32))
y, h = rnn(x, prev_h)

print(y.shape)
print(h.shape)

#[4,23,32]
#[2,4,32]

"""

def __init__(self,
Expand Down