Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions paddle/gserver/layers/AverageLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ namespace paddle {
* If SequenceLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances)
* output[i] = average_{for each instance in this sequence}{input[i]}
* If stride_ > 0:
* Output: a shorten sequence. Stride is the step size by which we slide a
* window upon the input sequence, and the average pooling
* operation is then applied to each interval independently.
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences
Expand Down
4 changes: 4 additions & 0 deletions paddle/gserver/layers/MaxLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ namespace paddle {
* If SequenceLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances)
* output[i] = max_{for each instance in this sequence}{input[i]}
* If stride_ > 0:
* Output: a shorten sequence. Stride is the step size by which we slide a
* window upon the input sequence, and the max pooling operation is
* then applied to each interval independently.
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences
Expand Down
10 changes: 4 additions & 6 deletions paddle/gserver/layers/SequenceLastInstanceLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,9 @@ namespace paddle {
* If SequenceLevel = kNonseq:
* Output: a sequence containing only the last instance of the input sequence
* If stride_ > 0:
* Output: a shorten sequence. The operation of getting last instance of a
* sequence is independently performed on every slice of the input
* sequence, which is obtained by sliding a window with the window
* size set to stride_.
* Output: a shorten sequence. Stride is the step size by which we slide a
* window upon the input sequence, and getting last instance
* operation is then applied to each interval independently.
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: a sequence containing only the last instance of each sub-sequence
Expand Down Expand Up @@ -73,8 +72,7 @@ bool SequenceLastInstanceLayer::init(const LayerMap& layerMap,
void SequenceLastInstanceLayer::forward(PassType passType) {
SequencePoolLayer::forward(passType);

auto starts = (stride_ > 0) ? stridePositions_->getData()
: startPositions_->getData(false);
auto starts = startPositions_->getData(false);
MatrixPtr inputValue = getInputValue(0);
MatrixPtr outputValue = getOutputValue();

Expand Down
5 changes: 2 additions & 3 deletions paddle/gserver/layers/SequencePoolLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,8 @@ void SequencePoolLayer::forward(PassType passType) {
if (stride_ > 0) {
CHECK_EQ(input.hasSubseq(), 0UL)
<< "sequence stride pooling is invalid for hasSubseq now";
output_.poolSequenceWithStride(
input, stride_, &stridePositions_, reversed_);
newBatchSize_ = stridePositions_->getSize() - 1;
output_.poolSequenceWithStride(input, stride_, &startPositions_, reversed_);
newBatchSize_ = startPositions_->getSize() - 1;
}

resetOutput(newBatchSize_, dim);
Expand Down
7 changes: 3 additions & 4 deletions paddle/gserver/layers/SequencePoolLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ namespace paddle {
* sequence}{input[i]}
* If stride_ > 0:
* Check input sequence must not have sub-sequence
* Output: a shorten sequence, pooling is performed upon a small local
* area
* Output: a shorten sequence. Stride is the step size by which we slide
* a window upon the input sequence, and the pooling operation
* is then applied to each interval independently.
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences
Expand All @@ -47,8 +48,6 @@ class SequencePoolLayer : public Layer {
size_t newBatchSize_;
ICpuGpuVectorPtr startPositions_;
int stride_;
// Store the start position of each window.
IVectorPtr stridePositions_;
// Whether the input sequence is reversed or not.
bool reversed_ = false;

Expand Down
12 changes: 10 additions & 2 deletions paddle/gserver/tests/test_LayerGrad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -845,8 +845,12 @@ void testDegradeLayer(bool hasSubseq,

TEST(Layer, MaxLayer) {
testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq
testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq
testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq
testDegradeLayer(false,
"max",
"non-seq",
5); // seq max to a shorten seq, stride window = 5
testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq
testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq
}

TEST(Layer, SequenceLastInstanceLayer) {
Expand All @@ -868,6 +872,10 @@ TEST(Layer, SequenceLastInstanceLayer) {

TEST(Layer, AverageLayer) {
testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq
testDegradeLayer(false,
"average",
"non-seq",
5); // seq average to a shorten seq, stride window = 5
testDegradeLayer(
true, "average", "non-seq", -1); // hasSubseq average to non-seq
testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq
Expand Down
6 changes: 3 additions & 3 deletions paddle/parameter/Argument.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ void Argument::degradeSequence(const Argument& input) {

void Argument::poolSequenceWithStride(const Argument& input,
size_t stride,
IVectorPtr* stridePostions,
ICpuGpuVectorPtr* stridePostions,
bool reversed) {
// If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5,
// then sequenceStartPositions = [0, 2, 3, 4, 7].
Expand Down Expand Up @@ -598,8 +598,8 @@ void Argument::poolSequenceWithStride(const Argument& input,
stridePos.emplace_back(starts[numSequences]);
int size = stridePos.size();
CHECK_EQ(size - 1, tgtBuf[numSequences]);
IVector::resizeOrCreate(*stridePostions, size, false);
(*stridePostions)->copyFrom(stridePos.data(), size);
ICpuGpuVector::resizeOrCreate(*stridePostions, size, false);
(*stridePostions)->getMutableVector(false)->copyFrom(stridePos.data(), size);
}

void Argument::getValueString(
Expand Down
2 changes: 1 addition & 1 deletion paddle/parameter/Argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ struct Argument {
*/
void poolSequenceWithStride(const Argument& input,
size_t stride,
IVectorPtr* stridePositions,
ICpuGpuVectorPtr* stridePositions,
bool reversed = false);
/**
* @brief getValueString will return the argument's output in string. There
Expand Down
4 changes: 2 additions & 2 deletions paddle/parameter/tests/test_argument.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ TEST(Argument, poolSequenceWithStride) {
int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30};

for (auto reversed : {false, true}) {
IVectorPtr stridePositions;
ICpuGpuVectorPtr stridePositions;
output.poolSequenceWithStride(
input, 5 /* stride */, &stridePositions, reversed);

Expand All @@ -45,7 +45,7 @@ TEST(Argument, poolSequenceWithStride) {
CHECK_EQ(stridePositions->getSize(), 8UL);
auto result = reversed ? strideResultReversed : strideResult;
for (int i = 0; i < 8; i++) {
CHECK_EQ(stridePositions->getData()[i], result[i]);
CHECK_EQ(stridePositions->getData(false)[i], result[i]);
}
}
}
Expand Down
8 changes: 8 additions & 0 deletions python/paddle/trainer/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2420,10 +2420,14 @@ def __init__(self,
trans_type='non-seq',
bias=False,
output_max_index=None,
stride=-1,
**xargs):
super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs)
config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input')
if trans_type == 'seq':
config_assert(stride == -1, 'subseq does not support stride window')
self.config.trans_type = trans_type
self.config.seq_pool_stride = stride
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
self.set_layer_size(input_layer.size)
Expand Down Expand Up @@ -2685,11 +2689,15 @@ def __init__(self,
average_strategy='average',
trans_type='non-seq',
bias=False,
stride=-1,
**xargs):
super(AverageLayer, self).__init__(
name, 'average', 0, inputs=inputs, **xargs)
self.config.average_strategy = average_strategy
if trans_type == 'seq':
config_assert(stride == -1, 'subseq does not support stride window')
self.config.trans_type = trans_type
self.config.seq_pool_stride = stride
config_assert(len(inputs) == 1, 'AverageLayer must have 1 input')
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
Expand Down
19 changes: 17 additions & 2 deletions python/paddle/trainer_config_helpers/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1090,10 +1090,19 @@ def pooling_layer(input,
name=None,
bias_attr=None,
agg_level=AggregateLevel.TO_NO_SEQUENCE,
stride=-1,
layer_attr=None):
"""
Pooling layer for sequence inputs, not used for Image.

If stride > 0, this layer slides a window whose size is determined by stride,
and return the pooling value of the window as the output. Thus, a long sequence
will be shorten.

The parameter stride specifies the intervals at which to apply the pooling
operation. Note that for sequence with sub-sequence, the default value
of stride is -1.

The example usage is:

.. code-block:: python
Expand All @@ -1112,6 +1121,8 @@ def pooling_layer(input,
:param pooling_type: Type of pooling, MaxPooling(default), AvgPooling,
SumPooling, SquareRootNPooling.
:type pooling_type: BasePoolingType|None
:param stride: The step size between successive pooling regions.
:type stride: Int
:param bias_attr: Bias parameter attribute. False if no bias.
:type bias_attr: ParameterAttribute|None|False
:param layer_attr: The Extra Attributes for layer, such as dropout.
Expand All @@ -1129,12 +1140,16 @@ def pooling_layer(input,
extra_dict['output_max_index'] = pooling_type.output_max_index
extra_dict.update(ExtraLayerAttribute.to_kwargs(layer_attr))

if agg_level == AggregateLevel.TO_SEQUENCE:
assert stride == -1

Layer(
name=name,
type=pooling_type.name,
inputs=[Input(input.name)],
bias=ParamAttr.to_bias(bias_attr),
trans_type=agg_level,
stride=stride,
**extra_dict)

return LayerOutput(
Expand Down Expand Up @@ -1396,7 +1411,7 @@ def last_seq(input,
:type name: basestring
:param input: Input layer name.
:type input: LayerOutput
:param stride: window size.
:param stride: The step size between successive pooling regions.
:type stride: Int
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
Expand Down Expand Up @@ -1452,7 +1467,7 @@ def first_seq(input,
:type name: basestring
:param input: Input layer name.
:type input: LayerOutput
:param stride: window size.
:param stride: The step size between successive pooling regions.
:type stride: Int
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ layers {
input_layer_name: "dat_in"
}
trans_type: "seq"
seq_pool_stride: -1
}
layers {
name: "__seq_pooling_1__"
Expand All @@ -24,6 +25,7 @@ layers {
input_layer_name: "dat_in"
}
trans_type: "non-seq"
seq_pool_stride: -1
}
layers {
name: "__seq_pooling_2__"
Expand All @@ -35,6 +37,7 @@ layers {
}
average_strategy: "average"
trans_type: "seq"
seq_pool_stride: -1
}
layers {
name: "__seq_pooling_3__"
Expand All @@ -46,6 +49,7 @@ layers {
}
average_strategy: "average"
trans_type: "non-seq"
seq_pool_stride: -1
}
layers {
name: "__seq_pooling_4__"
Expand All @@ -57,6 +61,7 @@ layers {
}
average_strategy: "sum"
trans_type: "seq"
seq_pool_stride: -1
}
layers {
name: "__seq_pooling_5__"
Expand All @@ -68,6 +73,7 @@ layers {
}
average_strategy: "sum"
trans_type: "non-seq"
seq_pool_stride: -1
}
layers {
name: "__seq_pooling_6__"
Expand All @@ -77,8 +83,44 @@ layers {
inputs {
input_layer_name: "dat_in"
}
trans_type: "non-seq"
seq_pool_stride: 5
}
layers {
name: "__seq_pooling_7__"
type: "average"
size: 100
active_type: ""
inputs {
input_layer_name: "dat_in"
}
average_strategy: "average"
trans_type: "non-seq"
seq_pool_stride: 5
}
layers {
name: "__seq_pooling_8__"
type: "average"
size: 100
active_type: ""
inputs {
input_layer_name: "dat_in"
}
average_strategy: "sum"
trans_type: "non-seq"
seq_pool_stride: 5
}
layers {
name: "__seq_pooling_9__"
type: "max"
size: 100
active_type: ""
inputs {
input_layer_name: "dat_in"
}
output_max_index: true
trans_type: "non-seq"
seq_pool_stride: -1
}
input_layer_names: "dat_in"
output_layer_names: "__seq_pooling_0__"
Expand All @@ -88,6 +130,9 @@ output_layer_names: "__seq_pooling_3__"
output_layer_names: "__seq_pooling_4__"
output_layer_names: "__seq_pooling_5__"
output_layer_names: "__seq_pooling_6__"
output_layer_names: "__seq_pooling_7__"
output_layer_names: "__seq_pooling_8__"
output_layer_names: "__seq_pooling_9__"
sub_models {
name: "root"
layer_names: "dat_in"
Expand All @@ -98,6 +143,9 @@ sub_models {
layer_names: "__seq_pooling_4__"
layer_names: "__seq_pooling_5__"
layer_names: "__seq_pooling_6__"
layer_names: "__seq_pooling_7__"
layer_names: "__seq_pooling_8__"
layer_names: "__seq_pooling_9__"
input_layer_names: "dat_in"
output_layer_names: "__seq_pooling_0__"
output_layer_names: "__seq_pooling_1__"
Expand All @@ -106,6 +154,9 @@ sub_models {
output_layer_names: "__seq_pooling_4__"
output_layer_names: "__seq_pooling_5__"
output_layer_names: "__seq_pooling_6__"
output_layer_names: "__seq_pooling_7__"
output_layer_names: "__seq_pooling_8__"
output_layer_names: "__seq_pooling_9__"
is_recurrent_layer_group: false
}

Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@
for al in AGG_LEVEL:
opts.append(pooling_layer(input=din, agg_level=al, pooling_type=pt()))

for pt in POOL_TYPE:
opts.append(
pooling_layer(
input=din,
agg_level=AggregateLevel.TO_NO_SEQUENCE,
pooling_type=pt(),
stride=5))

opts.append(
pooling_layer(
input=din, pooling_type=MaxPooling(output_max_index=True)))
Expand Down