Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions paddle/gserver/layers/MaxLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ namespace paddle {
* If SequenceLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances)
* output[i] = max_{for each instance in this sequence}{input[i]}
* If stride_ > 0:
* Output: a shorten sequence. The operation of getting max instance of a
* sequence is independently performed on every slice of the input
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • “getting max instance of a sequence” 这个表述令人困惑。
  • Stride is the step size by which we slide a window upon the input sequence, and the pooling operation is then applied to each interval independently.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. And update the comments in SequencePoolLayer.h, AverageLayer.h and SequenceLastInstanceLayer.h at the same time.

* sequence, which is obtained by sliding a window with the window
* size set to stride_.
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences
Expand Down
3 changes: 1 addition & 2 deletions paddle/gserver/layers/SequenceLastInstanceLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ bool SequenceLastInstanceLayer::init(const LayerMap& layerMap,
void SequenceLastInstanceLayer::forward(PassType passType) {
SequencePoolLayer::forward(passType);

auto starts = (stride_ > 0) ? stridePositions_->getData()
: startPositions_->getData(false);
auto starts = startPositions_->getData(false);
MatrixPtr inputValue = getInputValue(0);
MatrixPtr outputValue = getOutputValue();

Expand Down
5 changes: 2 additions & 3 deletions paddle/gserver/layers/SequencePoolLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,8 @@ void SequencePoolLayer::forward(PassType passType) {
if (stride_ > 0) {
CHECK_EQ(input.hasSubseq(), 0UL)
<< "sequence stride pooling is invalid for hasSubseq now";
output_.poolSequenceWithStride(
input, stride_, &stridePositions_, reversed_);
newBatchSize_ = stridePositions_->getSize() - 1;
output_.poolSequenceWithStride(input, stride_, &startPositions_, reversed_);
newBatchSize_ = startPositions_->getSize() - 1;
}

resetOutput(newBatchSize_, dim);
Expand Down
2 changes: 0 additions & 2 deletions paddle/gserver/layers/SequencePoolLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,6 @@ class SequencePoolLayer : public Layer {
size_t newBatchSize_;
ICpuGpuVectorPtr startPositions_;
int stride_;
// Store the start position of each window.
IVectorPtr stridePositions_;
// Whether the input sequence is reversed or not.
bool reversed_ = false;

Expand Down
12 changes: 10 additions & 2 deletions paddle/gserver/tests/test_LayerGrad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -845,8 +845,12 @@ void testDegradeLayer(bool hasSubseq,

TEST(Layer, MaxLayer) {
testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq
testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq
testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq
testDegradeLayer(false,
"max",
"non-seq",
5); // seq max to a shorten seq, stride window = 5
testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq
testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq
}

TEST(Layer, SequenceLastInstanceLayer) {
Expand All @@ -868,6 +872,10 @@ TEST(Layer, SequenceLastInstanceLayer) {

TEST(Layer, AverageLayer) {
testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq
testDegradeLayer(false,
"average",
"non-seq",
5); // seq average to a shorten seq, stride window = 5
testDegradeLayer(
true, "average", "non-seq", -1); // hasSubseq average to non-seq
testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq
Expand Down
6 changes: 3 additions & 3 deletions paddle/parameter/Argument.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ void Argument::degradeSequence(const Argument& input) {

void Argument::poolSequenceWithStride(const Argument& input,
size_t stride,
IVectorPtr* stridePostions,
ICpuGpuVectorPtr* stridePostions,
bool reversed) {
// If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5,
// then sequenceStartPositions = [0, 2, 3, 4, 7].
Expand Down Expand Up @@ -598,8 +598,8 @@ void Argument::poolSequenceWithStride(const Argument& input,
stridePos.emplace_back(starts[numSequences]);
int size = stridePos.size();
CHECK_EQ(size - 1, tgtBuf[numSequences]);
IVector::resizeOrCreate(*stridePostions, size, false);
(*stridePostions)->copyFrom(stridePos.data(), size);
ICpuGpuVector::resizeOrCreate(*stridePostions, size, false);
(*stridePostions)->getMutableVector(false)->copyFrom(stridePos.data(), size);
}

void Argument::getValueString(
Expand Down
2 changes: 1 addition & 1 deletion paddle/parameter/Argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ struct Argument {
*/
void poolSequenceWithStride(const Argument& input,
size_t stride,
IVectorPtr* stridePositions,
ICpuGpuVectorPtr* stridePositions,
bool reversed = false);
/**
* @brief getValueString will return the argument's output in string. There
Expand Down
4 changes: 2 additions & 2 deletions paddle/parameter/tests/test_argument.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ TEST(Argument, poolSequenceWithStride) {
int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30};

for (auto reversed : {false, true}) {
IVectorPtr stridePositions;
ICpuGpuVectorPtr stridePositions;
output.poolSequenceWithStride(
input, 5 /* stride */, &stridePositions, reversed);

Expand All @@ -45,7 +45,7 @@ TEST(Argument, poolSequenceWithStride) {
CHECK_EQ(stridePositions->getSize(), 8UL);
auto result = reversed ? strideResultReversed : strideResult;
for (int i = 0; i < 8; i++) {
CHECK_EQ(stridePositions->getData()[i], result[i]);
CHECK_EQ(stridePositions->getData(false)[i], result[i]);
}
}
}
Expand Down
8 changes: 8 additions & 0 deletions python/paddle/trainer/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2420,10 +2420,14 @@ def __init__(self,
trans_type='non-seq',
bias=False,
output_max_index=None,
stride=-1,
**xargs):
super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs)
config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input')
if trans_type == 'seq':
config_assert(stride == -1, 'subseq does not support stride window')
self.config.trans_type = trans_type
self.config.seq_pool_stride = stride
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
self.set_layer_size(input_layer.size)
Expand Down Expand Up @@ -2685,11 +2689,15 @@ def __init__(self,
average_strategy='average',
trans_type='non-seq',
bias=False,
stride=-1,
**xargs):
super(AverageLayer, self).__init__(
name, 'average', 0, inputs=inputs, **xargs)
self.config.average_strategy = average_strategy
if trans_type == 'seq':
config_assert(stride == -1, 'subseq does not support stride window')
self.config.trans_type = trans_type
self.config.seq_pool_stride = stride
config_assert(len(inputs) == 1, 'AverageLayer must have 1 input')
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
Expand Down
12 changes: 12 additions & 0 deletions python/paddle/trainer_config_helpers/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1090,10 +1090,16 @@ def pooling_layer(input,
name=None,
bias_attr=None,
agg_level=AggregateLevel.TO_NO_SEQUENCE,
stride=-1,
layer_attr=None):
"""
Pooling layer for sequence inputs, not used for Image.

If stride > 0, this layer slides a window whose size is determined by stride,
and return the pooling value of the window as the output. Thus, a long sequence
will be shorten. Note that for sequence with sub-sequence, the default value
of stride is -1.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the parameter stride specifies the intervals at which to apply the pooling operation.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


The example usage is:

.. code-block:: python
Expand All @@ -1112,6 +1118,8 @@ def pooling_layer(input,
:param pooling_type: Type of pooling, MaxPooling(default), AvgPooling,
SumPooling, SquareRootNPooling.
:type pooling_type: BasePoolingType|None
:param stride: window size.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

stride --> the step size between successive pooling regions.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, and update the comments in last_seq and first_seq at the same time.

:type stride: Int
:param bias_attr: Bias parameter attribute. False if no bias.
:type bias_attr: ParameterAttribute|None|False
:param layer_attr: The Extra Attributes for layer, such as dropout.
Expand All @@ -1129,12 +1137,16 @@ def pooling_layer(input,
extra_dict['output_max_index'] = pooling_type.output_max_index
extra_dict.update(ExtraLayerAttribute.to_kwargs(layer_attr))

if agg_level == AggregateLevel.TO_SEQUENCE:
assert stride == -1

Layer(
name=name,
type=pooling_type.name,
inputs=[Input(input.name)],
bias=ParamAttr.to_bias(bias_attr),
trans_type=agg_level,
stride=stride,
**extra_dict)

return LayerOutput(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ layers {
input_layer_name: "dat_in"
}
trans_type: "seq"
seq_pool_stride: -1
}
layers {
name: "__seq_pooling_1__"
Expand All @@ -24,6 +25,7 @@ layers {
input_layer_name: "dat_in"
}
trans_type: "non-seq"
seq_pool_stride: -1
}
layers {
name: "__seq_pooling_2__"
Expand All @@ -35,6 +37,7 @@ layers {
}
average_strategy: "average"
trans_type: "seq"
seq_pool_stride: -1
}
layers {
name: "__seq_pooling_3__"
Expand All @@ -46,6 +49,7 @@ layers {
}
average_strategy: "average"
trans_type: "non-seq"
seq_pool_stride: -1
}
layers {
name: "__seq_pooling_4__"
Expand All @@ -57,6 +61,7 @@ layers {
}
average_strategy: "sum"
trans_type: "seq"
seq_pool_stride: -1
}
layers {
name: "__seq_pooling_5__"
Expand All @@ -68,6 +73,7 @@ layers {
}
average_strategy: "sum"
trans_type: "non-seq"
seq_pool_stride: -1
}
layers {
name: "__seq_pooling_6__"
Expand All @@ -77,8 +83,44 @@ layers {
inputs {
input_layer_name: "dat_in"
}
trans_type: "non-seq"
seq_pool_stride: 5
}
layers {
name: "__seq_pooling_7__"
type: "average"
size: 100
active_type: ""
inputs {
input_layer_name: "dat_in"
}
average_strategy: "average"
trans_type: "non-seq"
seq_pool_stride: 5
}
layers {
name: "__seq_pooling_8__"
type: "average"
size: 100
active_type: ""
inputs {
input_layer_name: "dat_in"
}
average_strategy: "sum"
trans_type: "non-seq"
seq_pool_stride: 5
}
layers {
name: "__seq_pooling_9__"
type: "max"
size: 100
active_type: ""
inputs {
input_layer_name: "dat_in"
}
output_max_index: true
trans_type: "non-seq"
seq_pool_stride: -1
}
input_layer_names: "dat_in"
output_layer_names: "__seq_pooling_0__"
Expand All @@ -88,6 +130,9 @@ output_layer_names: "__seq_pooling_3__"
output_layer_names: "__seq_pooling_4__"
output_layer_names: "__seq_pooling_5__"
output_layer_names: "__seq_pooling_6__"
output_layer_names: "__seq_pooling_7__"
output_layer_names: "__seq_pooling_8__"
output_layer_names: "__seq_pooling_9__"
sub_models {
name: "root"
layer_names: "dat_in"
Expand All @@ -98,6 +143,9 @@ sub_models {
layer_names: "__seq_pooling_4__"
layer_names: "__seq_pooling_5__"
layer_names: "__seq_pooling_6__"
layer_names: "__seq_pooling_7__"
layer_names: "__seq_pooling_8__"
layer_names: "__seq_pooling_9__"
input_layer_names: "dat_in"
output_layer_names: "__seq_pooling_0__"
output_layer_names: "__seq_pooling_1__"
Expand All @@ -106,6 +154,9 @@ sub_models {
output_layer_names: "__seq_pooling_4__"
output_layer_names: "__seq_pooling_5__"
output_layer_names: "__seq_pooling_6__"
output_layer_names: "__seq_pooling_7__"
output_layer_names: "__seq_pooling_8__"
output_layer_names: "__seq_pooling_9__"
is_recurrent_layer_group: false
}

Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@
for al in AGG_LEVEL:
opts.append(pooling_layer(input=din, agg_level=al, pooling_type=pt()))

for pt in POOL_TYPE:
opts.append(
pooling_layer(
input=din,
agg_level=AggregateLevel.TO_NO_SEQUENCE,
pooling_type=pt(),
stride=5))

opts.append(
pooling_layer(
input=din, pooling_type=MaxPooling(output_max_index=True)))
Expand Down