From cc09650ea04ffc6c9c6b462a96cd9cdd6a65d793 Mon Sep 17 00:00:00 2001 From: ranqiu Date: Mon, 25 Dec 2017 20:09:21 +0800 Subject: [PATCH] Refine understand_sentiment_dynamic_lstm --- .../test_understand_sentiment_dynamic_lstm.py | 83 ++++++++++++++++--- 1 file changed, 70 insertions(+), 13 deletions(-) diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py index cd28f04b857477..b5c8ed19144eac 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py @@ -1,6 +1,9 @@ +import math import numpy as np import paddle.v2 as paddle import paddle.v2.fluid as fluid +from paddle.v2.fluid.param_attr import ParamAttr +from paddle.v2.fluid.initializer import NormalInitializer def stacked_lstm_net(data, @@ -9,32 +12,82 @@ def stacked_lstm_net(data, class_dim=2, emb_dim=128, hid_dim=512, - stacked_num=3): + stacked_num=3, + batch_size=100): assert stacked_num % 2 == 1 - emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim]) - # add bias attr - - # TODO(qijun) linear act - fc1 = fluid.layers.fc(input=emb, size=hid_dim) - lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim) + emb = fluid.layers.embedding( + input=data, + size=[input_dim, emb_dim], + param_attr=ParamAttr(initializer=NormalInitializer( + loc=0., scale=1.0 / math.sqrt(input_dim)))) + + fc1 = fluid.layers.fc(input=emb, + size=hid_dim, + bias_attr=ParamAttr(initializer=NormalInitializer( + loc=0., scale=0.)), + param_attr=ParamAttr( + name='fc1', + initializer=NormalInitializer( + loc=0., scale=1.0 / math.sqrt(emb_dim)))) + lstm1, cell1 = fluid.layers.dynamic_lstm( + input=fc1, + size=hid_dim, + candidate_activation='relu', + bias_attr=ParamAttr(initializer=NormalInitializer( + loc=0., scale=0.)), + param_attr=ParamAttr(initializer=NormalInitializer( + loc=0., scale=1.0 / math.sqrt(emb_dim)))) inputs = [fc1, lstm1] for i in range(2, stacked_num + 1): - fc = fluid.layers.fc(input=inputs, size=hid_dim) + fc = fluid.layers.fc(input=inputs, + size=hid_dim, + bias_attr=ParamAttr(initializer=NormalInitializer( + loc=0., scale=0.)), + param_attr=[ + ParamAttr( + learning_rate=1e-3, + initializer=NormalInitializer( + loc=0., scale=1.0 / + math.sqrt(hid_dim))), ParamAttr( + learning_rate=1., + initializer=NormalInitializer( + loc=0., scale=0.)) + ]) lstm, cell = fluid.layers.dynamic_lstm( - input=fc, size=hid_dim, is_reverse=(i % 2) == 0) + input=fc, + size=hid_dim, + is_reverse=(i % 2) == 0, + candidate_activation='relu', + bias_attr=ParamAttr(initializer=NormalInitializer( + loc=0., scale=0.)), + param_attr=ParamAttr(initializer=NormalInitializer( + loc=0., scale=1.0 / math.sqrt(emb_dim)))) inputs = [fc, lstm] fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max') lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max') - prediction = fluid.layers.fc(input=[fc_last, lstm_last], - size=class_dim, - act='softmax') + prediction = fluid.layers.fc( + input=[fc_last, lstm_last], + size=class_dim, + bias_attr=ParamAttr(initializer=NormalInitializer( + loc=0., scale=0.)), + param_attr=[ + ParamAttr( + learning_rate=1e-3, + initializer=NormalInitializer( + loc=0., scale=1.0 / math.sqrt(hid_dim))), ParamAttr( + learning_rate=1., + initializer=NormalInitializer( + loc=0., scale=0.)) + ], + act='softmax') cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(x=cost) + avg_cost = fluid.layers.scale(x=avg_cost, scale=float(batch_size)) adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) adam_optimizer.minimize(avg_cost) accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) @@ -69,7 +122,11 @@ def main(): name="words", shape=[1], dtype="int64", lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") cost, accuracy, acc_out = stacked_lstm_net( - data, label, input_dim=dict_dim, class_dim=class_dim) + data, + label, + input_dim=dict_dim, + class_dim=class_dim, + batch_size=BATCH_SIZE) train_data = paddle.batch( paddle.reader.shuffle(