Skip to content

Commit d1de7ec

Browse files
committed
Change learning rate from attribute to input tensor
1 parent 05cbd4d commit d1de7ec

File tree

3 files changed

+16
-5
lines changed

3 files changed

+16
-5
lines changed

paddle/operators/adagrad_op.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,17 @@ class AdagradOp : public framework::OperatorWithKernel {
2929
"Input(grad) of AdagradOp should not be null.");
3030
PADDLE_ENFORCE(ctx->HasInput("moment"),
3131
"Input(moment) of AdagradOp should not be null.");
32+
PADDLE_ENFORCE(ctx->HasInput("learning_rate"),
33+
"Input(learning_rate) of AdagradOp should not be null.");
3234

3335
PADDLE_ENFORCE(ctx->HasOutput("param_out"),
3436
"Output(param_out) of AdagradOp should not be null.");
3537
PADDLE_ENFORCE(ctx->HasOutput("moment_out"),
3638
"Output(moment_out) of AdagradOp should not be null.");
3739

40+
auto lr_dims = ctx->GetInputDim("learning_rate");
41+
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
42+
"learning_rate should have one element");
3843
auto param_dim = ctx->GetInputDim("param");
3944
PADDLE_ENFORCE_EQ(
4045
param_dim, ctx->GetInputDim("grad"),
@@ -56,11 +61,11 @@ class AdagradOpMaker : public framework::OpProtoAndCheckerMaker {
5661
AddInput("param", "Input parameter");
5762
AddInput("grad", "Input gradient");
5863
AddInput("moment", "Second moment");
64+
AddInput("learning_rate", "learning rate of adagrad");
5965

6066
AddOutput("param_out", "Output parameter");
6167
AddOutput("moment_out", "Output second moment");
6268

63-
AddAttr<float>("learning_rate", "Learning rate");
6469
AddAttr<float>("epsilon", "Constant for numerical stability");
6570
AddComment(R"DOC(
6671

paddle/operators/adagrad_op.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ namespace paddle {
2020
namespace operators {
2121

2222
using Tensor = framework::Tensor;
23+
24+
template <typename T, int MajorType = Eigen::RowMajor,
25+
typename IndexType = Eigen::DenseIndex>
26+
using EigenScalar = framework::EigenScalar<T, MajorType, IndexType>;
27+
2328
template <typename T, int MajorType = Eigen::RowMajor,
2429
typename IndexType = Eigen::DenseIndex>
2530
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
@@ -34,12 +39,14 @@ class AdagradOpKernel : public framework::OpKernel<T> {
3439
param_out->mutable_data<T>(ctx.GetPlace());
3540
moment_out->mutable_data<T>(ctx.GetPlace());
3641

37-
float lr = ctx.Attr<float>("learning_rate");
42+
float lr = ctx.Input<Tensor>("learning_rate")->data<float>()[0];
3843
float epsilon = ctx.Attr<float>("epsilon");
3944

4045
auto p = EigenVector<T>::Flatten(*ctx.Input<Tensor>("param"));
4146
auto g = EigenVector<T>::Flatten(*ctx.Input<Tensor>("grad"));
4247
auto m = EigenVector<T>::Flatten(*ctx.Input<Tensor>("moment"));
48+
auto lr = EigenScalar<T>::From(*ctx.Input<Tensor>("learning_rate"));
49+
4350
auto p_out = EigenVector<T>::Flatten(*param_out);
4451
auto m_out = EigenVector<T>::Flatten(*moment_out);
4552
auto place = ctx.GetEigenDevice<Place>();

python/paddle/v2/framework/tests/test_adagrad_op.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,15 @@ def setUp(self):
1111
grad = np.random.random((123, 321)).astype("float32")
1212
moment = np.zeros((123, 321)).astype("float32")
1313

14-
learning_rate = 0.01
14+
lr = np.array([0.01]).astype("float32")
1515
epsilon = 1e-6
1616

1717
self.inputs = {'param': param, 'grad': grad, 'moment': moment}
1818

1919
self.attrs = {'learning_rate': learning_rate, 'epsilon': epsilon}
2020

2121
moment_out = moment + grad * grad
22-
param_out = param - learning_rate * grad / (np.sqrt(moment_out) +
23-
epsilon)
22+
param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon)
2423

2524
self.outputs = {'param_out': param_out, 'moment_out': moment_out}
2625

0 commit comments

Comments
 (0)