Skip to content

Commit abd6181

Browse files
author
Abhinav Arora
committed
Changing learning rate and time step to inputs from attributes
1 parent be2d958 commit abd6181

File tree

3 files changed

+12
-12
lines changed

3 files changed

+12
-12
lines changed

paddle/operators/adamax_op.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ class AdamaxOp : public framework::OperatorWithKernel {
3131
"Input(moment) of AdamaxOp should not be null.");
3232
PADDLE_ENFORCE(ctx->HasInput("inf_norm"),
3333
"Input(inf_norm) of AdamaxOp should not be null.");
34+
PADDLE_ENFORCE(ctx->HasInput("learning_rate"),
35+
"Input(learning_rate) of AdamaxOp should not be null.");
36+
PADDLE_ENFORCE(ctx->HasInput("time_step"),
37+
"Input(time_step) of AdamaxOp should not be null.");
3438

3539
PADDLE_ENFORCE(ctx->HasOutput("param_out"),
3640
"Output(param_out) of AdamaxOp should not be null.");
@@ -62,15 +66,15 @@ class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker {
6266
: OpProtoAndCheckerMaker(proto, op_checker) {
6367
AddInput("param", "Input parameter");
6468
AddInput("grad", "Input gradient");
69+
AddInput("learning_rate", "Learning rate");
6570
AddInput("moment", "First moment");
6671
AddInput("inf_norm", "Input exponentially weighted infinity norm");
72+
AddInput("time_step", "Time step");
6773

6874
AddOutput("param_out", "Output parameter");
6975
AddOutput("moment_out", "Output first moment");
7076
AddOutput("inf_norm_out", "Output exponentially weighted infinity norm");
7177

72-
AddAttr<int>("time_step", "Time step");
73-
AddAttr<float>("learning_rate", "Learning rate");
7478
AddAttr<float>("beta_1",
7579
"exponential decay rate for the 1st moment estimates.");
7680
AddAttr<float>(

paddle/operators/adamax_op.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ class AdamaxOpKernel : public framework::OpKernel<T> {
3737
moment_out->mutable_data<T>(ctx.GetPlace());
3838
norm_out->mutable_data<T>(ctx.GetPlace());
3939

40-
float lr = ctx.Attr<float>("learning_rate");
4140
float beta_1 = ctx.Attr<float>("beta_1");
4241
float beta_2 = ctx.Attr<float>("beta_2");
4342
float epsilon = ctx.Attr<float>("epsilon");
44-
int t = ctx.Attr<int>("time_step");
43+
float lr = *ctx.Input<float>("learning_rate");
44+
int t = *ctx.Input<int>("time_step");
4545

4646
auto p = EigenVector<T>::Flatten(*ctx.Input<Tensor>("param"));
4747
auto g = EigenVector<T>::Flatten(*ctx.Input<Tensor>("grad"));

python/paddle/v2/framework/tests/test_adamax_op.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,13 @@ def setUp(self):
2222
'param': param,
2323
'grad': grad,
2424
'moment': moment,
25-
'inf_norm': inf_norm
26-
}
27-
28-
self.attrs = {
25+
'inf_norm': inf_norm,
2926
'time_step': time_step,
30-
'learning_rate': learning_rate,
31-
'beta_1': beta_1,
32-
'beta_2': beta_2,
33-
'epsilon': epsilon
27+
'learning_rate': learning_rate
3428
}
3529

30+
self.attrs = {'beta_1': beta_1, 'beta_2': beta_2, 'epsilon': epsilon}
31+
3632
moment_out = beta_1 * moment + (1 - beta_1) * grad
3733
inf_norm_out = np.maximum(beta_2 * inf_norm + epsilon, np.abs(grad))
3834
lr_t = (learning_rate / (1 - beta_1**time_step))

0 commit comments

Comments
 (0)