Skip to content

Commit 78f4c80

Browse files
committed
change learning rate and fix format
1 parent d1de7ec commit 78f4c80

File tree

3 files changed

+97
-61
lines changed

3 files changed

+97
-61
lines changed

paddle/operators/adagrad_op.cc

Lines changed: 35 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -23,33 +23,33 @@ class AdagradOp : public framework::OperatorWithKernel {
2323

2424
protected:
2525
void InferShape(framework::InferShapeContextBase *ctx) const override {
26-
PADDLE_ENFORCE(ctx->HasInput("param"),
27-
"Input(param) of AdagradOp should not be null.");
28-
PADDLE_ENFORCE(ctx->HasInput("grad"),
29-
"Input(grad) of AdagradOp should not be null.");
30-
PADDLE_ENFORCE(ctx->HasInput("moment"),
31-
"Input(moment) of AdagradOp should not be null.");
32-
PADDLE_ENFORCE(ctx->HasInput("learning_rate"),
33-
"Input(learning_rate) of AdagradOp should not be null.");
34-
35-
PADDLE_ENFORCE(ctx->HasOutput("param_out"),
36-
"Output(param_out) of AdagradOp should not be null.");
37-
PADDLE_ENFORCE(ctx->HasOutput("moment_out"),
38-
"Output(moment_out) of AdagradOp should not be null.");
39-
40-
auto lr_dims = ctx->GetInputDim("learning_rate");
26+
PADDLE_ENFORCE(ctx->HasInput("Param"),
27+
"Input(Param) of AdagradOp should not be null.");
28+
PADDLE_ENFORCE(ctx->HasInput("Grad"),
29+
"Input(Grad) of AdagradOp should not be null.");
30+
PADDLE_ENFORCE(ctx->HasInput("Moment"),
31+
"Input(Moment) of AdagradOp should not be null.");
32+
PADDLE_ENFORCE(ctx->HasInput("LearningRate"),
33+
"Input(LearningRate) of AdagradOp should not be null.");
34+
35+
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
36+
"Output(ParamOut) of AdagradOp should not be null.");
37+
PADDLE_ENFORCE(ctx->HasOutput("MomentOut"),
38+
"Output(MomentOut) of AdagradOp should not be null.");
39+
40+
auto lr_dims = ctx->GetInputDim("LearningRate");
4141
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
42-
"learning_rate should have one element");
43-
auto param_dim = ctx->GetInputDim("param");
42+
"LearningRate should have one element");
43+
auto param_dims = ctx->GetInputDim("Param");
4444
PADDLE_ENFORCE_EQ(
45-
param_dim, ctx->GetInputDim("grad"),
46-
"Param and grad input of AdagradOp should have the same dimension.");
45+
param_dims, ctx->GetInputDim("Grad"),
46+
"Param and Grad input of AdagradOp should have the same dimension.");
4747
PADDLE_ENFORCE_EQ(
48-
param_dim, ctx->GetInputDim("moment"),
49-
"Param and moment input of AdagradOp should have the same dimension.");
48+
param_dims, ctx->GetInputDim("Moment"),
49+
"Param and Moment input of AdagradOp should have the same dimension.");
5050

51-
ctx->SetOutputDim("param_out", param_dim);
52-
ctx->SetOutputDim("moment_out", param_dim);
51+
ctx->SetOutputDim("ParamOut", param_dims);
52+
ctx->SetOutputDim("MomentOut", param_dims);
5353
}
5454
};
5555

@@ -58,15 +58,18 @@ class AdagradOpMaker : public framework::OpProtoAndCheckerMaker {
5858
AdagradOpMaker(framework::OpProto *proto,
5959
framework::OpAttrChecker *op_checker)
6060
: OpProtoAndCheckerMaker(proto, op_checker) {
61-
AddInput("param", "Input parameter");
62-
AddInput("grad", "Input gradient");
63-
AddInput("moment", "Second moment");
64-
AddInput("learning_rate", "learning rate of adagrad");
65-
66-
AddOutput("param_out", "Output parameter");
67-
AddOutput("moment_out", "Output second moment");
68-
69-
AddAttr<float>("epsilon", "Constant for numerical stability");
61+
AddInput("Param", "(Tensor) Input parameter");
62+
AddInput("Grad", "(Tensor) Input gradient");
63+
AddInput("Moment", "(Tensor) Second moment");
64+
AddInput("LearningRate", "(Tensor) Learning rate");
65+
66+
AddOutput("ParamOut", "(Tensor) Output parameter");
67+
AddOutput("MomentOut", "(Tensor) Output second moment");
68+
69+
AddAttr<float>("epsilon",
70+
"(float, default 1.0e-6) "
71+
"Constant for numerical stability")
72+
.SetDefault(1.0e-6f);
7073
AddComment(R"DOC(
7174
7275
Adaptive Gradient Algorithm (Adagrad).

paddle/operators/adagrad_op.h

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,40 +19,35 @@ limitations under the License. */
1919
namespace paddle {
2020
namespace operators {
2121

22-
using Tensor = framework::Tensor;
23-
24-
template <typename T, int MajorType = Eigen::RowMajor,
25-
typename IndexType = Eigen::DenseIndex>
26-
using EigenScalar = framework::EigenScalar<T, MajorType, IndexType>;
27-
28-
template <typename T, int MajorType = Eigen::RowMajor,
29-
typename IndexType = Eigen::DenseIndex>
30-
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
31-
3222
template <typename Place, typename T>
3323
class AdagradOpKernel : public framework::OpKernel<T> {
3424
public:
3525
void Compute(const framework::ExecutionContext& ctx) const override {
36-
auto param_out = ctx.Output<Tensor>("param_out");
37-
auto moment_out = ctx.Output<Tensor>("moment_out");
26+
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
27+
auto moment_out_tensor = ctx.Output<framework::Tensor>("MomentOut");
3828

39-
param_out->mutable_data<T>(ctx.GetPlace());
40-
moment_out->mutable_data<T>(ctx.GetPlace());
29+
param_out_tensor->mutable_data<T>(ctx.GetPlace());
30+
moment_out_tensor->mutable_data<T>(ctx.GetPlace());
4131

42-
float lr = ctx.Input<Tensor>("learning_rate")->data<float>()[0];
4332
float epsilon = ctx.Attr<float>("epsilon");
4433

45-
auto p = EigenVector<T>::Flatten(*ctx.Input<Tensor>("param"));
46-
auto g = EigenVector<T>::Flatten(*ctx.Input<Tensor>("grad"));
47-
auto m = EigenVector<T>::Flatten(*ctx.Input<Tensor>("moment"));
48-
auto lr = EigenScalar<T>::From(*ctx.Input<Tensor>("learning_rate"));
49-
50-
auto p_out = EigenVector<T>::Flatten(*param_out);
51-
auto m_out = EigenVector<T>::Flatten(*moment_out);
34+
auto param = framework::EigenVector<T>::Flatten(
35+
*ctx.Input<framework::Tensor>("Param"));
36+
auto grad = framework::EigenVector<T>::Flatten(
37+
*ctx.Input<framework::Tensor>("Grad"));
38+
auto moment = framework::EigenVector<T>::Flatten(
39+
*ctx.Input<framework::Tensor>("Moment"));
40+
auto lr = framework::EigenVector<T>::Flatten(
41+
*ctx.Input<framework::Tensor>("LearningRate"));
42+
43+
auto param_out = framework::EigenVector<T>::Flatten(*param_out_tensor);
44+
auto moment_out = framework::EigenVector<T>::Flatten(*moment_out_tensor);
5245
auto place = ctx.GetEigenDevice<Place>();
5346

54-
m_out.device(place) = m + g * g;
55-
p_out.device(place) = p - lr * g / (m_out.sqrt() + epsilon);
47+
moment_out.device(place) = moment + grad * grad;
48+
Eigen::DSizes<int, 1> m_dsize(moment_out_tensor->numel());
49+
param_out.device(place) =
50+
param - lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon);
5651
}
5752
};
5853

python/paddle/v2/framework/tests/test_adagrad_op.py

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,63 @@
33
from op_test import OpTest
44

55

6-
class TestAdagradOp(OpTest):
6+
class TestAdagradOp1(OpTest):
7+
''' Test Adagrad operator with explicit attributes
8+
'''
9+
710
def setUp(self):
811
self.op_type = "adagrad"
912

1013
param = np.random.random((123, 321)).astype("float32")
1114
grad = np.random.random((123, 321)).astype("float32")
1215
moment = np.zeros((123, 321)).astype("float32")
16+
lr = 0.01
17+
epsilon = 1e-8
18+
19+
self.inputs = {
20+
'Param': param,
21+
'Grad': grad,
22+
'Moment': moment,
23+
'LearningRate': np.array([lr]).astype("float32")
24+
}
25+
26+
self.attrs = {'epsilon': epsilon}
27+
28+
moment_out = moment + grad * grad
29+
param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon)
30+
31+
self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out}
32+
33+
def test_check_output(self):
34+
self.check_output()
35+
36+
37+
class TestAdagradOp2(OpTest):
38+
''' Test Adagrad operator with default attributes
39+
'''
1340

14-
lr = np.array([0.01]).astype("float32")
41+
def setUp(self):
42+
self.op_type = "adagrad"
43+
44+
param = np.random.random((123, 321)).astype("float32")
45+
grad = np.random.random((123, 321)).astype("float32")
46+
moment = np.zeros((123, 321)).astype("float32")
47+
lr = 0.01
1548
epsilon = 1e-6
1649

17-
self.inputs = {'param': param, 'grad': grad, 'moment': moment}
50+
self.inputs = {
51+
'Param': param,
52+
'Grad': grad,
53+
'Moment': moment,
54+
'LearningRate': np.array([lr]).astype("float32")
55+
}
1856

19-
self.attrs = {'learning_rate': learning_rate, 'epsilon': epsilon}
57+
self.attrs = {'epsilon': epsilon}
2058

2159
moment_out = moment + grad * grad
2260
param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon)
2361

24-
self.outputs = {'param_out': param_out, 'moment_out': moment_out}
62+
self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out}
2563

2664
def test_check_output(self):
2765
self.check_output()

0 commit comments

Comments
 (0)