Daoyuan's comments.

xutianbing · xutianbing · commit 5b1a5c116a04 · 2017-01-25T11:29:10.000-08:00
diff --git a/paddle/function/FunctionTest.h b/paddle/function/FunctionTest.h
@@ -70,7 +70,7 @@ class FunctionCompare {
   }
 
   // output need only contains shape, do not contains data.
-  void addOutputs(const BufferArg& output, ArgType argType = ADD_TO) {
+  void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) {
     size_t size =
         output.shape().getElements() * sizeOfValuType(output.valueType());
     cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
diff --git a/paddle/function/MulOp.cpp b/paddle/function/MulOp.cpp
@@ -49,8 +49,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuSparseMatrix& out,
                             real scaleAB,
                             real scaleT,
                             bool aTrans,
-                            bool bTrans,
-                            bool cTrans) {
+                            bool bTrans) {
   CHECK_EQ(out.getValueType(), FLOAT_VALUE);
   if (scaleT == 0) {
     out.zeroMem();
@@ -114,8 +113,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
                             real scaleAB,
                             real scaleT,
                             bool aTrans,
-                            bool bTrans,
-                            bool cTrans) {
+                            bool bTrans) {
   GEMM(aTrans ? CblasTrans : CblasNoTrans,
        bTrans ? CblasTrans : CblasNoTrans,
        out.getHeight(),
@@ -139,8 +137,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
                             real scaleAB,
                             real scaleT,
                             bool aTrans,
-                            bool bTrans,
-                            bool cTrans) {
+                            bool bTrans) {
   if (scaleT == 0) {
     out.zeroMem();
   }
@@ -174,8 +171,7 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
                             real scaleAB,
                             real scaleT,
                             bool aTrans,
-                            bool bTrans,
-                            bool cTrans) {
+                            bool bTrans) {
   if (scaleT == 0) {
     out.zeroMem();
   }
@@ -222,10 +218,10 @@ void MulOp<DEVICE_TYPE_CPU>(CpuMatrix& out,
 
 /**
  * mul operator
- * out = scaleT * out + scaleAB * (in1 * in2)
+ * out = scaleT * out + scaleAB * (A * B)
  * here, scaleT in {0, 1}, scaleAB == 1,
- * out = in1 (A) * in2 (B), ASSIGN_TO
- * out += in1 (A) * in2 (B), ADD_TO
+ * out = A * B, ASSIGN_TO
+ * out += A * B, ADD_TO
  *
  *
  * \param outputs[0]      output matrix (out), M * N,
@@ -253,15 +249,11 @@ template <DeviceType Device>
 class MulFunc : public FunctionBase {
 public:
   void init(const FuncConfig& config) override {
-    alpha_ = config.get<real>("scaleAB");
-    beta_ = config.get<real>("scaleT");
     aTrans_ = config.get<bool>("aTrans");
     bTrans_ = config.get<bool>("bTrans");
-    cTrans_ = config.get<bool>("cTrans");
   }
 
   void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
-    CHECK(!cTrans_) << "output matrix should not be transposed";
     CHECK(!aTrans_ || !bTrans_)
         << "Not support both a and b are transpose matrices";
 
@@ -281,10 +273,8 @@ class MulFunc : public FunctionBase {
     CHECK_EQ(aRow, outputs[0].shape()[0]);
     CHECK_EQ(bCol, outputs[0].shape()[1]);
 
-    /// only support C = A * B or C += A * B
-    CHECK_EQ(alpha_, static_cast<real>(1.0));
-    CHECK((beta_ == 0 && outputs[0].getArgType() == ASSIGN_TO) ||
-          (beta_ == 1 && outputs[0].getArgType() == ADD_TO));
+    /// only support C = A * B (ASSIGN_TO) or C += A * B (ADD_TO)
+    real scaleT = (outputs[0].getArgType() == ADD_TO) ? 1.0 : 0.0;
 
     /// support dense = not both sparse * sparse
     /// or sparse = dense * dense
@@ -300,11 +290,10 @@ class MulFunc : public FunctionBase {
       MulOp<Device>(outMat,
                     inputs[0].matrix<Device>(),
                     inputs[1].matrix<Device>(),
-                    alpha_,
-                    beta_,
+                    1.0,  // scaleAB
+                    scaleT,
                     aTrans_,
-                    bTrans_,
-                    cTrans_);
+                    bTrans_);
       return;
     }
 
@@ -315,11 +304,10 @@ class MulFunc : public FunctionBase {
       MulOp<Device>(outMat,
                     inputs[0].matrix<Device>(),
                     inputs[1].sparse().SparseMatrix<Device>(),
-                    alpha_,
-                    beta_,
+                    1.0,  // scaleAB
+                    scaleT,
                     aTrans_,
-                    bTrans_,
-                    cTrans_);
+                    bTrans_);
       return;
     }
 
@@ -332,11 +320,10 @@ class MulFunc : public FunctionBase {
       MulOp<Device>(outMat,
                     inputs[0].sparse().SparseMatrix<Device>(),
                     inputs[1].matrix<Device>(),
-                    alpha_,
-                    beta_,
+                    1.0,  // scaleAB
+                    scaleT,
                     aTrans_,
-                    bTrans_,
-                    cTrans_);
+                    bTrans_);
       return;
     }
 
@@ -347,21 +334,17 @@ class MulFunc : public FunctionBase {
       MulOp<Device>(outSparseMat,
                     inputs[0].matrix<Device>(),
                     inputs[1].matrix<Device>(),
-                    alpha_,
-                    beta_,
+                    1.0,  // scaleAB
+                    scaleT,
                     aTrans_,
-                    bTrans_,
-                    cTrans_);
+                    bTrans_);
       return;
     }
   }
 
 private:
-  real alpha_;
-  real beta_;
   bool aTrans_;
   bool bTrans_;
-  bool cTrans_;
 };
 
 REGISTER_TYPED_FUNC(MulOp, CPU, MulFunc);
diff --git a/paddle/function/MulOp.h b/paddle/function/MulOp.h
@@ -27,8 +27,7 @@ void MulOp(CpuMatrix& out,
            real scaleAB,
            real scaleT,
            bool aTrans,
-           bool bTrans,
-           bool cTrans);
+           bool bTrans);
 
 /// CPU, dense matrix (+)= sparse matrix * dense matrix
 template <DeviceType DType>
@@ -38,8 +37,7 @@ void MulOp(CpuMatrix& out,
            real scaleAB,
            real scaleT,
            bool aTrans,
-           bool bTrans,
-           bool cTrans);
+           bool bTrans);
 
 /// CPU, dense matrix (+)= dense matrix * sparse matrix
 template <DeviceType DType>
@@ -49,8 +47,7 @@ void MulOp(CpuMatrix& out,
            real scaleAB,
            real scaleT,
            bool aTrans,
-           bool bTrans,
-           bool cTrans);
+           bool bTrans);
 
 /// CPU, sparse matrix (+)= dense matrix * dense matrix
 template <DeviceType DType>
@@ -60,8 +57,7 @@ void MulOp(CpuSparseMatrix& out,
            real scaleAB,
            real scaleT,
            bool aTrans,
-           bool bTrans,
-           bool cTrans);
+           bool bTrans);
 
 /// GPU, dense matrix (+)= dense matrix * dense matrix
 template <DeviceType DType>
@@ -71,8 +67,7 @@ void MulOp(GpuMatrix& out,
            real scaleAB,
            real scaleT,
            bool aTrans,
-           bool bTrans,
-           bool cTrans);
+           bool bTrans);
 
 /// GPU, dense matrix (+)= sparse matrix * dense matrix
 template <DeviceType DType>
@@ -82,8 +77,7 @@ void MulOp(GpuMatrix& out,
            real scaleAB,
            real scaleT,
            bool aTrans,
-           bool bTrans,
-           bool cTrans);
+           bool bTrans);
 
 /// GPU, dense matrix (+)= dense matrix * sparse matrix
 template <DeviceType DType>
@@ -93,8 +87,8 @@ void MulOp(GpuMatrix& out,
            real scaleAB,
            real scaleT,
            bool aTrans,
-           bool bTrans,
-           bool cTrans);
+           bool bTrans);
+
 /// GPU, sparse matrix (+)= dense matrix * dense matrix
 template <DeviceType DType>
 void MulOp(GpuSparseMatrix& out,
@@ -103,7 +97,6 @@ void MulOp(GpuSparseMatrix& out,
            real scaleAB,
            real scaleT,
            bool aTrans,
-           bool bTrans,
-           bool cTrans);
+           bool bTrans);
 
 }  // namespace paddle
diff --git a/paddle/function/MulOpGpu.cu b/paddle/function/MulOpGpu.cu
@@ -26,8 +26,7 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
                             real scaleAB,
                             real scaleT,
                             bool aTrans,
-                            bool bTrans,
-                            bool cTrans) {
+                            bool bTrans) {
   CHECK(a.useGpu_ && b.useGpu_) << "matrix device type not match";
   hl_matrix_mul(const_cast<real*>(a.getData()),
                 !aTrans ? HPPL_OP_N : HPPL_OP_T,
@@ -52,8 +51,7 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
                             real scaleAB,
                             real scaleT,
                             bool aTrans,
-                            bool bTrans,
-                            bool cTrans) {
+                            bool bTrans) {
   CHECK(out.isContiguous());
   CHECK(b.isContiguous());
   CHECK(a.useGpu_ && b.useGpu_) << "matrix device type not match";
@@ -77,8 +75,7 @@ void MulOp<DEVICE_TYPE_GPU>(GpuMatrix& out,
                             real scaleAB,
                             real scaleT,
                             bool aTrans,
-                            bool bTrans,
-                            bool cTrans) {
+                            bool bTrans) {
   CHECK(out.isContiguous());
   CHECK(a.isContiguous());
   CHECK(a.useGpu_ && b.useGpu_) << "matrix device type not match";
@@ -116,8 +113,7 @@ void MulOp<DEVICE_TYPE_GPU>(GpuSparseMatrix& out,
                             real scaleAB,
                             real scaleT,
                             bool aTrans,
-                            bool bTrans,
-                            bool cTrans) {
+                            bool bTrans) {
   CHECK(a.useGpu_ && b.useGpu_) << "matrix device type not match";
   hl_sparse_matrix_mul(const_cast<real*>(a.getData()),
                        aTrans ? HPPL_OP_T : HPPL_OP_N,
diff --git a/paddle/function/MulOpTest.cpp b/paddle/function/MulOpTest.cpp

Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,7 @@ class FunctionCompare {`
`70`	`70`	`}`
`71`	`71`
`72`	`72`	`// output need only contains shape, do not contains data.`
`73`		`- void addOutputs(const BufferArg& output, ArgType argType = ADD_TO) {`
	`73`	`+ void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) {`
`74`	`74`	`size_t size =`
`75`	`75`	`output.shape().getElements() * sizeOfValuType(output.valueType());`
`76`	`76`	`cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));`