PaddlePaddle · zhaoyang-star · Jun 16, 2021 · Jun 13, 2021 · Apr 27, 2021 · Apr 28, 2021
diff --git a/lite/api/paddle_use_passes.h b/lite/api/paddle_use_passes.h
@@ -48,6 +48,7 @@ USE_MIR_PASS(lite_conv_activation_fuse_pass);
 USE_MIR_PASS(lite_var_conv_2d_activation_fuse_pass);
 USE_MIR_PASS(lite_match_matrix_activation_fuse_pass);
 USE_MIR_PASS(lite_scales_fuse_pass);
+USE_MIR_PASS(lite_scaleacts_fuse_pass);
 USE_MIR_PASS(lite_sequence_reverse_embedding_fuse_pass);
 USE_MIR_PASS(lite_elementwise_activation_fuse_pass);
 USE_MIR_PASS(lite_elementwise_scale_fuse_pass);

diff --git a/lite/backends/opencl/cl_kernel/image/scale_kernel.cl b/lite/backends/opencl/cl_kernel/image/scale_kernel.cl
@@ -28,6 +28,7 @@ __kernel void scale(__read_only image2d_t input,
 
   WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), in);
 }
+
 __kernel void scale_relu6(__read_only image2d_t input,
                           __write_only image2d_t output,
                           __private float scale,
@@ -43,3 +44,22 @@ __kernel void scale_relu6(__read_only image2d_t input,
   in = min((CL_DTYPE4)(alpha, alpha, alpha, alpha), in);
   WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), in);
 }
+
+__kernel void scaleacts(__read_only image2d_t input,
+                        __write_only image2d_t output,
+                        __private float scale,
+                        __private float bias,
+                        __private float alpha,
+                        __private float scale1,
+                        __private float bias1) {
+
+  const int x = get_global_id(0);
+  const int y = get_global_id(1);
+
+  CL_DTYPE4 in = READ_IMG_TYPE(CL_DTYPE_CHAR, input, SAMPLER, (int2)(x, y));
+  in = CONVERT_TYPE_TO(scale, CL_DTYPE) * in + CONVERT_TYPE_TO(bias, CL_DTYPE);
+  in = max((CL_DTYPE4)(0.0f, 0.0f, 0.0f, 0.0f), in);
+  in = min((CL_DTYPE4)(alpha, alpha, alpha, alpha), in);
+  in = CONVERT_TYPE_TO(scale1, CL_DTYPE) * in + CONVERT_TYPE_TO(bias1, CL_DTYPE);
+  WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), in);
+}
diff --git a/lite/backends/opencl/cl_wrapper.cc b/lite/backends/opencl/cl_wrapper.cc
@@ -68,6 +68,9 @@ bool CLWrapper::InitHandle() {
     // Arm Mali with Android
     "/system/vendor/lib/egl/libGLES_mali.so",
     "/system/lib/egl/libGLES_mali.so",
+    // PowerVR Rogue with Android
+    "/system/vendor/lib/libPVROCL.so",
+    "/data/data/org.pocl.libs/files/lib/libpocl.so",
 #endif  // __aarch64__
 #elif defined(__linux__)
     "/usr/lib/aarch64-linux-gnu/libOpenCL.so",

diff --git a/lite/core/mir/CMakeLists.txt b/lite/core/mir/CMakeLists.txt
@@ -56,6 +56,7 @@ lite_cc_library(mir_passes
       fusion/__xpu__bigru_fuse_pass.cc
       fusion/match_matrix_activation_fuse_pass.cc
       fusion/scales_fuse_pass.cc
+      fusion/scaleacts_fuse_pass.cc
       fusion/sequence_reverse_embedding_fuse_pass.cc
       fusion/instance_norm_activation_fuse_pass.cc
       fusion/elementwise_add_scale_fuse_pass.cc

diff --git a/lite/core/mir/fusion/CMakeLists.txt b/lite/core/mir/fusion/CMakeLists.txt
@@ -61,6 +61,9 @@ lite_cc_library(fuse_match_matrix_activation
 lite_cc_library(fuse_scales
         SRCS scales_fuser.cc
         DEPS pattern_matcher_high_api)
+lite_cc_library(fuse_scaleacts
+        SRCS scaleacts_fuser.cc
+        DEPS pattern_matcher_high_api)
 lite_cc_library(fuse_sequence_reverse_embedding
         SRCS sequence_reverse_embedding_fuser.cc
         DEPS pattern_matcher_high_api)
@@ -98,6 +101,7 @@ set(mir_fusers
     fuse_inplace
     fuse_match_matrix_activation
     fuse_scales
+    fuse_scaleacts
     fuse_sequence_reverse_embedding
     fuse_instance_norm_activation
     fuse_elementwise_add_scale

diff --git a/lite/core/mir/fusion/scaleacts_fuse_pass.cc b/lite/core/mir/fusion/scaleacts_fuse_pass.cc
@@ -0,0 +1,38 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/core/mir/fusion/scaleacts_fuse_pass.h"
+
+#include <memory>
+#include <vector>
+
+#include "lite/core/mir/fusion/scaleacts_fuser.h"
+#include "lite/core/mir/pass_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace mir {
+
+void ScaleactsFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
+  fusion::ScaleactsFuser fuser;
+  fuser(graph.get());
+}
+
+}  // namespace mir
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_MIR_PASS(lite_scaleacts_fuse_pass,
+                  paddle::lite::mir::ScaleactsFusePass)
+    .BindTargets({TARGET(kOpenCL)});
diff --git a/lite/core/mir/fusion/scaleacts_fuse_pass.h b/lite/core/mir/fusion/scaleacts_fuse_pass.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include "lite/core/mir/pass.h"
+
+namespace paddle {
+namespace lite {
+namespace mir {
+
+// This pass fuses two scale ops to one scale op.
+// Caculation will not be reduced by this pass but two ops fused to one.
+// This will reduce running time on gpu device.
+// MobilenetV3 has this pattern.
+//
+// For example:
+//     scale(has act func)
+//       |
+//       |
+//       |
+//     scale(no act func)
+//
+// After this pass is applied:
+//     scale( out = scale1 * (act(scale*in + bias)) + bias1 )
+
+class ScaleactsFusePass : public ProgramPass {
+ public:
+  void Apply(const std::unique_ptr<SSAGraph>& graph) override;
+};
+
+}  // namespace mir
+}  // namespace lite
+}  // namespace paddle
diff --git a/lite/core/mir/fusion/scaleacts_fuser.cc b/lite/core/mir/fusion/scaleacts_fuser.cc
@@ -0,0 +1,102 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/core/mir/fusion/scaleacts_fuser.h"
+#include <memory>
+#include <vector>
+
+namespace paddle {
+namespace lite {
+namespace mir {
+namespace fusion {
+
+void ScaleactsFuser::BuildPattern() {
+  // create input nodes.
+  auto* x = VarNode("x")->assert_is_op_input("scale", "X")->AsInput();
+
+  auto scales_teller1 = [](const Node* node) -> bool {
+    bool bias_after_scale =
+        const_cast<Node*>(node)->AsStmt().op_info()->GetAttr<bool>(
+            "bias_after_scale");
+    bool has_act =
+        const_cast<Node*>(node)->AsStmt().op_info()->HasAttr("activation_type");
+    return bias_after_scale && has_act;
+  };
+  auto scales_teller2 = [](const Node* node) -> bool {
+    bool bias_after_scale =
+        const_cast<Node*>(node)->AsStmt().op_info()->GetAttr<bool>(
+            "bias_after_scale");
+    bool has_act =
+        const_cast<Node*>(node)->AsStmt().op_info()->HasAttr("activation_type");
+    return bias_after_scale && (!has_act);
+  };
+
+  // create op nodes
+  auto* scale1 = OpNode("scale1", "scale")
+                     ->assert_is_op("scale")
+                     ->assert_node_satisfied(scales_teller1)
+                     ->AsIntermediate();
+  auto* scale2 = OpNode("scale2", "scale")
+                     ->assert_is_op("scale")
+                     ->assert_node_satisfied(scales_teller2)
+                     ->AsIntermediate();
+
+  // create intermediate nodes
+  auto* scale1_out = VarNode("scale1_out")
+                         ->assert_is_op_output("scale", "Out")
+                         ->assert_is_op_input("scale", "X")
+                         ->AsIntermediate();
+
+  // create output node
+  auto* out = VarNode("out")->assert_is_op_output("scale", "Out")->AsOutput();
+
+  // create topology.
+  *x >> *scale1 >> *scale1_out >> *scale2 >> *out;
+}
+
+void ScaleactsFuser::InsertNewNode(SSAGraph* graph,
+                                   const key2nodes_t& matched) {
+  auto op_desc = GenOpDesc(matched);
+  auto scale_op = LiteOpRegistry::Global().Create("scale");
+  auto scale = matched.at("scale1")->stmt()->op();
+  auto* scope = scale->scope();
+  auto& valid_places = scale->valid_places();
+  scale_op->Attach(op_desc, scope);
+
+  auto* new_op_node = graph->GraphCreateInstructNode(scale_op, valid_places);
+
+  IR_NODE_LINK_TO(matched.at("x"), new_op_node);
+  IR_NODE_LINK_TO(new_op_node, matched.at("out"));
+}
+
+cpp::OpDesc ScaleactsFuser::GenOpDesc(const key2nodes_t& matched) {
+  auto* op_desc_tmp = matched.at("scale2")->stmt()->op_info();
+  float scale1 = op_desc_tmp->GetAttr<float>("scale");
+  float bias1 = op_desc_tmp->GetAttr<float>("bias");
+
+  auto op_desc = *matched.at("scale1")->stmt()->op_info();
+  op_desc.SetAttr("fuse_scaleact", true);
+  op_desc.SetAttr("scale1", scale1);
+  op_desc.SetAttr("bias1", bias1);
+
+  auto& out_name = matched.at("out")->arg()->name;
+  op_desc.SetOutput("Out", {out_name});
+
+  return op_desc;
+}
+
+}  // namespace fusion
+}  // namespace mir
+}  // namespace lite
+}  // namespace paddle
diff --git a/lite/core/mir/fusion/scaleacts_fuser.h b/lite/core/mir/fusion/scaleacts_fuser.h
@@ -0,0 +1,38 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include "lite/core/mir/pattern_matcher_high_api.h"
+
+namespace paddle {
+namespace lite {
+namespace mir {
+namespace fusion {
+
+class ScaleactsFuser : public FuseBase {
+ public:
+  void BuildPattern() override;
+  void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) override;
+
+ private:
+  cpp::OpDesc GenOpDesc(const key2nodes_t& matched) override;
+};
+
+}  // namespace fusion
+}  // namespace mir
+}  // namespace lite
+}  // namespace paddle
diff --git a/lite/core/mir/fusion/scales_fuser.cc b/lite/core/mir/fusion/scales_fuser.cc
@@ -29,7 +29,9 @@ void ScalesFuser::BuildPattern() {
     bool bias_after_scale =
         const_cast<Node*>(node)->AsStmt().op_info()->GetAttr<bool>(
             "bias_after_scale");
-    return bias_after_scale;
+    bool has_act =
+        const_cast<Node*>(node)->AsStmt().op_info()->HasAttr("activation_type");
+    return bias_after_scale && (!has_act);
   };
 
   // create op nodes

diff --git a/lite/core/optimizer.h b/lite/core/optimizer.h
@@ -111,6 +111,7 @@ class Optimizer {
          "elementwise_mul_constant_eliminate_pass",     //
          "lite_sequence_pool_concat_fuse_pass",         //
          "lite_scale_activation_fuse_pass",             //
+         "lite_scaleacts_fuse_pass",                    //
          "lite_elementwise_scale_fuse_pass",            //
          "lite_instance_norm_activation_fuse_pass",     //
          "lite_flatten_fc_fuse_pass",                   //

diff --git a/lite/core/profile/precision_profiler.h b/lite/core/profile/precision_profiler.h
@@ -53,19 +53,15 @@ namespace lite {
 namespace profile {
 
 static const std::string get_date_str() {
-  auto timestamp =
-      std::chrono::duration_cast<std::chrono::microseconds>(
-          std::chrono::high_resolution_clock::now().time_since_epoch())
-          .count();
-  std::time_t time_t = timestamp / 1000000;
-  auto gmtime = std::gmtime(&time_t);
+  std::time_t now = std::time(nullptr);
   char buffer[32];
-  strftime(buffer, 32, "%Y-%m-%d_%H-%M-%S_", gmtime);
-  char microseconds[8];
-  snprintf(microseconds, sizeof(microseconds), "%06ld", timestamp % 1000000);
-
-  // print date / time
-  return std::string(buffer) + microseconds;
+  if (std::strftime(
+          buffer, sizeof(buffer), "%Y-%m-%d_%H-%M-%S", std::localtime(&now))) {
+    return std::string(buffer);
+  } else {
+    LOG(WARNING) << "Convert calendar time error! Use the default timestamp.";
+    return "timestamp";
+  }
 }
 
 inline std::string generate_valid_tensor_name(const std::string& name) {

diff --git a/lite/kernels/opencl/scale_image_compute.cc b/lite/kernels/opencl/scale_image_compute.cc
@@ -42,6 +42,9 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
       LOG(FATAL) << "Unsupported activation type: "
                  << scale_param_->activation_type;
     }
+    if (scale_param_->fuse_scaleact) {
+      kernel_func_name_ = "scaleacts";
+    }
     context.cl_context()->AddKernel(kernel_func_name_,
                                     "image/scale_kernel.cl",
                                     build_options_,
@@ -87,6 +90,8 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
       bias *= scale;
     }
     const float alpha = scale_param_->alpha;
+    const float scale1 = scale_param_->scale1;
+    const float bias1 = scale_param_->bias1;
 
     auto& context = ctx_->As<OpenCLContext>();
     CHECK(context.cl_context() != nullptr);
@@ -103,6 +108,12 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
     CL_CHECK_FATAL(status);
     status = kernel.setArg(4, alpha);
     CL_CHECK_FATAL(status);
+    if (kernel_func_name_ == "scaleacts") {
+      status = kernel.setArg(5, scale1);
+      CL_CHECK_FATAL(status);
+      status = kernel.setArg(6, bias1);
+      CL_CHECK_FATAL(status);
+    }
 
     status = EnqueueNDRangeKernel(context,
                                   kernel,

diff --git a/lite/operators/op_params.h b/lite/operators/op_params.h
@@ -281,11 +281,15 @@ struct ScaleParam : ParamBase {
   lite::Tensor* output{};
 
   float scale{1.f};
-  float bias{};
+  float bias{0.f};
   bool bias_after_scale{true};
   std::string activation_type{""};
   bool fuse_relu{false};
   float alpha{6.f};
+
+  bool fuse_scaleact{false};
+  float scale1{1.f};
+  float bias1{0.f};
   ///////////////////////////////////////////////////////////////////////////////////
   // get a vector of input tensors
   const std::vector<const Tensor*>* input_tensor_ptrs() override {