From 521327d48e37bd0b6ecea14f21e3ff2e2657ba68 Mon Sep 17 00:00:00 2001 From: zhaoyang-star Date: Sun, 13 Jun 2021 17:27:42 +0800 Subject: [PATCH 1/6] [OpenCL] Support PowerVR Rogue GPU (#5962) --- lite/backends/opencl/cl_wrapper.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lite/backends/opencl/cl_wrapper.cc b/lite/backends/opencl/cl_wrapper.cc index 3d18a065972..3aa6e7ef97f 100644 --- a/lite/backends/opencl/cl_wrapper.cc +++ b/lite/backends/opencl/cl_wrapper.cc @@ -68,6 +68,9 @@ bool CLWrapper::InitHandle() { // Arm Mali with Android "/system/vendor/lib/egl/libGLES_mali.so", "/system/lib/egl/libGLES_mali.so", + // PowerVR Rogue with Android + "/system/vendor/lib/libPVROCL.so", + "/data/data/org.pocl.libs/files/lib/libpocl.so", #endif // __aarch64__ #elif defined(__linux__) "/usr/lib/aarch64-linux-gnu/libOpenCL.so", From 0a5f0d150251de70461e1a7651145fdef6ccd224 Mon Sep 17 00:00:00 2001 From: zhaoyang-star Date: Tue, 27 Apr 2021 19:39:00 +0800 Subject: [PATCH 2/6] [Pass] Fuse scale(with act) + scale(without act) (#5982) --- lite/api/paddle_use_passes.h | 1 + .../opencl/cl_kernel/image/scale_kernel.cl | 20 ++++ lite/core/mir/CMakeLists.txt | 1 + lite/core/mir/fusion/CMakeLists.txt | 4 + lite/core/mir/fusion/scaleacts_fuse_pass.cc | 38 +++++++ lite/core/mir/fusion/scaleacts_fuse_pass.h | 47 ++++++++ lite/core/mir/fusion/scaleacts_fuser.cc | 102 ++++++++++++++++++ lite/core/mir/fusion/scaleacts_fuser.h | 38 +++++++ lite/core/mir/fusion/scales_fuser.cc | 4 +- lite/core/optimizer.h | 1 + lite/kernels/opencl/scale_image_compute.cc | 11 ++ lite/operators/op_params.h | 6 +- lite/operators/scale_op.cc | 6 ++ lite/operators/scale_op.h | 1 + 14 files changed, 278 insertions(+), 2 deletions(-) create mode 100644 lite/core/mir/fusion/scaleacts_fuse_pass.cc create mode 100644 lite/core/mir/fusion/scaleacts_fuse_pass.h create mode 100644 lite/core/mir/fusion/scaleacts_fuser.cc create mode 100644 lite/core/mir/fusion/scaleacts_fuser.h diff --git a/lite/api/paddle_use_passes.h b/lite/api/paddle_use_passes.h index 2675c1e2d7e..b29e85889f8 100644 --- a/lite/api/paddle_use_passes.h +++ b/lite/api/paddle_use_passes.h @@ -48,6 +48,7 @@ USE_MIR_PASS(lite_conv_activation_fuse_pass); USE_MIR_PASS(lite_var_conv_2d_activation_fuse_pass); USE_MIR_PASS(lite_match_matrix_activation_fuse_pass); USE_MIR_PASS(lite_scales_fuse_pass); +USE_MIR_PASS(lite_scaleacts_fuse_pass); USE_MIR_PASS(lite_sequence_reverse_embedding_fuse_pass); USE_MIR_PASS(lite_elementwise_activation_fuse_pass); USE_MIR_PASS(lite_elementwise_scale_fuse_pass); diff --git a/lite/backends/opencl/cl_kernel/image/scale_kernel.cl b/lite/backends/opencl/cl_kernel/image/scale_kernel.cl index 6bba53d960e..878afc8aa0c 100644 --- a/lite/backends/opencl/cl_kernel/image/scale_kernel.cl +++ b/lite/backends/opencl/cl_kernel/image/scale_kernel.cl @@ -28,6 +28,7 @@ __kernel void scale(__read_only image2d_t input, WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), in); } + __kernel void scale_relu6(__read_only image2d_t input, __write_only image2d_t output, __private float scale, @@ -43,3 +44,22 @@ __kernel void scale_relu6(__read_only image2d_t input, in = min((CL_DTYPE4)(alpha, alpha, alpha, alpha), in); WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), in); } + +__kernel void scaleacts(__read_only image2d_t input, + __write_only image2d_t output, + __private float scale, + __private float bias, + __private float alpha, + __private float scale1, + __private float bias1) { + + const int x = get_global_id(0); + const int y = get_global_id(1); + + CL_DTYPE4 in = READ_IMG_TYPE(CL_DTYPE_CHAR, input, SAMPLER, (int2)(x, y)); + in = CONVERT_TYPE_TO(scale, CL_DTYPE) * in + CONVERT_TYPE_TO(bias, CL_DTYPE); + in = max((CL_DTYPE4)(0.0f, 0.0f, 0.0f, 0.0f), in); + in = min((CL_DTYPE4)(alpha, alpha, alpha, alpha), in); + in = CONVERT_TYPE_TO(scale1, CL_DTYPE) * in + CONVERT_TYPE_TO(bias1, CL_DTYPE); + WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), in); +} diff --git a/lite/core/mir/CMakeLists.txt b/lite/core/mir/CMakeLists.txt index 17b47c11622..9ea400ce3c5 100644 --- a/lite/core/mir/CMakeLists.txt +++ b/lite/core/mir/CMakeLists.txt @@ -56,6 +56,7 @@ lite_cc_library(mir_passes fusion/__xpu__bigru_fuse_pass.cc fusion/match_matrix_activation_fuse_pass.cc fusion/scales_fuse_pass.cc + fusion/scaleacts_fuse_pass.cc fusion/sequence_reverse_embedding_fuse_pass.cc fusion/instance_norm_activation_fuse_pass.cc fusion/elementwise_add_scale_fuse_pass.cc diff --git a/lite/core/mir/fusion/CMakeLists.txt b/lite/core/mir/fusion/CMakeLists.txt index 60832c54e3f..c32d27e08f8 100644 --- a/lite/core/mir/fusion/CMakeLists.txt +++ b/lite/core/mir/fusion/CMakeLists.txt @@ -61,6 +61,9 @@ lite_cc_library(fuse_match_matrix_activation lite_cc_library(fuse_scales SRCS scales_fuser.cc DEPS pattern_matcher_high_api) +lite_cc_library(fuse_scaleacts + SRCS scaleacts_fuser.cc + DEPS pattern_matcher_high_api) lite_cc_library(fuse_sequence_reverse_embedding SRCS sequence_reverse_embedding_fuser.cc DEPS pattern_matcher_high_api) @@ -98,6 +101,7 @@ set(mir_fusers fuse_inplace fuse_match_matrix_activation fuse_scales + fuse_scaleacts fuse_sequence_reverse_embedding fuse_instance_norm_activation fuse_elementwise_add_scale diff --git a/lite/core/mir/fusion/scaleacts_fuse_pass.cc b/lite/core/mir/fusion/scaleacts_fuse_pass.cc new file mode 100644 index 00000000000..1f603af9131 --- /dev/null +++ b/lite/core/mir/fusion/scaleacts_fuse_pass.cc @@ -0,0 +1,38 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/core/mir/fusion/scaleacts_fuse_pass.h" + +#include +#include + +#include "lite/core/mir/fusion/scaleacts_fuser.h" +#include "lite/core/mir/pass_registry.h" + +namespace paddle { +namespace lite { +namespace mir { + +void ScaleactsFusePass::Apply(const std::unique_ptr& graph) { + fusion::ScaleactsFuser fuser; + fuser(graph.get()); +} + +} // namespace mir +} // namespace lite +} // namespace paddle + +REGISTER_MIR_PASS(lite_scaleacts_fuse_pass, + paddle::lite::mir::ScaleactsFusePass) + .BindTargets({TARGET(kOpenCL)}); diff --git a/lite/core/mir/fusion/scaleacts_fuse_pass.h b/lite/core/mir/fusion/scaleacts_fuse_pass.h new file mode 100644 index 00000000000..f3da413398e --- /dev/null +++ b/lite/core/mir/fusion/scaleacts_fuse_pass.h @@ -0,0 +1,47 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "lite/core/mir/pass.h" + +namespace paddle { +namespace lite { +namespace mir { + +// This pass fuses two scale ops to one scale op. +// Caculation will not be reduced by this pass but two ops fused to one. +// This will reduce running time on gpu device. +// MobilenetV3 has this pattern. +// +// For example: +// scale(has act func) +// | +// | +// | +// scale(no act func) +// +// After this pass is applied: +// scale( out = scale1 * (act(scale*in + bias)) + bias1 ) + +class ScaleactsFusePass : public ProgramPass { + public: + void Apply(const std::unique_ptr& graph) override; +}; + +} // namespace mir +} // namespace lite +} // namespace paddle diff --git a/lite/core/mir/fusion/scaleacts_fuser.cc b/lite/core/mir/fusion/scaleacts_fuser.cc new file mode 100644 index 00000000000..0583a981696 --- /dev/null +++ b/lite/core/mir/fusion/scaleacts_fuser.cc @@ -0,0 +1,102 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/core/mir/fusion/scaleacts_fuser.h" +#include +#include + +namespace paddle { +namespace lite { +namespace mir { +namespace fusion { + +void ScaleactsFuser::BuildPattern() { + // create input nodes. + auto* x = VarNode("x")->assert_is_op_input("scale", "X")->AsInput(); + + auto scales_teller1 = [](const Node* node) -> bool { + bool bias_after_scale = + const_cast(node)->AsStmt().op_info()->GetAttr( + "bias_after_scale"); + bool has_act = + const_cast(node)->AsStmt().op_info()->HasAttr("activation_type"); + return bias_after_scale && has_act; + }; + auto scales_teller2 = [](const Node* node) -> bool { + bool bias_after_scale = + const_cast(node)->AsStmt().op_info()->GetAttr( + "bias_after_scale"); + bool has_act = + const_cast(node)->AsStmt().op_info()->HasAttr("activation_type"); + return bias_after_scale && (!has_act); + }; + + // create op nodes + auto* scale1 = OpNode("scale1", "scale") + ->assert_is_op("scale") + ->assert_node_satisfied(scales_teller1) + ->AsIntermediate(); + auto* scale2 = OpNode("scale2", "scale") + ->assert_is_op("scale") + ->assert_node_satisfied(scales_teller2) + ->AsIntermediate(); + + // create intermediate nodes + auto* scale1_out = VarNode("scale1_out") + ->assert_is_op_output("scale", "Out") + ->assert_is_op_input("scale", "X") + ->AsIntermediate(); + + // create output node + auto* out = VarNode("out")->assert_is_op_output("scale", "Out")->AsOutput(); + + // create topology. + *x >> *scale1 >> *scale1_out >> *scale2 >> *out; +} + +void ScaleactsFuser::InsertNewNode(SSAGraph* graph, + const key2nodes_t& matched) { + auto op_desc = GenOpDesc(matched); + auto scale_op = LiteOpRegistry::Global().Create("scale"); + auto scale = matched.at("scale1")->stmt()->op(); + auto* scope = scale->scope(); + auto& valid_places = scale->valid_places(); + scale_op->Attach(op_desc, scope); + + auto* new_op_node = graph->GraphCreateInstructNode(scale_op, valid_places); + + IR_NODE_LINK_TO(matched.at("x"), new_op_node); + IR_NODE_LINK_TO(new_op_node, matched.at("out")); +} + +cpp::OpDesc ScaleactsFuser::GenOpDesc(const key2nodes_t& matched) { + auto* op_desc_tmp = matched.at("scale2")->stmt()->op_info(); + float scale1 = op_desc_tmp->GetAttr("scale"); + float bias1 = op_desc_tmp->GetAttr("bias"); + + auto op_desc = *matched.at("scale1")->stmt()->op_info(); + op_desc.SetAttr("fuse_scaleact", true); + op_desc.SetAttr("scale1", scale1); + op_desc.SetAttr("bias1", bias1); + + auto& out_name = matched.at("out")->arg()->name; + op_desc.SetOutput("Out", {out_name}); + + return op_desc; +} + +} // namespace fusion +} // namespace mir +} // namespace lite +} // namespace paddle diff --git a/lite/core/mir/fusion/scaleacts_fuser.h b/lite/core/mir/fusion/scaleacts_fuser.h new file mode 100644 index 00000000000..e991b7bfba6 --- /dev/null +++ b/lite/core/mir/fusion/scaleacts_fuser.h @@ -0,0 +1,38 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "lite/core/mir/pattern_matcher_high_api.h" + +namespace paddle { +namespace lite { +namespace mir { +namespace fusion { + +class ScaleactsFuser : public FuseBase { + public: + void BuildPattern() override; + void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) override; + + private: + cpp::OpDesc GenOpDesc(const key2nodes_t& matched) override; +}; + +} // namespace fusion +} // namespace mir +} // namespace lite +} // namespace paddle diff --git a/lite/core/mir/fusion/scales_fuser.cc b/lite/core/mir/fusion/scales_fuser.cc index a82193c1e20..825e41d4186 100644 --- a/lite/core/mir/fusion/scales_fuser.cc +++ b/lite/core/mir/fusion/scales_fuser.cc @@ -29,7 +29,9 @@ void ScalesFuser::BuildPattern() { bool bias_after_scale = const_cast(node)->AsStmt().op_info()->GetAttr( "bias_after_scale"); - return bias_after_scale; + bool has_act = + const_cast(node)->AsStmt().op_info()->HasAttr("activation_type"); + return bias_after_scale && (!has_act); }; // create op nodes diff --git a/lite/core/optimizer.h b/lite/core/optimizer.h index d1ba19749d8..f931e33d17f 100644 --- a/lite/core/optimizer.h +++ b/lite/core/optimizer.h @@ -111,6 +111,7 @@ class Optimizer { "elementwise_mul_constant_eliminate_pass", // "lite_sequence_pool_concat_fuse_pass", // "lite_scale_activation_fuse_pass", // + "lite_scaleacts_fuse_pass", // "lite_elementwise_scale_fuse_pass", // "lite_instance_norm_activation_fuse_pass", // "lite_flatten_fc_fuse_pass", // diff --git a/lite/kernels/opencl/scale_image_compute.cc b/lite/kernels/opencl/scale_image_compute.cc index cfac090b994..bb85d359737 100644 --- a/lite/kernels/opencl/scale_image_compute.cc +++ b/lite/kernels/opencl/scale_image_compute.cc @@ -42,6 +42,9 @@ class ScaleComputeImage2D : public KernelLiteactivation_type; } + if (scale_param_->fuse_scaleact) { + kernel_func_name_ = "scaleacts"; + } context.cl_context()->AddKernel(kernel_func_name_, "image/scale_kernel.cl", build_options_, @@ -87,6 +90,8 @@ class ScaleComputeImage2D : public KernelLitealpha; + const float scale1 = scale_param_->scale1; + const float bias1 = scale_param_->bias1; auto& context = ctx_->As(); CHECK(context.cl_context() != nullptr); @@ -103,6 +108,12 @@ class ScaleComputeImage2D : public KernelLite* input_tensor_ptrs() override { diff --git a/lite/operators/scale_op.cc b/lite/operators/scale_op.cc index 5f398d36479..faacfe33699 100644 --- a/lite/operators/scale_op.cc +++ b/lite/operators/scale_op.cc @@ -52,6 +52,12 @@ bool ScaleOp::AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) { CHECK(false) << "The fused conv only supports fuse with relu and leaky relu"; } + + if (op_desc.HasAttr("fuse_scaleact")) { + param_.fuse_scaleact = op_desc.GetAttr("fuse_scaleact"); + param_.scale1 = op_desc.GetAttr("scale1"); + param_.bias1 = op_desc.GetAttr("bias1"); + } } CHECK(param_.x); CHECK(param_.output); diff --git a/lite/operators/scale_op.h b/lite/operators/scale_op.h index 73a01ab24e5..8da4b0a4765 100644 --- a/lite/operators/scale_op.h +++ b/lite/operators/scale_op.h @@ -45,6 +45,7 @@ class ScaleOp : public OpLite { ch->remark = param_.activation_type + "alpha" + std::to_string(param_.alpha); ch->macs = param_.x->numel() * 1.f; + if (param_.fuse_scaleact) ch->macs *= 2; } #endif From 8268ca3ae5b5f73ba2b9d61e23c3a11902ebd4ff Mon Sep 17 00:00:00 2001 From: zhaoyang-star Date: Wed, 28 Apr 2021 16:30:46 +0800 Subject: [PATCH 3/6] [Core][Precision_Profile] Fix precision_profile tensor data folder name (#5993) --- lite/core/profile/precision_profiler.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/lite/core/profile/precision_profiler.h b/lite/core/profile/precision_profiler.h index 7d97bb4a333..bc95ac33c90 100644 --- a/lite/core/profile/precision_profiler.h +++ b/lite/core/profile/precision_profiler.h @@ -53,19 +53,15 @@ namespace lite { namespace profile { static const std::string get_date_str() { - auto timestamp = - std::chrono::duration_cast( - std::chrono::high_resolution_clock::now().time_since_epoch()) - .count(); - std::time_t time_t = timestamp / 1000000; - auto gmtime = std::gmtime(&time_t); + std::time_t now = std::time(nullptr); char buffer[32]; - strftime(buffer, 32, "%Y-%m-%d_%H-%M-%S_", gmtime); - char microseconds[8]; - snprintf(microseconds, sizeof(microseconds), "%06ld", timestamp % 1000000); - - // print date / time - return std::string(buffer) + microseconds; + if (std::strftime( + buffer, sizeof(buffer), "%Y-%m-%d_%H-%M-%S", std::localtime(&now))) { + return std::string(buffer); + } else { + LOG(WARNING) << "Convert calendar time error! Use the default timestamp."; + return "timestamp"; + } } inline std::string generate_valid_tensor_name(const std::string& name) { From c1865772b3fc317e4f6885b543b9546d8f870388 Mon Sep 17 00:00:00 2001 From: zhaoyang-star Date: Sun, 13 Jun 2021 18:41:52 +0800 Subject: [PATCH 4/6] test=develop From 316e52fee37388e619d519c677581346e2002c4a Mon Sep 17 00:00:00 2001 From: zhaoyang-star Date: Tue, 15 Jun 2021 10:54:08 +0800 Subject: [PATCH 5/6] test=develop From 5264fb0143ce4717aa8d27299b70eef0db9dd710 Mon Sep 17 00:00:00 2001 From: zhaoyang-star Date: Wed, 16 Jun 2021 15:59:55 +0800 Subject: [PATCH 6/6] trigger ci. test=develop