Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lite/api/paddle_use_passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ USE_MIR_PASS(lite_conv_activation_fuse_pass);
USE_MIR_PASS(lite_var_conv_2d_activation_fuse_pass);
USE_MIR_PASS(lite_match_matrix_activation_fuse_pass);
USE_MIR_PASS(lite_scales_fuse_pass);
USE_MIR_PASS(lite_scaleacts_fuse_pass);
USE_MIR_PASS(lite_sequence_reverse_embedding_fuse_pass);
USE_MIR_PASS(lite_elementwise_activation_fuse_pass);
USE_MIR_PASS(lite_elementwise_scale_fuse_pass);
Expand Down
20 changes: 20 additions & 0 deletions lite/backends/opencl/cl_kernel/image/scale_kernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ __kernel void scale(__read_only image2d_t input,

WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), in);
}

__kernel void scale_relu6(__read_only image2d_t input,
__write_only image2d_t output,
__private float scale,
Expand All @@ -43,3 +44,22 @@ __kernel void scale_relu6(__read_only image2d_t input,
in = min((CL_DTYPE4)(alpha, alpha, alpha, alpha), in);
WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), in);
}

__kernel void scaleacts(__read_only image2d_t input,
__write_only image2d_t output,
__private float scale,
__private float bias,
__private float alpha,
__private float scale1,
__private float bias1) {

const int x = get_global_id(0);
const int y = get_global_id(1);

CL_DTYPE4 in = READ_IMG_TYPE(CL_DTYPE_CHAR, input, SAMPLER, (int2)(x, y));
in = CONVERT_TYPE_TO(scale, CL_DTYPE) * in + CONVERT_TYPE_TO(bias, CL_DTYPE);
in = max((CL_DTYPE4)(0.0f, 0.0f, 0.0f, 0.0f), in);
in = min((CL_DTYPE4)(alpha, alpha, alpha, alpha), in);
in = CONVERT_TYPE_TO(scale1, CL_DTYPE) * in + CONVERT_TYPE_TO(bias1, CL_DTYPE);
WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), in);
}
3 changes: 3 additions & 0 deletions lite/backends/opencl/cl_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ bool CLWrapper::InitHandle() {
// Arm Mali with Android
"/system/vendor/lib/egl/libGLES_mali.so",
"/system/lib/egl/libGLES_mali.so",
// PowerVR Rogue with Android
"/system/vendor/lib/libPVROCL.so",
"/data/data/org.pocl.libs/files/lib/libpocl.so",
#endif // __aarch64__
#elif defined(__linux__)
"/usr/lib/aarch64-linux-gnu/libOpenCL.so",
Expand Down
1 change: 1 addition & 0 deletions lite/core/mir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ lite_cc_library(mir_passes
fusion/__xpu__bigru_fuse_pass.cc
fusion/match_matrix_activation_fuse_pass.cc
fusion/scales_fuse_pass.cc
fusion/scaleacts_fuse_pass.cc
fusion/sequence_reverse_embedding_fuse_pass.cc
fusion/instance_norm_activation_fuse_pass.cc
fusion/elementwise_add_scale_fuse_pass.cc
Expand Down
4 changes: 4 additions & 0 deletions lite/core/mir/fusion/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ lite_cc_library(fuse_match_matrix_activation
lite_cc_library(fuse_scales
SRCS scales_fuser.cc
DEPS pattern_matcher_high_api)
lite_cc_library(fuse_scaleacts
SRCS scaleacts_fuser.cc
DEPS pattern_matcher_high_api)
lite_cc_library(fuse_sequence_reverse_embedding
SRCS sequence_reverse_embedding_fuser.cc
DEPS pattern_matcher_high_api)
Expand Down Expand Up @@ -98,6 +101,7 @@ set(mir_fusers
fuse_inplace
fuse_match_matrix_activation
fuse_scales
fuse_scaleacts
fuse_sequence_reverse_embedding
fuse_instance_norm_activation
fuse_elementwise_add_scale
Expand Down
38 changes: 38 additions & 0 deletions lite/core/mir/fusion/scaleacts_fuse_pass.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/core/mir/fusion/scaleacts_fuse_pass.h"

#include <memory>
#include <vector>

#include "lite/core/mir/fusion/scaleacts_fuser.h"
#include "lite/core/mir/pass_registry.h"

namespace paddle {
namespace lite {
namespace mir {

void ScaleactsFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
fusion::ScaleactsFuser fuser;
fuser(graph.get());
}

} // namespace mir
} // namespace lite
} // namespace paddle

REGISTER_MIR_PASS(lite_scaleacts_fuse_pass,
paddle::lite::mir::ScaleactsFusePass)
.BindTargets({TARGET(kOpenCL)});
47 changes: 47 additions & 0 deletions lite/core/mir/fusion/scaleacts_fuse_pass.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include "lite/core/mir/pass.h"

namespace paddle {
namespace lite {
namespace mir {

// This pass fuses two scale ops to one scale op.
// Caculation will not be reduced by this pass but two ops fused to one.
// This will reduce running time on gpu device.
// MobilenetV3 has this pattern.
//
// For example:
// scale(has act func)
// |
// |
// |
// scale(no act func)
//
// After this pass is applied:
// scale( out = scale1 * (act(scale*in + bias)) + bias1 )

class ScaleactsFusePass : public ProgramPass {
public:
void Apply(const std::unique_ptr<SSAGraph>& graph) override;
};

} // namespace mir
} // namespace lite
} // namespace paddle
102 changes: 102 additions & 0 deletions lite/core/mir/fusion/scaleacts_fuser.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/core/mir/fusion/scaleacts_fuser.h"
#include <memory>
#include <vector>

namespace paddle {
namespace lite {
namespace mir {
namespace fusion {

void ScaleactsFuser::BuildPattern() {
// create input nodes.
auto* x = VarNode("x")->assert_is_op_input("scale", "X")->AsInput();

auto scales_teller1 = [](const Node* node) -> bool {
bool bias_after_scale =
const_cast<Node*>(node)->AsStmt().op_info()->GetAttr<bool>(
"bias_after_scale");
bool has_act =
const_cast<Node*>(node)->AsStmt().op_info()->HasAttr("activation_type");
return bias_after_scale && has_act;
};
auto scales_teller2 = [](const Node* node) -> bool {
bool bias_after_scale =
const_cast<Node*>(node)->AsStmt().op_info()->GetAttr<bool>(
"bias_after_scale");
bool has_act =
const_cast<Node*>(node)->AsStmt().op_info()->HasAttr("activation_type");
return bias_after_scale && (!has_act);
};

// create op nodes
auto* scale1 = OpNode("scale1", "scale")
->assert_is_op("scale")
->assert_node_satisfied(scales_teller1)
->AsIntermediate();
auto* scale2 = OpNode("scale2", "scale")
->assert_is_op("scale")
->assert_node_satisfied(scales_teller2)
->AsIntermediate();

// create intermediate nodes
auto* scale1_out = VarNode("scale1_out")
->assert_is_op_output("scale", "Out")
->assert_is_op_input("scale", "X")
->AsIntermediate();

// create output node
auto* out = VarNode("out")->assert_is_op_output("scale", "Out")->AsOutput();

// create topology.
*x >> *scale1 >> *scale1_out >> *scale2 >> *out;
}

void ScaleactsFuser::InsertNewNode(SSAGraph* graph,
const key2nodes_t& matched) {
auto op_desc = GenOpDesc(matched);
auto scale_op = LiteOpRegistry::Global().Create("scale");
auto scale = matched.at("scale1")->stmt()->op();
auto* scope = scale->scope();
auto& valid_places = scale->valid_places();
scale_op->Attach(op_desc, scope);

auto* new_op_node = graph->GraphCreateInstructNode(scale_op, valid_places);

IR_NODE_LINK_TO(matched.at("x"), new_op_node);
IR_NODE_LINK_TO(new_op_node, matched.at("out"));
}

cpp::OpDesc ScaleactsFuser::GenOpDesc(const key2nodes_t& matched) {
auto* op_desc_tmp = matched.at("scale2")->stmt()->op_info();
float scale1 = op_desc_tmp->GetAttr<float>("scale");
float bias1 = op_desc_tmp->GetAttr<float>("bias");

auto op_desc = *matched.at("scale1")->stmt()->op_info();
op_desc.SetAttr("fuse_scaleact", true);
op_desc.SetAttr("scale1", scale1);
op_desc.SetAttr("bias1", bias1);

auto& out_name = matched.at("out")->arg()->name;
op_desc.SetOutput("Out", {out_name});

return op_desc;
}

} // namespace fusion
} // namespace mir
} // namespace lite
} // namespace paddle
38 changes: 38 additions & 0 deletions lite/core/mir/fusion/scaleacts_fuser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include "lite/core/mir/pattern_matcher_high_api.h"

namespace paddle {
namespace lite {
namespace mir {
namespace fusion {

class ScaleactsFuser : public FuseBase {
public:
void BuildPattern() override;
void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) override;

private:
cpp::OpDesc GenOpDesc(const key2nodes_t& matched) override;
};

} // namespace fusion
} // namespace mir
} // namespace lite
} // namespace paddle
4 changes: 3 additions & 1 deletion lite/core/mir/fusion/scales_fuser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ void ScalesFuser::BuildPattern() {
bool bias_after_scale =
const_cast<Node*>(node)->AsStmt().op_info()->GetAttr<bool>(
"bias_after_scale");
return bias_after_scale;
bool has_act =
const_cast<Node*>(node)->AsStmt().op_info()->HasAttr("activation_type");
return bias_after_scale && (!has_act);
};

// create op nodes
Expand Down
1 change: 1 addition & 0 deletions lite/core/optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ class Optimizer {
"elementwise_mul_constant_eliminate_pass", //
"lite_sequence_pool_concat_fuse_pass", //
"lite_scale_activation_fuse_pass", //
"lite_scaleacts_fuse_pass", //
"lite_elementwise_scale_fuse_pass", //
"lite_instance_norm_activation_fuse_pass", //
"lite_flatten_fc_fuse_pass", //
Expand Down
20 changes: 8 additions & 12 deletions lite/core/profile/precision_profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,15 @@ namespace lite {
namespace profile {

static const std::string get_date_str() {
auto timestamp =
std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::high_resolution_clock::now().time_since_epoch())
.count();
std::time_t time_t = timestamp / 1000000;
auto gmtime = std::gmtime(&time_t);
std::time_t now = std::time(nullptr);
char buffer[32];
strftime(buffer, 32, "%Y-%m-%d_%H-%M-%S_", gmtime);
char microseconds[8];
snprintf(microseconds, sizeof(microseconds), "%06ld", timestamp % 1000000);

// print date / time
return std::string(buffer) + microseconds;
if (std::strftime(
buffer, sizeof(buffer), "%Y-%m-%d_%H-%M-%S", std::localtime(&now))) {
return std::string(buffer);
} else {
LOG(WARNING) << "Convert calendar time error! Use the default timestamp.";
return "timestamp";
}
}

inline std::string generate_valid_tensor_name(const std::string& name) {
Expand Down
11 changes: 11 additions & 0 deletions lite/kernels/opencl/scale_image_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
LOG(FATAL) << "Unsupported activation type: "
<< scale_param_->activation_type;
}
if (scale_param_->fuse_scaleact) {
kernel_func_name_ = "scaleacts";
}
context.cl_context()->AddKernel(kernel_func_name_,
"image/scale_kernel.cl",
build_options_,
Expand Down Expand Up @@ -87,6 +90,8 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
bias *= scale;
}
const float alpha = scale_param_->alpha;
const float scale1 = scale_param_->scale1;
const float bias1 = scale_param_->bias1;

auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
Expand All @@ -103,6 +108,12 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
CL_CHECK_FATAL(status);
status = kernel.setArg(4, alpha);
CL_CHECK_FATAL(status);
if (kernel_func_name_ == "scaleacts") {
status = kernel.setArg(5, scale1);
CL_CHECK_FATAL(status);
status = kernel.setArg(6, bias1);
CL_CHECK_FATAL(status);
}

status = EnqueueNDRangeKernel(context,
kernel,
Expand Down
6 changes: 5 additions & 1 deletion lite/operators/op_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,11 +281,15 @@ struct ScaleParam : ParamBase {
lite::Tensor* output{};

float scale{1.f};
float bias{};
float bias{0.f};
bool bias_after_scale{true};
std::string activation_type{""};
bool fuse_relu{false};
float alpha{6.f};

bool fuse_scaleact{false};
float scale1{1.f};
float bias1{0.f};
///////////////////////////////////////////////////////////////////////////////////
// get a vector of input tensors
const std::vector<const Tensor*>* input_tensor_ptrs() override {
Expand Down
Loading