Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion paddle/fluid/framework/ir/fc_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ void FCFusePass::ApplyImpl(ir::Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern);

auto base_op_desc = *mul->Op()->Proto();
// Create an FC Node.
OpDesc desc;
OpDesc desc(base_op_desc, nullptr);
std::string fc_x_in = subgraph.at(x)->Name();
std::string fc_Y_in = w->Name();
std::string fc_bias_in = fc_bias->Name();
Expand Down
25 changes: 12 additions & 13 deletions paddle/fluid/framework/ir/graph_pattern_detector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1640,32 +1640,31 @@ PDNode *patterns::FillConstantElementWiseMulFuse::operator()(
void patterns::QuantDequantOpFuse::operator()(PDNode *quant_op_input,
const std::string &op_type,
const std::string &weight_name,
int times) {
int times,
const std::string &quant_type) {
const int kNumFields = 5;
const int kQuantizedWeightOffset = 0;
const int kQuantizedOpOffset = 1;
const int kQuantizedOpOutOffset = 2;
const int kDequantOpOffset = 3;
const int kDequantOpOutOffset = 4;
// the quant op always be one.
auto quant_op_in_scale =
pattern->NewNode(GetNodeName("quant_op_in_scale"))
->assert_is_op_input("fake_quantize_range_abs_max", "InScale")
->AsInput();
auto quant_op = pattern->NewNode(GetNodeName("quant_op"))
->assert_is_op("fake_quantize_range_abs_max");
auto quant_op_in_scale = pattern->NewNode(GetNodeName("quant_op_in_scale"))
->assert_is_op_input(quant_type, "InScale")
->AsInput();
auto quant_op =
pattern->NewNode(GetNodeName("quant_op"))->assert_is_op(quant_type);

auto quant_op_out_scale =
pattern->NewNode(GetNodeName("quant_op_out_scale"))
->assert_is_op_output("fake_quantize_range_abs_max", "OutScale")
->assert_is_op_output(quant_type, "OutScale")
->assert_is_op_input("fake_dequantize_max_abs", "Scale")
->AsIntermediate();

auto quant_op_out =
pattern->NewNode(GetNodeName("quant_op_out"))
->assert_is_op_output("fake_quantize_range_abs_max", "Out")
->assert_is_op_input(op_type)
->AsIntermediate();
auto quant_op_out = pattern->NewNode(GetNodeName("quant_op_out"))
->assert_is_op_output(quant_type, "Out")
->assert_is_op_input(op_type)
->AsIntermediate();

// there are 'times' quantized and dequant op
std::vector<PDNode *> nodes;
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/framework/ir/graph_pattern_detector.h
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,8 @@ struct QuantDequantOpFuse : public PatternBase {
: PatternBase(pattern, name_scope, "quant_dequant_fuse") {}

void operator()(PDNode* quant_op_input, const std::string& op_name,
const std::string& weight_name, int times = 1);
const std::string& weight_name, int times,
const std::string& quant_type);

std::string GetNodeName(const std::string& op_type) {
return PDNodeName(name_scope_, repr_, id_, op_type);
Expand Down
28 changes: 19 additions & 9 deletions paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ namespace framework {
namespace ir {

void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
std::string op_type) {
const std::string& op_type,
const std::string& quant_type) {
const std::string pattern_name = "quant_dequant_fuse";
// FusePassBase::Init(pattern_name, graph);
const int kNumFields = 5;
Expand All @@ -38,14 +39,17 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
GraphPatternDetector gpd;
auto* x = gpd.mutable_pattern()
->NewNode("x")
->assert_is_op_input("fake_quantize_range_abs_max", "X")
->assert_is_op_input(quant_type, "X")
->AsInput();

std::string quantized_op_type = "";
std::string weight_name = "";
if (op_type == "conv2d") {
quantized_op_type = "conv2d";
weight_name = "Filter";
} else if (op_type == "depthwise_conv2d") {
quantized_op_type = "depthwise_conv2d";
weight_name = "Filter";
} else if (op_type == "conv2d_fusion") {
quantized_op_type = "conv2d_fusion";
weight_name = "Filter";
Expand All @@ -62,7 +66,7 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
}

patterns::QuantDequantOpFuse pattern(gpd.mutable_pattern(), pattern_name);
pattern(x, quantized_op_type, weight_name, times);
pattern(x, quantized_op_type, weight_name, times, quant_type);

auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
Expand Down Expand Up @@ -103,7 +107,6 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
std::unordered_set<const Node*> delete_nodes;

for (int i = 0; i < times; i++) {
// max_range = (range * range) / weight_scale
float max_range = boost::get<float>(
nodes[i * kNumFields + kDequantOpOffset]->Op()->GetAttr("max_range"));
float weight_scale = (range * range) / max_range;
Expand All @@ -118,7 +121,8 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
new_op_desc.SetType(quantized_op_type);

if (quantized_op_type == "conv2d" ||
quantized_op_type == "conv2d_fusion") {
quantized_op_type == "conv2d_fusion" ||
quantized_op_type == "depthwise_conv2d") {
new_op_desc.SetInput("Input", {new_input});
new_op_desc.SetOutput("Output", {new_output});
} else if (quantized_op_type == "fc") {
Expand Down Expand Up @@ -156,11 +160,17 @@ void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const {
const std::string pattern_name = "quant_dequant_fuse";
FusePassBase::Init(pattern_name, graph);

std::unordered_set<std::string> quantized_op_types = {"conv2d", "mul"};
std::unordered_set<std::string> quant_types = {
"fake_quantize_range_abs_max", "fake_quantize_moving_average_abs_max"};

std::unordered_set<std::string> quantized_op_types = {"conv2d", "mul",
"depthwise_conv2d"};
auto* scope = param_scope();
for (auto& op_type : quantized_op_types) {
for (int i = 1; i <= 6; i++) {
RunQuantDequant(graph, scope, i, op_type);
for (auto& quant_type : quant_types) {
for (auto& op_type : quantized_op_types) {
for (int i = 6; i >= 1; i--) {
RunQuantDequant(graph, scope, i, op_type, quant_type);
}
}
}
}
Expand Down
6 changes: 5 additions & 1 deletion paddle/fluid/inference/anakin/convert/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc roi_align.cc DEPS anakin_engine framework_proto scope op_registry)
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc
batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc
detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc
roi_align.cc helper.cc DEPS anakin_engine framework_proto scope op_registry)

cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL)
cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL)
Expand Down
49 changes: 39 additions & 10 deletions paddle/fluid/inference/anakin/convert/activation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ namespace paddle {
namespace inference {
namespace anakin {

template <typename TargetT>
ActivationOpConverter<TargetT>::ActivationOpConverter(
template <typename TargetT, ::anakin::Precision PrecisionT>
ActivationOpConverter<TargetT, PrecisionT>::ActivationOpConverter(
const std::string &op_type)
: op_type_(op_type) {
auto it = anakin_op_types_.find(op_type_);
Expand All @@ -30,8 +30,8 @@ ActivationOpConverter<TargetT>::ActivationOpConverter(
anakin_op_type_ = it->second;
}

template <typename TargetT>
void ActivationOpConverter<TargetT>::operator()(
template <typename TargetT, ::anakin::Precision PrecisionT>
void ActivationOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
Expand All @@ -50,11 +50,40 @@ void ActivationOpConverter<TargetT>::operator()(
} // namespace paddle

#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER(sigmoid,
SigmoidOpConverter<::anakin::saber::NV>);
REGISTER_CUDA_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter<::anakin::saber::NV>);
using sigmoid_nv_fp32 =
::paddle::inference::anakin::SigmoidOpConverter<::anakin::saber::NV,
::anakin::Precision::FP32>;
using sigmoid_nv_int8 =
::paddle::inference::anakin::SigmoidOpConverter<::anakin::saber::NV,
::anakin::Precision::INT8>;
using tanh_nv_fp32 =
::paddle::inference::anakin::TanhOpConverter<::anakin::saber::NV,
::anakin::Precision::FP32>;
using tanh_nv_int8 =
::paddle::inference::anakin::TanhOpConverter<::anakin::saber::NV,
::anakin::Precision::INT8>;

REGISTER_CUDA_ANAKIN_OP_CONVERTER(sigmoid, sigmoid_nv_fp32);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(sigmoid, sigmoid_nv_int8);
REGISTER_CUDA_ANAKIN_OP_CONVERTER(tanh, tanh_nv_fp32);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(tanh, tanh_nv_int8);
#endif

REGISTER_CPU_ANAKIN_OP_CONVERTER(sigmoid,
SigmoidOpConverter<::anakin::saber::X86>);
REGISTER_CPU_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter<::anakin::saber::X86>);
using sigmoid_cpu_fp32 =
::paddle::inference::anakin::SigmoidOpConverter<::anakin::saber::X86,
::anakin::Precision::FP32>;
using sigmoid_cpu_int8 =
::paddle::inference::anakin::SigmoidOpConverter<::anakin::saber::X86,
::anakin::Precision::INT8>;
using tanh_cpu_fp32 =
::paddle::inference::anakin::TanhOpConverter<::anakin::saber::X86,
::anakin::Precision::FP32>;
using tanh_cpu_int8 =
::paddle::inference::anakin::TanhOpConverter<::anakin::saber::X86,
::anakin::Precision::INT8>;

REGISTER_CPU_ANAKIN_OP_CONVERTER(sigmoid, sigmoid_cpu_fp32);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(sigmoid, sigmoid_cpu_int8);

REGISTER_CPU_ANAKIN_OP_CONVERTER(tanh, tanh_cpu_fp32);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(tanh, tanh_cpu_int8);
17 changes: 9 additions & 8 deletions paddle/fluid/inference/anakin/convert/activation.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ namespace paddle {
namespace inference {
namespace anakin {

template <typename TargetT>
class ActivationOpConverter : public AnakinOpConverter<TargetT> {
template <typename TargetT, ::anakin::Precision PrecisionT>
class ActivationOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
explicit ActivationOpConverter(const std::string &op_type);

Expand All @@ -40,16 +40,17 @@ class ActivationOpConverter : public AnakinOpConverter<TargetT> {
{"sigmoid", "Sigmoid"}};
};

template <typename TargetT>
class TanhOpConverter : public ActivationOpConverter<TargetT> {
template <typename TargetT, ::anakin::Precision PrecisionT>
class TanhOpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
public:
TanhOpConverter() : ActivationOpConverter<TargetT>("tanh") {}
TanhOpConverter() : ActivationOpConverter<TargetT, PrecisionT>("tanh") {}
};

template <typename TargetT>
class SigmoidOpConverter : public ActivationOpConverter<TargetT> {
template <typename TargetT, ::anakin::Precision PrecisionT>
class SigmoidOpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
public:
SigmoidOpConverter() : ActivationOpConverter<TargetT>("sigmoid") {}
SigmoidOpConverter()
: ActivationOpConverter<TargetT, PrecisionT>("sigmoid") {}
};
} // namespace anakin
} // namespace inference
Expand Down
27 changes: 20 additions & 7 deletions paddle/fluid/inference/anakin/convert/affine_channel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@
// limitations under the License.

#include "paddle/fluid/inference/anakin/convert/affine_channel.h"
#include "paddle/fluid/inference/anakin/convert/helper.h"
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"

namespace paddle {
namespace inference {
namespace anakin {

template <typename TargetT>
void AffineChannelOpConverter<TargetT>::operator()(
template <typename TargetT, ::anakin::Precision PrecisionT>
void AffineChannelOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
Expand Down Expand Up @@ -53,8 +53,21 @@ void AffineChannelOpConverter<TargetT>::operator()(
} // namespace paddle

#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER(
affine_channel, AffineChannelOpConverter<::anakin::saber::NV>);
using affine_channel_nv_fp32 =
::paddle::inference::anakin::AffineChannelOpConverter<
::anakin::saber::NV, ::anakin::Precision::FP32>;
using affine_channel_nv_int8 =
::paddle::inference::anakin::AffineChannelOpConverter<
::anakin::saber::NV, ::anakin::Precision::INT8>;
REGISTER_CUDA_ANAKIN_OP_CONVERTER(affine_channel, affine_channel_nv_fp32);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(affine_channel, affine_channel_nv_int8);
#endif
REGISTER_CPU_ANAKIN_OP_CONVERTER(
affine_channel, AffineChannelOpConverter<::anakin::saber::X86>);

using affine_channel_cpu_fp32 =
::paddle::inference::anakin::AffineChannelOpConverter<
::anakin::saber::X86, ::anakin::Precision::FP32>;
using affine_channel_cpu_int8 =
::paddle::inference::anakin::AffineChannelOpConverter<
::anakin::saber::X86, ::anakin::Precision::INT8>;
REGISTER_CPU_ANAKIN_OP_CONVERTER(affine_channel, affine_channel_cpu_fp32);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(affine_channel, affine_channel_cpu_int8);
4 changes: 2 additions & 2 deletions paddle/fluid/inference/anakin/convert/affine_channel.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ namespace paddle {
namespace inference {
namespace anakin {

template <typename TargetT>
class AffineChannelOpConverter : public AnakinOpConverter<TargetT> {
template <typename TargetT, ::anakin::Precision PrecisionT>
class AffineChannelOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
AffineChannelOpConverter() = default;

Expand Down
23 changes: 15 additions & 8 deletions paddle/fluid/inference/anakin/convert/batch_norm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@
// limitations under the License.

#include "paddle/fluid/inference/anakin/convert/batch_norm.h"
#include "paddle/fluid/inference/anakin/convert/helper.h"
#include <math.h>
#include <algorithm>
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"

namespace paddle {
namespace inference {
namespace anakin {

template <typename TargetT>
void BatchNormOpConverter<TargetT>::operator()(
template <typename TargetT, ::anakin::Precision PrecisionT>
void BatchNormOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
Expand Down Expand Up @@ -74,17 +74,24 @@ void BatchNormOpConverter<TargetT>::operator()(
PADDLE_ENFORCE_NOT_NULL(bias_v);
auto bias = pblock_from_var<TargetT>(*bias_v);
this->engine_->AddOpAttr(scale_op_name, "weight_2", *bias);

}

} // namespace anakin
} // namespace inference
} // namespace paddle

#ifdef PADDLE_WITH_CUDA
REGISTER_CUDA_ANAKIN_OP_CONVERTER(batch_norm,
BatchNormOpConverter<::anakin::saber::NV>);
using bn_nv_fp32 = ::paddle::inference::anakin::BatchNormOpConverter<
::anakin::saber::NV, ::anakin::Precision::FP32>;
using bn_nv_int8 = ::paddle::inference::anakin::BatchNormOpConverter<
::anakin::saber::NV, ::anakin::Precision::INT8>;
REGISTER_CUDA_ANAKIN_OP_CONVERTER(batch_norm, bn_nv_fp32);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(batch_norm, bn_nv_int8);
#endif

REGISTER_CPU_ANAKIN_OP_CONVERTER(batch_norm,
BatchNormOpConverter<::anakin::saber::X86>);
using bn_cpu_fp32 = ::paddle::inference::anakin::BatchNormOpConverter<
::anakin::saber::X86, ::anakin::Precision::FP32>;
using bn_cpu_int8 = ::paddle::inference::anakin::BatchNormOpConverter<
::anakin::saber::X86, ::anakin::Precision::INT8>;
REGISTER_CPU_ANAKIN_OP_CONVERTER(batch_norm, bn_cpu_fp32);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(batch_norm, bn_cpu_int8);
4 changes: 2 additions & 2 deletions paddle/fluid/inference/anakin/convert/batch_norm.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ namespace paddle {
namespace inference {
namespace anakin {

template <typename TargetT>
class BatchNormOpConverter : public AnakinOpConverter<TargetT> {
template <typename TargetT, ::anakin::Precision PrecisionT>
class BatchNormOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
BatchNormOpConverter() = default;

Expand Down
Loading