Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,6 @@ void SetOp(ProgramDesc* prog,
op->SetAttr("fuse_residual_connection", false);
}
op->SetOutput("Output", {outputs[0]});
op->SetAttr("Scale_in", 1.0f);
op->SetAttr("Scale_out", 1.0f);
op->SetAttr("Scale_weights", std::vector<float>{1.0f});
} else if (type == "pool2d" || type == "transpose2" || type == "reshape2" ||
type == "nearest_interp" || type == "nearest_interp_v2") {
op->SetInput("X", {inputs[0]});
Expand Down
4 changes: 3 additions & 1 deletion paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,9 @@ void CPUQuantizeSquashPass::OpDequantSquash(Graph* graph) const {
FindOutputNameByVarName(any_op->Op(), dequant_in->Name());

if (output_name.empty()) return;

if (any_op->Op()->Type() == "conv2d") {
any_op->Op()->SetType("fused_conv2d");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So all int8-oneDNN kernels should be executed as fused kernels by default?

Copy link
Contributor Author

@zyfncg zyfncg Dec 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we are trying to delete the extra inputs and attributes in base op, so some extra attributes for int8-oneDNN kernel are also removed, currently we have to put them into fused kernel to execute because no better choice.
I think a good way to execute int8-oneDNN kernel is creating a new kernel for int8-oneDNN, but it is difficult to implement at the current stage, maybe we could come up with a good solution in the future.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, thank you for explaining

}
any_op->Op()->SetAttr("force_fp32_output", true);
any_op->Op()->SetOutput(output_name,
std::vector<std::string>({dequant_out->Name()}));
Expand Down
11 changes: 8 additions & 3 deletions paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
std::string input_name = "";
if (quantized_op_type == "conv2d" ||
quantized_op_type == "depthwise_conv2d" ||
quantized_op_type == "fused_conv2d" ||
quantized_op_type == "conv2d_fusion" ||
quantized_op_type == "conv2d_transpose") {
weight_name = "Filter";
Expand All @@ -424,9 +425,10 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
input_name = "Input";
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"QuantDequantFuse: We only support conv2d, conv2d_fusion, "
"conv2d_transpose, fc, mul, matmul, matmul_v2 for "
"now."));
"QuantDequantFuse: We only support conv2d, conv2d_fusion, fused_conv2d,"
"conv2d_transpose, fc, mul, matmul, matmul_v2 for now, but received: "
"%s.",
quantized_op_type));
}
const std::string pattern_name = "dequant_fuse";
GraphPatternDetector gpd;
Expand Down Expand Up @@ -559,6 +561,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
}
}
} else if (quantized_op_type == "conv2d" ||
quantized_op_type == "fusd_conv2d" ||
quantized_op_type == "depthwise_conv2d") {
PADDLE_ENFORCE_EQ(
dequant_type,
Expand Down Expand Up @@ -642,6 +645,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
new_op_desc.SetType(quantized_op_type);
new_op_desc.SetAttr("enable_int8", true);
if (quantized_op_type == "conv2d" || quantized_op_type == "conv2d_fusion" ||
quantized_op_type == "fused_conv2d" ||
quantized_op_type == "depthwise_conv2d" ||
quantized_op_type == "conv2d_transpose") {
new_op_desc.SetInput("Input", {new_input});
Expand Down Expand Up @@ -677,6 +681,7 @@ void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const {
"fake_quantize_range_abs_max", "fake_quantize_moving_average_abs_max"};
std::unordered_set<std::string> quantized_op_types = {
"conv2d",
"fused_conv2d",
"mul",
"matmul",
"depthwise_conv2d",
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/framework/op_desc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,11 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
if (extra_attr_iter != extra_attr_map.end()) {
is_runtime_attr = true;
attrs_ptr = &(this->runtime_attrs_);
// When an attribute is found in both attrs and runtime_attrs, it must
// be a runtime attribute, so it's value in attrs should be removed.
if (this->attrs_.find(name) != this->attrs_.end()) {
this->attrs_.erase(name);
}
}
// NOTICE(minqiyang): pybind11 will take the empty list in python as
// the std::vector<int> type in C++; so we have to change the attr's type
Expand Down
50 changes: 0 additions & 50 deletions paddle/fluid/operators/compat/conv2d.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,6 @@ def {
inputs {
name: "Filter"
}
inputs {
name: "Bias"
}
inputs {
name: "ResidualData"
}
outputs {
name: "Output"
}
Expand Down Expand Up @@ -69,54 +63,10 @@ extra {
name: "skip_quant"
type: BOOLEAN
}
attrs {
name: "fuse_relu_before_depthwise_conv"
type: BOOLEAN
}
attrs {
name: "fuse_relu"
type: BOOLEAN
}
attrs {
name: "fuse_activation"
type: STRING
}
attrs {
name: "fuse_alpha"
type: FLOAT
}
attrs {
name: "fuse_beta"
type: FLOAT
}
attrs {
name: "use_addto"
type: BOOLEAN
}
attrs {
name: "fuse_residual_connection"
type: BOOLEAN
}
attrs {
name: "Scale_in"
type: FLOAT
}
attrs {
name: "Scale_out"
type: FLOAT
}
attrs {
name: "Scale_in_eltwise"
type: FLOAT
}
attrs {
name: "Scale_weights"
type: FLOATS
}
attrs {
name: "force_fp32_output"
type: BOOLEAN
}
attrs {
name: "workspace_size_MB"
type: INT
Expand Down
46 changes: 0 additions & 46 deletions paddle/fluid/operators/compat/depthwise_conv2d.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,6 @@ def {
inputs {
name: "Filter"
}
inputs {
name: "Bias"
}
inputs {
name: "ResidualData"
}
outputs {
name: "Output"
}
Expand Down Expand Up @@ -65,50 +59,10 @@ extra {
name: "fuse_relu_before_depthwise_conv"
type: BOOLEAN
}
attrs {
name: "fuse_relu"
type: BOOLEAN
}
attrs {
name: "fuse_activation"
type: STRING
}
attrs {
name: "fuse_alpha"
type: FLOAT
}
attrs {
name: "fuse_beta"
type: FLOAT
}
attrs {
name: "use_addto"
type: BOOLEAN
}
attrs {
name: "fuse_residual_connection"
type: BOOLEAN
}
attrs {
name: "Scale_in"
type: FLOAT
}
attrs {
name: "Scale_out"
type: FLOAT
}
attrs {
name: "Scale_in_eltwise"
type: FLOAT
}
attrs {
name: "Scale_weights"
type: FLOATS
}
attrs {
name: "force_fp32_output"
type: BOOLEAN
}
attrs {
name: "workspace_size_MB"
type: INT
Expand Down
12 changes: 0 additions & 12 deletions paddle/fluid/operators/conv_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -251,18 +251,6 @@ void Conv2DOpMaker::Make() {
"H is the height of the filter, and W is the width of the filter. "
"If the groups attribute is greater than 1, C equals the number of "
"input image channels divided by the groups.");
AddInput("Bias",
"(Tensor) Bias to be added to each output of filter application."
"The format of output tensor is X (one-dimensional) of size equal"
"to the number of output channels. Only used with MKL-DNN.")
.AsDispensable()
.AsExtra();
AddInput("ResidualData",
"(Tensor) Tensor with residual data "
"to which convolution output will be added."
"Used with fuse_residual_connection fusion.")
.AsDispensable()
.AsExtra();
AddOutput("Output",
"(Tensor) The output tensor of convolution operator. "
"It has same data fromat and data type as the Input.");
Expand Down
10 changes: 10 additions & 0 deletions paddle/fluid/operators/fused/conv_fusion_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ namespace operators {
class Conv2DFusionOpMaker : public Conv2DOpMaker {
protected:
void Apply() override {
AddInput("Bias",
"(Tensor) Bias to be added to each output of filter application."
"The format of output tensor is X (one-dimensional) of size equal"
"to the number of output channels. Only used with MKL-DNN.")
.AsDispensable();
AddInput("ResidualData",
"(Tensor) Tensor with residual data "
"to which convolution output will be added."
"Used with fuse_residual_connection fusion.")
.AsDispensable();
AddAttr<std::string>(
"activation",
"The activation type can be 'identity', 'sigmoid', 'relu', 'relu6' "
Expand Down
10 changes: 10 additions & 0 deletions paddle/fluid/operators/fused/fused_conv2d_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ namespace operators {
class FusedConvOpMaker : public Conv2DOpMaker {
protected:
void Apply() override {
AddInput("Bias",
"(Tensor) Bias to be added to each output of filter application."
"The format of output tensor is X (one-dimensional) of size equal"
"to the number of output channels. Only used with MKL-DNN.")
.AsDispensable();
AddInput("ResidualData",
"(Tensor) Tensor with residual data "
"to which convolution output will be added."
"Used with fuse_residual_connection fusion.")
.AsDispensable();
AddAttr<std::string>(
"mkldnn_data_type",
"(string, default \"float32\"). Data type of mkldnn kernel")
Expand Down
4 changes: 1 addition & 3 deletions paddle/fluid/operators/ops_extra_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,7 @@ class ExtraInfoUtils {

// TODO(chenweihang): move these extra inputs into op_compat.yaml
std::unordered_map<std::string, std::vector<std::string>>
g_extra_input_names_map_ = {{"conv2d", {"Bias", "ResidualData"}},
{"conv2d_transpose", {"Bias"}},
{"conv2d_grad", {"Bias"}},
g_extra_input_names_map_ = {{"conv2d_transpose", {"Bias"}},
{"matmul_v2", {"ResidualData"}}};
std::vector<std::string> empty_extra_input_names_;
};
Expand Down
12 changes: 7 additions & 5 deletions paddle/phi/api/yaml/op_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,8 @@
- op : conv2d
backward : conv2d_grad
extra :
attrs : [bool is_test = false, bool use_cudnn = true, bool fuse_relu_before_depthwise_conv = false, bool use_mkldnn = false,
bool use_quantizer = false, str mkldnn_data_type = "float32", bool fuse_relu = false,
str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f, bool use_addto = false,
bool fuse_residual_connection = false, float Scale_in = 1.0f, float Scale_out = 1.0f,
float Scale_in_eltwise = 1.0f, 'float[] Scale_weights = {1.0f}', bool force_fp32_output = false,
attrs : [bool is_test = false, bool use_cudnn = true, bool use_mkldnn = false, bool use_addto = false,
str mkldnn_data_type = "float32", bool force_fp32_output = false,
int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]

- op : conv2d_fusion
Expand Down Expand Up @@ -556,6 +553,11 @@
extra :
attrs : [bool use_mkldnn = false]

- op : fused_conv2d
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fused_conv2d has own operator with extra attributes declared. Is it necessary to add op_compat anyway?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it is necessary.
The extra attributes declared in fused_conv2d.pbtxt are used in Pass. The info of extra attributes is getted from op_compat.yaml when executor run the kernel, so it is needed to add op_compat for extra attributes.

extra :
attrs : [bool use_cudnn = false, float fuse_alpha = 0.0f, float fuse_beta = 0.0f, float Scale_in = 1.0f,
float Scale_out = 1.0f, float Scale_in_eltwise = 1.0f, 'float[] Scale_weights = {1.0f}']

- op : gather
backward : gather_grad
extra :
Expand Down
23 changes: 5 additions & 18 deletions paddle/phi/kernels/onednn/conv_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,29 +41,16 @@ void ConvKernel(const Context& dev_ctx,
dev_ctx.GetDnnAttr("mkldnn_data_type")) ==
"bfloat16"
: false;
const auto* bias =
dev_ctx.HasDnnInput("Bias") ? dev_ctx.GetDnnInput("Bias") : nullptr;
const auto* residual_param = dev_ctx.HasDnnInput("ResidualData")
? dev_ctx.GetDnnInput("ResidualData")
: nullptr;
bool fuse_residual_conn =
dev_ctx.HasDnnAttr("fuse_residual_connection")
? PADDLE_GET_CONST(bool,
dev_ctx.GetDnnAttr("fuse_residual_connection"))
: false;
const std::string& fuse_activation =
dev_ctx.HasDnnAttr("fuse_activation")
? PADDLE_GET_CONST(std::string, dev_ctx.GetDnnAttr("fuse_activation"))
: "";
bool force_fp32_output =
dev_ctx.HasDnnAttr("force_fp32_output")
? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("force_fp32_output"))
: false;

ConvOnednn<T>(dev_ctx,
&input,
&filter,
bias,
residual_param,
nullptr,
nullptr,
strides,
paddings,
padding_algorithm,
Expand All @@ -72,8 +59,8 @@ void ConvKernel(const Context& dev_ctx,
data_format,
is_test,
is_BFLOAT16,
fuse_activation,
fuse_residual_conn,
"",
false,
force_fp32_output,
out);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def _transform_to_conv_mkldnn(self, graph, op_node):
}

conv_op_node = graph.create_op_node(
op_type='conv2d',
op_type='fused_conv2d',
attrs=attrs,
inputs={'Input': input_var_node, 'Filter': weight_var_node},
outputs={'Output': output_var_node},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def setUp(self):
}

if self.fuse_residual:
self.op_type = "fused_conv2d"
self.inputs['ResidualData'] = OpTest.np_dtype_to_fluid_dtype(
convert_float_to_uint16(self.input_residual)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ def residual_helper(init_low, init_high, output_):
input_residual
)

if self.fuse_activation != "" or self.fuse_residual:
self.op_type = "fused_conv2d"

self.attrs = {
'strides': self.stride,
'paddings': self.pad,
Expand Down Expand Up @@ -341,6 +344,7 @@ def init_group(self):


def init_data_type_with_fusion(self, input_dt, fuse_activation, fuse_residual):
self.op_type = "fused_conv2d"
self.srctype = input_dt
self.dsttype = np.uint8 if fuse_activation == "relu" else np.int8

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ def setUp(self):
output = np.minimum(np.maximum(output, 0), self.fuse_alpha).astype(
self.dsttype
)
if (
self.fuse_activation != ""
or self.fuse_bias
or self.fuse_residual_connection
):
self.op_type = 'fused_conv2d'

output = output.astype(self.dtype)

self.attrs['fuse_bias'] = self.fuse_bias
Expand Down