Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 44 additions & 1 deletion paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,8 @@ void ConvBNFusePass::ApplyImpl(ir::Graph* graph) const {
// without MKL-DNN fuse conv+bn into conv+elementwise_add
if (is_mkldnn) {
if (conv->Op()->Type() == "conv2d" ||
conv->Op()->Type() == "depthwise_conv2d") {
conv->Op()->Type() == "depthwise_conv2d" ||
conv->Op()->Type() == "conv2d_transpose") {
ConvertToFusedOp(conv->Op());
}
if (mkldnn_with_bias) {
Expand Down Expand Up @@ -816,6 +817,48 @@ ConvTransposeBNFusePass::ConvTransposeBNFusePass() { // NOLINT
.AddAttr("data_format")
.IsStringIn({"NCHW", "AnyLayout"})
.End();

AddOpCompat(OpCompat("conv2d_transpose_bias"))
.AddInput("Input")
.IsTensor()
.End()
.AddInput("Filter")
.IsTensor()
.End()
.AddInput("Bias")
.IsTensor()
.IsOptional()
.End()
.AddOutput("Output")
.IsTensor()
.End()
.AddAttr("output_padding")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("output_size")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("groups")
.IsNumEQ(1)
.End()
.AddAttr("dilations")
.IsType<std::vector<int>>()
.End()
.AddAttr("strides")
.IsType<std::vector<int>>()
.End()
.AddAttr("paddings")
.IsType<std::vector<int>>()
.End()
.AddAttr("padding_algorithm")
.IsOptional()
.IsStringIn({"EXPLICIT", "SAME", "VALID"})
.End()
.AddAttr("data_format")
.IsStringIn({"NCHW", "AnyLayout"})
.End();
}

ConvTransposeEltwiseAddBNFusePass::
Expand Down
42 changes: 42 additions & 0 deletions paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,48 @@ Conv2DTransposeBiasFusePass::Conv2DTransposeBiasFusePass() {
.IsStringIn({"NCHW", "NHWC", "AnyLayout"})
.End();

AddOpCompat(OpCompat("conv2d_transpose_bias"))
.AddInput("Input")
.IsTensor()
.End()
.AddInput("Filter")
.IsTensor()
.End()
.AddInput("Bias")
.IsTensor()
.IsOptional()
.End()
.AddOutput("Output")
.IsTensor()
.End()
.AddAttr("output_padding")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("output_size")
.IsType<std::vector<int>>()
.IsOptional()
.End()
.AddAttr("groups")
.IsNumGE(1)
.End()
.AddAttr("dilations")
.IsType<std::vector<int>>()
.End()
.AddAttr("strides")
.IsType<std::vector<int>>()
.End()
.AddAttr("paddings")
.IsType<std::vector<int>>()
.End()
.AddAttr("padding_algorithm")
.IsOptional()
.IsStringIn({"EXPLICIT", "SAME", "VALID"})
.End()
.AddAttr("data_format")
.IsStringIn({"NCHW", "NHWC", "AnyLayout"})
.End();

AddOpCompat(OpCompat("elementwise_add"))
.AddInput("X")
.IsTensor()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class Conv2DTransposeBiasFusePass : public ConvBiasFusePass {
public:
Conv2DTransposeBiasFusePass();
std::string type() const override { return "conv2d_transpose"; }
std::string fused_type() const override { return "conv2d_transpose"; }
std::string fused_type() const override { return "conv2d_transpose_bias"; }
};

class Conv3DBiasFusePass : public ConvBiasFusePass {
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ static void GetInfoFromTheTmpOp(ir::Graph* graph,
inline void ConvertToFusedOp(OpDesc* op) {
const std::map<std::string, std::string> fused_ops = {
{"conv2d", "fused_conv2d"},
{"conv2d_transpose", "conv2d_transpose_bias"},
{"depthwise_conv2d", "fused_conv2d"},
{"elementwise_add", "fused_elementwise_add"},
{"elementwise_sub", "fused_elementwise_sub"},
Expand Down
69 changes: 69 additions & 0 deletions paddle/fluid/operators/compat/conv2d_transpose_bias.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
type: "conv2d_transpose_bias"
def {
inputs {
name: "Input"
}
inputs {
name: "Filter"
}
inputs {
name: "Bias"
}
outputs {
name: "Output"
}
attrs {
name: "output_padding"
type: INTS
}
attrs {
name: "output_size"
type: INTS
}
attrs {
name: "groups"
type: INT
}
attrs {
name: "dilations"
type: INTS
}
attrs {
name: "strides"
type: INTS
}
attrs {
name: "paddings"
type: INTS
}
attrs {
name: "padding_algorithm"
type: STRING
}
attrs {
name: "data_format"
type: STRING
}
}
extra {
attrs {
name: "force_fp32_output"
type: BOOLEAN
}
attrs {
name: "fuse_relu"
type: BOOLEAN
}
attrs {
name: "fuse_activation"
type: STRING
}
attrs {
name: "fuse_alpha"
type: FLOAT
}
attrs {
name: "fuse_beta"
type: FLOAT
}
}
10 changes: 10 additions & 0 deletions paddle/phi/api/yaml/legacy_ops.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,16 @@
data_type : x
backward : conv2d_transpose_grad

- op : conv2d_transpose_bias
args : (Tensor x, Tensor filter, Tensor bias, int[] strides={1, 1}, int[] paddings={0, 0}, int[] output_padding={}, IntArray output_size={}, str padding_algorithm="EXPLICIT", int groups=1, int[] dilations={1, 1}, str data_format="NCHW")
output : Tensor(out)
infer_meta :
func : Conv2dTransposeInferMeta
param: [x, filter, strides, paddings, output_padding, output_size, padding_algorithm, groups, dilations, data_format]
kernel :
func : conv2d_transpose_bias
data_type : x

- op : copy_to
args : (Tensor x, Place place, bool blocking)
output : Tensor(out)
Expand Down
14 changes: 14 additions & 0 deletions paddle/phi/api/yaml/op_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,20 @@
str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f,
int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB()]

- op : conv2d_transpose_bias
inputs :
{x : Input, filter : Filter, bias : Bias}
outputs :
out : Output
int_array :
output_size :
data_type : int
support_tensor : true
extra :
attrs : [bool is_test = false, bool use_cudnn = false, bool use_mkldnn = true, bool force_fp32_output = false,
str mkldnn_data_type = "float32", bool fuse_relu = false,
str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f]

- op : conv3d
backward : conv3d_grad, conv3d_double_grad (conv3d_grad_grad)
inputs :
Expand Down
11 changes: 11 additions & 0 deletions paddle/phi/api/yaml/static_ops.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,17 @@
optional : bias
backward : conv2d_transpose_grad

- op : conv2d_transpose_bias
args : (Tensor x, Tensor filter, Tensor bias, int[] strides={1, 1}, int[] paddings={0, 0}, int[] output_padding={}, IntArray output_size={}, str padding_algorithm="EXPLICIT", int groups=1, int[] dilations={1, 1}, str data_format="NCHW")
output : Tensor(out)
infer_meta :
func : Conv2dTransposeInferMeta
param : [x, filter, strides, paddings, output_padding, output_size, padding_algorithm, groups, dilations, data_format]
kernel :
func : conv2d_transpose_bias
param : [x, filter, bias, strides, paddings, output_padding, output_size, padding_algorithm, groups, dilations, data_format]
data_type : x

- op : decode_jpeg
args : (Tensor x, str mode = "unchanged")
output : Tensor(out)
Expand Down
88 changes: 85 additions & 3 deletions paddle/phi/kernels/onednn/conv_transpose_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -356,15 +356,13 @@ template <typename T, typename T_out>
void Execute(const OneDNNContext& dev_ctx,
const DenseTensor* x,
const DenseTensor* filter,
const DenseTensor* bias,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
DenseTensor* out) {
const auto* bias =
dev_ctx.HasDnnInput("Bias") ? dev_ctx.GetDnnInput("Bias") : nullptr;

std::shared_ptr<dnnl::deconvolution_forward> conv_p;
std::shared_ptr<dnnl::memory> src_memory_p;
std::shared_ptr<dnnl::memory> weights_memory_p;
Expand Down Expand Up @@ -407,6 +405,23 @@ void Execute(const OneDNNContext& dev_ctx,
args.insert({DNNL_ARG_BIAS, *bias_memory_p});
}
} else {
// Check if bias obey the rules
if (bias) {
PADDLE_ENFORCE_EQ(
bias->layout(),
DataLayout::ONEDNN,
phi::errors::InvalidArgument(
"The Bias tensor's layout should be %d, but got %d.",
DataLayout::ONEDNN,
bias->layout()));

PADDLE_ENFORCE_EQ(
bias->dims().size(),
1,
phi::errors::InvalidArgument("Bias must only have 1 dimension, "
"i.e. X, but got dimension = %d .",
bias->dims().size()));
}
// Caching Key for weights is needed
std::string key =
funcs::CreateKey(dev_ctx,
Expand Down Expand Up @@ -494,6 +509,63 @@ void Conv2dTransposeKernel(const Context& dev_ctx,
Execute<T, dtype::bfloat16>(dev_ctx,
&x,
&filter,
nullptr,
strides,
paddings,
padding_algorithm,
groups,
dilations,
out);
} else {
Execute<T, float>(dev_ctx,
&x,
&filter,
nullptr,
strides,
paddings,
padding_algorithm,
groups,
dilations,
out);
}
}

template <typename T, typename Context>
void Conv2dTransposeBiasKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& filter,
const paddle::optional<DenseTensor>& bias,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding UNUSED,
const IntArray& output_size UNUSED,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format UNUSED,
DenseTensor* out) {
PADDLE_ENFORCE_EQ(dev_ctx.GetPlace().GetType(),
AllocationType::CPU,
phi::errors::PreconditionNotMet(
"Operator oneDNN Conv must use CPUPlace"));

const bool is_BFLOAT16 =
dev_ctx.HasDnnAttr("mkldnn_data_type")
? PADDLE_GET_CONST(std::string,
dev_ctx.GetDnnAttr("mkldnn_data_type")) ==
"bfloat16"
: false;
const bool force_fp32_output =
dev_ctx.HasDnnAttr("force_fp32_output")
? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("force_fp32_output"))
: false;
const bool use_bfloat16 = (!force_fp32_output && is_BFLOAT16);

if (use_bfloat16) {
Execute<T, dtype::bfloat16>(dev_ctx,
&x,
&filter,
bias.get_ptr(),
strides,
paddings,
padding_algorithm,
Expand All @@ -504,6 +576,7 @@ void Conv2dTransposeKernel(const Context& dev_ctx,
Execute<T, float>(dev_ctx,
&x,
&filter,
bias.get_ptr(),
strides,
paddings,
padding_algorithm,
Expand Down Expand Up @@ -547,3 +620,12 @@ PD_REGISTER_KERNEL(conv2d_transpose,
phi::dtype::bfloat16) {
kernel->get_kerneltype_forvar_fn_ = phi::ConvTransposeGetKernelTypeForVar;
}

PD_REGISTER_KERNEL(conv2d_transpose_bias,
OneDNN,
ONEDNN,
phi::Conv2dTransposeBiasKernel,
float,
phi::dtype::bfloat16) {
kernel->get_kerneltype_forvar_fn_ = phi::ConvTransposeGetKernelTypeForVar;
}
3 changes: 3 additions & 0 deletions test/cpp/fluid/mkldnn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ paddle_test(test_mkldnn_pool_adaptive_op SRCS test_mkldnn_pool_adaptive_op.cc)

paddle_test(test_mkldnn_squeeze SRCS test_mkldnn_squeeze.cc)

paddle_test(test_mkldnn_conv2d_transpose_bias SRCS
test_mkldnn_conv2d_transpose_bias.cc)

if(WITH_ONNXRUNTIME AND WIN32)
# Copy onnxruntime for some c++ test in Windows, since the test will
# be build only in CI, so suppose the generator in Windows is Ninja.
Expand Down
Loading