【Hackathon 6th No.52】move quantize、dequantize op to phi (#64494)

enkilee · web-flow · commit 7fa3e2db3c84 · 2024-05-30T11:18:54.000+08:00
diff --git a/paddle/fluid/operators/onednn/quantize_onednn_op.cc b/paddle/fluid/operators/onednn/quantize_onednn_op.cc
diff --git a/paddle/fluid/pir/dialect/operator/utils/utils.cc b/paddle/fluid/pir/dialect/operator/utils/utils.cc
@@ -84,7 +84,6 @@ const std::unordered_set<std::string> LegacyOpList = {
 #ifdef PADDLE_WITH_DNNL
     paddle::onednn::dialect::LrnOp::name(),
     paddle::onednn::dialect::LrnGradOp::name(),
-    paddle::onednn::dialect::QuantizeOp::name(),
     paddle::onednn::dialect::MultiGruOp::name(),
     paddle::onednn::dialect::FusionLstmOp::name(),
 #endif
diff --git a/paddle/phi/kernels/onednn/quantize_kernel.cc b/paddle/phi/kernels/onednn/quantize_kernel.cc
@@ -0,0 +1,114 @@
+/* Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/kernels/quantize_kernel.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
+#include "paddle/phi/core/compat/convert_utils.h"
+#include "paddle/phi/core/enforce.h"
+#include "paddle/phi/core/expect.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/core/utils/data_type.h"
+
+namespace phi {
+
+using dnnl::memory;
+
+template <typename T, typename Context>
+void QuantOpKernel(const Context& dev_ctx,
+                   const DenseTensor& input,
+                   bool is_negative_input,
+                   const float scale,
+                   const float shift,
+                   const std::string& output_format,
+                   bool bfloat16,
+                   DenseTensor* output) {
+  const auto quantization_shift = static_cast<int32_t>(shift);
+  const bool with_scale = scale != 1.0f;
+  const bool with_shift = quantization_shift != 0.0f;
+
+  PADDLE_ENFORCE_NE(scale,
+                    0.0f,
+                    phi::errors::InvalidArgument(
+                        "Quantization scale must be different than 0.0f"));
+  PADDLE_ENFORCE(quantization_shift <= 255 && quantization_shift >= 0,
+                 phi::errors::InvalidArgument(
+                     "Quantization shift must be lower or equal to ",
+                     "255 and greater or equal to 0, but got %f",
+                     quantization_shift));
+
+  auto x_tz = common::vectorize<int64_t>(input.dims());
+  dnnl::primitive_attr attrs;
+  static constexpr int32_t mask = 0;
+
+  if (with_scale) {
+    attrs.set_scales_mask(DNNL_ARG_SRC, mask);
+  }
+
+  if (with_shift) {
+    attrs.set_zero_points_mask(DNNL_ARG_DST, mask);
+  }
+
+  auto x_type = phi::funcs::ToOneDNNDataType(input.dtype());
+  DataType out_dtype;
+
+  if (bfloat16) {
+    out_dtype = DataType::BFLOAT16;
+  } else if (is_negative_input && !with_shift) {
+    out_dtype = DataType::INT8;
+  } else {
+    out_dtype = DataType::UINT8;
+  }
+
+  auto out_type = phi::funcs::ToOneDNNDataType(out_dtype);
+
+  phi::funcs::ReorderOneDNNHandler reorder_handler(
+      x_tz, input.dtype(), x_type, out_dtype, out_type, dev_ctx.GetEngine());
+
+  auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
+      input.mem_desc(), phi::funcs::to_void_cast(input.data<T>()));
+  auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
+      output, input.mem_desc(), dev_ctx.GetPlace());
+
+  auto reorder_p = reorder_handler.AcquireReorder(
+      reorder_dst_memory_p, reorder_src_memory_p, attrs);
+
+  auto& astream = phi::OneDNNContext::tls().get_stream();
+
+  auto scales_md = dnnl::memory::desc(
+      {1}, dnnl::memory::data_type::f32, dnnl::memory::format_tag::x);
+  auto scales_mem = dnnl::memory(
+      scales_md, dev_ctx.GetEngine(), phi::funcs::to_void_cast<float>(&scale));
+  auto zero_points_md = dnnl::memory::desc(
+      {1}, dnnl::memory::data_type::s32, dnnl::memory::format_tag::x);
+  auto zero_points_mem =
+      dnnl::memory(zero_points_md,
+                   dev_ctx.GetEngine(),
+                   phi::funcs::to_void_cast<int32_t>(&quantization_shift));
+
+  std::unordered_map<int, dnnl::memory> reorder_args;
+  reorder_args.insert({DNNL_ARG_SRC, *reorder_src_memory_p});
+  reorder_args.insert({DNNL_ARG_DST, *reorder_dst_memory_p});
+  if (with_scale) {
+    reorder_args.insert({DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC, scales_mem});
+  }
+  if (with_shift) {
+    reorder_args.insert(
+        {DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_DST, zero_points_mem});
+  }
+
+  reorder_p->execute(astream, reorder_args);
+  astream.wait();
+
+  output->set_mem_desc(reorder_dst_memory_p->get_desc());
+}
+}  // namespace phi
+
+PD_REGISTER_KERNEL(quantize, OneDNN, ONEDNN, phi::QuantOpKernel, float) {}
diff --git a/paddle/phi/kernels/quantize_kernel.h b/paddle/phi/kernels/quantize_kernel.h
@@ -0,0 +1,33 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/device_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void QuantOpKernel(const Context& dev_ctx,
+                   const DenseTensor& input,
+                   bool is_negative_input,
+                   const float scale,
+                   const float shift,
+                   const std::string& output_format,
+                   bool bfloat16,
+                   DenseTensor* output);
+
+}  // namespace phi
diff --git a/test/mkldnn/CMakeLists.txt b/test/mkldnn/CMakeLists.txt
@@ -20,6 +20,9 @@ if(WITH_ONEDNN AND NOT WIN32)
   py_test_modules(
     test_dequantize_mkldnn_op_static_build MODULES test_dequantize_mkldnn_op
     ENVS FLAGS_new_executor_static_build=true)
+  py_test_modules(
+    test_quantize_mkldnn_op_static_build MODULES test_quantize_mkldnn_op ENVS
+    FLAGS_new_executor_static_build=true)
 endif()
 
 set_tests_properties(test_concat_mkldnn_op PROPERTIES TIMEOUT 120)
diff --git a/test/mkldnn/test_quantize_mkldnn_op.py b/test/mkldnn/test_quantize_mkldnn_op.py
@@ -26,8 +26,9 @@ def setUp(self):
         self.scale = 255.0
         self.shift = 0.0
         self.input_size = [1, 1, 5, 5]  # Naive nChw16c
-        self.is_negative = False
+        self.is_negative_input = False
         self.output_format = 'NCHW'
+        self.bfloat16 = False
         self.set_scale()
         self.set_shift()
         self.set_is_negative()
@@ -37,7 +38,7 @@ def setUp(self):
         self.prepare_output()
 
     def prepare_input(self):
-        if self.is_negative:
+        if self.is_negative_input:
             # input data values are from interval [-1.0, 1.0)
             self.input = (
                 2 * np.random.random_sample(self.input_size) - 1
@@ -50,14 +51,18 @@ def prepare_input(self):
 
         self.inputs = {'Input': OpTest.np_dtype_to_base_dtype(self.input)}
         self.attrs = {
+            'is_negative_input': self.is_negative_input,
             'Scale': self.scale,
             'Shift': self.shift,
-            'is_negative_input': self.is_negative,
             'output_format': self.output_format,
+            'bfloat16': self.bfloat16,
         }
 
     def prepare_output(self):
-        input_data_type = 'int8' if self.is_negative else 'uint8'
+        if self.is_negative_input and self.shift == 0.0:
+            input_data_type = 'int8'
+        else:
+            input_data_type = 'uint8'
         output = np.rint(self.input * self.scale + self.shift).astype(
             input_data_type
         )
@@ -97,15 +102,15 @@ def set_scale(self):
         self.scale = 127.0
 
     def set_is_negative(self):
-        self.is_nagative = True
+        self.is_negative_input = True
 
 
 class TestQuantizeOp2(TestQuantizeOp):
     def set_scale(self):
         self.scale = 255.0
 
     def set_is_negative(self):
-        self.is_nagative = False
+        self.is_negative_input = False
 
 
 # 2-dim input
@@ -115,7 +120,7 @@ def set_output_format(self):
         self.output_format = 'NCHW'
 
     def set_is_negative(self):
-        self.is_nagative = False
+        self.is_negative_input = False
 
     def set_scale(self):
         self.scale = 255.0
@@ -131,7 +136,7 @@ def set_input_size(self):
 # N - negative input
 class TestQuantizeOpShift_NCHW_2_N(TestQuantizeOpShift_NCHW_2_P):
     def set_is_negative(self):
-        self.is_nagative = True
+        self.is_negative_input = True
 
     def set_scale(self):
         self.scale = 127.0