PaddlePaddle · luotao1 · Nov 16, 2020 · Nov 13, 2020 · Nov 14, 2020
diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc
@@ -16,6 +16,7 @@ limitations under the License. */
 #include <vector>
 
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
+#include "paddle/fluid/framework/op_version_registry.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #include "paddle/fluid/string/pretty_log.h"
 
@@ -157,3 +158,8 @@ void CPUBFloat16Pass::ApplyImpl(ir::Graph* graph) const {
 }  // namespace paddle
 
 REGISTER_PASS(cpu_bfloat16_pass, paddle::framework::ir::CPUBFloat16Pass);
+
+REGISTER_PASS_CAPABILITY(cpu_bfloat16_pass)
+    .AddCombination(
+        paddle::framework::compatible::OpVersionComparatorCombination().GE(
+            "quantize", 1));
diff --git a/paddle/fluid/operators/quantize_op.cc b/paddle/fluid/operators/quantize_op.cc
@@ -13,6 +13,7 @@
  *     limitations under the License. */
 
 #include "paddle/fluid/operators/quantize_op.h"
+#include "paddle/fluid/framework/op_version_registry.h"
 #ifdef PADDLE_WITH_MKLDNN
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #endif
@@ -54,3 +55,10 @@ void QuantOpMaker::Make() {
 namespace ops = paddle::operators;
 
 REGISTER_OPERATOR(quantize, ops::QuantOp, ops::QuantOpMaker);
+
+REGISTER_OP_VERSION(quantize)
+    .AddCheckpoint(
+        R"ROC( Add a new attribute [bfloat16])ROC",
+        paddle::framework::compatible::OpVersionDesc().NewAttr(
+            "bfloat16", "If true, float32 input is converted to bfloat16",
+            false));
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
@@ -43,6 +43,7 @@ def __init__(self, methodName='runTest'):
         self.fetch_list = None
 
         self.enable_mkldnn = False
+        self.enable_mkldnn_bfloat16 = False
         self.enable_trt = False
         self.trt_parameters = None
         self.enable_lite = False
@@ -125,6 +126,8 @@ def _get_analysis_config(self,
                     self.trt_parameters.use_calib_mode)
         elif use_mkldnn:
             config.enable_mkldnn()
+            if self.enable_mkldnn_bfloat16:
+                config.enable_mkldnn_bfloat16()
 
         return config
 
@@ -251,6 +254,8 @@ def check_output_with_option(self,
                 len(outs) == len(mkldnn_outputs),
                 "The number of outputs is different between CPU and MKLDNN. ")
 
+            if self.enable_mkldnn_bfloat16:
+                atol = 0.01
             for out, mkldnn_output in zip(outs, mkldnn_outputs):
                 self.assertTrue(
                     np.allclose(

diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py
@@ -0,0 +1,58 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+from inference_pass_test import InferencePassTest
+import paddle.fluid as fluid
+from paddle.fluid.core import PassVersionChecker
+
+
+class TestMKLDNNCpuBfloat16Pass(InferencePassTest):
+    def setUp(self):
+        self.init_data()
+        with fluid.program_guard(self.main_program, self.startup_program):
+            x = fluid.data(
+                name='x', shape=[-1] + self.shape_x, dtype=self.d_type)
+            y = fluid.data(
+                name='y', shape=[-1] + self.shape_y, dtype=self.d_type)
+            out = fluid.layers.matmul(x, y)
+            out = fluid.layers.transpose(out, perm=[0, 1, 2, 3])
+            out = fluid.layers.reshape(out, [0, 0, 0, 0])
+            out = fluid.layers.fc(out, size=1)
+
+            self.feeds = {
+                "x":
+                np.random.random([self.bs] + self.shape_x).astype(self.d_type),
+                "y":
+                np.random.random([self.bs] + self.shape_y).astype(self.d_type)
+            }
+            self.fetch_list = [out]
+
+    def init_data(self):
+        self.bs = 8
+        self.d_type = np.float32
+        self.shape_x = [12, 10, 1]
+        self.shape_y = [12, 1, 64]
+        self.enable_mkldnn = True
+        self.enable_mkldnn_bfloat16 = True
+
+    def test_check_output(self):
+        use_gpu = False
+        self.check_output_with_option(use_gpu, flatten=True)
+        self.assertTrue(PassVersionChecker.IsCompatible('cpu_bfloat16_pass'))
+
+
+if __name__ == "__main__":
+    unittest.main()