diff --git a/paddle/fluid/framework/selected_rows_utils.cc b/paddle/fluid/framework/selected_rows_utils.cc
index 67bb6cac67026b..6d961b92f5da06 100644
--- a/paddle/fluid/framework/selected_rows_utils.cc
+++ b/paddle/fluid/framework/selected_rows_utils.cc
@@ -72,8 +72,12 @@ void DeserializeFromStream(std::istream& is,
   }
   {
     // the 2st field, rows information
-    uint64_t size;
+    uint64_t size = 0;
     is.read(reinterpret_cast<char*>(&size), sizeof(size));
+    PADDLE_ENFORCE_EQ(
+        is.good(),
+        true,
+        platform::errors::Unavailable("Cannot read the number of rows."));
     auto& rows = *selected_rows->mutable_rows();
     rows.resize(size);
     for (uint64_t i = 0; i < size; ++i) {
diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
index 7e38e407336e5f..86f3e759d42a82 100644
--- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
@@ -524,7 +524,7 @@ if(NOT WIN32)
   set_tests_properties(test_post_training_quantization_program_resnet50
                        PROPERTIES TIMEOUT 240)
   set_tests_properties(test_post_training_quantization_mobilenetv1
-                       PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY")
+                       PROPERTIES TIMEOUT 900 LABELS "RUN_TYPE=NIGHTLY")
   set_tests_properties(test_post_training_quantization_resnet50
                        PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY")
   set_tests_properties(test_post_training_quantization_mnist PROPERTIES TIMEOUT
diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
index cb6d685f721d60..54bc5b19743455 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
@@ -241,6 +241,8 @@ def run_program(self, model_path, batch_size, infer_iterations):
     def generate_quantized_model(self,
                                  model_path,
                                  quantizable_op_type,
+                                 batch_size,
+                                 batch_nums,
                                  algo="KL",
                                  round_type="round",
                                  is_full_quantize=False,
@@ -262,6 +264,8 @@ def generate_quantized_model(self,
         ptq = PostTrainingQuantization(executor=exe,
                                        sample_generator=val_reader,
                                        model_dir=model_path,
+                                       batch_size=batch_size,
+                                       batch_nums=batch_nums,
                                        algo=algo,
                                        quantizable_op_type=quantizable_op_type,
                                        round_type=round_type,
@@ -299,7 +303,8 @@ def run_test(self,
         print("Start INT8 post training quantization for {0} on {1} images ...".
               format(model, sample_iterations * batch_size))
         self.generate_quantized_model(os.path.join(model_cache_folder, "model"),
-                                      quantizable_op_type, algo, round_type,
+                                      quantizable_op_type, batch_size,
+                                      sample_iterations, algo, round_type,
                                       is_full_quantize, is_use_cache_file,
                                       is_optimize_model, onnx_format)
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
index 5393014fe7d34b..044451695d4d02 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
@@ -225,8 +225,8 @@ def check_output_with_option(self,
                     tensorrt_output = tensorrt_output.flatten()
 
                 np.testing.assert_allclose(
-                    paddle_out,
                     tensorrt_output,
+                    paddle_out,
                     rtol=rtol,
                     atol=atol,
                     err_msg='Output has diff between GPU and TensorRT. ')
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
index e4514dd0ee948a..4d8f4e2b19549d 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
@@ -47,11 +47,11 @@ def sample_program_configs(self):
         self.trt_param.workspace_size = 1073741824
 
         def generate_input1(batch, attrs: List[Dict[str, Any]]):
-            return np.ones([batch, attrs[0]['groups'] * 3, 64,
-                            64]).astype(np.float32)
+            return np.ones([batch, attrs[0]['groups'] * 3, 64, 64]).astype(
+                np.float32) / 4
 
         def generate_weight1(attrs: List[Dict[str, Any]]):
-            return np.random.random([24, 3, 3, 3]).astype(np.float32)
+            return np.random.random([9, 3, 3, 3]).astype(np.float32) - 0.5
 
         batch_options = [1, 2]
         strides_options = [[2, 2], [1, 2]]
@@ -162,7 +162,7 @@ def generate_trt_nodes_num(attrs, dynamic_shape):
             attrs, False), (1e-3, 1e-3)
         self.trt_param.precision = paddle_infer.PrecisionType.Int8
         yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), (1e-3, 1e-3)
+            attrs, False), (1e-2, 1e-2)
 
         # for dynamic_shape
         generate_dynamic_shape(attrs)
@@ -174,7 +174,7 @@ def generate_trt_nodes_num(attrs, dynamic_shape):
             attrs, True), (1e-3, 1e-3)
         self.trt_param.precision = paddle_infer.PrecisionType.Int8
         yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), (1e-3, 1e-3)
+            attrs, True), (1e-2, 1e-2)
 
     def test(self):
         self.run_test()
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py
index 457db86c3236d3..56767b3457791d 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py
@@ -128,7 +128,7 @@ def generate_trt_nodes_num(attrs, dynamic_shape):
             attrs, False), 1e-5
         self.trt_param.precision = paddle_infer.PrecisionType.Half
         yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False), (1e-3, 1e-3)
 
         # for dynamic_shape
         generate_dynamic_shape(attrs)
@@ -137,7 +137,7 @@ def generate_trt_nodes_num(attrs, dynamic_shape):
             attrs, True), 1e-5
         self.trt_param.precision = paddle_infer.PrecisionType.Half
         yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True), (1e-3, 1e-3)
 
     def test(self):
         self.run_test()
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py
index 68c3e9bd377db8..42b234827b1e72 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py
@@ -44,9 +44,9 @@ def sample_program_configs(self):
 
         def generate_input1(dtype, attrs: List[Dict[str, Any]]):
             if dtype == -1 or dtype == 5:
-                return np.random.random([1, 3, 64, 64]).astype(np.float32)
+                return np.random.random([1, 3, 32, 32]).astype(np.float32)
             elif dtype == 2:
-                return np.random.random([1, 3, 64, 64]).astype(np.int32)
+                return np.random.random([1, 3, 32, 32]).astype(np.int32)
 
         for keep_dim in [True, False]:
             for dim in [[], [1], [0], [0, 1], [1, 2, 3], [-2, 0, 3], [-3],
@@ -93,7 +93,7 @@ def sample_predictor_configs(self, program_config):
         def generate_dynamic_shape(attrs):
             self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
             self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
-            self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
+            self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 32, 32]}
 
         def clear_dynamic_shape():
             self.dynamic_shape.min_input_shape = {}
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py
index a02cdb6a34791c..730babf2aab6dd 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py
@@ -154,7 +154,7 @@ def sample_program_config(self, draw):
 
     def test(self):
         self.run_and_statis(quant=False,
-                            max_examples=50,
+                            max_examples=25,
                             passes=["trt_flatten2_matmul_fuse_pass"])
 
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
index 22f278d6d5d18f..c4488a57f96057 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
@@ -79,7 +79,14 @@ def check_output(self):
             shutil.rmtree(self.path + "_opt_cache")
         if core.is_compiled_with_cuda():
             use_gpu = True
-            self.check_output_with_option(use_gpu)
+            if self.precision == AnalysisConfig.Precision.Float32:
+                atol, rtol = (1e-5, 1e-5)
+            elif self.precision == AnalysisConfig.Precision.Half:
+                atol, rtol = (1e-3, 1e-3)
+            else:
+                raise ValueError("Unsupported precision {}".format(
+                    self.precision))
+            self.check_output_with_option(use_gpu, atol=atol, rtol=rtol)
             self.assertTrue(
                 PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py
index 3812642d2a5e4b..f6eaa2fb8c75e0 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py
@@ -75,7 +75,14 @@ def check_output(self):
             shutil.rmtree(self.path + "_opt_cache")
         if core.is_compiled_with_cuda():
             use_gpu = True
-            self.check_output_with_option(use_gpu)
+            if self.precision == AnalysisConfig.Precision.Float32:
+                atol, rtol = (1e-5, 1e-5)
+            elif self.precision == AnalysisConfig.Precision.Half:
+                atol, rtol = (1e-3, 1e-3)
+            else:
+                raise ValueError("Unsupported precision {}".format(
+                    self.precision))
+            self.check_output_with_option(use_gpu, atol=atol, rtol=rtol)
             self.assertTrue(
                 PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py
index 1086e1428e09f3..ead27625af24c0 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py
@@ -218,7 +218,10 @@ def setUp(self):
     def test_check_output(self):
         if core.is_compiled_with_cuda():
             use_gpu = True
-            self.check_output_with_option(use_gpu, flatten=True)
+            self.check_output_with_option(use_gpu,
+                                          flatten=True,
+                                          atol=1e-3,
+                                          rtol=1e-3)
             self.assertTrue(
                 PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
 
@@ -244,7 +247,10 @@ def setUp(self):
     def test_check_output(self):
         if core.is_compiled_with_cuda():
             use_gpu = True
-            self.check_output_with_option(use_gpu, flatten=True)
+            self.check_output_with_option(use_gpu,
+                                          flatten=True,
+                                          atol=1e-3,
+                                          rtol=1e-3)
             self.assertTrue(
                 PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
 
diff --git a/python/paddle/fluid/tests/unittests/test_einsum_v2.py b/python/paddle/fluid/tests/unittests/test_einsum_v2.py
index 971ad1fa744e92..7230cd97ebdd88 100644
--- a/python/paddle/fluid/tests/unittests/test_einsum_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_einsum_v2.py
@@ -530,21 +530,26 @@ def test_shape(self):
         self.assertEqual(C.shape, (-1, 384))
 
 
+@unittest.skipIf(not core.is_compiled_with_cuda()
+                 or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+                 "core is not compiled with CUDA or not support the bfloat16")
 class TestBF16(unittest.TestCase):
     """
     EinsumOp support bfloat16 type, add unittest here for the correctness.
     """
 
     def test_shape(self):
-        if paddle.is_compiled_with_cuda() and _is_gpu_bfloat16_supported():
-            """ MatmulKernel support bfloat16 only if cuda_major >= 11.0 and Compute Capability >= 8.0
+        cuda_major = paddle.version.cuda().split('.')[0].strip()
+        if int(cuda_major) >= 11:
+            """ MatmulKernel support bfloat16 only if cuda_major > 11.0.
             """
             A = paddle.to_tensor(np.array([1.0, 2.0])).astype(paddle.bfloat16)
             A = A.cuda()
             B = paddle.to_tensor(np.array([2.0, 3.0])).astype(paddle.bfloat16)
             B = B.cuda()
             C = paddle.einsum('i,i->', A, B)
-            self.assertEqual(C.astype(paddle.float32).item(), 8.0)
+            D = paddle.to_tensor(8.0).astype(paddle.bfloat16)
+            self.assertEqual(C.item(), D.item())
 
 
 class TestComplex(unittest.TestCase):