Correct test and add comment

wozna · wozna · commit 3bcac6e58105 · 2021-05-06T14:30:25.000+02:00
diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py
@@ -157,7 +157,8 @@ def _insert_cast_post_op(block, op, idx, src_dtype, dest_dtype, target_name,
         return num_cast_ops
 
     assert target_var.dtype == src_dtype, \
-           "The real dtype({}) is not equal to the src dtype({})".format(_dtype_to_str(target_var.dtype), _dtype_to_str(src_dtype))
+        "The real dtype({}) is not equal to the src dtype({})".format(
+            _dtype_to_str(target_var.dtype), _dtype_to_str(src_dtype))
 
     cast_name = target_var.name + '.cast_' + _dtype_to_str(dest_dtype)
     cast_var = block.vars.get(cast_name)
@@ -221,6 +222,13 @@ def find_true_post_op(ops, cur_op, var_name, search_all=False):
     """
     post_op = []
     if search_all:
+        """
+        \"cur_op\" do not have to be in list of \"ops\". E.g. \"cur_op\" can come 
+        from startup_prog block and \"ops\" list from main_prog block. 
+        By setting idx to -1, we'll start looking for post-ops from the top of the list. 
+        If search_all is False, assume that \"cur_op\" is in \"ops\" list, 
+        so to reduce the time of search we can start iterating from \"cur_op\" idx. 
+        """
         idx = -1
     else:
         for idx, op in enumerate(ops):
@@ -274,7 +282,7 @@ def _need_keep_fp32(op, unsupported_op_list, use_fp16_guard):
 
     if use_fp16_guard:
         if op.has_attr("op_namescope") and \
-            (_fp16_guard_pattern in op.attr("op_namescope")):
+                (_fp16_guard_pattern in op.attr("op_namescope")):
             # op in fp16 guard
             return False
         else:
@@ -500,8 +508,8 @@ def rewrite_program(main_prog, amp_lists):
     black_op_set = set()
     for op in ops:
 
-        # NOTE(zhiqiu): 'create_py_reader' and 'read' is used in non-iterable DataLoder, 
-        # we don't need to handle reader op and the input of 'create_py_reader' is not 
+        # NOTE(zhiqiu): 'create_py_reader' and 'read' is used in non-iterable DataLoder,
+        # we don't need to handle reader op and the input of 'create_py_reader' is not
         # in block, which may result in errors.
         # See GeneratorLoader._init_non_iterable() for details.
         if op.type == 'create_py_reader' or op.type == 'read':
@@ -616,7 +624,7 @@ def update_role_var_grad(main_prog, params_grads):
                 raise ValueError("The cast op {0}'s output should not be"
                                  "used by a non-optimize op, however, it"
                                  "is used by {1}".format(op, post_ops[0]))
-            #add new op in the python and cpp at the same time 
+            # add new op in the python and cpp at the same time
             new_op_desc = block.desc.append_op()
             new_op_desc.copy_from(op.desc)
             new_op = framework.Operator(
diff --git a/python/paddle/fluid/contrib/tests/test_model_cast_to_bf16.py b/python/paddle/fluid/contrib/tests/test_model_cast_to_bf16.py
@@ -53,20 +53,27 @@ def scope_prog_guard(self):
             with fluid.program_guard(prog, startup_prog):
                 yield
 
-    def get_static_graph_result(self, feed, fetch_list, amp_fun,
-                                with_lod=False):
+    def get_static_graph_result(self,
+                                feed,
+                                fetch_list,
+                                amp_fun,
+                                with_lod=False,
+                                startup_prog=None):
         exe = fluid.Executor(core.CPUPlace())
-        exe.run(fluid.default_startup_program())
+        exe.run(fluid.default_startup_program()
+                if startup_prog is None else startup_prog)
         prog = fluid.default_main_program()
-        startup_prog = fluid.default_startup_program()
         if amp_fun is not None:
-            amp_fun(prog, startup_prog)
+            if startup_prog is not None:
+                amp_fun(prog, startup_prog)
+            else:
+                amp_fun(prog)
         return exe.run(prog,
                        feed=feed,
                        fetch_list=fetch_list,
                        return_numpy=(not with_lod))
 
-    def _graph_common(self, _amp_fun):
+    def _graph_common(self, _amp_fun, startup_prog=None):
         size = 3
         n = np.ones([size, size], dtype='float32') * 3.2
         nn = np.ones([size, size], dtype='float32') * -2.7
@@ -123,7 +130,8 @@ def _graph_common(self, _amp_fun):
                 self.get_static_graph_result(
                     feed={'t': n, 'tt': nn},
                     fetch_list=[ret],
-                    amp_fun=_amp_fun
+                    amp_fun=_amp_fun,
+                    startup_prog=startup_prog
                 )
         self.assertTrue(
             static_ret_bf16, np.ones(
@@ -133,7 +141,7 @@ def test_graph_rewrite(self):
         self._graph_common(lambda prog: amp.bf16.rewrite_program_bf16(
             prog,
             amp.bf16.AutoMixedPrecisionListsBF16(
-                custom_fp32_varnames={'elementwise_add_0.tmp_0'}),
+                custom_fp32_varnames={'elementwise_add_0.tmp_0'})
         ))
 
     def test_graph_cast(self):
@@ -143,7 +151,7 @@ def test_graph_cast(self):
             amp.bf16.AutoMixedPrecisionListsBF16(
                 custom_fp32_list={'elementwise_mul'}),
             use_bf16_guard=True
-        ))
+        ), startup_prog=fluid.default_startup_program())
 
 
 if __name__ == '__main__':