From c5c4aa25361bb94735e04cd0bab89eab10108939 Mon Sep 17 00:00:00 2001 From: youth123 <2042519524@qq.com> Date: Fri, 28 May 2021 17:51:43 +0800 Subject: [PATCH 1/2] update mp --- python/paddle/distributed/collective.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index 4f3a6f4768933d..f3da072d757d2c 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -1009,16 +1009,17 @@ def _parallel_linear(x, name=name) linear_out = linear(x) - startup_block = paddle.static.default_startup_program().global_block() - main_block = paddle.static.default_main_program().global_block() - startup_block.vars[linear.weight.name].is_distributed = True - main_block.vars[linear.weight.name].is_distributed = True + startup_block = paddle.static.default_startup_program().current_block() + main_block = paddle.static.default_main_program().current_block() + startup_block._find_var_recursive(linear.weight.name).is_distributed = True + main_block._find_var_recursive(linear.weight.name).is_distributed = True # set is_distributed for splited bias # if a linear layer is splited by row, each rank would hold a complete bias and they should be the same in each rank. # if a linear layer is splited by col, the bias would also be split into each rank as its weight if axis == 1 and linear._bias_attr != False: - startup_block.vars[linear.bias.name].is_distributed = True - main_block.vars[linear.bias.name].is_distributed = True + startup_block._find_var_recursive( + linear.bias.name).is_distributed = True + main_block._find_var_recursive(linear.bias.name).is_distributed = True if not gather_out: return linear_out From afc13b68da0b0bbabeaeac7c6bd7dead95187b72 Mon Sep 17 00:00:00 2001 From: youth123 <2042519524@qq.com> Date: Mon, 31 May 2021 20:06:06 +0800 Subject: [PATCH 2/2] test=allcase --- python/paddle/distributed/collective.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index f3da072d757d2c..5775a734c87069 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -1013,6 +1013,7 @@ def _parallel_linear(x, main_block = paddle.static.default_main_program().current_block() startup_block._find_var_recursive(linear.weight.name).is_distributed = True main_block._find_var_recursive(linear.weight.name).is_distributed = True + # set is_distributed for splited bias # if a linear layer is splited by row, each rank would hold a complete bias and they should be the same in each rank. # if a linear layer is splited by col, the bias would also be split into each rank as its weight