File tree Expand file tree Collapse file tree 1 file changed +5
-6
lines changed
python/paddle/distributed/fleet/meta_optimizers Expand file tree Collapse file tree 1 file changed +5
-6
lines changed Original file line number Diff line number Diff line change @@ -428,6 +428,9 @@ def minimize_impl(self,
428428
429429 self ._adapt_amp_clip_without_sharding ()
430430
431+ # loss div dp_degree
432+ self ._insert_loss_grad_scale_op ()
433+
431434 self ._apply_optimize_offload_pass ()
432435
433436 # step6: (optional) sharding gradient merge
@@ -561,11 +564,6 @@ def _init_pipeline_comm(self, startup_block):
561564 def _init_comm (self ):
562565 # sync var
563566 startup_block = self ._startup_program .global_block ()
564- self .startup_prog_sync_var = startup_block .create_var (
565- name = "startup_prog_sync_var" ,
566- shape = [1 ],
567- dtype = core .VarDesc .VarType .INT32 ,
568- persistable = False )
569567
570568 # mp ring
571569 if self .mp_degree > 1 :
@@ -1281,7 +1279,8 @@ def _initialization_broadcast(self):
12811279 this funtion is to ensure the initialization between dp group to be
12821280 identical when hybrid-dp is used.
12831281 """
1284- if not self .hybrid_dp : return
1282+ if not self .hybrid_dp :
1283+ return
12851284
12861285 startup_block = self ._startup_program .global_block ()
12871286
You can’t perform that action at this time.
0 commit comments