We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 881e55e commit e32d596Copy full SHA for e32d596
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
@@ -371,8 +371,11 @@ def _adapt_amp_clip_without_sharding(self):
371
# FIXME(wangxi): mp should prune duplicated param_grads when calc
372
# amp inf_var & clip global_norm_var
373
374
- FP16Utils.sync_amp_check_nan_inf(main_block,
375
- [self.mp_ring_id, self.pp_ring_id])
+ rings = [self.mp_ring_id, self.pp_ring_id]
+ # FIXME(wangxi): some problem with NPU found_finite, need sync with DP
376
+ if core.is_compiled_with_npu():
377
+ rings += [self.dp_ring_id]
378
+ FP16Utils.sync_amp_check_nan_inf(main_block, rings)
379
380
gradientclip_helper = GradientClipHelper(None)
381
gradientclip_helper.sync_global_norm(
0 commit comments