From cb5f0435e881c69b6f495f859ec7a3d0a2b19126 Mon Sep 17 00:00:00 2001 From: lilong12 Date: Tue, 26 Jan 2021 20:01:36 +0800 Subject: [PATCH 1/2] update, test=develop (#30692) --- paddle/fluid/framework/distributed_strategy.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/distributed_strategy.proto b/paddle/fluid/framework/distributed_strategy.proto index 7cf8d55aeeb1d9..07ea824dc7a4c3 100644 --- a/paddle/fluid/framework/distributed_strategy.proto +++ b/paddle/fluid/framework/distributed_strategy.proto @@ -140,7 +140,7 @@ message DistributedStrategy { optional int32 fuse_grad_size_in_MB = 19 [ default = 32 ]; optional float fuse_grad_size_in_TFLOPS = 20 [ default = 50 ]; optional bool cudnn_exhaustive_search = 21 [ default = true ]; - optional int32 conv_workspace_size_limit = 22 [ default = 4000 ]; + optional int32 conv_workspace_size_limit = 22 [ default = 512 ]; optional bool cudnn_batchnorm_spatial_persistent = 23 [ default = true ]; optional bool adaptive_localsgd = 24 [ default = false ]; optional bool fp16_allreduce = 25 [ default = false ]; From 1240e4ff4ede76f4f137a0a49475853ab7462676 Mon Sep 17 00:00:00 2001 From: lilong12 Date: Wed, 24 Feb 2021 19:19:36 +0800 Subject: [PATCH 2/2] align the default value of some configuration for fleet to that of single cards (#30740) * update, test=develop --- .../fluid/framework/distributed_strategy.proto | 4 ++-- .../fleet/base/distributed_strategy.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/distributed_strategy.proto b/paddle/fluid/framework/distributed_strategy.proto index 07ea824dc7a4c3..e57aa45b8662df 100644 --- a/paddle/fluid/framework/distributed_strategy.proto +++ b/paddle/fluid/framework/distributed_strategy.proto @@ -139,9 +139,9 @@ message DistributedStrategy { optional bool fuse_all_reduce_ops = 18 [ default = true ]; optional int32 fuse_grad_size_in_MB = 19 [ default = 32 ]; optional float fuse_grad_size_in_TFLOPS = 20 [ default = 50 ]; - optional bool cudnn_exhaustive_search = 21 [ default = true ]; + optional bool cudnn_exhaustive_search = 21 [ default = false ]; optional int32 conv_workspace_size_limit = 22 [ default = 512 ]; - optional bool cudnn_batchnorm_spatial_persistent = 23 [ default = true ]; + optional bool cudnn_batchnorm_spatial_persistent = 23 [ default = false ]; optional bool adaptive_localsgd = 24 [ default = false ]; optional bool fp16_allreduce = 25 [ default = false ]; optional bool sharding = 26 [ default = false ]; diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index f7a28f15e9b70b..68a7fea5d961af 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -115,6 +115,22 @@ def __init__(self): """ self.strategy = distributed_strategy_pb2.DistributedStrategy() + + # Set the default values of the following flags to the ones set by users + key = 'FLAGS_cudnn_batchnorm_spatial_persistent' + if core.globals().is_public(key): + self.strategy.cudnn_batchnorm_spatial_persistent = bool( + core.globals()[key]) + key = 'FLAGS_conv_workspace_size_limit' + if core.globals().is_public(key): + self.strategy.conv_workspace_size_limit = int(core.globals()[key]) + key = 'FLAGS_cudnn_exhaustive_search' + if core.globals().is_public(key): + self.strategy.cudnn_exhaustive_search = bool(core.globals()[key]) + key = 'FLAGS_sync_nccl_allreduce' + if core.globals().is_public(key): + self.strategy.sync_nccl_allreduce = bool(core.globals()[key]) + self.__lock_attr = True def __setattr__(self, key, value):