From 59f71585913126ddb3a7c81b2d593975acf1c1b5 Mon Sep 17 00:00:00 2001 From: zhiboniu Date: Thu, 29 Apr 2021 14:31:56 +0000 Subject: [PATCH] update 2.0 public api in distributed --- python/paddle/distributed/__init__.py | 96 +++++++++++-------- python/paddle/distributed/cloud_utils.py | 7 +- python/paddle/distributed/collective.py | 27 ++---- python/paddle/distributed/entry_attr.py | 2 +- python/paddle/distributed/fleet/__init__.py | 41 +++++--- .../paddle/distributed/fleet/ascend_utils.py | 2 + .../fleet/base/distributed_strategy.py | 2 +- .../distributed/fleet/base/fleet_base.py | 2 + .../fleet/base/meta_optimizer_factory.py | 2 + .../fleet/base/private_helper_function.py | 2 + .../distributed/fleet/base/role_maker.py | 2 + .../distributed/fleet/base/runtime_factory.py | 2 + .../fleet/base/strategy_compiler.py | 2 + .../distributed/fleet/base/util_factory.py | 3 +- .../paddle/distributed/fleet/cloud_utils.py | 2 + .../fleet/data_generator/__init__.py | 4 +- .../fleet/data_generator/data_generator.py | 2 + .../distributed/fleet/dataset/__init__.py | 10 +- .../distributed/fleet/dataset/dataset.py | 2 + .../fleet/dataset/index_dataset.py | 2 + python/paddle/distributed/fleet/launch.py | 2 + .../fleet/meta_optimizers/amp_optimizer.py | 2 + .../ascend/ascend_optimizer.py | 2 + .../meta_optimizers/ascend/ascend_parser.py | 2 + .../fleet/meta_optimizers/common.py | 2 + .../fleet/meta_optimizers/dgc_optimizer.py | 2 + .../dygraph_optimizer/__init__.py | 2 + .../hybrid_parallel_gradscaler.py | 2 + .../hybrid_parallel_optimizer.py | 2 + .../fp16_allreduce_optimizer.py | 2 + .../gradient_merge_optimizer.py | 2 + .../graph_execution_optimizer.py | 2 + .../fleet/meta_optimizers/lamb_optimizer.py | 2 + .../fleet/meta_optimizers/lars_optimizer.py | 2 + .../meta_optimizers/localsgd_optimizer.py | 2 + .../meta_optimizers/meta_optimizer_base.py | 2 + .../parameter_server_graph_optimizer.py | 2 + .../parameter_server_optimizer.py | 2 + .../meta_optimizers/pipeline_optimizer.py | 2 + .../meta_optimizers/recompute_optimizer.py | 2 + .../meta_optimizers/sharding/fp16_helper.py | 2 + .../sharding/gradient_clip_helper.py | 2 + .../sharding/offload_helper.py | 2 + .../fleet/meta_optimizers/sharding/prune.py | 2 + .../fleet/meta_optimizers/sharding/shard.py | 2 + .../sharding/weight_decay_helper.py | 2 + .../meta_optimizers/sharding_optimizer.py | 2 +- .../tensor_parallel_optimizer.py | 2 + .../fleet/meta_parallel/__init__.py | 15 ++- .../fleet/meta_parallel/meta_parallel_base.py | 2 + .../fleet/meta_parallel/model_parallel.py | 6 +- .../meta_parallel/parallel_layers/__init__.py | 13 ++- .../parallel_layers/mp_layers.py | 4 +- .../parallel_layers/pp_layers.py | 2 +- .../meta_parallel/parallel_layers/random.py | 5 +- .../fleet/meta_parallel/pipeline_parallel.py | 2 + .../fleet/meta_parallel/pp_utils/__init__.py | 4 +- .../fleet/meta_parallel/pp_utils/utils.py | 2 +- .../distributed/fleet/metrics/__init__.py | 20 ++-- .../distributed/fleet/metrics/metric.py | 2 + .../distributed/fleet/runtime/__init__.py | 2 + .../fleet/runtime/collective_runtime.py | 2 + .../fleet/runtime/parameter_server_runtime.py | 2 + .../distributed/fleet/runtime/the_one_ps.py | 2 + .../distributed/fleet/utils/__init__.py | 14 ++- python/paddle/distributed/fleet/utils/fs.py | 2 +- .../distributed/fleet/utils/http_server.py | 2 + .../fleet/utils/hybrid_parallel_util.py | 2 + .../distributed/fleet/utils/log_util.py | 2 + .../paddle/distributed/fleet/utils/ps_util.py | 2 + .../distributed/fleet/utils/recompute.py | 2 + python/paddle/distributed/launch.py | 2 + python/paddle/distributed/parallel.py | 9 +- python/paddle/distributed/spawn.py | 6 +- python/paddle/distributed/utils.py | 18 ++++ python/paddle/nn/__init__.py | 2 +- 76 files changed, 302 insertions(+), 116 deletions(-) diff --git a/python/paddle/distributed/__init__.py b/python/paddle/distributed/__init__.py index c882e94d2bade8..7427219285c200 100644 --- a/python/paddle/distributed/__init__.py +++ b/python/paddle/distributed/__init__.py @@ -12,46 +12,62 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import spawn -from .spawn import spawn - -from . import parallel -from .parallel import init_parallel_env -from .parallel import get_rank -from .parallel import get_world_size -from paddle.fluid.dygraph.parallel import ParallelEnv #DEFINE_ALIAS -from paddle.distributed.fleet.dataset import * - -from . import collective -from .collective import * - -from .entry_attr import ProbabilityEntry -from .entry_attr import CountFilterEntry - -# start multiprocess apis -__all__ = ["spawn"] - -# dygraph parallel apis -__all__ += [ - "init_parallel_env", - "get_rank", - "get_world_size", - "ParallelEnv", - "InMemoryDataset", - "QueueDataset", -] +from .spawn import spawn # noqa: F401 -# dataset reader -__all__ += [ - "InMemoryDataset", - "QueueDataset", -] +from .parallel import init_parallel_env # noqa: F401 +from .parallel import get_rank # noqa: F401 +from .parallel import get_world_size # noqa: F401 -# entry for embedding -__all__ += [ - "ProbabilityEntry", - "CountFilterEntry", -] +from paddle.distributed.fleet.dataset import InMemoryDataset # noqa: F401 +from paddle.distributed.fleet.dataset import QueueDataset # noqa: F401 + +from .collective import broadcast # noqa: F401 +from .collective import all_reduce # noqa: F401 +from .collective import reduce # noqa: F401 +from .collective import all_gather # noqa: F401 +from .collective import scatter # noqa: F401 +from .collective import barrier # noqa: F401 +from .collective import ReduceOp # noqa: F401 +from .collective import split # noqa: F401 +from .collective import new_group # noqa: F401 +from .collective import alltoall # noqa: F401 +from .collective import recv # noqa: F401 +from .collective import get_group # noqa: F401 +from .collective import send # noqa: F401 +from .collective import wait # noqa: F401 + +from .fleet import BoxPSDataset # noqa: F401 -# collective apis -__all__ += collective.__all__ +from .entry_attr import ProbabilityEntry # noqa: F401 +from .entry_attr import CountFilterEntry # noqa: F401 + +from paddle.fluid.dygraph.parallel import ParallelEnv # noqa: F401 + +from . import cloud_utils # noqa: F401 +from . import utils # noqa: F401 + +__all__ = [ #noqa + "spawn", + "scatter", + "broadcast", + "ParallelEnv", + "new_group", + "init_parallel_env", + "QueueDataset", + "split", + "CountFilterEntry", + "get_world_size", + "get_group", + "all_gather", + "InMemoryDataset", + "barrier", + "all_reduce", + "alltoall", + "send", + "reduce", + "recv", + "ReduceOp", + "wait", + "get_rank", + "ProbabilityEntry" +] diff --git a/python/paddle/distributed/cloud_utils.py b/python/paddle/distributed/cloud_utils.py index 962ba62b15f4a5..34e55bf164673f 100644 --- a/python/paddle/distributed/cloud_utils.py +++ b/python/paddle/distributed/cloud_utils.py @@ -14,7 +14,12 @@ import os import paddle -from paddle.distributed.utils import get_cluster, logger, get_gpus, get_cluster_from_args +from paddle.distributed.utils import get_cluster +from paddle.distributed.utils import logger +from paddle.distributed.utils import get_gpus +from paddle.distributed.utils import get_cluster_from_args + +__all__ = [] def get_cloud_cluster(args_node_ips, args_node_ip, args_port, selected_devices): diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index 7aa765ba93fbe3..f88828dd9411b9 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -15,8 +15,14 @@ import numpy as np import os from ..fluid.layer_helper import LayerHelper -from ..fluid.framework import Variable, OpProtoHolder, in_dygraph_mode, convert_np_dtype_to_dtype_ -from ..fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype +from ..fluid.framework import Variable +from ..fluid.framework import OpProtoHolder +from ..fluid.framework import in_dygraph_mode +from ..fluid.framework import convert_np_dtype_to_dtype_ +from ..fluid.data_feeder import convert_dtype +from ..fluid.data_feeder import check_variable_and_dtype +from ..fluid.data_feeder import check_type +from ..fluid.data_feeder import check_dtype from ..fluid.layers.tensor import fill_constant from ..fluid.layers import utils from ..fluid.dygraph.parallel import prepare_context @@ -25,22 +31,7 @@ import paddle.fluid as fluid import paddle.fluid.core as core -__all__ = [ - 'wait', - 'new_group', - 'get_group', - 'broadcast', - 'all_reduce', - 'reduce', - 'all_gather', - 'scatter', - 'barrier', - 'split', - 'alltoall', - 'ReduceOp', - 'send', - 'recv', -] +__all__ = [] class ReduceOp: diff --git a/python/paddle/distributed/entry_attr.py b/python/paddle/distributed/entry_attr.py index dbd899952af03f..e219ef6434a3f1 100644 --- a/python/paddle/distributed/entry_attr.py +++ b/python/paddle/distributed/entry_attr.py @@ -14,7 +14,7 @@ from __future__ import print_function -__all__ = ['ProbabilityEntry', 'CountFilterEntry'] +__all__ = [] class EntryAttr(object): diff --git a/python/paddle/distributed/fleet/__init__.py b/python/paddle/distributed/fleet/__init__.py index 403a02496afaab..5f9a61371d34f4 100644 --- a/python/paddle/distributed/fleet/__init__.py +++ b/python/paddle/distributed/fleet/__init__.py @@ -13,21 +13,34 @@ # limitations under the License. # TODO: define distributed api under this directory, -from .base.role_maker import Role, UserDefinedRoleMaker, PaddleCloudRoleMaker -from .base.distributed_strategy import DistributedStrategy -from .base.fleet_base import Fleet -from .base.util_factory import UtilBase -from .dataset import * -from .data_generator import MultiSlotDataGenerator, MultiSlotStringDataGenerator -from . import metrics -from .base.topology import CommunicateTopology, HybridCommunicateGroup -from .meta_parallel import * +from .base.role_maker import Role # noqa: F401 +from .base.role_maker import UserDefinedRoleMaker # noqa: F401 +from .base.role_maker import PaddleCloudRoleMaker # noqa: F401 +from .base.distributed_strategy import DistributedStrategy # noqa: F401 +from .base.fleet_base import Fleet # noqa: F401 +from .base.util_factory import UtilBase # noqa: F401 +from .dataset import DatasetBase # noqa: F401 +from .dataset import InMemoryDataset # noqa: F401 +from .dataset import QueueDataset # noqa: F401 +from .dataset import FileInstantDataset # noqa: F401 +from .dataset import BoxPSDataset # noqa: F401 +from .data_generator.data_generator import MultiSlotDataGenerator # noqa: F401 +from .data_generator.data_generator import MultiSlotStringDataGenerator # noqa: F401 +from . import metrics # noqa: F401 +from .base.topology import CommunicateTopology +from .base.topology import HybridCommunicateGroup # noqa: F401 -__all__ = [ - "DistributedStrategy", "UtilBase", "UserDefinedRoleMaker", - "PaddleCloudRoleMaker", "Fleet", "MultiSlotDataGenerator", - "MultiSlotStringDataGenerator", "Role", "CommunicateTopology", - "HybridCommunicateGroup" +__all__ = [ #noqa + "CommunicateTopology", + "UtilBase", + "HybridCommunicateGroup", + "MultiSlotStringDataGenerator", + "UserDefinedRoleMaker", + "DistributedStrategy", + "Role", + "MultiSlotDataGenerator", + "PaddleCloudRoleMaker", + "Fleet" ] fleet = Fleet() diff --git a/python/paddle/distributed/fleet/ascend_utils.py b/python/paddle/distributed/fleet/ascend_utils.py index b64149f27bcac1..708c76ac55abe8 100644 --- a/python/paddle/distributed/fleet/ascend_utils.py +++ b/python/paddle/distributed/fleet/ascend_utils.py @@ -17,6 +17,8 @@ import paddle from paddle.distributed.fleet.launch_utils import get_cluster, logger, get_host_name_ip, DeviceMode +__all__ = [] + def _get_ascend_rankfile(rank_table_file_path): """ diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index 9fed3a8550c407..a44d008fe9a313 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -19,7 +19,7 @@ import google.protobuf.text_format import google.protobuf -__all__ = ["DistributedStrategy"] +__all__ = [] non_auto_func_called = True diff --git a/python/paddle/distributed/fleet/base/fleet_base.py b/python/paddle/distributed/fleet/base/fleet_base.py index 9e200f4ee5f6e2..a7564a23a7cfb8 100644 --- a/python/paddle/distributed/fleet/base/fleet_base.py +++ b/python/paddle/distributed/fleet/base/fleet_base.py @@ -33,6 +33,8 @@ from ..meta_optimizers import HybridParallelOptimizer from ..meta_optimizers import HybridParallelGradScaler +__all__ = [] + def _inited_runtime_handler_(func): def __impl__(*args, **kwargs): diff --git a/python/paddle/distributed/fleet/base/meta_optimizer_factory.py b/python/paddle/distributed/fleet/base/meta_optimizer_factory.py index 6989eec119f786..52eeebd0c126c2 100755 --- a/python/paddle/distributed/fleet/base/meta_optimizer_factory.py +++ b/python/paddle/distributed/fleet/base/meta_optimizer_factory.py @@ -14,6 +14,8 @@ from ..meta_optimizers import * +__all__ = [] + meta_optimizer_names = list( filter(lambda name: name.endswith("Optimizer"), dir())) diff --git a/python/paddle/distributed/fleet/base/private_helper_function.py b/python/paddle/distributed/fleet/base/private_helper_function.py index 6af4a9e667528b..c7ddd33d5d0187 100644 --- a/python/paddle/distributed/fleet/base/private_helper_function.py +++ b/python/paddle/distributed/fleet/base/private_helper_function.py @@ -17,6 +17,8 @@ from contextlib import closing from six import string_types +__all__ = [] + def wait_server_ready(endpoints): """ diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py index 62c8faa0757c66..f89d73416960a8 100644 --- a/python/paddle/distributed/fleet/base/role_maker.py +++ b/python/paddle/distributed/fleet/base/role_maker.py @@ -22,6 +22,8 @@ import paddle.fluid as fluid from paddle.distributed.fleet.base.private_helper_function import wait_server_ready +__all__ = [] + class Role: WORKER = 1 diff --git a/python/paddle/distributed/fleet/base/runtime_factory.py b/python/paddle/distributed/fleet/base/runtime_factory.py index 9e612c6d530f14..85ff3e1e69c581 100644 --- a/python/paddle/distributed/fleet/base/runtime_factory.py +++ b/python/paddle/distributed/fleet/base/runtime_factory.py @@ -15,6 +15,8 @@ from ..runtime.parameter_server_runtime import ParameterServerRuntime from ..runtime.the_one_ps import TheOnePSRuntime +__all__ = [] + class RuntimeFactory(object): def __init__(self): diff --git a/python/paddle/distributed/fleet/base/strategy_compiler.py b/python/paddle/distributed/fleet/base/strategy_compiler.py index 7b146318abe62a..b90e5b2bff7bfa 100644 --- a/python/paddle/distributed/fleet/base/strategy_compiler.py +++ b/python/paddle/distributed/fleet/base/strategy_compiler.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +__all__ = [] + def create_graph(optimizer_list): nsize = len(optimizer_list) diff --git a/python/paddle/distributed/fleet/base/util_factory.py b/python/paddle/distributed/fleet/base/util_factory.py index d982f14eaa5af1..de101cd74c4e83 100644 --- a/python/paddle/distributed/fleet/base/util_factory.py +++ b/python/paddle/distributed/fleet/base/util_factory.py @@ -27,7 +27,8 @@ import subprocess import os import numpy as np -__all__ = ['UtilBase'] + +__all__ = [] class UtilFactory(object): diff --git a/python/paddle/distributed/fleet/cloud_utils.py b/python/paddle/distributed/fleet/cloud_utils.py index f5a24cf48ca06d..0b1169e4422637 100644 --- a/python/paddle/distributed/fleet/cloud_utils.py +++ b/python/paddle/distributed/fleet/cloud_utils.py @@ -16,6 +16,8 @@ import paddle from paddle.distributed.fleet.launch_utils import get_cluster, logger +__all__ = [] + def get_cloud_cluster(args_node_ips, device_mode, diff --git a/python/paddle/distributed/fleet/data_generator/__init__.py b/python/paddle/distributed/fleet/data_generator/__init__.py index 481df4064a4ecc..230ada2abec062 100644 --- a/python/paddle/distributed/fleet/data_generator/__init__.py +++ b/python/paddle/distributed/fleet/data_generator/__init__.py @@ -11,4 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -from .data_generator import * +from .data_generator import DataGenerator # noqa: F401 + +__all__ = [] diff --git a/python/paddle/distributed/fleet/data_generator/data_generator.py b/python/paddle/distributed/fleet/data_generator/data_generator.py index 9d743fc38bf398..cceb81838c1d2a 100644 --- a/python/paddle/distributed/fleet/data_generator/data_generator.py +++ b/python/paddle/distributed/fleet/data_generator/data_generator.py @@ -15,6 +15,8 @@ import os import sys +__all__ = [] + class DataGenerator(object): """ diff --git a/python/paddle/distributed/fleet/dataset/__init__.py b/python/paddle/distributed/fleet/dataset/__init__.py index 24b68596f25419..55b944abccd51c 100644 --- a/python/paddle/distributed/fleet/dataset/__init__.py +++ b/python/paddle/distributed/fleet/dataset/__init__.py @@ -11,5 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -from .dataset import * -from .index_dataset import * +from .dataset import DatasetBase # noqa: F401 +from .dataset import InMemoryDataset # noqa: F401 +from .dataset import QueueDataset # noqa: F401 +from .dataset import FileInstantDataset # noqa: F401 +from .dataset import BoxPSDataset # noqa: F401 +from .index_dataset import TreeIndex # noqa: F401 + +__all__ = [] diff --git a/python/paddle/distributed/fleet/dataset/dataset.py b/python/paddle/distributed/fleet/dataset/dataset.py index 10c27ea91d2494..2f428346b9c0c5 100644 --- a/python/paddle/distributed/fleet/dataset/dataset.py +++ b/python/paddle/distributed/fleet/dataset/dataset.py @@ -18,6 +18,8 @@ from google.protobuf import text_format import paddle.fluid.core as core +__all__ = [] + class DatasetBase(object): """ Base dataset class. """ diff --git a/python/paddle/distributed/fleet/dataset/index_dataset.py b/python/paddle/distributed/fleet/dataset/index_dataset.py index dfd3daa9570b95..c4c424fe2dc7e6 100644 --- a/python/paddle/distributed/fleet/dataset/index_dataset.py +++ b/python/paddle/distributed/fleet/dataset/index_dataset.py @@ -13,6 +13,8 @@ # limitations under the License. from paddle.fluid import core +__all__ = [] + class Index(object): def __init__(self, name): diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index 69c5b325d182d8..25b10133191788 100644 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -75,6 +75,8 @@ import paddle.distributed.fleet.cloud_utils as cloud_utils import paddle.distributed.fleet.ascend_utils as ascend_utils +__all__ = [] + def _print_arguments(args): print("----------- Configuration Arguments -----------") diff --git a/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py index 02505e01197dc6..9ffb47789ee987 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py @@ -14,6 +14,8 @@ import paddle.fluid.contrib.mixed_precision as mixed_precision from .meta_optimizer_base import MetaOptimizerBase +__all__ = [] + class AMPOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py index 824225fd776d13..6282ac7b509838 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py @@ -24,6 +24,8 @@ HcomGroupConfig = namedtuple('HcomGroupConfig', ['name', 'nranks', 'rank_ids']) +__all__ = [] + class AscendIRParser(object): def __init__(self, auto_dp=False, world_rank_size=1): diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py index 19b5e910db2993..3331a45b3d9479 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py @@ -18,6 +18,8 @@ from paddle.distributed import fleet from functools import reduce +__all__ = [] + registerd_op = {## forwards "elementwise_add": "AddParser", "matmul": "MatMulParser", diff --git a/python/paddle/distributed/fleet/meta_optimizers/common.py b/python/paddle/distributed/fleet/meta_optimizers/common.py index 9e2723dad729aa..707284a784c38e 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/common.py +++ b/python/paddle/distributed/fleet/meta_optimizers/common.py @@ -19,6 +19,8 @@ from paddle.fluid import core, unique_name from ..base.private_helper_function import wait_server_ready +__all__ = [] + OpRole = core.op_proto_and_checker_maker.OpRole OP_ROLE_KEY = core.op_proto_and_checker_maker.kOpRoleAttrName() diff --git a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py index 7bd68325569334..b035f179317ac4 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py @@ -15,6 +15,8 @@ from .meta_optimizer_base import MetaOptimizerBase import logging +__all__ = [] + class DGCOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/__init__.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/__init__.py index 4e41723cb622dc..f0f26bd2e0d060 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/__init__.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/__init__.py @@ -12,3 +12,5 @@ # See the License for the specific language governing permissions and from .hybrid_parallel_optimizer import HybridParallelOptimizer from .hybrid_parallel_gradscaler import HybridParallelGradScaler + +__all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py index 13bb9d2acece28..d0e8034f5cae15 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py @@ -23,6 +23,8 @@ from paddle.fluid import core import paddle +__all__ = [] + class HybridParallelGradScaler: def __init__(self, scaler, hcg): diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py index 52e87173684a34..b7ac298d2223ee 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py @@ -23,6 +23,8 @@ from paddle.fluid.framework import Variable from ...utils.log_util import logger +__all__ = [] + class HybridParallelClipGrad: def __init__(self, clip, hcg): diff --git a/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py index 411980ed01322a..f636a313757854 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py @@ -14,6 +14,8 @@ from paddle.fluid import core, framework, unique_name from .meta_optimizer_base import MetaOptimizerBase +__all__ = [] + class FP16AllReduceOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py index 380fbc2e09ebff..949ef3e5f3a78f 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py @@ -14,6 +14,8 @@ from paddle.fluid.optimizer import GradientMergeOptimizer as GM from .meta_optimizer_base import MetaOptimizerBase +__all__ = [] + class GradientMergeOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py index 9a4ffd2fd02d4a..4194cf13d2bbcd 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py @@ -19,6 +19,8 @@ from ..base.private_helper_function import wait_server_ready import logging +__all__ = [] + class GraphExecutionOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py index 64d54ae3bab03b..6d2474d9352f87 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py @@ -16,6 +16,8 @@ from .meta_optimizer_base import MetaOptimizerBase import logging +__all__ = [] + class LambOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py index 32c6be505a5467..e1bf3722c191d1 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py @@ -15,6 +15,8 @@ from .meta_optimizer_base import MetaOptimizerBase import logging +__all__ = [] + class LarsOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py index 91030f07629343..3340672e0f925b 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py @@ -19,6 +19,8 @@ from .meta_optimizer_base import MetaOptimizerBase from .common import OpRole, OP_ROLE_KEY, CollectiveHelper, is_update_op +__all__ = [] + class LocalSGDOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py b/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py index a12ca50442b1c3..3bbaa055c5e597 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py +++ b/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py @@ -14,6 +14,8 @@ from paddle.fluid.optimizer import Optimizer +__all__ = [] + class MetaOptimizerBase(Optimizer): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py index dfa765364f357b..ba2a0e84c7ab6b 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py @@ -15,6 +15,8 @@ from paddle.fluid import compiler from .parameter_server_optimizer import ParameterServerOptimizer +__all__ = [] + class ParameterServerGraphOptimizer(ParameterServerOptimizer): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py index f6d2af0b416d2d..88180221ff4ff5 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py @@ -20,6 +20,8 @@ import platform from ..base.private_helper_function import wait_server_ready +__all__ = [] + class ParameterServerOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py index 1aa51a6671c17f..a0bf4cc5bc0975 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py @@ -22,6 +22,8 @@ from .meta_optimizer_base import MetaOptimizerBase from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_loss_grad_op, is_backward_op, is_optimizer_op +__all__ = [] + class PipelineOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py index 3a784c306257b2..d79675448c0425 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py @@ -14,6 +14,8 @@ from paddle.fluid.optimizer import RecomputeOptimizer as RO from .meta_optimizer_base import MetaOptimizerBase +__all__ = [] + class RecomputeOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py index 40ba77815663f0..8e636353729845 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py @@ -17,6 +17,8 @@ from paddle.fluid import core +__all__ = [] + class FP16Utils(object): def __init__(self): diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py index d5a012b147a99e..fd74f28b69e190 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py @@ -14,6 +14,8 @@ from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole +__all__ = [] + class GradientClipHelper(object): def __init__(self, mp_ring_id): diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py index 76803818453c92..f6741b165ce072 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py @@ -15,6 +15,8 @@ from ..common import is_optimizer_op, OP_ROLE_KEY, OpRole from paddle.fluid import core, unique_name +__all__ = [] + class OffloadHelper(object): cpu_place_type = 0 diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/prune.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/prune.py index 5a43367cf1ad12..dd4e16b576fcf0 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/prune.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/prune.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +__all__ = [] + class ProgramDeps(object): def __init__(self, block, start_vars, end_vars): diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py index 92e36e0ec1fff3..0c33a78120cb84 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py @@ -16,6 +16,8 @@ from paddle.distributed.fleet.meta_optimizers.sharding.utils import * from paddle.distributed.fleet.meta_optimizers.sharding.fp16_helper import FP16Utils +__all__ = [] + class Shard(object): def __init__(self, ): diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/weight_decay_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/weight_decay_helper.py index 2833e8c6dac4be..ab0c79bca554c6 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/weight_decay_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/weight_decay_helper.py @@ -14,6 +14,8 @@ from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_VAR_KEY +__all__ = [] + class WeightDecayHelper(object): def __init__(self): diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py index 852421523b15b1..2742c2ae65cf4a 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py @@ -34,7 +34,7 @@ datefmt='%Y-%m-%d %H:%M:%S') from functools import reduce -__all__ = ["ShardingOptimizer"] +__all__ = [] class ShardingOptimizer(MetaOptimizerBase): diff --git a/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py index 2ba0195156082c..5fbec7da0b5edf 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py @@ -19,6 +19,8 @@ from .meta_optimizer_base import MetaOptimizerBase from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_update_op, is_loss_grad_op, is_backward_op, is_optimizer_op +__all__ = [] + class TensorParallelOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_parallel/__init__.py b/python/paddle/distributed/fleet/meta_parallel/__init__.py index ed1add1f7baeeb..ed74d8e744e50d 100644 --- a/python/paddle/distributed/fleet/meta_parallel/__init__.py +++ b/python/paddle/distributed/fleet/meta_parallel/__init__.py @@ -12,6 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .parallel_layers import * -from .model_parallel import ModelParallel -from .pipeline_parallel import PipelineParallel +from .parallel_layers import VocabParallelEmbedding # noqa: F401 +from .parallel_layers import ColumnParallelLinear # noqa: F401 +from .parallel_layers import RowParallelLinear # noqa: F401 +from .parallel_layers import LayerDesc # noqa: F401 +from .parallel_layers import PipelineLayer # noqa: F401 +from .parallel_layers import RNGStatesTracker # noqa: F401 +from .parallel_layers import model_parallel_random_seed # noqa: F401 +from .parallel_layers import get_rng_state_tracker # noqa: F401 +from .model_parallel import ModelParallel # noqa: F401 +from .pipeline_parallel import PipelineParallel # noqa: F401 + +__all__ = [] diff --git a/python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py b/python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py index cdf947895b777c..69e41ab0edab2d 100644 --- a/python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py +++ b/python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py @@ -14,6 +14,8 @@ from paddle.fluid.dygraph.layers import Layer +__all__ = [] + class MetaParallelBase(Layer): def __init__(self, layers, hcg, strategy): diff --git a/python/paddle/distributed/fleet/meta_parallel/model_parallel.py b/python/paddle/distributed/fleet/meta_parallel/model_parallel.py index ebf26498d93243..682d7152a42bd2 100644 --- a/python/paddle/distributed/fleet/meta_parallel/model_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/model_parallel.py @@ -14,9 +14,13 @@ from paddle.fluid.dygraph.layers import Layer from .meta_parallel_base import MetaParallelBase -from ..utils.hybrid_parallel_util import * +from ..utils.hybrid_parallel_util import broadcast_dp_parameters +from ..utils.hybrid_parallel_util import broadcast_input_data +from ..utils.hybrid_parallel_util import broadcast_mp_parameters from ..utils.log_util import logger +__all__ = [] + class ModelParallel(MetaParallelBase): def __init__(self, layers, hcg, **kwargs): diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/__init__.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/__init__.py index c4ec61e84ffa5c..6a33611403ace0 100644 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/__init__.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/__init__.py @@ -12,6 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .mp_layers import * -from .pp_layers import * -from .random import * +from .mp_layers import VocabParallelEmbedding # noqa: F401 +from .mp_layers import ColumnParallelLinear # noqa: F401 +from .mp_layers import RowParallelLinear # noqa: F401 +from .pp_layers import LayerDesc # noqa: F401 +from .pp_layers import PipelineLayer # noqa: F401 +from .random import RNGStatesTracker # noqa: F401 +from .random import model_parallel_random_seed # noqa: F401 +from .random import get_rng_state_tracker # noqa: F401 + +__all__ = [] diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py index b89e90128b1121..af59b16e22aa85 100644 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py @@ -19,9 +19,7 @@ from paddle import framework from ...base import topology as tp -__all__ = [ - 'VocabParallelEmbedding', 'ColumnParallelLinear', 'RowParallelLinear' -] +__all__ = [] # Follow this paper to achieve the file: # Shoeybi M, Patwary M, Puri R, et al. Megatron-lm: Training multi-billion parameter diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py index 669ed032a34438..ec5ad92b2b237d 100644 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py @@ -16,7 +16,7 @@ from paddle.fluid.dygraph.layers import Layer from ...utils.log_util import logger, layer_to_str -__all__ = ['LayerDesc', 'PipelineLayer'] +__all__ = [] class SegmentLayers(object): diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py index 56c741dbd3cad5..41c9deabd1e11b 100644 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py @@ -14,9 +14,8 @@ import paddle import contextlib -__all__ = [ - 'RNGStatesTracker', 'model_parallel_random_seed', 'get_rng_state_tracker' -] + +__all__ = [] MODEL_PARALLEL_RNG = 'model_parallel_rng' diff --git a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py index 98a82f2b798562..2cfa7fae734f87 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py @@ -26,6 +26,8 @@ from .pp_utils import utils from .parallel_layers.pp_layers import PipelineLayer +__all__ = [] + FLOAT_TYPES = [ paddle.float16, paddle.float32, diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/__init__.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/__init__.py index d39e6760a38657..786eb20487a52e 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/__init__.py +++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/__init__.py @@ -12,4 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .utils import * +from .utils import get_tensor_bytes + +__all__ = [] diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py index 56eef8d7d21df8..98b9189f329171 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py @@ -16,7 +16,7 @@ import paddle from ...utils import hybrid_parallel_util as hp_util -__all__ = ['get_tensor_bytes', ] +__all__ = [] def get_tensor_bytes(tensor): diff --git a/python/paddle/distributed/fleet/metrics/__init__.py b/python/paddle/distributed/fleet/metrics/__init__.py index bc30c063787d28..abcb90afb23c43 100644 --- a/python/paddle/distributed/fleet/metrics/__init__.py +++ b/python/paddle/distributed/fleet/metrics/__init__.py @@ -12,15 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .metric import * +from .metric import acc # noqa: F401 +from .metric import auc # noqa: F401 +from .metric import mae # noqa: F401 +from .metric import max # noqa: F401 +from .metric import min # noqa: F401 +from .metric import mse # noqa: F401 +from .metric import rmse # noqa: F401 +from .metric import sum # noqa: F401 -__all__ = [ - "sum", - "max", - "min", - "auc", - "mae", - "rmse", - "mse", - "acc", -] +__all__ = [] diff --git a/python/paddle/distributed/fleet/metrics/metric.py b/python/paddle/distributed/fleet/metrics/metric.py index 9ed0a0df4be018..d2050585df754b 100644 --- a/python/paddle/distributed/fleet/metrics/metric.py +++ b/python/paddle/distributed/fleet/metrics/metric.py @@ -18,6 +18,8 @@ from paddle.static import Variable import paddle +__all__ = [] + def sum(input, scope=None, util=None): """ diff --git a/python/paddle/distributed/fleet/runtime/__init__.py b/python/paddle/distributed/fleet/runtime/__init__.py index 51d8c6ffebf1dd..f5c30b2f3c5aaa 100644 --- a/python/paddle/distributed/fleet/runtime/__init__.py +++ b/python/paddle/distributed/fleet/runtime/__init__.py @@ -15,3 +15,5 @@ from .collective_runtime import CollectiveRuntime from .parameter_server_runtime import ParameterServerRuntime from .the_one_ps import TheOnePSRuntime + +__all__ = [] diff --git a/python/paddle/distributed/fleet/runtime/collective_runtime.py b/python/paddle/distributed/fleet/runtime/collective_runtime.py index c56cf4c7aa2ed8..a23b15f1fca1ba 100644 --- a/python/paddle/distributed/fleet/runtime/collective_runtime.py +++ b/python/paddle/distributed/fleet/runtime/collective_runtime.py @@ -15,6 +15,8 @@ from .runtime_base import RuntimeBase import logging +__all__ = [] + class CollectiveRuntime(RuntimeBase): def __init__(self): diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py index 782ba87e07925c..0767158d23f008 100644 --- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py +++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py @@ -26,6 +26,8 @@ from .runtime_base import RuntimeBase from ..base.private_helper_function import wait_server_ready +__all__ = [] + class ParameterServerRuntime(RuntimeBase): def __init__(self): diff --git a/python/paddle/distributed/fleet/runtime/the_one_ps.py b/python/paddle/distributed/fleet/runtime/the_one_ps.py index 24b83662c9dbf9..ce68eb9a1fb4ad 100644 --- a/python/paddle/distributed/fleet/runtime/the_one_ps.py +++ b/python/paddle/distributed/fleet/runtime/the_one_ps.py @@ -25,6 +25,8 @@ from .runtime_base import RuntimeBase from ..base.private_helper_function import wait_server_ready +__all__ = [] + def conv_indent(indent): return "".join([" "] * indent) diff --git a/python/paddle/distributed/fleet/utils/__init__.py b/python/paddle/distributed/fleet/utils/__init__.py index 0a47750ead7ec9..1bf90a22e375c7 100644 --- a/python/paddle/distributed/fleet/utils/__init__.py +++ b/python/paddle/distributed/fleet/utils/__init__.py @@ -12,6 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .fs import LocalFS, HDFSClient -from .ps_util import DistributedInfer -from .recompute import recompute +from .fs import LocalFS # noqa: F401 +from .fs import HDFSClient # noqa: F401 +from .ps_util import DistributedInfer # noqa: F401 +from .recompute import recompute # noqa: F401 + +from . import log_util # noqa: F401 +from . import hybrid_parallel_util # noqa: F401 + +__all__ = [ #noqa + "LocalFS", "recompute", "DistributedInfer", "HDFSClient" +] diff --git a/python/paddle/distributed/fleet/utils/fs.py b/python/paddle/distributed/fleet/utils/fs.py index 7e62e551fe8d53..087942e70a2263 100644 --- a/python/paddle/distributed/fleet/utils/fs.py +++ b/python/paddle/distributed/fleet/utils/fs.py @@ -31,7 +31,7 @@ import shutil -__all__ = ['LocalFS', 'HDFSClient'] +__all__ = [] class ExecuteError(Exception): diff --git a/python/paddle/distributed/fleet/utils/http_server.py b/python/paddle/distributed/fleet/utils/http_server.py index 92295cc74ae4d4..a9d0687461b995 100644 --- a/python/paddle/distributed/fleet/utils/http_server.py +++ b/python/paddle/distributed/fleet/utils/http_server.py @@ -28,6 +28,8 @@ import threading import socket +__all__ = [] + def get_logger(name, level, fmt): logger = logging.getLogger(name) diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py index de2d3f45ba0330..5521bd5b952837 100644 --- a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py +++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py @@ -23,6 +23,8 @@ from collections import OrderedDict from .log_util import logger +__all__ = [] + def _apply_collective_grads(parameters, comm_group): grad_var_set = set() diff --git a/python/paddle/distributed/fleet/utils/log_util.py b/python/paddle/distributed/fleet/utils/log_util.py index 12c0bf699c1e60..77eb641e0c6fe4 100644 --- a/python/paddle/distributed/fleet/utils/log_util.py +++ b/python/paddle/distributed/fleet/utils/log_util.py @@ -15,6 +15,8 @@ import logging import sys +__all__ = [] + class LoggerFactory: @staticmethod diff --git a/python/paddle/distributed/fleet/utils/ps_util.py b/python/paddle/distributed/fleet/utils/ps_util.py index 7bf7bec43de008..8bf69a41a7cc83 100644 --- a/python/paddle/distributed/fleet/utils/ps_util.py +++ b/python/paddle/distributed/fleet/utils/ps_util.py @@ -18,6 +18,8 @@ import paddle import warnings +__all__ = [] + class DistributedInfer: """ diff --git a/python/paddle/distributed/fleet/utils/recompute.py b/python/paddle/distributed/fleet/utils/recompute.py index 0dc305ec77d517..a172610ae4b083 100644 --- a/python/paddle/distributed/fleet/utils/recompute.py +++ b/python/paddle/distributed/fleet/utils/recompute.py @@ -23,6 +23,8 @@ format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S') +__all__ = [] + def detach_variable(inputs): out = [] diff --git a/python/paddle/distributed/launch.py b/python/paddle/distributed/launch.py index df3a3407bf5cf6..e02a439025b77f 100644 --- a/python/paddle/distributed/launch.py +++ b/python/paddle/distributed/launch.py @@ -14,3 +14,5 @@ from paddle.distributed.fleet import launch launch.launch() + +__all__ = [] diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py index 582c0be713f4ef..bc042e722947a0 100644 --- a/python/paddle/distributed/parallel.py +++ b/python/paddle/distributed/parallel.py @@ -15,7 +15,8 @@ import os import six import warnings -from multiprocessing import Process, Manager +from multiprocessing import Process # noqa: F401 +from multiprocessing import Manager # noqa: F401 import time import sys @@ -26,9 +27,11 @@ from paddle.fluid.framework import _set_expected_place from paddle.fluid.dygraph import parallel_helper from paddle.fluid.dygraph.parallel import ParallelEnv -from paddle.distributed.fleet.base.private_helper_function import wait_server_ready +from paddle.distributed.fleet.base.private_helper_function import wait_server_ready # noqa: F401 -__all__ = ["init_parallel_env"] +__all__ = [ #noqa + "init_parallel_env" +] ParallelStrategy = core.ParallelStrategy diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py index 782fcb28e991c2..c46672dca09e97 100644 --- a/python/paddle/distributed/spawn.py +++ b/python/paddle/distributed/spawn.py @@ -21,7 +21,9 @@ import sys import warnings -from paddle.distributed.utils import _print_arguments, _prepare_trainer_env, get_host_name_ip +from paddle.distributed.utils import _print_arguments +from paddle.distributed.utils import _prepare_trainer_env +from paddle.distributed.utils import get_host_name_ip from paddle.distributed.cloud_utils import get_cluster_and_pod from paddle.distributed.fleet.cloud_utils import use_paddlecloud from paddle.device import get_device @@ -30,6 +32,8 @@ from paddle.fluid import core from paddle.fluid.framework import _cpu_num, set_flags +__all__ = [] + class ParallelEnvArgs(object): def __init__(self): diff --git a/python/paddle/distributed/utils.py b/python/paddle/distributed/utils.py index f40a7b31b83e6f..e84025c2eb6d20 100644 --- a/python/paddle/distributed/utils.py +++ b/python/paddle/distributed/utils.py @@ -26,6 +26,24 @@ import socket from paddle.fluid import core +__all__ = [ #noqa + 'get_host_name_ip', + 'Trainer', + 'get_cluster', + 'start_local_trainers', + 'watch_local_trainers', + 'find_free_ports', + 'JobServer', + 'Cluster', + 'Pod', + 'Hdfs', + 'add_arguments', + 'terminate_local_procs', + 'TrainerProc', + 'get_logger', + 'pull_worker_log' +] + logger = logging.getLogger("root") logger.propagate = False diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index 817fd501181992..2a824898b82710 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -203,7 +203,7 @@ def weight_norm(*args): 'Dropout3D', 'Bilinear', 'AlphaDropout', - 'Unfold' + 'Unfold', 'RNNCellBase', 'SimpleRNNCell', 'LSTMCell',