Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 56 additions & 40 deletions python/paddle/distributed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,46 +12,62 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from . import spawn
from .spawn import spawn

from . import parallel
from .parallel import init_parallel_env
from .parallel import get_rank
from .parallel import get_world_size
from paddle.fluid.dygraph.parallel import ParallelEnv #DEFINE_ALIAS
from paddle.distributed.fleet.dataset import *

from . import collective
from .collective import *

from .entry_attr import ProbabilityEntry
from .entry_attr import CountFilterEntry

# start multiprocess apis
__all__ = ["spawn"]

# dygraph parallel apis
__all__ += [
"init_parallel_env",
"get_rank",
"get_world_size",
"ParallelEnv",
"InMemoryDataset",
"QueueDataset",
]
from .spawn import spawn # noqa: F401

# dataset reader
__all__ += [
"InMemoryDataset",
"QueueDataset",
]
from .parallel import init_parallel_env # noqa: F401
from .parallel import get_rank # noqa: F401
from .parallel import get_world_size # noqa: F401

# entry for embedding
__all__ += [
"ProbabilityEntry",
"CountFilterEntry",
]
from paddle.distributed.fleet.dataset import InMemoryDataset # noqa: F401
from paddle.distributed.fleet.dataset import QueueDataset # noqa: F401

from .collective import broadcast # noqa: F401
from .collective import all_reduce # noqa: F401
from .collective import reduce # noqa: F401
from .collective import all_gather # noqa: F401
from .collective import scatter # noqa: F401
from .collective import barrier # noqa: F401
from .collective import ReduceOp # noqa: F401
from .collective import split # noqa: F401
from .collective import new_group # noqa: F401
from .collective import alltoall # noqa: F401
from .collective import recv # noqa: F401
from .collective import get_group # noqa: F401
from .collective import send # noqa: F401
from .collective import wait # noqa: F401

from .fleet import BoxPSDataset # noqa: F401

# collective apis
__all__ += collective.__all__
from .entry_attr import ProbabilityEntry # noqa: F401
from .entry_attr import CountFilterEntry # noqa: F401

from paddle.fluid.dygraph.parallel import ParallelEnv # noqa: F401

from . import cloud_utils # noqa: F401
from . import utils # noqa: F401

__all__ = [ #noqa
"spawn",
"scatter",
"broadcast",
"ParallelEnv",
"new_group",
"init_parallel_env",
"QueueDataset",
"split",
"CountFilterEntry",
"get_world_size",
"get_group",
"all_gather",
"InMemoryDataset",
"barrier",
"all_reduce",
"alltoall",
"send",
"reduce",
"recv",
"ReduceOp",
"wait",
"get_rank",
"ProbabilityEntry"
]
7 changes: 6 additions & 1 deletion python/paddle/distributed/cloud_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@

import os
import paddle
from paddle.distributed.utils import get_cluster, logger, get_gpus, get_cluster_from_args
from paddle.distributed.utils import get_cluster
from paddle.distributed.utils import logger
from paddle.distributed.utils import get_gpus
from paddle.distributed.utils import get_cluster_from_args

__all__ = []


def get_cloud_cluster(args_node_ips, args_node_ip, args_port, selected_devices):
Expand Down
27 changes: 9 additions & 18 deletions python/paddle/distributed/collective.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,14 @@
import numpy as np
import os
from ..fluid.layer_helper import LayerHelper
from ..fluid.framework import Variable, OpProtoHolder, in_dygraph_mode, convert_np_dtype_to_dtype_
from ..fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype
from ..fluid.framework import Variable
from ..fluid.framework import OpProtoHolder
from ..fluid.framework import in_dygraph_mode
from ..fluid.framework import convert_np_dtype_to_dtype_
from ..fluid.data_feeder import convert_dtype
from ..fluid.data_feeder import check_variable_and_dtype
from ..fluid.data_feeder import check_type
from ..fluid.data_feeder import check_dtype
from ..fluid.layers.tensor import fill_constant
from ..fluid.layers import utils
from ..fluid.dygraph.parallel import prepare_context
Expand All @@ -25,22 +31,7 @@
import paddle.fluid as fluid
import paddle.fluid.core as core

__all__ = [
'wait',
'new_group',
'get_group',
'broadcast',
'all_reduce',
'reduce',
'all_gather',
'scatter',
'barrier',
'split',
'alltoall',
'ReduceOp',
'send',
'recv',
]
__all__ = []


class ReduceOp:
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/distributed/entry_attr.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from __future__ import print_function

__all__ = ['ProbabilityEntry', 'CountFilterEntry']
__all__ = []


class EntryAttr(object):
Expand Down
41 changes: 27 additions & 14 deletions python/paddle/distributed/fleet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,34 @@
# limitations under the License.

# TODO: define distributed api under this directory,
from .base.role_maker import Role, UserDefinedRoleMaker, PaddleCloudRoleMaker
from .base.distributed_strategy import DistributedStrategy
from .base.fleet_base import Fleet
from .base.util_factory import UtilBase
from .dataset import *
from .data_generator import MultiSlotDataGenerator, MultiSlotStringDataGenerator
from . import metrics
from .base.topology import CommunicateTopology, HybridCommunicateGroup
from .meta_parallel import *
from .base.role_maker import Role # noqa: F401
from .base.role_maker import UserDefinedRoleMaker # noqa: F401
from .base.role_maker import PaddleCloudRoleMaker # noqa: F401
from .base.distributed_strategy import DistributedStrategy # noqa: F401
from .base.fleet_base import Fleet # noqa: F401
from .base.util_factory import UtilBase # noqa: F401
from .dataset import DatasetBase # noqa: F401
from .dataset import InMemoryDataset # noqa: F401
from .dataset import QueueDataset # noqa: F401
from .dataset import FileInstantDataset # noqa: F401
from .dataset import BoxPSDataset # noqa: F401
from .data_generator.data_generator import MultiSlotDataGenerator # noqa: F401
from .data_generator.data_generator import MultiSlotStringDataGenerator # noqa: F401
from . import metrics # noqa: F401
from .base.topology import CommunicateTopology
from .base.topology import HybridCommunicateGroup # noqa: F401

__all__ = [
"DistributedStrategy", "UtilBase", "UserDefinedRoleMaker",
"PaddleCloudRoleMaker", "Fleet", "MultiSlotDataGenerator",
"MultiSlotStringDataGenerator", "Role", "CommunicateTopology",
"HybridCommunicateGroup"
__all__ = [ #noqa
"CommunicateTopology",
"UtilBase",
"HybridCommunicateGroup",
"MultiSlotStringDataGenerator",
"UserDefinedRoleMaker",
"DistributedStrategy",
"Role",
"MultiSlotDataGenerator",
"PaddleCloudRoleMaker",
"Fleet"
]

fleet = Fleet()
Expand Down
2 changes: 2 additions & 0 deletions python/paddle/distributed/fleet/ascend_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import paddle
from paddle.distributed.fleet.launch_utils import get_cluster, logger, get_host_name_ip, DeviceMode

__all__ = []


def _get_ascend_rankfile(rank_table_file_path):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import google.protobuf.text_format
import google.protobuf

__all__ = ["DistributedStrategy"]
__all__ = []

non_auto_func_called = True

Expand Down
2 changes: 2 additions & 0 deletions python/paddle/distributed/fleet/base/fleet_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
from ..meta_optimizers import HybridParallelOptimizer
from ..meta_optimizers import HybridParallelGradScaler

__all__ = []


def _inited_runtime_handler_(func):
def __impl__(*args, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

from ..meta_optimizers import *

__all__ = []

meta_optimizer_names = list(
filter(lambda name: name.endswith("Optimizer"), dir()))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from contextlib import closing
from six import string_types

__all__ = []


def wait_server_ready(endpoints):
"""
Expand Down
2 changes: 2 additions & 0 deletions python/paddle/distributed/fleet/base/role_maker.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import paddle.fluid as fluid
from paddle.distributed.fleet.base.private_helper_function import wait_server_ready

__all__ = []


class Role:
WORKER = 1
Expand Down
2 changes: 2 additions & 0 deletions python/paddle/distributed/fleet/base/runtime_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from ..runtime.parameter_server_runtime import ParameterServerRuntime
from ..runtime.the_one_ps import TheOnePSRuntime

__all__ = []


class RuntimeFactory(object):
def __init__(self):
Expand Down
2 changes: 2 additions & 0 deletions python/paddle/distributed/fleet/base/strategy_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = []


def create_graph(optimizer_list):
nsize = len(optimizer_list)
Expand Down
3 changes: 2 additions & 1 deletion python/paddle/distributed/fleet/base/util_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
import subprocess
import os
import numpy as np
__all__ = ['UtilBase']

__all__ = []


class UtilFactory(object):
Expand Down
2 changes: 2 additions & 0 deletions python/paddle/distributed/fleet/cloud_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import paddle
from paddle.distributed.fleet.launch_utils import get_cluster, logger

__all__ = []


def get_cloud_cluster(args_node_ips,
device_mode,
Expand Down
4 changes: 3 additions & 1 deletion python/paddle/distributed/fleet/data_generator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

from .data_generator import *
from .data_generator import DataGenerator # noqa: F401

__all__ = []
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import os
import sys

__all__ = []


class DataGenerator(object):
"""
Expand Down
10 changes: 8 additions & 2 deletions python/paddle/distributed/fleet/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

from .dataset import *
from .index_dataset import *
from .dataset import DatasetBase # noqa: F401
from .dataset import InMemoryDataset # noqa: F401
from .dataset import QueueDataset # noqa: F401
from .dataset import FileInstantDataset # noqa: F401
from .dataset import BoxPSDataset # noqa: F401
from .index_dataset import TreeIndex # noqa: F401

__all__ = []
2 changes: 2 additions & 0 deletions python/paddle/distributed/fleet/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from google.protobuf import text_format
import paddle.fluid.core as core

__all__ = []


class DatasetBase(object):
""" Base dataset class. """
Expand Down
2 changes: 2 additions & 0 deletions python/paddle/distributed/fleet/dataset/index_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# limitations under the License.
from paddle.fluid import core

__all__ = []


class Index(object):
def __init__(self, name):
Expand Down
2 changes: 2 additions & 0 deletions python/paddle/distributed/fleet/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@
import paddle.distributed.fleet.cloud_utils as cloud_utils
import paddle.distributed.fleet.ascend_utils as ascend_utils

__all__ = []


def _print_arguments(args):
print("----------- Configuration Arguments -----------")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import paddle.fluid.contrib.mixed_precision as mixed_precision
from .meta_optimizer_base import MetaOptimizerBase

__all__ = []


class AMPOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

HcomGroupConfig = namedtuple('HcomGroupConfig', ['name', 'nranks', 'rank_ids'])

__all__ = []


class AscendIRParser(object):
def __init__(self, auto_dp=False, world_rank_size=1):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from paddle.distributed import fleet
from functools import reduce

__all__ = []

registerd_op = {## forwards
"elementwise_add": "AddParser",
"matmul": "MatMulParser",
Expand Down
Loading