Skip to content

Commit fff329f

Browse files
committed
Merge branch 'develop' into tabs/fix/py
2 parents 48434d4 + f778470 commit fff329f

File tree

190 files changed

+4752
-3063
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

190 files changed

+4752
-3063
lines changed

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,9 @@ if(WITH_DISTRIBUTE)
485485
ON
486486
CACHE STRING "Enable GLOO when compiling WITH_DISTRIBUTE=ON." FORCE)
487487
endif()
488+
set(WITH_MPI
489+
ON
490+
CACHE STRING "Enable MPI when compiling WITH_DISTRIBUTE=ON." FORCE)
488491
if(WITH_ASCEND_CL AND NOT WITH_ARM_BRPC)
489492
# disable WITH_PSCORE for NPU before include third_party
490493
message(
@@ -509,6 +512,10 @@ if(WITH_DISTRIBUTE)
509512
endif()
510513
endif()
511514

515+
if(WITH_MPI)
516+
include(mpi)
517+
endif()
518+
512519
include(third_party
513520
)# download, build, install third_party, Contains about 20+ dependencies
514521

cmake/experiments/cuda_module_loading_lazy.cmake

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
1+
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -16,16 +16,15 @@
1616
# cuda moduel lazy loading is supported by CUDA 11.7+
1717
# this experiment option makes Paddle supports lazy loading before CUDA 11.7.
1818

19-
option(EXP_CUDA_MODULE_LOADING_LAZY "enable lazy cuda module loading" OFF)
20-
if(${EXP_CUDA_MODULE_LOADING_LAZY})
21-
if(NOT ${ON_INFER} OR NOT ${LINUX})
19+
if(LINUX)
20+
if(NOT ON_INFER)
2221
message(
2322
"EXP_CUDA_MODULE_LOADING_LAZY only works with ON_INFER=ON on Linux platforms"
2423
)
2524
return()
2625
endif()
27-
if(NOT ${CUDA_FOUND})
28-
message("EXP_CUDA_MODULE_LOADING_LAZY only works with CUDA")
26+
if(NOT WITH_GPU)
27+
message("EXP_CUDA_MODULE_LOADING_LAZY only works with GPU")
2928
return()
3029
endif()
3130
if(${CUDA_VERSION} VERSION_GREATER_EQUAL "11.7")
@@ -39,8 +38,13 @@ if(${EXP_CUDA_MODULE_LOADING_LAZY})
3938
set(CUDA_USE_STATIC_CUDA_RUNTIME
4039
OFF
4140
CACHE BOOL "" FORCE)
42-
set(CMAKE_CUDA_FLAGS "--cudart shared")
4341
enable_language(CUDA)
42+
execute_process(
43+
COMMAND "rm" "-rf" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy"
44+
COMMAND "chmod" "755" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy.sh"
45+
COMMAND "bash" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy.sh"
46+
"${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" "${CUDA_TOOLKIT_ROOT_DIR}")
47+
execute_process(COMMAND "chmod" "755" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy")
4448
set(CUDA_NVCC_EXECUTABLE
4549
"${CMAKE_SOURCE_DIR}/tools/nvcc_lazy"
4650
CACHE FILEPATH "" FORCE)

cmake/mpi.cmake

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
if(NOT WITH_DISTRIBUTE OR NOT WITH_MPI)
2+
return()
3+
endif()
4+
5+
find_package(MPI)
6+
7+
if(NOT MPI_CXX_FOUND)
8+
set(WITH_MPI
9+
OFF
10+
CACHE STRING "Disable MPI" FORCE)
11+
message(WARNING "Not found MPI support in current system")
12+
return()
13+
endif()
14+
15+
message(STATUS "MPI compile flags: " ${MPI_CXX_COMPILE_FLAGS})
16+
message(STATUS "MPI include path: " ${MPI_CXX_INCLUDE_PATH})
17+
message(STATUS "MPI LINK flags path: " ${MPI_CXX_LINK_FLAGS})
18+
message(STATUS "MPI libraries: " ${MPI_CXX_LIBRARIES})
19+
include_directories(SYSTEM ${MPI_CXX_INCLUDE_PATH})
20+
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MPI_CXX_LINK_FLAGS}")
21+
add_definitions("-DPADDLE_WITH_MPI")
22+
find_program(
23+
OMPI_INFO
24+
NAMES ompi_info
25+
HINTS ${MPI_CXX_LIBRARIES}/../bin)
26+
27+
if(OMPI_INFO)
28+
execute_process(COMMAND ${OMPI_INFO} OUTPUT_VARIABLE output_)
29+
if(output_ MATCHES "smcuda")
30+
#NOTE some mpi lib support mpi cuda aware.
31+
add_definitions("-DPADDLE_WITH_MPI_AWARE")
32+
endif()
33+
endif()

cmake/operators.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ function(op_library TARGET)
510510
if(WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
511511
# Append first implemented MKLDNN activation operator
512512
if(${MKLDNN_FILE} STREQUAL "activation_mkldnn_op")
513-
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(gelu, MKLDNN);\n")
513+
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(softplus, MKLDNN);\n")
514514
elseif(${MKLDNN_FILE} STREQUAL "conv_mkldnn_op")
515515
file(APPEND ${pybind_file}
516516
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);\n")

paddle/fluid/distributed/collective/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,13 @@ if(WITH_NCCL OR WITH_RCCL)
4343
endif()
4444
endif()
4545

46+
if(WITH_MPI)
47+
cc_library(
48+
processgroup_mpi
49+
SRCS ProcessGroupMPI.cc MPITools.cc Common.cc
50+
DEPS collective_helper device_context)
51+
endif()
52+
4653
if(WITH_ASCEND_CL)
4754
cc_library(
4855
processgroup_hccl
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/distributed/collective/MPITools.h"
16+
#include "paddle/fluid/distributed/collective/Common.h"
17+
#include "paddle/fluid/distributed/collective/Types.h"
18+
19+
namespace paddle {
20+
namespace distributed {
21+
namespace mpi {
22+
23+
MPI_Op ToMPIType(ReduceOp reduction) {
24+
static const std::map<ReduceOp, MPI_Op> red_type = {
25+
{ReduceOp::MIN, MPI_MIN},
26+
{ReduceOp::MAX, MPI_MAX},
27+
{ReduceOp::SUM, MPI_SUM},
28+
{ReduceOp::PRODUCT, MPI_PROD},
29+
};
30+
auto it = red_type.find(reduction);
31+
PADDLE_ENFORCE_EQ(it != red_type.end(),
32+
true,
33+
platform::errors::InvalidArgument(
34+
"Invalid mpi reduction. Must be MPI_MIN | MPI_MAX | "
35+
"MPI_PROD | MPI_SUM."));
36+
return it->second;
37+
}
38+
39+
// NOTE: MPI dose not support CUDA aware now.
40+
bool CheckMpiCudaAware() { return false; }
41+
42+
void CheckValidInputs(const std::vector<phi::DenseTensor>& tensors) {
43+
PADDLE_ENFORCE_EQ(
44+
tensors.size() == 1,
45+
true,
46+
platform::errors::InvalidArgument("the inputs size of MPI must be 1!"));
47+
48+
PADDLE_ENFORCE_EQ(CheckTensorsInCudaPlace(tensors) && !CheckMpiCudaAware(),
49+
false,
50+
platform::errors::InvalidArgument(
51+
"Found CUDA Tensor. But CUDA-aware MPI not support!"));
52+
}
53+
54+
} // namespace mpi
55+
} // namespace distributed
56+
} // namespace paddle
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
#include <error.h>
17+
#include <iostream>
18+
#include <string>
19+
#include "paddle/fluid/framework/data_type.h"
20+
#include "paddle/fluid/framework/variable.h"
21+
#include "paddle/fluid/platform/enforce.h"
22+
23+
#include "paddle/fluid/distributed/collective/Types.h"
24+
25+
#ifdef HOST
26+
#undef HOST
27+
#endif
28+
29+
#include <mpi.h>
30+
31+
namespace paddle {
32+
namespace distributed {
33+
namespace mpi {
34+
35+
#define MPI_CHECK(cmd) \
36+
do { \
37+
int r = cmd; \
38+
if (r != MPI_SUCCESS) { \
39+
LOG(FATAL) << "Failed, MPI error in" << __FILE__ << ":" << __LINE__ \
40+
<< "with error code: " << std::to_string(r) << std::endl; \
41+
exit(EXIT_FAILURE); \
42+
} \
43+
} while (0)
44+
45+
MPI_Op ToMPIType(ReduceOp reduction);
46+
47+
bool CheckMpiCudaAware();
48+
49+
void CheckValidInputs(const std::vector<phi::DenseTensor>& tensors);
50+
51+
} // namespace mpi
52+
} // namespace distributed
53+
} // namespace paddle

paddle/fluid/distributed/collective/ProcessGroup.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,13 @@ ProcessGroup::ProcessGroup(int rank,
5252
}
5353
}
5454

55+
ProcessGroup::ProcessGroup(int rank, int size, int gid)
56+
: rank_(rank), size_(size), gid_(gid) {
57+
if (gid != IGNORE_ID) {
58+
auto map = ProcessGroupMapFromGid::getInstance();
59+
map->insert(gid_, this);
60+
}
61+
}
62+
5563
} // namespace distributed
5664
} // namespace paddle

paddle/fluid/distributed/collective/ProcessGroup.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ class ProcessGroup {
8282
int size,
8383
const platform::Place& place,
8484
int gid);
85+
86+
explicit ProcessGroup(int rank, int size, int gid);
87+
8588
virtual ~ProcessGroup() {}
8689

8790
int GetRank() const { return rank_; }

0 commit comments

Comments
 (0)