Skip to content

Commit 6c28e11

Browse files
committed
coll/ch4: activate CH4 POSIX release_gather bcast
Enable CVARs and JSONs to select ch4-posix layer release_gather algorithms. Select MPIDI_POSIX_mpi_bcast_release_gather if it passes MPIDI_CH4_release_gather condition check, which only passes if comm is an posix intranode comm.
1 parent 20eb0bd commit 6c28e11

File tree

6 files changed

+59
-18
lines changed

6 files changed

+59
-18
lines changed

src/mpi/coll/coll_algorithms.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ conditions:
7070
avg_msg_size(thresh): MPIR_Csel_avg_msg_size
7171
total_msg_size(thresh): MPIR_Csel_total_msg_size
7272

73+
# conditional conditions - only call the condition function under macro_guard
74+
MPIDI_CH4_release_gather: MPIDI_POSIX_check_release_gather #if defined(MPIDI_CH4_SHM_POSIX)
75+
7376
# ----
7477
general:
7578
MPIR_Coll_auto
@@ -110,6 +113,11 @@ bcast-intra:
110113
pipelined_tree
111114
extra_params: tree_type, k, is_non_blocking, chunk_size, recv_pre_posted
112115
cvar_params: TREE_TYPE, TREE_KVAL, IS_NON_BLOCKING, TREE_PIPELINE_CHUNK_SIZE, RECV_PRE_POST
116+
release_gather
117+
func_name: MPIDI_POSIX_mpi_bcast_release_gather
118+
inline: 1
119+
macro_guard: defined(MPIDI_CH4_SHM_POSIX)
120+
restrictions: MPIDI_CH4_release_gather
113121
ibcast-intra:
114122
sched_binomial
115123
sched_smp

src/mpi/coll/coll_selection.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
{
22
"collective=bcast-intra":
33
{
4+
"MPIDI_CH4_release_gather":
5+
{
6+
"algorithm=MPIDI_POSIX_mpi_bcast_release_gather":{}
7+
},
48
"comm_size(8)":
59
{
610
"algorithm=MPIR_Bcast_intra_binomial":{}

src/mpi/coll/cvars.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ cvars:
202202
scatter_ring_allgather - Force Scatter Ring
203203
pipelined_tree - Force tree-based pipelined algorithm
204204
tree - Force tree-based algorithm
205+
release_gather - Force CH4 POSIX release_gather algorithm
205206

206207
- name : MPIR_CVAR_BCAST_TREE_KVAL
207208
category : COLLECTIVE

src/mpid/ch4/shm/posix/posix_coll_release_gather.h

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,47 @@
2929
#include "algo_common.h"
3030
#include "release_gather.h"
3131

32+
MPL_STATIC_INLINE_PREFIX bool MPIDI_POSIX_check_release_gather(MPIR_Csel_coll_sig_s * coll_sig)
33+
{
34+
if (MPIR_IS_THREADED) {
35+
return false;
36+
}
37+
38+
/* Check whether comm is an intranode comm */
39+
MPIR_Comm *comm_ptr = coll_sig->comm_ptr;
40+
MPIR_Assert(comm_ptr->attr & MPIR_COMM_ATTR__HIERARCHY);
41+
if (comm_ptr->num_external > 1) {
42+
return false;
43+
}
44+
45+
/* check coll_type */
46+
MPIDI_POSIX_release_gather_opcode_t opcode;
47+
switch (coll_sig->coll_type) {
48+
case MPIR_CSEL_COLL_TYPE__INTRA_BCAST:
49+
opcode = MPIDI_POSIX_RELEASE_GATHER_OPCODE_BCAST;
50+
break;
51+
default:
52+
return false;
53+
}
54+
55+
/* Check repeats if the algorithm CVAR is not set */
56+
if (!(coll_sig->flags & MPIR_COLL_SIG_FLAG__CVAR)) {
57+
MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls++;
58+
if (MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls <
59+
MPIR_CVAR_POSIX_NUM_COLLS_THRESHOLD) {
60+
return false;
61+
}
62+
}
63+
64+
/* Lazy initialization of release_gather specific struct */
65+
int mpi_errno = MPIDI_POSIX_mpi_release_gather_comm_init(comm_ptr, opcode);
66+
if (mpi_errno != MPI_SUCCESS) {
67+
return false;
68+
}
69+
70+
return true;
71+
}
72+
3273
/* Intra-node bcast is implemented as a release step followed by gather step in release_gather
3374
* framework. The actual data movement happens in release step. Gather step makes sure that
3475
* the shared bcast buffer can be reused for next bcast call. Release gather framework has
@@ -59,20 +100,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_bcast_release_gather(void *buffer,
59100
goto fn_exit;
60101
}
61102

62-
MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls++;
63-
if (MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls <
64-
MPIR_CVAR_POSIX_NUM_COLLS_THRESHOLD) {
65-
/* Fallback to pt2pt algorithms if the total number of release_gather collective calls is
66-
* less than the specified threshold */
67-
goto fallback;
68-
}
69-
70-
/* Lazy initialization of release_gather specific struct */
71-
mpi_errno =
72-
MPIDI_POSIX_mpi_release_gather_comm_init(comm_ptr, MPIDI_POSIX_RELEASE_GATHER_OPCODE_BCAST);
73-
MPII_COLLECTIVE_FALLBACK_CHECK(MPIR_Comm_rank(comm_ptr), !mpi_errno, mpi_errno,
74-
"release_gather bcast cannot create more shared memory. Falling back to pt2pt algorithms.\n");
75-
76103
my_rank = MPIR_Comm_rank(comm_ptr);
77104
MPIR_Type_get_extent_impl(datatype, &lb, &extent);
78105
MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
@@ -150,10 +177,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_bcast_release_gather(void *buffer,
150177
return mpi_errno;
151178
fn_fail:
152179
goto fn_exit;
153-
fallback:
154-
/* FIXME: proper error */
155-
mpi_errno = MPI_ERR_OTHER;
156-
goto fn_exit;
157180
}
158181

159182
/* Intra-node reduce is implemented as a release step followed by gather step in release_gather

src/mpid/ch4/shm/posix/posix_pre.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
#include <mpi.h>
1010
#include "release_gather_types.h"
1111

12+
/* define a macro to sigify that we have CH4 POSIX. An example usage is
13+
* to macro-guard posix collective algorithms */
14+
#define MPIDI_CH4_SHM_POSIX 1
15+
1216
#define MPIDI_POSIX_MAX_AM_HDR_SIZE 800 /* constrained by MPIDI_POSIX_AM_HDR_POOL_CELL_SIZE */
1317
#define MPIDI_POSIX_AM_MSG_HEADER_SIZE (sizeof(MPIDI_POSIX_am_header_t))
1418
#define MPIDI_POSIX_MAX_IOV_NUM (3) /* am_hdr, [padding], payload */

src/mpid/ch4/shm/posix/release_gather/release_gather.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,7 @@ int MPIDI_POSIX_mpi_release_gather_comm_init(MPIR_Comm * comm_ptr,
481481
if (mpi_errno != MPI_SUCCESS) {
482482
MPIDI_POSIX_mpi_release_gather_comm_free(comm_ptr);
483483
RELEASE_GATHER_FIELD(comm_ptr, is_initialized) = 0;
484+
/* TODO: set a flag so next time we skip the try */
484485
}
485486
return mpi_errno;
486487
fn_fail:

0 commit comments

Comments
 (0)