@@ -12,61 +12,31 @@ typedef struct MPII_Csel_container MPII_Csel_container_s;
1212#include "coll_impl.h"
1313#include "coll_algos.h"
1414
15- typedef enum {
16- MPIR_CSEL_COLL_TYPE__ALLGATHER = 0 ,
17- MPIR_CSEL_COLL_TYPE__ALLGATHERV ,
18- MPIR_CSEL_COLL_TYPE__ALLREDUCE ,
19- MPIR_CSEL_COLL_TYPE__ALLTOALL ,
20- MPIR_CSEL_COLL_TYPE__ALLTOALLV ,
21- MPIR_CSEL_COLL_TYPE__ALLTOALLW ,
22- MPIR_CSEL_COLL_TYPE__BARRIER ,
23- MPIR_CSEL_COLL_TYPE__BCAST ,
24- MPIR_CSEL_COLL_TYPE__EXSCAN ,
25- MPIR_CSEL_COLL_TYPE__GATHER ,
26- MPIR_CSEL_COLL_TYPE__GATHERV ,
27- MPIR_CSEL_COLL_TYPE__IALLGATHER ,
28- MPIR_CSEL_COLL_TYPE__IALLGATHERV ,
29- MPIR_CSEL_COLL_TYPE__IALLREDUCE ,
30- MPIR_CSEL_COLL_TYPE__IALLTOALL ,
31- MPIR_CSEL_COLL_TYPE__IALLTOALLV ,
32- MPIR_CSEL_COLL_TYPE__IALLTOALLW ,
33- MPIR_CSEL_COLL_TYPE__IBARRIER ,
34- MPIR_CSEL_COLL_TYPE__IBCAST ,
35- MPIR_CSEL_COLL_TYPE__IEXSCAN ,
36- MPIR_CSEL_COLL_TYPE__IGATHER ,
37- MPIR_CSEL_COLL_TYPE__IGATHERV ,
38- MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLGATHER ,
39- MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLGATHERV ,
40- MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALL ,
41- MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALLV ,
42- MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALLW ,
43- MPIR_CSEL_COLL_TYPE__IREDUCE ,
44- MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER ,
45- MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK ,
46- MPIR_CSEL_COLL_TYPE__ISCAN ,
47- MPIR_CSEL_COLL_TYPE__ISCATTER ,
48- MPIR_CSEL_COLL_TYPE__ISCATTERV ,
49- MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLGATHER ,
50- MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLGATHERV ,
51- MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALL ,
52- MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALLV ,
53- MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALLW ,
54- MPIR_CSEL_COLL_TYPE__REDUCE ,
55- MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER ,
56- MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK ,
57- MPIR_CSEL_COLL_TYPE__SCAN ,
58- MPIR_CSEL_COLL_TYPE__SCATTER ,
59- MPIR_CSEL_COLL_TYPE__SCATTERV ,
60- MPIR_CSEL_COLL_TYPE__END ,
61- } MPIR_Csel_coll_type_e ;
62-
63- typedef enum {
64- MPIR_COLL_ALGORITHM_IDS (),
65- /* composition algorithms */
66- MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Coll_auto ,
67- /* end */
68- MPII_CSEL_CONTAINER_TYPE__ALGORITHM__Algorithm_count ,
69- } MPII_Csel_container_type_e ;
15+ /* Define values for collective attribute.
16+ * - The first 8 bits are passed down to basic collective algorithms.
17+ * - The higher bits are used to assist algorithm selections
18+ * - The lower 32 bits are reserved by MPIR-layer
19+ * - The higher 32 bits are reserved for MPID-layer
20+ */
21+ #define MPIR_COLL_ATTR_CORE_BITS 8
22+ #define MPIR_COLL_ATTR_MPIR_BITS 32
23+
24+ /* bit 0-7 */
25+ #define MPIR_COLL_ATTR_SYNC 0x1 /* It's an internal collective that focuses
26+ * on synchronization rather than batch latency.
27+ * In particular, advise netmod to avoid using
28+ * injection send. */
29+ #define MPIR_ERR_PROC_FAILED 0x2
30+ #define MPIR_ERR_OTHER 0x4
31+ #define MPIR_COLL_ATTR_ERR_MASK 0x6
32+
33+ #define MPIR_COLL_ATTR_HAS_ERR (coll_attr ) ((coll_attr) & MPIR_COLL_ATTR_ERR_MASK)
34+
35+ /* bit 8-31, MPIR-layer */
36+ #define MPIR_COLL_ATTR__inplace 0x00000100
37+ #define MPIR_COLL_ATTR__pof2 0x00000200
38+ #define MPIR_COLL_ATTR__commutative 0x00000400
39+ #define MPIR_COLL_ATTR__builtin_op 0x00000800
7040
7141struct MPIR_Csel_coll_sig {
7242 MPIR_Csel_coll_type_e coll_type ;
@@ -75,6 +45,10 @@ struct MPIR_Csel_coll_sig {
7545 enum MPIR_sched_type sched_type ;
7646 bool is_persistent ;
7747
48+ struct {
49+ bool is_gpu ;
50+ } cache ;
51+
7852 union {
7953 struct {
8054 const void * sendbuf ;
@@ -215,208 +189,6 @@ struct MPIR_Csel_coll_sig {
215189 } u ;
216190};
217191
218- struct MPII_Csel_container {
219- MPII_Csel_container_type_e id ;
220-
221- union {
222- struct {
223- struct {
224- int k ;
225- } intra_tsp_brucks ;
226- struct {
227- int k ;
228- } intra_tsp_recexch_doubling ;
229- struct {
230- int k ;
231- } intra_tsp_recexch_halving ;
232- } iallgather ;
233- struct {
234- struct {
235- int k ;
236- } intra_tsp_brucks ;
237- struct {
238- int k ;
239- } intra_tsp_recexch_doubling ;
240- struct {
241- int k ;
242- } intra_tsp_recexch_halving ;
243- } iallgatherv ;
244- struct {
245- struct {
246- int k ;
247- } intra_tsp_recexch_single_buffer ;
248- struct {
249- int k ;
250- } intra_tsp_recexch ;
251- struct {
252- int tree_type ;
253- int k ;
254- int chunk_size ;
255- int buffer_per_child ;
256- } intra_tsp_tree ;
257- struct {
258- int k ;
259- } intra_tsp_recexch_reduce_scatter_recexch_allgatherv ;
260- } iallreduce ;
261- struct {
262- struct {
263- int k ;
264- int buffer_per_phase ;
265- } intra_tsp_brucks ;
266- struct {
267- int batch_size ;
268- int bblock ;
269- } intra_tsp_scattered ;
270- } ialltoall ;
271- struct {
272- struct {
273- int batch_size ;
274- int bblock ;
275- } intra_tsp_scattered ;
276- struct {
277- int bblock ;
278- } intra_tsp_blocked ;
279- } ialltoallv ;
280- struct {
281- struct {
282- int bblock ;
283- } intra_tsp_blocked ;
284- } ialltoallw ;
285- struct {
286- struct {
287- int k ;
288- } intra_k_dissemination ;
289- struct {
290- int k ;
291- bool single_phase_recv ;
292- } intra_recexch ;
293- } barrier ;
294- struct {
295- struct {
296- int k ;
297- } intra_tsp_recexch ;
298- struct {
299- int k ;
300- } intra_tsp_k_dissemination ;
301- } ibarrier ;
302- struct {
303- struct {
304- int tree_type ;
305- int k ;
306- int chunk_size ;
307- } intra_tsp_tree ;
308- struct {
309- int chunk_size ;
310- } intra_tsp_ring ;
311- struct {
312- int scatterv_k ;
313- int allgatherv_k ;
314- } intra_tsp_scatterv_allgatherv ;
315- struct {
316- int scatterv_k ;
317- } intra_tsp_scatterv_ring_allgatherv ;
318- } ibcast ;
319- struct {
320- struct {
321- int tree_type ;
322- int k ;
323- int is_non_blocking ;
324- int topo_overhead ;
325- int topo_diff_groups ;
326- int topo_diff_switches ;
327- int topo_same_switches ;
328- } intra_tree ;
329- struct {
330- int tree_type ;
331- int k ;
332- int is_non_blocking ;
333- int chunk_size ;
334- int recv_pre_posted ;
335- } intra_pipelined_tree ;
336- } bcast ;
337- struct {
338- struct {
339- int k ;
340- } intra_k_brucks ;
341- struct {
342- int k ;
343- bool single_phase_recv ;
344- } intra_recexch_doubling ;
345- struct {
346- int k ;
347- bool single_phase_recv ;
348- } intra_recexch_halving ;
349- } allgather ;
350- struct {
351- struct {
352- int k ;
353- } intra_k_brucks ;
354- } alltoall ;
355- struct {
356- struct {
357- int k ;
358- } intra_tsp_tree ;
359- } igather ;
360- struct {
361- struct {
362- int tree_type ;
363- int k ;
364- int chunk_size ;
365- int buffer_per_child ;
366- int topo_overhead ;
367- int topo_diff_groups ;
368- int topo_diff_switches ;
369- int topo_same_switches ;
370- } intra_tsp_tree ;
371- struct {
372- int chunk_size ;
373- int buffer_per_child ;
374- } intra_tsp_ring ;
375- } ireduce ;
376- struct {
377- struct {
378- int k ;
379- } intra_tsp_recexch ;
380- } ireduce_scatter ;
381- struct {
382- struct {
383- int k ;
384- } intra_tsp_recexch ;
385- } ireduce_scatter_block ;
386- struct {
387- struct {
388- int k ;
389- } intra_recursive_multiplying ;
390- struct {
391- int tree_type ;
392- int k ;
393- int chunk_size ;
394- int buffer_per_child ;
395- int topo_overhead ;
396- int topo_diff_groups ;
397- int topo_diff_switches ;
398- int topo_same_switches ;
399- } intra_tree ;
400- struct {
401- int k ;
402- bool single_phase_recv ;
403- } intra_recexch ;
404- struct {
405- int k ;
406- bool single_phase_recv ;
407- } intra_k_reduce_scatter_allgather ;
408- struct {
409- int ccl ;
410- } intra_ccl ;
411- } allreduce ;
412- struct {
413- struct {
414- int k ;
415- } intra_tsp_tree ;
416- } iscatter ;
417- } u ;
418- };
419-
420192typedef int (* MPIR_Coll_algo_fn ) (MPIR_Csel_coll_sig_s * coll_sig , MPII_Csel_container_s * cnt );
421193void MPIR_Init_coll_sig (MPIR_Csel_coll_sig_s * coll_sig );
422194
0 commit comments