Skip to content

Commit b0bf4f9

Browse files
committed
additional _sync devicelibs
1 parent 887de34 commit b0bf4f9

2 files changed

Lines changed: 47 additions & 0 deletions

File tree

bitcode/devicelib.cl

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,42 @@ EXPORT OVLD int __chip_any(int predicate) {
994994
return __chip_ballot(predicate) != 0;
995995
}
996996

997+
EXPORT OVLD unsigned __chip_ballot_sync(unsigned mask, int predicate) {
998+
if (mask == 0) {
999+
return 0;
1000+
} else if (mask == 0xFFFFFFFF) {
1001+
return __chip_ballot(predicate);
1002+
} else {
1003+
if (get_sub_group_local_id() == 0) {
1004+
printf("warning: Partial mask in __ballot_sync is not fully supported\n");
1005+
}
1006+
return __chip_ballot(predicate) & mask;
1007+
}
1008+
}
1009+
1010+
EXPORT OVLD int __chip_any_sync(unsigned mask, int predicate) {
1011+
if (mask == 0) {
1012+
return 0;
1013+
} else if (mask == 0xFFFFFFFF) {
1014+
return __chip_any(predicate);
1015+
} else {
1016+
unsigned ballot = __chip_ballot(predicate) & mask;
1017+
return ballot != 0;
1018+
}
1019+
}
1020+
1021+
EXPORT OVLD int __chip_all_sync(unsigned mask, int predicate) {
1022+
if (mask == 0) {
1023+
return 1;
1024+
} else if (mask == 0xFFFFFFFF) {
1025+
return __chip_all(predicate);
1026+
} else {
1027+
unsigned ballot = __chip_ballot(predicate);
1028+
return (ballot & mask) == mask;
1029+
}
1030+
}
1031+
1032+
9971033
EXPORT OVLD unsigned __chip_lane_id() { return get_sub_group_local_id(); }
9981034

9991035
EXPORT OVLD void __chip_syncwarp() {

include/hip/devicelib/sync_and_util.hh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,22 @@ extern "C++" inline __device__ uint64_t __ballot(int predicate) {
3636
return __chip_ballot(predicate);
3737
}
3838

39+
extern "C++" __device__ uint64_t __chip_ballot_sync(unsigned mask, int predicate); // Custom
40+
extern "C++" inline __device__ uint64_t __ballot_sync(unsigned mask, int predicate) {
41+
return __chip_ballot_sync(mask, predicate);
42+
}
43+
3944
extern "C++" __device__ int __chip_all(int predicate); // Custom
4045
extern "C++" inline __device__ int __all(int predicate) {
4146
return __chip_all(predicate);
4247
}
4348

49+
extern "C++" __device__ int __chip_all_sync(unsigned mask, int predicate); // Custom
50+
extern "C++" inline __device__ int __all_sync(unsigned mask, int predicate) {
51+
return __chip_all_sync(mask, predicate);
52+
}
53+
54+
4455
extern "C++" __device__ int __chip_any(int predicate); // Custom
4556
extern "C++" inline __device__ int __any(int predicate) {
4657
return __chip_any(predicate);

0 commit comments

Comments
 (0)