Skip to content

Commit 773ab04

Browse files
author
Henry Linjamäki
committed
rtdevlib: fix function signature mismatches
A issue discovered while running HIP programs on OpenCL-BE->rusticl (with #830). Linking of __chip_atomic_add_f* symbols failed because the caller's and callee's function signature differed by their pointer parameters (pointee type didn't match). The mismatch was caused by LLVM-SPIRV-Translator's feature that attempts to recover original pointee types in LLVM bitcodes that use opaque pointers. But the way it attempts to infer the types may end up with SPIR-V functions with different pointee type across SPIR-V modules. The issue is worked around by passing pointers as integers for functions whose definitions are linked in at runtime.
1 parent e5df5cd commit 773ab04

7 files changed

Lines changed: 98 additions & 26 deletions

File tree

bitcode/README-devicelib.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,23 @@ Note: Some amdgcn intrinsics still don't have generic equivalents so for example
3535
* If there are missing headers, add them to the end of the list with a comment that they're undocumented. The missing documentationt should be reported to AMD or NVIDIA.
3636
* Avoid using macros, escpecially in the headers.
3737
* `devicelib.cl` does use some macros mainly to avoid code duplication when declaring function overloads. If you do use macros, make sure to leave a comment with the full function name that the macro expands to.
38-
* All device-side functions should be declared with `extern "C"` and follow the following naming convention: `__chip_<function_name_snake_case>_<type>` where type is one of `f32`, `f64`, `i32`, `i64`, `u32`, `u64`. For example, `__chip_sin_f32` is the device-side function that implements the `sin` function for `float` arguments. Note: this convention is not yet fully implemented.
38+
* All device-side functions should be declared with `extern "C"` and follow the following naming convention: `__chip_<function_name_snake_case>_<type>` where type is one of `f32`, `f64`, `i32`, `i64`, `u32`, `u64`. For example, `__chip_sin_f32` is the device-side function that implements the `sin` function for `float` arguments. Note: this convention is not yet fully implemented.
39+
40+
## Obfuscated Pointer Parameters
41+
42+
A selection of devicelib definitions use `__chip_obfuscated_ptr_t` as
43+
parameter type insted of regular pointer type. This is for working
44+
around LLVM-SPIR-VTranslation feature that attempts to recover
45+
original pointee types in LLVM bitcode input which uses opaque
46+
pointers (only option with the latest LLVM).
47+
48+
An issue with the feature is that the type inference is influenced by
49+
the surrounding code and this causes situations where same LLVM
50+
function declarations and definitions may end up to be have parameters
51+
with different pointee types across SPIR-V modules. Linking such
52+
modules together will probably trigger undefined behavior. Al least on
53+
Mesa's rusticl frontend the linking is known to fail.
54+
55+
The shortcoming of the type inference is worked around by passing the
56+
pointer arguments as some non-pointer type (`__chip_obfuscated_ptr_t`)
57+
and it's used for devicelib definitions which are linked at runtime.

bitcode/atomicAddDouble_emulation.cl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
// Implementations for emulated 64-bit floating point atomic add operations.
2424

25+
#include "cl_utils.h"
26+
2527
#ifndef __opencl_c_generic_address_space
2628
#error __opencl_c_generic_address_space needed!
2729
#endif
@@ -57,16 +59,17 @@ static OVERLOADED double __chip_atomic_add_f64(volatile global double *address,
5759
return as_double(r);
5860
}
5961

60-
double __chip_atomic_add_f64(generic double *address, double val) {
61-
volatile global double *gi = to_global(address);
62+
double __chip_atomic_add_f64(__chip_obfuscated_ptr_t address, double val) {
63+
volatile global double *gi = to_global(UNCOVER_OBFUSCATED_PTR(address));
6264
if (gi)
6365
return __chip_atomic_add_f64(gi, val);
64-
volatile local double *li = to_local(address);
66+
volatile local double *li = to_local(UNCOVER_OBFUSCATED_PTR(address));
6567
if (li)
6668
return __chip_atomic_add_f64(li, val);
6769
return 0;
6870
}
6971

70-
double __chip_atomic_add_system_f64(generic double *address, double val) {
72+
double __chip_atomic_add_system_f64(__chip_obfuscated_ptr_t address,
73+
double val) {
7174
return __chip_atomic_add_f64(address, val);
7275
}

bitcode/atomicAddDouble_native.cl

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
// Implementations for 64-bit floating point atomic operations using
2424
// OpenCL built-in extension.
2525

26+
#include "cl_utils.h"
27+
2628
#ifndef __opencl_c_generic_address_space
2729
#error __opencl_c_generic_address_space needed!
2830
#endif
@@ -37,9 +39,11 @@
3739
/* https://registry.khronos.org/OpenCL/extensions/ext/cl_ext_float_atomics.html
3840
*/
3941
#define DEF_CHIP_ATOMIC2F_ORDER_SCOPE(NAME, OP, ORDER, SCOPE) \
40-
double __chip_atomic_##NAME##_f64(double *address, double i) { \
41-
return atomic_##OP##_explicit((volatile __generic double *)address, i, \
42-
memory_order_##ORDER, memory_scope_##SCOPE); \
42+
double __chip_atomic_##NAME##_f64(__chip_obfuscated_ptr_t address, \
43+
double i) { \
44+
return atomic_##OP##_explicit( \
45+
(volatile __generic double *)UNCOVER_OBFUSCATED_PTR(address), i, \
46+
memory_order_##ORDER, memory_scope_##SCOPE); \
4347
}
4448

4549
#define DEF_CHIP_ATOMIC2F(NAME, OP) \

bitcode/atomicAddFloat_emulation.cl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
// Implementations for emulated 32-bit floating point atomic add operations.
2424

25+
#include "cl_utils.h"
26+
2527
#ifndef __opencl_c_generic_address_space
2628
#error __opencl_c_generic_address_space needed!
2729
#endif
@@ -57,16 +59,16 @@ static OVERLOADED float __chip_atomic_add_f32(volatile global float *address,
5759
return as_float(r);
5860
}
5961

60-
float __chip_atomic_add_f32(generic float *address, float val) {
61-
volatile global float *gi = to_global(address);
62+
float __chip_atomic_add_f32(__chip_obfuscated_ptr_t address, float val) {
63+
volatile global float *gi = to_global(UNCOVER_OBFUSCATED_PTR(address));
6264
if (gi)
6365
return __chip_atomic_add_f32(gi, val);
64-
volatile local float *li = to_local(address);
66+
volatile local float *li = to_local(UNCOVER_OBFUSCATED_PTR(address));
6567
if (li)
6668
return __chip_atomic_add_f32(li, val);
6769
return 0;
6870
}
6971

70-
float __chip_atomic_add_system_f32(generic float *address, float val) {
72+
float __chip_atomic_add_system_f32(__chip_obfuscated_ptr_t address, float val) {
7173
return __chip_atomic_add_f32(address, val);
7274
}

bitcode/atomicAddFloat_native.cl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
// Implementations for 32-bit floating point atomic operations using
2424
// OpenCL built-in extension.
2525

26+
#include "cl_utils.h"
27+
2628
#ifndef __opencl_c_generic_address_space
2729
#error __opencl_c_generic_address_space needed!
2830
#endif
@@ -37,9 +39,10 @@
3739
/* https://registry.khronos.org/OpenCL/extensions/ext/cl_ext_float_atomics.html
3840
*/
3941
#define DEF_CHIP_ATOMIC2F_ORDER_SCOPE(NAME, OP, ORDER, SCOPE) \
40-
float __chip_atomic_##NAME##_f32(float *address, float i) { \
41-
return atomic_##OP##_explicit((volatile __generic float *)address, i, \
42-
memory_order_##ORDER, memory_scope_##SCOPE); \
42+
float __chip_atomic_##NAME##_f32(__chip_obfuscated_ptr_t address, float i) { \
43+
return atomic_##OP##_explicit( \
44+
(volatile __generic float *)UNCOVER_OBFUSCATED_PTR(address), i, \
45+
memory_order_##ORDER, memory_scope_##SCOPE); \
4346
}
4447

4548
#define DEF_CHIP_ATOMIC2F(NAME, OP) \

bitcode/cl_utils.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* Copyright (c) 2024 chipStar developers
3+
*
4+
* Permission is hereby granted, free of charge, to any person obtaining a copy
5+
* of this software and associated documentation files (the "Software"), to deal
6+
* in the Software without restriction, including without limitation the rights
7+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8+
* copies of the Software, and to permit persons to whom the Software is
9+
* furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice shall be included
12+
* in all copies or substantial portions of the Software.
13+
*
14+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17+
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20+
* DEALINGS IN THE SOFTWARE.
21+
*/
22+
23+
// Shared OpenCL utilities.
24+
25+
#ifndef BITCODE_CL_UTILS_H
26+
#define BITCODE_CL_UTILS_H
27+
28+
// See bitcode/README-devicelib.md for the purpose of "obfuscated pointers".
29+
typedef ulong __chip_obfuscated_ptr_t;
30+
#define UNCOVER_OBFUSCATED_PTR(_PTR) ((generic void *)_PTR)
31+
32+
#endif // BITCODE_CL_UTILS_H

include/hip/devicelib/atomics.hh

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@
2626
#include <hip/host_defines.h>
2727
#include <hip/devicelib/macros.hh>
2828

29+
// See bitcode/README-devicelib.md for the purpose of "obfuscated pointers".
30+
typedef unsigned long __chip_obfuscated_ptr_t;
31+
inline __device__ __chip_obfuscated_ptr_t __chip_obfuscate_ptr(void *ptr) {
32+
static_assert(sizeof(__chip_obfuscated_ptr_t) == sizeof(void *), "");
33+
return reinterpret_cast<__chip_obfuscated_ptr_t>(ptr);
34+
}
35+
2936
// Copied from HIP programming guide:
3037
// https://docs.amd.com/bundle/HIP-Programming-Guide-v5.0/page/Programming_with_HIP.html
3138
// Slightly modified to group operations
@@ -63,14 +70,16 @@ atomicAdd(unsigned long long *address, unsigned long long val) {
6370
return __chip_atomic_add_l(address, val);
6471
}
6572

66-
extern "C" __device__ float __chip_atomic_add_f32(float *address, float val);
73+
extern "C" __device__ float
74+
__chip_atomic_add_f32(__chip_obfuscated_ptr_t address, float val);
6775
extern "C++" inline __device__ float atomicAdd(float *address, float val) {
68-
return __chip_atomic_add_f32(address, val);
76+
return __chip_atomic_add_f32(__chip_obfuscate_ptr(address), val);
6977
}
7078

71-
extern "C" __device__ double __chip_atomic_add_f64(double *address, double val);
79+
extern "C" __device__ double
80+
__chip_atomic_add_f64(__chip_obfuscated_ptr_t address, double val);
7281
extern "C++" inline __device__ double atomicAdd(double *address, double val) {
73-
return __chip_atomic_add_f64(address, val);
82+
return __chip_atomic_add_f64(__chip_obfuscate_ptr(address), val);
7483
}
7584

7685
extern "C" __device__ int __chip_atomic_add_system_i(int *address, int val);
@@ -92,18 +101,18 @@ atomicAdd_system(unsigned long long *address, unsigned long long val) {
92101
return __chip_atomic_add_system_l(address, val);
93102
}
94103

95-
extern "C" __device__ float __chip_atomic_add_system_f32(float *address,
96-
float val);
104+
extern "C" __device__ float
105+
__chip_atomic_add_system_f32(__chip_obfuscated_ptr_t address, float val);
97106
extern "C++" inline __device__ float atomicAdd_system(float *address,
98107
float val) {
99-
return __chip_atomic_add_system_f32(address, val);
108+
return __chip_atomic_add_system_f32(__chip_obfuscate_ptr(address), val);
100109
}
101110

102-
extern "C" __device__ double __chip_atomic_add_system_f64(double *address,
103-
double val);
111+
extern "C" __device__ double
112+
__chip_atomic_add_system_f64(__chip_obfuscated_ptr_t address, double val);
104113
extern "C++" inline __device__ double atomicAdd_system(double *address,
105114
double val) {
106-
return __chip_atomic_add_system_f64(address, val);
115+
return __chip_atomic_add_system_f64(__chip_obfuscate_ptr(address), val);
107116
}
108117

109118
extern "C" __device__ int __chip_atomic_sub_i(int *address, int val);
@@ -432,7 +441,7 @@ atomicXor_system(unsigned long long *address, unsigned long long val) {
432441

433442
// Undocumented
434443
extern "C++" inline __device__ void atomicAddNoRet(float *address, float val) {
435-
(void)__chip_atomic_add_f32(address, val);
444+
(void)__chip_atomic_add_f32(__chip_obfuscate_ptr(address), val);
436445
}
437446

438447
#endif // HIP_INLUDE_DEVICELIB_ATOMICS

0 commit comments

Comments
 (0)