Skip to content

Commit e15ac50

Browse files
authored
[SYCL][CUDA][libclc] Add atomic loads and stores with various memory orders and scopes (#5191)
Adds atomic loads and stores with various memory orders and scopes. These are implemented by adding intrinsics and builtins for PTX loads and stores. Tests for this are here: intel/llvm-test-suite#648
1 parent bf5d9d5 commit e15ac50

9 files changed

Lines changed: 1838 additions & 2 deletions

File tree

clang/include/clang/Basic/BuiltinsNVPTX.def

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2037,6 +2037,92 @@ TARGET_BUILTIN(__nvvm_atom_acq_rel_cas_shared_ll, "LLiLLiD*LLiLLi", "n", SM_70)
20372037
TARGET_BUILTIN(__nvvm_atom_acq_rel_cta_cas_shared_ll, "LLiLLiD*LLiLLi", "n", SM_70)
20382038
TARGET_BUILTIN(__nvvm_atom_acq_rel_sys_cas_shared_ll, "LLiLLiD*LLiLLi", "n", SM_70)
20392039

2040+
#pragma push_macro("LD_VOLATILE_BUILTIN_TYPES")
2041+
#define LD_VOLATILE_BUILTIN_TYPES(ADDR_SPACE) \
2042+
BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_i, "iiD*", "n") \
2043+
BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_l, "LiLiD*", "n") \
2044+
BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_ll, "LLiLLiD*", "n") \
2045+
BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_f, "ffD*", "n") \
2046+
BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_d, "ddD*", "n")
2047+
2048+
#pragma push_macro("LD_BUILTIN_TYPES")
2049+
#define LD_BUILTIN_TYPES(ORDER, SCOPE, ADDR_SPACE) \
2050+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_i, "iiD*", "n", SM_70) \
2051+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_l, "LiLiD*", "n", SM_70) \
2052+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_ll, "LLiLLiD*", "n", SM_70) \
2053+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_f, "ffD*", "n", SM_70) \
2054+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_d, "ddD*", "n", SM_70)
2055+
2056+
#pragma push_macro("LD_BUILTIN_AS_TYPES")
2057+
#define LD_BUILTIN_AS_TYPES(ORDER, SCOPE) \
2058+
LD_BUILTIN_TYPES(ORDER, SCOPE, _gen) \
2059+
LD_BUILTIN_TYPES(ORDER, SCOPE, _global) \
2060+
LD_BUILTIN_TYPES(ORDER, SCOPE, _shared)
2061+
2062+
#pragma push_macro("LD_BUILTIN_SCOPES_AS_TYPES")
2063+
#define LD_BUILTIN_SCOPES_AS_TYPES(ORDER) \
2064+
LD_BUILTIN_AS_TYPES(ORDER, ) \
2065+
LD_BUILTIN_AS_TYPES(ORDER, _cta) \
2066+
LD_BUILTIN_AS_TYPES(ORDER, _sys)
2067+
2068+
LD_BUILTIN_SCOPES_AS_TYPES()
2069+
LD_BUILTIN_SCOPES_AS_TYPES(_acquire)
2070+
LD_VOLATILE_BUILTIN_TYPES(_gen)
2071+
LD_VOLATILE_BUILTIN_TYPES(_global)
2072+
LD_VOLATILE_BUILTIN_TYPES(_shared)
2073+
2074+
#undef LD_VOLATILE_BUILTIN_TYPES
2075+
#pragma pop_macro("LD_VOLATILE_BUILTIN_TYPES")
2076+
#undef LD_BUILTIN_TYPES
2077+
#pragma pop_macro("LD_BUILTIN_TYPES")
2078+
#undef LD_BUILTIN_AS_TYPES
2079+
#pragma pop_macro("LD_BUILTIN_AS_TYPES")
2080+
#undef LD_BUILTIN_SCOPES_AS_TYPES
2081+
#pragma pop_macro("LD_BUILTIN_SCOPES_AS_TYPES")
2082+
2083+
#pragma push_macro("ST_VOLATILE_BUILTIN_TYPES")
2084+
#define ST_VOLATILE_BUILTIN_TYPES(ADDR_SPACE) \
2085+
BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_i, "viD*i", "n") \
2086+
BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_l, "vLiD*Li", "n") \
2087+
BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_ll, "vLLiD*LLi", "n") \
2088+
BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_f, "vfD*f", "n") \
2089+
BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_d, "vdD*d", "n")
2090+
2091+
#pragma push_macro("ST_BUILTIN_TYPES")
2092+
#define ST_BUILTIN_TYPES(ORDER, SCOPE, ADDR_SPACE) \
2093+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_i, "viD*i", "n", SM_70) \
2094+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_l, "vLiD*Li", "n", SM_70) \
2095+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_ll, "vLLiD*LLi", "n", SM_70) \
2096+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_f, "vfD*f", "n", SM_70) \
2097+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_d, "vdD*d", "n", SM_70)
2098+
2099+
#pragma push_macro("ST_BUILTIN_AS_TYPES")
2100+
#define ST_BUILTIN_AS_TYPES(ORDER, SCOPE) \
2101+
ST_BUILTIN_TYPES(ORDER, SCOPE, _gen) \
2102+
ST_BUILTIN_TYPES(ORDER, SCOPE, _global) \
2103+
ST_BUILTIN_TYPES(ORDER, SCOPE, _shared)
2104+
2105+
#pragma push_macro("ST_BUILTIN_SCOPES_AS_TYPES")
2106+
#define ST_BUILTIN_SCOPES_AS_TYPES(ORDER) \
2107+
ST_BUILTIN_AS_TYPES(ORDER, ) \
2108+
ST_BUILTIN_AS_TYPES(ORDER, _cta) \
2109+
ST_BUILTIN_AS_TYPES(ORDER, _sys)
2110+
2111+
ST_BUILTIN_SCOPES_AS_TYPES()
2112+
ST_BUILTIN_SCOPES_AS_TYPES(_release)
2113+
ST_VOLATILE_BUILTIN_TYPES(_gen)
2114+
ST_VOLATILE_BUILTIN_TYPES(_global)
2115+
ST_VOLATILE_BUILTIN_TYPES(_shared)
2116+
2117+
#undef ST_VOLATILE_BUILTIN_TYPES
2118+
#pragma pop_macro("ST_VOLATILE_BUILTIN_TYPES")
2119+
#undef ST_BUILTIN_TYPES
2120+
#pragma pop_macro("ST_BUILTIN_TYPES")
2121+
#undef ST_BUILTIN_AS_TYPES
2122+
#pragma pop_macro("ST_BUILTIN_AS_TYPES")
2123+
#undef ST_BUILTIN_SCOPES_AS_TYPES
2124+
#pragma pop_macro("ST_BUILTIN_SCOPES_AS_TYPES")
2125+
20402126
// Compiler Error Warn
20412127
BUILTIN(__nvvm_compiler_error, "vcC*4", "n")
20422128
BUILTIN(__nvvm_compiler_warn, "vcC*4", "n")

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17492,6 +17492,21 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
1749217492
Ptr->getType()}),
1749317493
{Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
1749417494
};
17495+
auto MakeScopedLd = [&](unsigned IntrinsicID) {
17496+
Value *Ptr = EmitScalarExpr(E->getArg(0));
17497+
return Builder.CreateCall(
17498+
CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
17499+
Ptr->getType()}),
17500+
{Ptr});
17501+
};
17502+
auto MakeScopedSt = [&](unsigned IntrinsicID) {
17503+
Value *Ptr = EmitScalarExpr(E->getArg(0));
17504+
return Builder.CreateCall(
17505+
CGM.getIntrinsic(
17506+
IntrinsicID,
17507+
{Ptr->getType(), Ptr->getType()->getPointerElementType()}),
17508+
{Ptr, EmitScalarExpr(E->getArg(1))});
17509+
};
1749517510
auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
1749617511
Value *Ptr = EmitScalarExpr(E->getArg(0));
1749717512
return Builder.CreateCall(
@@ -17507,6 +17522,85 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
1750717522
{Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
1750817523
};
1750917524
switch (BuiltinID) {
17525+
17526+
#define LD_VOLATILE_CASES(ADDR_SPACE) \
17527+
case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_i: \
17528+
case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_l: \
17529+
case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_ll: \
17530+
return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_i_volatile); \
17531+
case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_f: \
17532+
case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_d: \
17533+
return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_f_volatile);
17534+
17535+
#define LD_CASES(ORDER, SCOPE, ADDR_SPACE) \
17536+
case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_i: \
17537+
case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_l: \
17538+
case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_ll: \
17539+
return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_i##ORDER##SCOPE); \
17540+
case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_f: \
17541+
case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_d: \
17542+
return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_f##ORDER##SCOPE);
17543+
17544+
#define LD_CASES_AS(ORDER, SCOPE) \
17545+
LD_CASES(ORDER, SCOPE, _gen) \
17546+
LD_CASES(ORDER, SCOPE, _global) \
17547+
LD_CASES(ORDER, SCOPE, _shared)
17548+
17549+
#define LD_CASES_AS_SCOPES(ORDER) \
17550+
LD_CASES_AS(ORDER, ) \
17551+
LD_CASES_AS(ORDER, _cta) \
17552+
LD_CASES_AS(ORDER, _sys)
17553+
17554+
LD_CASES_AS_SCOPES()
17555+
LD_CASES_AS_SCOPES(_acquire)
17556+
LD_VOLATILE_CASES(_gen)
17557+
LD_VOLATILE_CASES(_global)
17558+
LD_VOLATILE_CASES(_shared)
17559+
17560+
#undef LD_VOLATILE_CASES
17561+
#undef LD_CASES
17562+
#undef LD_CASES_AS
17563+
#undef LD_CASES_AS_SCOPES
17564+
17565+
#define ST_VOLATILE_CASES(ADDR_SPACE) \
17566+
case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_i: \
17567+
case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_l: \
17568+
case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_ll: \
17569+
return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_i_volatile); \
17570+
case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_f: \
17571+
case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_d: \
17572+
return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_f_volatile);
17573+
17574+
#define ST_CASES(ORDER, SCOPE, ADDR_SPACE) \
17575+
case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_i: \
17576+
case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_l: \
17577+
case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_ll: \
17578+
return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_i##ORDER##SCOPE); \
17579+
case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_f: \
17580+
case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_d: \
17581+
return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_f##ORDER##SCOPE);
17582+
17583+
#define ST_CASES_AS(ORDER, SCOPE) \
17584+
ST_CASES(ORDER, SCOPE, _gen) \
17585+
ST_CASES(ORDER, SCOPE, _global) \
17586+
ST_CASES(ORDER, SCOPE, _shared)
17587+
17588+
#define ST_CASES_AS_SCOPES(ORDER) \
17589+
ST_CASES_AS(ORDER, ) \
17590+
ST_CASES_AS(ORDER, _cta) \
17591+
ST_CASES_AS(ORDER, _sys)
17592+
17593+
ST_CASES_AS_SCOPES()
17594+
ST_CASES_AS_SCOPES(_release)
17595+
ST_VOLATILE_CASES(_gen)
17596+
ST_VOLATILE_CASES(_global)
17597+
ST_VOLATILE_CASES(_shared)
17598+
17599+
#undef ST_VOLATILE_CASES
17600+
#undef ST_CASES
17601+
#undef ST_CASES_AS
17602+
#undef ST_CASES_AS_SCOPES
17603+
1751017604
case NVPTX::BI__nvvm_atom_add_gen_i:
1751117605
case NVPTX::BI__nvvm_atom_add_gen_l:
1751217606
case NVPTX::BI__nvvm_atom_add_gen_ll:

0 commit comments

Comments
 (0)