diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index db9f88059ad9b..dc555c782202e 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -169,7 +169,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__PTX__"); Builder.defineMacro("__NVPTX__"); if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice || Opts.SYCLIsDevice) { - // Set __CUDA_ARCH__ for the GPU specified. + // Set __CUDA_ARCH__ or __SYCL_CUDA_ARCH__ for the GPU specified. + // The SYCL-specific macro is used to distinguish the SYCL and CUDA APIs. std::string CUDAArchCode = [this] { switch (GPU) { case CudaArch::GFX600: @@ -260,7 +261,12 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, } llvm_unreachable("unhandled CudaArch"); }(); - Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); + + if (Opts.SYCLIsDevice) { + Builder.defineMacro("__SYCL_CUDA_ARCH__", CUDAArchCode); + } else { + Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); + } } } diff --git a/clang/test/Preprocessor/sycl-macro.cpp b/clang/test/Preprocessor/sycl-macro.cpp index 79c4ff94b41fb..cab506771e74c 100644 --- a/clang/test/Preprocessor/sycl-macro.cpp +++ b/clang/test/Preprocessor/sycl-macro.cpp @@ -33,6 +33,7 @@ // CHECK-NO-SYCL_FIT_IN_INT-NOT:#define __SYCL_ID_QUERIES_FIT_IN_INT__ 1 // CHECK-SYCL-ID:#define __SYCL_ID_QUERIES_FIT_IN_INT__ 1 -// CHECK-CUDA:#define __CUDA_ARCH__ 800 +// CHECK-CUDA:#define __SYCL_CUDA_ARCH__ 800 +// CHECK-CUDA-NOT:#define __CUDA_ARCH__ 800 // CHECK-HIP:#define __CUDA_ARCH__ 0 diff --git a/sycl/include/sycl/ext/oneapi/bfloat16.hpp b/sycl/include/sycl/ext/oneapi/bfloat16.hpp index 06c1d8a966277..7f2b4c27ab39c 100644 --- a/sycl/include/sycl/ext/oneapi/bfloat16.hpp +++ b/sycl/include/sycl/ext/oneapi/bfloat16.hpp @@ -50,7 +50,7 @@ class bfloat16 { static detail::Bfloat16StorageT from_float(const float &a) { #if defined(__SYCL_DEVICE_ONLY__) #if defined(__NVPTX__) -#if (__CUDA_ARCH__ >= 800) +#if (__SYCL_CUDA_ARCH__ >= 800) return __nvvm_f2bf16_rn(a); #else // TODO find a better way to check for NaN @@ -126,7 +126,7 @@ class bfloat16 { friend bfloat16 operator-(bfloat16 &lhs) { #if defined(__SYCL_DEVICE_ONLY__) #if defined(__NVPTX__) -#if (__CUDA_ARCH__ >= 800) +#if (__SYCL_CUDA_ARCH__ >= 800) return detail::bitsToBfloat16(__nvvm_neg_bf16(lhs.value)); #else return -to_float(lhs.value);