diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 34cc069c5b7ce..263600ffe12d9 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -824,19 +824,6 @@ static bool addSYCLDefaultTriple(Compilation &C, return true; } -// Prefix for Intel GPU specific targets used for -fsycl-targets -constexpr char IntelGPU[] = "intel_gpu_"; - -static llvm::Optional isIntelGPUTarget(StringRef Target) { - // Handle target specifications that resemble 'intel_gpu_*' here. These are - // 'spir64_gen' based. - if (Target.startswith(IntelGPU)) { - return tools::SYCL::gen::resolveGenDevice( - Target.drop_front(sizeof(IntelGPU) - 1)); - } - return llvm::None; -} - void Driver::CreateOffloadingDeviceToolChains(Compilation &C, InputList &Inputs) { @@ -845,6 +832,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // // We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA // or HIP type. However, mixed CUDA/HIP compilation is not supported. + using namespace tools::SYCL; bool IsCuda = llvm::any_of(Inputs, [](std::pair &I) { return types::isCuda(I.first); @@ -1122,12 +1110,24 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, for (StringRef Val : SYCLTargetsValues->getValues()) { StringRef UserTargetName(Val); - if (auto Device = isIntelGPUTarget(Val)) { + if (auto Device = gen::isGPUTarget(Val)) { if (Device->empty()) { Diag(clang::diag::err_drv_invalid_sycl_target) << Val; continue; } UserTargetName = "spir64_gen"; + } else if (auto Device = gen::isGPUTarget(Val)) { + if (Device->empty()) { + Diag(clang::diag::err_drv_invalid_sycl_target) << Val; + continue; + } + UserTargetName = "nvptx64-nvidia-cuda"; + } else if (auto Device = gen::isGPUTarget(Val)) { + if (Device->empty()) { + Diag(clang::diag::err_drv_invalid_sycl_target) << Val; + continue; + } + UserTargetName = "amdgcn-amd-amdhsa"; } if (!isValidSYCLTriple(MakeSYCLDeviceTriple(UserTargetName))) { @@ -5716,6 +5716,7 @@ class OffloadingActionBuilder final { } bool initialize() override { + using namespace tools::SYCL; // Get the SYCL toolchains. If we don't get any, the action builder will // know there is nothing to do related to SYCL offloading. auto SYCLTCRange = C.getOffloadToolChains(); @@ -5755,7 +5756,7 @@ class OffloadingActionBuilder final { llvm::StringMap FoundNormalizedTriples; for (StringRef Val : SYCLTargetsValues->getValues()) { StringRef UserTargetName(Val); - if (auto ValidDevice = isIntelGPUTarget(Val)) { + if (auto ValidDevice = gen::isGPUTarget(Val)) { if (ValidDevice->empty()) // Unrecognized, we have already diagnosed this earlier; skip. continue; @@ -5763,7 +5764,27 @@ class OffloadingActionBuilder final { GpuArchList.emplace_back(C.getDriver().MakeSYCLDeviceTriple( "spir64_gen"), ValidDevice->data()); UserTargetName = "spir64_gen"; + } else if (auto ValidDevice = + gen::isGPUTarget(Val)) { + if (ValidDevice->empty()) + // Unrecognized, we have already diagnosed this earlier; skip. + continue; + // Add the proper -device value to the list. + GpuArchList.emplace_back( + C.getDriver().MakeSYCLDeviceTriple("nvptx64-nvidia-cuda"), + ValidDevice->data()); + UserTargetName = "nvptx64-nvidia-cuda"; + } else if (auto ValidDevice = gen::isGPUTarget(Val)) { + if (ValidDevice->empty()) + // Unrecognized, we have already diagnosed this earlier; skip. + continue; + // Add the proper -device value to the list. + GpuArchList.emplace_back( + C.getDriver().MakeSYCLDeviceTriple("amdgcn-amd-amdhsa"), + ValidDevice->data()); + UserTargetName = "amdgcn-amd-amdhsa"; } + llvm::Triple TT(C.getDriver().MakeSYCLDeviceTriple(Val)); std::string NormalizedName = TT.normalize(); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 869bd83a7b065..6ef84262341c6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5192,10 +5192,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // between device and host where we should be able to use the offloading // arch to add the macro to the host compile. auto addTargetMacros = [&](const llvm::Triple &Triple) { - if (!Triple.isSPIR()) + if (!Triple.isSPIR() && !Triple.isNVPTX() && !Triple.isAMDGCN()) return; SmallString<64> Macro; - if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) { + if ((Triple.isSPIR() && + Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) || + Triple.isNVPTX() || Triple.isAMDGCN()) { StringRef Device = JA.getOffloadingArch(); if (!Device.empty()) { Macro = "-D"; diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index ae40b71f4c99b..794be18d06a35 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -596,60 +596,130 @@ void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C, StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) { StringRef Device; Device = llvm::StringSwitch(DeviceName) - .Cases("bdw", "8_0_0", "bdw") - .Cases("skl", "9_0_9", "skl") - .Cases("kbl", "9_1_9", "kbl") - .Cases("cfl", "9_2_9", "cfl") - .Cases("apl", "9_3_0", "apl") - .Cases("glk", "9_4_0", "glk") - .Cases("whl", "9_5_0", "whl") - .Cases("aml", "9_6_0", "aml") - .Cases("cml", "9_7_0", "cml") - .Cases("icllp", "11_0_0", "icllp") - .Cases("ehl", "11_2_0", "ehl") - .Cases("tgllp", "12_0_0", "tgllp") - .Case("rkl", "rkl") - .Case("adl_s", "adl_s") - .Case("rpl_s", "rpl_s") - .Case("adl_p", "adl_p") - .Case("adl_n", "adl_n") - .Cases("dg1", "12_10_0", "dg1") - .Case("acm_g10", "acm_g10") - .Case("acm_g11", "acm_g11") - .Case("acm_g12", "acm_g12") - .Case("pvc", "pvc") + .Cases("intel_gpu_bdw", "intel_gpu_8_0_0", "bdw") + .Cases("intel_gpu_skl", "intel_gpu_9_0_9", "skl") + .Cases("intel_gpu_kbl", "intel_gpu_9_1_9", "kbl") + .Cases("intel_gpu_cfl", "intel_gpu_9_2_9", "cfl") + .Cases("intel_gpu_apl", "intel_gpu_9_3_0", "apl") + .Cases("intel_gpu_glk", "intel_gpu_9_4_0", "glk") + .Cases("intel_gpu_whl", "intel_gpu_9_5_0", "whl") + .Cases("intel_gpu_aml", "intel_gpu_9_6_0", "aml") + .Cases("intel_gpu_cml", "intel_gpu_9_7_0", "cml") + .Cases("intel_gpu_icllp", "intel_gpu_11_0_0", "icllp") + .Cases("intel_gpu_ehl", "intel_gpu_11_2_0", "ehl") + .Cases("intel_gpu_tgllp", "intel_gpu_12_0_0", "tgllp") + .Case("intel_gpu_rkl", "rkl") + .Case("intel_gpu_adl_s", "adl_s") + .Case("intel_gpu_rpl_s", "rpl_s") + .Case("intel_gpu_adl_p", "adl_p") + .Case("intel_gpu_adl_n", "adl_n") + .Cases("intel_gpu_dg1", "intel_gpu_12_10_0", "dg1") + .Case("intel_gpu_acm_g10", "acm_g10") + .Case("intel_gpu_acm_g11", "acm_g11") + .Case("intel_gpu_acm_g12", "acm_g12") + .Case("intel_gpu_pvc", "pvc") + .Case("nvidia_gpu_sm_50", "sm_50") + .Case("nvidia_gpu_sm_52", "sm_52") + .Case("nvidia_gpu_sm_53", "sm_53") + .Case("nvidia_gpu_sm_60", "sm_60") + .Case("nvidia_gpu_sm_61", "sm_61") + .Case("nvidia_gpu_sm_62", "sm_62") + .Case("nvidia_gpu_sm_70", "sm_70") + .Case("nvidia_gpu_sm_72", "sm_72") + .Case("nvidia_gpu_sm_75", "sm_75") + .Case("nvidia_gpu_sm_80", "sm_80") + .Case("nvidia_gpu_sm_86", "sm_86") + .Case("nvidia_gpu_sm_87", "sm_87") + .Case("nvidia_gpu_sm_89", "sm_89") + .Case("nvidia_gpu_sm_90", "sm_90") + .Case("amd_gpu_gfx700", "gfx700") + .Case("amd_gpu_gfx701", "gfx701") + .Case("amd_gpu_gfx702", "gfx702") + .Case("amd_gpu_gfx801", "gfx801") + .Case("amd_gpu_gfx802", "gfx802") + .Case("amd_gpu_gfx803", "gfx803") + .Case("amd_gpu_gfx805", "gfx805") + .Case("amd_gpu_gfx810", "gfx810") + .Case("amd_gpu_gfx900", "gfx900") + .Case("amd_gpu_gfx902", "gfx902") + .Case("amd_gpu_gfx904", "gfx904") + .Case("amd_gpu_gfx906", "gfx906") + .Case("amd_gpu_gfx908", "gfx908") + .Case("amd_gpu_gfx90a", "gfx90a") + .Case("amd_gpu_gfx1010", "gfx1010") + .Case("amd_gpu_gfx1011", "gfx1011") + .Case("amd_gpu_gfx1012", "gfx1012") + .Case("amd_gpu_gfx1013", "gfx1013") + .Case("amd_gpu_gfx1030", "gfx1030") + .Case("amd_gpu_gfx1031", "gfx1031") + .Case("amd_gpu_gfx1032", "gfx1032") .Default(""); return Device; } -StringRef SYCL::gen::getGenDeviceMacro(StringRef DeviceName) { +SmallString<64> SYCL::gen::getGenDeviceMacro(StringRef DeviceName) { SmallString<64> Macro; StringRef Ext = llvm::StringSwitch(DeviceName) - .Case("bdw", "BDW") - .Case("skl", "SKL") - .Case("kbl", "KBL") - .Case("cfl", "CFL") - .Case("apl", "APL") - .Case("glk", "GLK") - .Case("whl", "WHL") - .Case("aml", "AML") - .Case("cml", "CML") - .Case("icllp", "ICLLP") - .Case("ehl", "EHL") - .Case("tgllp", "TGLLP") - .Case("rkl", "RKL") - .Case("adl_s", "ADL_S") - .Case("rpl_s", "RPL_S") - .Case("adl_p", "ADL_P") - .Case("adl_n", "ADL_N") - .Case("dg1", "DG1") - .Case("acm_g10", "ACM_G10") - .Case("acm_g11", "ACM_G11") - .Case("acm_g12", "ACM_G12") - .Case("pvc", "PVC") + .Case("bdw", "INTEL_GPU_BDW") + .Case("skl", "INTEL_GPU_SKL") + .Case("kbl", "INTEL_GPU_KBL") + .Case("cfl", "INTEL_GPU_CFL") + .Case("apl", "INTEL_GPU_APL") + .Case("glk", "INTEL_GPU_GLK") + .Case("whl", "INTEL_GPU_WHL") + .Case("aml", "INTEL_GPU_AML") + .Case("cml", "INTEL_GPU_CML") + .Case("icllp", "INTEL_GPU_ICLLP") + .Case("ehl", "INTEL_GPU_EHL") + .Case("tgllp", "INTEL_GPU_TGLLP") + .Case("rkl", "INTEL_GPU_RKL") + .Case("adl_s", "INTEL_GPU_ADL_S") + .Case("rpl_s", "INTEL_GPU_RPL_S") + .Case("adl_p", "INTEL_GPU_ADL_P") + .Case("adl_n", "INTEL_GPU_ADL_N") + .Case("dg1", "INTEL_GPU_DG1") + .Case("acm_g10", "INTEL_GPU_ACM_G10") + .Case("acm_g11", "INTEL_GPU_ACM_G11") + .Case("acm_g12", "INTEL_GPU_ACM_G12") + .Case("pvc", "INTEL_GPU_PVC") + .Case("sm_50", "NVIDIA_GPU_SM_50") + .Case("sm_52", "NVIDIA_GPU_SM_52") + .Case("sm_53", "NVIDIA_GPU_SM_53") + .Case("sm_60", "NVIDIA_GPU_SM_60") + .Case("sm_61", "NVIDIA_GPU_SM_61") + .Case("sm_62", "NVIDIA_GPU_SM_62") + .Case("sm_70", "NVIDIA_GPU_SM_70") + .Case("sm_72", "NVIDIA_GPU_SM_72") + .Case("sm_75", "NVIDIA_GPU_SM_75") + .Case("sm_80", "NVIDIA_GPU_SM_80") + .Case("sm_86", "NVIDIA_GPU_SM_86") + .Case("sm_87", "NVIDIA_GPU_SM_87") + .Case("sm_89", "NVIDIA_GPU_SM_89") + .Case("sm_90", "NVIDIA_GPU_SM_90") + .Case("gfx700", "AMD_GPU_GFX700") + .Case("gfx701", "AMD_GPU_GFX701") + .Case("gfx702", "AMD_GPU_GFX702") + .Case("gfx801", "AMD_GPU_GFX801") + .Case("gfx802", "AMD_GPU_GFX802") + .Case("gfx803", "AMD_GPU_GFX803") + .Case("gfx805", "AMD_GPU_GFX805") + .Case("gfx810", "AMD_GPU_GFX810") + .Case("gfx900", "AMD_GPU_GFX900") + .Case("gfx902", "AMD_GPU_GFX902") + .Case("gfx904", "AMD_GPU_GFX904") + .Case("gfx906", "AMD_GPU_GFX906") + .Case("gfx908", "AMD_GPU_GFX908") + .Case("gfx90a", "AMD_GPU_GFX90A") + .Case("gfx1010", "AMD_GPU_GFX1010") + .Case("gfx1011", "AMD_GPU_GFX1011") + .Case("gfx1012", "AMD_GPU_GFX1012") + .Case("gfx1013", "AMD_GPU_GFX1013") + .Case("gfx1030", "AMD_GPU_GFX1030") + .Case("gfx1031", "AMD_GPU_GFX1031") + .Case("gfx1032", "AMD_GPU_GFX1032") .Default(""); if (!Ext.empty()) { - Macro = "__SYCL_TARGET_INTEL_GPU_"; + Macro = "__SYCL_TARGET_"; Macro += Ext; Macro += "__"; } @@ -759,6 +829,25 @@ static void parseTargetOpts(StringRef ArgString, const llvm::opt::ArgList &Args, CmdArgs.push_back(Args.MakeArgString(TA)); } +void SYCLToolChain::TranslateGPUTargetOpt(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs, + OptSpecifier Opt_EQ) const { + for (auto *A : Args) { + if (A->getOption().matches(Opt_EQ)) { + if (auto GpuDevice = + tools::SYCL::gen::isGPUTarget( + A->getValue())) { + StringRef ArgString; + SmallString<64> OffloadArch("--offload-arch="); + OffloadArch += GpuDevice->data(); + ArgString = OffloadArch; + parseTargetOpts(ArgString, Args, CmdArgs); + A->claim(); + } + } + } +} + // Expects a specific type of option (e.g. -Xsycl-target-backend) and will // extract the arguments. void SYCLToolChain::TranslateTargetOpt(const llvm::opt::ArgList &Args, @@ -914,6 +1003,7 @@ void SYCLToolChain::TranslateBackendTargetArgs( // Handle -Xsycl-target-backend. TranslateTargetOpt(Args, CmdArgs, options::OPT_Xsycl_backend, options::OPT_Xsycl_backend_EQ, Device); + TranslateGPUTargetOpt(Args, CmdArgs, options::OPT_fsycl_targets_EQ); } void SYCLToolChain::TranslateLinkerTargetArgs( diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index fb78d2ab7e270..01f4e9c15271c 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -106,7 +106,21 @@ class LLVM_LIBRARY_VISIBILITY BackendCompiler : public Tool { }; StringRef resolveGenDevice(StringRef DeviceName); -StringRef getGenDeviceMacro(StringRef DeviceName); +SmallString<64> getGenDeviceMacro(StringRef DeviceName); + +// // Prefix for GPU specific targets used for -fsycl-targets +constexpr char IntelGPU[] = "intel_gpu_"; +constexpr char NvidiaGPU[] = "nvidia_gpu_"; +constexpr char AmdGPU[] = "amd_gpu_"; + +template llvm::Optional isGPUTarget(StringRef Target) { + // Handle target specifications that resemble '(intel, nvidia, amd)_gpu_*' + // here. + if (Target.startswith(GPUArh)) { + return resolveGenDevice(Target); + } + return llvm::None; +} } // end namespace gen @@ -189,6 +203,9 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { llvm::opt::OptSpecifier Opt, llvm::opt::OptSpecifier Opt_EQ, StringRef Device) const; + void TranslateGPUTargetOpt(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs, + llvm::opt::OptSpecifier Opt_EQ) const; }; } // end namespace toolchains diff --git a/clang/test/Driver/sycl-intel-gpu.cpp b/clang/test/Driver/sycl-intel-gpu.cpp deleted file mode 100644 index 40fda7134cbf8..0000000000000 --- a/clang/test/Driver/sycl-intel-gpu.cpp +++ /dev/null @@ -1,197 +0,0 @@ -/// Tests the behaviors of using -fsycl-targets=intel_gpu* - -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_bdw -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=bdw -DMAC_STR=BDW -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_8_0_0 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=bdw -DMAC_STR=BDW -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=skl -DMAC_STR=SKL -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_0_9 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=skl -DMAC_STR=SKL -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_kbl -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=kbl -DMAC_STR=KBL -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_1_9 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=kbl -DMAC_STR=KBL -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_cfl -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cfl -DMAC_STR=CFL -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_2_9 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cfl -DMAC_STR=CFL -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_apl -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=apl -DMAC_STR=APL -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_3_0 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=apl -DMAC_STR=APL -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_glk -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=glk -DMAC_STR=GLK -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_4_0 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=glk -DMAC_STR=GLK -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_whl -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=whl -DMAC_STR=WHL -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_5_0 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=whl -DMAC_STR=WHL -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_aml -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=aml -DMAC_STR=AML -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_6_0 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=aml -DMAC_STR=AML -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_cml -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cml -DMAC_STR=CML -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_7_0 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cml -DMAC_STR=CML -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_icllp -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=icllp \ -// RUN: -DMAC_STR=ICLLP -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_11_0_0 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=icllp \ -// RUN: -DMAC_STR=ICLLP -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_ehl -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=ehl -DMAC_STR=EHL -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_11_2_0 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=ehl -DMAC_STR=EHL -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_tgllp -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=tgllp \ -// RUN: -DMAC_STR=TGLLP -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_0_0 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=tgllp \ -// RUN: -DMAC_STR=TGLLP -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_rkl -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=rkl -DMAC_STR=RKL -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_adl_s -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=adl_s \ -// RUN: -DMAC_STR=ADL_S -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_rpl_s -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=rpl_s \ -// RUN: -DMAC_STR=RPL_S -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_adl_p -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=adl_p \ -// RUN: -DMAC_STR=ADL_P -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_adl_n -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=adl_n \ -// RUN: -DMAC_STR=ADL_N -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=dg1 -DMAC_STR=DG1 -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_10_0 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=dg1 -DMAC_STR=DG1 -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g10 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=acm_g10 \ -// RUN: -DMAC_STR=ACM_G10 -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g11 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=acm_g11 \ -// RUN: -DMAC_STR=ACM_G11 -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g12 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=acm_g12 \ -// RUN: -DMAC_STR=ACM_G12 -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc -DMAC_STR=PVC -// MACRO: clang{{.*}} "-triple" "spir64_gen-unknown-unknown" -// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__" -// DEVICE: ocloc{{.*}} "-device" "[[DEV_STR]]" -// MACRO: clang{{.*}} "-fsycl-is-host" -// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__" - -/// -fsycl-targets=spir64_x86_64 should set a specific macro -// RUN: %clangxx -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefix=MACRO_X86_64 -// RUN: %clang_cl -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefix=MACRO_X86_64 -// MACRO_X86_64: clang{{.*}} "-triple" "spir64_x86_64-unknown-unknown" -// MACRO_X86_64: "-D__SYCL_TARGET_INTEL_X86_64__" -// MACRO_X86_64: clang{{.*}} "-fsycl-is-host" -// MACRO_X86_64: "-D__SYCL_TARGET_INTEL_X86_64__" - -/// test for invalid arch -// RUN: %clangxx -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefix=BAD_INPUT -// RUN: %clang_cl -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefix=BAD_INPUT -// BAD_INPUT: error: SYCL target is invalid: 'intel_gpu_bad' - -/// Test for proper creation of fat object -// RUN: %clangxx -c -fsycl -fsycl-targets=intel_gpu_skl \ -// RUN: -target x86_64-unknown-linux-gnu -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefix=FATO -// FATO: clang-offload-bundler{{.*}} "-type=o" -// FATO: "-targets=sycl-spir64_gen-unknown-unknown-skl,host-x86_64-unknown-linux-gnu" - -/// Test for proper consumption of fat object -// RUN: touch %t.o -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl \ -// RUN: -target x86_64-unknown-linux-gnu -### %t.o 2>&1 | \ -// RUN: FileCheck %s --check-prefix=CONSUME_FAT -// CONSUME_FAT: clang-offload-bundler{{.*}} "-type=o" -// CONSUME_FAT: "-targets=host-x86_64-unknown-linux-gnu,sycl-spir64_gen-unknown-unknown-skl" -// CONSUME_FAT: "-unbundle" "-allow-missing-bundles" - -/// Test phases, BoundArch settings used for -device target. Additional -/// offload action used for compilation and backend compilation. -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl -fno-sycl-device-lib=all \ -// RUN: -fno-sycl-instrument-device-code \ -// RUN: -target x86_64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | \ -// RUN: FileCheck %s --check-prefix=CHECK_PHASES -// CHECK_PHASES: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl) -// CHECK_PHASES: 1: append-footer, {0}, c++, (host-sycl) -// CHECK_PHASES: 2: preprocessor, {1}, c++-cpp-output, (host-sycl) -// CHECK_PHASES: 3: input, "[[INPUT]]", c++, (device-sycl, skl) -// CHECK_PHASES: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, skl) -// CHECK_PHASES: 5: compiler, {4}, ir, (device-sycl, skl) -// CHECK_PHASES: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (spir64_gen-unknown-unknown:skl)" {5}, c++-cpp-output -// CHECK_PHASES: 7: compiler, {6}, ir, (host-sycl) -// CHECK_PHASES: 8: backend, {7}, assembler, (host-sycl) -// CHECK_PHASES: 9: assembler, {8}, object, (host-sycl) -// CHECK_PHASES: 10: linker, {9}, image, (host-sycl) -// CHECK_PHASES: 11: linker, {5}, ir, (device-sycl, skl) -// CHECK_PHASES: 12: sycl-post-link, {11}, tempfiletable, (device-sycl, skl) -// CHECK_PHASES: 13: file-table-tform, {12}, tempfilelist, (device-sycl, skl) -// CHECK_PHASES: 14: llvm-spirv, {13}, tempfilelist, (device-sycl, skl) -// CHECK_PHASES: 15: backend-compiler, {14}, image, (device-sycl, skl) -// CHECK_PHASES: 16: file-table-tform, {12, 15}, tempfiletable, (device-sycl, skl) -// CHECK_PHASES: 17: clang-offload-wrapper, {16}, object, (device-sycl, skl) -// CHECK_PHASES: 18: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (spir64_gen-unknown-unknown:skl)" {17}, image - -/// Check that ocloc and macro settings only occur for the expected toolchains -/// when mixing spir64_gen and intel_gpu -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spir64_gen \ -// RUN: -Xsycl-target-backend=spir64_gen "-device skl" \ -// RUN: -fno-sycl-device-lib=all -fno-sycl-instrument-device-code \ -// RUN: -target x86_64-unknown-linux-gnu -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefix=CHECK_TOOLS_MIX -// CHECK_TOOLS_MIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown" -// CHECK_TOOLS_MIX: "-D__SYCL_TARGET_INTEL_GPU_DG1__" -// CHECK_TOOLS_MIX: ocloc{{.*}} "-device" "dg1" -// CHECK_TOOLS_MIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown" -// CHECK_TOOLS_MIX-NOT: "-D__SYCL_TARGET_INTEL_GPU{{.*}}" -// CHECK_TOOLS_MIX: ocloc{{.*}} "-device" "skl" - -/// Test phases when using both spir64_gen and intel_gpu* -// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl,spir64_gen \ -// RUN: -fno-sycl-device-lib=all -fno-sycl-instrument-device-code \ -// RUN: -target x86_64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | \ -// RUN: FileCheck %s --check-prefix=CHECK_PHASES_MIX -// CHECK_PHASES_MIX: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl) -// CHECK_PHASES_MIX: 1: append-footer, {0}, c++, (host-sycl) -// CHECK_PHASES_MIX: 2: preprocessor, {1}, c++-cpp-output, (host-sycl) -// CHECK_PHASES_MIX: 3: input, "[[INPUT]]", c++, (device-sycl) -// CHECK_PHASES_MIX: 4: preprocessor, {3}, c++-cpp-output, (device-sycl) -// CHECK_PHASES_MIX: 5: compiler, {4}, ir, (device-sycl) -// CHECK_PHASES_MIX: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (spir64_gen-unknown-unknown)" {5}, c++-cpp-output -// CHECK_PHASES_MIX: 7: compiler, {6}, ir, (host-sycl) -// CHECK_PHASES_MIX: 8: backend, {7}, assembler, (host-sycl) -// CHECK_PHASES_MIX: 9: assembler, {8}, object, (host-sycl) -// CHECK_PHASES_MIX: 10: linker, {9}, image, (host-sycl) -// CHECK_PHASES_MIX: 11: input, "[[INPUT]]", c++, (device-sycl, skl) -// CHECK_PHASES_MIX: 12: preprocessor, {11}, c++-cpp-output, (device-sycl, skl) -// CHECK_PHASES_MIX: 13: compiler, {12}, ir, (device-sycl, skl) -// CHECK_PHASES_MIX: 14: linker, {13}, ir, (device-sycl, skl) -// CHECK_PHASES_MIX: 15: sycl-post-link, {14}, tempfiletable, (device-sycl, skl) -// CHECK_PHASES_MIX: 16: file-table-tform, {15}, tempfilelist, (device-sycl, skl) -// CHECK_PHASES_MIX: 17: llvm-spirv, {16}, tempfilelist, (device-sycl, skl) -// CHECK_PHASES_MIX: 18: backend-compiler, {17}, image, (device-sycl, skl) -// CHECK_PHASES_MIX: 19: file-table-tform, {15, 18}, tempfiletable, (device-sycl, skl) -// CHECK_PHASES_MIX: 20: clang-offload-wrapper, {19}, object, (device-sycl, skl) -// CHECK_PHASES_MIX: 21: linker, {5}, ir, (device-sycl) -// CHECK_PHASES_MIX: 22: sycl-post-link, {21}, tempfiletable, (device-sycl) -// CHECK_PHASES_MIX: 23: file-table-tform, {22}, tempfilelist, (device-sycl) -// CHECK_PHASES_MIX: 24: llvm-spirv, {23}, tempfilelist, (device-sycl) -// CHECK_PHASES_MIX: 25: backend-compiler, {24}, image, (device-sycl) -// CHECK_PHASES_MIX: 26: file-table-tform, {22, 25}, tempfiletable, (device-sycl) -// CHECK_PHASES_MIX: 27: clang-offload-wrapper, {26}, object, (device-sycl) -// CHECK_PHASES_MIX: 28: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (spir64_gen-unknown-unknown:skl)" {20}, "device-sycl (spir64_gen-unknown-unknown)" {27}, image - diff --git a/clang/test/Driver/sycl-oneapi-gpu.cpp b/clang/test/Driver/sycl-oneapi-gpu.cpp new file mode 100644 index 0000000000000..c64929873f281 --- /dev/null +++ b/clang/test/Driver/sycl-oneapi-gpu.cpp @@ -0,0 +1,384 @@ +/// Tests the behaviors of using -fsycl-targets=intel_gpu* + +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_bdw -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=bdw -DMAC_STR=BDW +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_8_0_0 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=bdw -DMAC_STR=BDW +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=skl -DMAC_STR=SKL +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_0_9 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=skl -DMAC_STR=SKL +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_kbl -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=kbl -DMAC_STR=KBL +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_1_9 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=kbl -DMAC_STR=KBL +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_cfl -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cfl -DMAC_STR=CFL +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_2_9 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cfl -DMAC_STR=CFL +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_apl -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=apl -DMAC_STR=APL +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_3_0 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=apl -DMAC_STR=APL +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_glk -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=glk -DMAC_STR=GLK +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_4_0 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=glk -DMAC_STR=GLK +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_whl -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=whl -DMAC_STR=WHL +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_5_0 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=whl -DMAC_STR=WHL +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_aml -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=aml -DMAC_STR=AML +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_6_0 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=aml -DMAC_STR=AML +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_cml -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cml -DMAC_STR=CML +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_7_0 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cml -DMAC_STR=CML +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_icllp -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=icllp \ +// RUN: -DMAC_STR=ICLLP +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_11_0_0 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=icllp \ +// RUN: -DMAC_STR=ICLLP +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_ehl -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=ehl -DMAC_STR=EHL +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_11_2_0 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=ehl -DMAC_STR=EHL +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_tgllp -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=tgllp \ +// RUN: -DMAC_STR=TGLLP +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_0_0 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=tgllp \ +// RUN: -DMAC_STR=TGLLP +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_rkl -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=rkl -DMAC_STR=RKL +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_adl_s -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=adl_s \ +// RUN: -DMAC_STR=ADL_S +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_rpl_s -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=rpl_s \ +// RUN: -DMAC_STR=RPL_S +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_adl_p -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=adl_p \ +// RUN: -DMAC_STR=ADL_P +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_adl_n -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=adl_n \ +// RUN: -DMAC_STR=ADL_N +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=dg1 -DMAC_STR=DG1 +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_10_0 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=dg1 -DMAC_STR=DG1 +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g10 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=acm_g10 \ +// RUN: -DMAC_STR=ACM_G10 +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g11 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=acm_g11 \ +// RUN: -DMAC_STR=ACM_G11 +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g12 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=acm_g12 \ +// RUN: -DMAC_STR=ACM_G12 +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc -DMAC_STR=PVC +// MACRO: clang{{.*}} "-triple" "spir64_gen-unknown-unknown" +// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__" +// DEVICE: ocloc{{.*}} "-device" "[[DEV_STR]]" +// MACRO: clang{{.*}} "-fsycl-is-host" +// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__" + +/// Tests the behaviors of using -fsycl-targets=nvidia_gpu* + +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_50 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_50 -DMAC_STR=SM_50 +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_52 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_52 -DMAC_STR=SM_52 +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_53 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_53 -DMAC_STR=SM_53 +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_60 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_60 -DMAC_STR=SM_60 +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_61 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_61 -DMAC_STR=SM_61 +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_62 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_62 -DMAC_STR=SM_62 +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_70 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_70 -DMAC_STR=SM_70 +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_72 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_72 -DMAC_STR=SM_72 +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_75 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_75 -DMAC_STR=SM_75 +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_80 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_80 -DMAC_STR=SM_80 +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_86 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_86 -DMAC_STR=SM_86 +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_87 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_87 -DMAC_STR=SM_87 +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_89 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_89 -DMAC_STR=SM_89 +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_90 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_90 -DMAC_STR=SM_90 +// MACRO_NVIDIA: clang{{.*}} "-triple" "nvptx64-nvidia-cuda" +// DEVICE_NVIDIA: llvm-foreach{{.*}} "--gpu-name" "[[DEV_STR]]" +// MACRO_NVIDIA: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" +// MACRO_NVIDIA: "-D__SYCL_TARGET_NVIDIA_GPU_[[MAC_STR]]__" + +/// Tests the behaviors of using -fsycl-targets=amd_gpu* + +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx700 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx700 -DMAC_STR=GFX700 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx701 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx701 -DMAC_STR=GFX701 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx702 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx702 -DMAC_STR=GFX702 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx801 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx801 -DMAC_STR=GFX801 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx802 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx802 -DMAC_STR=GFX802 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx803 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx803 -DMAC_STR=GFX803 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx805 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx805 -DMAC_STR=GFX805 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx810 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx810 -DMAC_STR=GFX810 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx900 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx900 -DMAC_STR=GFX900 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx902 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx902 -DMAC_STR=GFX902 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx904 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx904 -DMAC_STR=GFX904 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx906 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx906 -DMAC_STR=GFX906 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx908 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx908 -DMAC_STR=GFX908 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx90a -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx90a -DMAC_STR=GFX90A +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1010 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1010 -DMAC_STR=GFX1010 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1011 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1011 -DMAC_STR=GFX1011 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1012 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1012 -DMAC_STR=GFX1012 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1013 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1013 -DMAC_STR=GFX1013 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1030 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1030 -DMAC_STR=GFX1030 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1031 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1031 -DMAC_STR=GFX1031 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1032 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1032 -DMAC_STR=GFX1032 +// MACRO_AMD: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" +// MACRO_AMD: "-D__SYCL_TARGET_AMD_GPU_[[MAC_STR]]__" +// DEVICE_AMD: clang-offload-wrapper{{.*}} "-compile-opts=--offload-arch=[[DEV_STR]]" +// MACRO_AMD: clang{{.*}} "-fsycl-is-host" +// MACRO_AMD: "-D__SYCL_TARGET_AMD_GPU_[[MAC_STR]]__" + +/// -fsycl-targets=spir64_x86_64 should set a specific macro +// RUN: %clangxx -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=MACRO_X86_64 +// RUN: %clang_cl -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=MACRO_X86_64 +// MACRO_X86_64: clang{{.*}} "-triple" "spir64_x86_64-unknown-unknown" +// MACRO_X86_64: "-D__SYCL_TARGET_INTEL_X86_64__" +// MACRO_X86_64: clang{{.*}} "-fsycl-is-host" +// MACRO_X86_64: "-D__SYCL_TARGET_INTEL_X86_64__" + +/// test for invalid intel arch +// RUN: %clangxx -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=BAD_INPUT +// RUN: %clang_cl -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=BAD_INPUT +// BAD_INPUT: error: SYCL target is invalid: 'intel_gpu_bad' + +/// test for invalid nvidia arch +// RUN: %clangxx -c -fsycl -fsycl-targets=nvidia_gpu_bad -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=BAD_NVIDIA_INPUT +// RUN: %clang_cl -c -fsycl -fsycl-targets=nvidia_gpu_bad -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=BAD_NVIDIA_INPUT +// BAD_NVIDIA_INPUT: error: SYCL target is invalid: 'nvidia_gpu_bad' + +/// test for invalid amd arch +// RUN: %clangxx -c -fsycl -fsycl-targets=amd_gpu_bad -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=BAD_AMD_INPUT +// RUN: %clang_cl -c -fsycl -fsycl-targets=amd_gpu_bad -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=BAD_AMD_INPUT +// BAD_AMD_INPUT: error: SYCL target is invalid: 'amd_gpu_bad' + +/// Test for proper creation of fat object +// RUN: %clangxx -c -fsycl -fsycl-targets=intel_gpu_skl \ +// RUN: -target x86_64-unknown-linux-gnu -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=FATO +// FATO: clang-offload-bundler{{.*}} "-type=o" +// FATO: "-targets=sycl-spir64_gen-unknown-unknown-skl,host-x86_64-unknown-linux-gnu" + +/// Test for proper creation of fat object +// RUN: %clangxx -c -fsycl -fsycl-targets=nvidia_gpu_sm_50 \ +// RUN: -target x86_64-unknown-linux-gnu -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=NVIDIA_FATO +// NVIDIA_FATO: clang-offload-bundler{{.*}} "-type=o" +// NVIDIA_FATO: "-targets=sycl-nvptx64-nvidia-cuda-sm_50,host-x86_64-unknown-linux-gnu" + +/// Test for proper creation of fat object +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx700 \ +// RUN: -target x86_64-unknown-linux-gnu -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=AMD_FATO +// AMD_FATO: clang-offload-bundler{{.*}} "-type=o" +// AMD_FATO: "-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx700" + +/// Test for proper consumption of fat object +// RUN: touch %t.o +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl \ +// RUN: -target x86_64-unknown-linux-gnu -### %t.o 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CONSUME_FAT +// CONSUME_FAT: clang-offload-bundler{{.*}} "-type=o" +// CONSUME_FAT: "-targets=host-x86_64-unknown-linux-gnu,sycl-spir64_gen-unknown-unknown-skl" +// CONSUME_FAT: "-unbundle" "-allow-missing-bundles" + +/// Test for proper consumption of fat object +// RUN: touch %t.o +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_50 \ +// RUN: -target x86_64-unknown-linux-gnu -### %t.o 2>&1 | \ +// RUN: FileCheck %s --check-prefix=NVIDIA_CONSUME_FAT +// NVIDIA_CONSUME_FAT: clang-offload-bundler{{.*}} "-type=o" +// NVIDIA_CONSUME_FAT: "-targets=host-x86_64-unknown-linux-gnu,sycl-nvptx64-nvidia-cuda-sm_50" +// NVIDIA_CONSUME_FAT: "-unbundle" "-allow-missing-bundles" + +/// Test for proper consumption of fat object +// RUN: touch %t.o +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx700 \ +// RUN: -target x86_64-unknown-linux-gnu -### %t.o 2>&1 | \ +// RUN: FileCheck %s --check-prefix=AMD_CONSUME_FAT +// AMD_CONSUME_FAT: clang-offload-bundler{{.*}} "-type=o" +// AMD_CONSUME_FAT: "-targets=host-x86_64-unknown-linux-gnu,sycl-amdgcn-amd-amdhsa-gfx700" +// AMD_CONSUME_FAT: "-unbundle" "-allow-missing-bundles" + +/// Test phases, BoundArch settings used for -device target. Additional +/// offload action used for compilation and backend compilation. +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl -fno-sycl-device-lib=all \ +// RUN: -fno-sycl-instrument-device-code \ +// RUN: -target x86_64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK_PHASES +// CHECK_PHASES: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl) +// CHECK_PHASES: 1: append-footer, {0}, c++, (host-sycl) +// CHECK_PHASES: 2: preprocessor, {1}, c++-cpp-output, (host-sycl) +// CHECK_PHASES: 3: input, "[[INPUT]]", c++, (device-sycl, skl) +// CHECK_PHASES: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, skl) +// CHECK_PHASES: 5: compiler, {4}, ir, (device-sycl, skl) +// CHECK_PHASES: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (spir64_gen-unknown-unknown:skl)" {5}, c++-cpp-output +// CHECK_PHASES: 7: compiler, {6}, ir, (host-sycl) +// CHECK_PHASES: 8: backend, {7}, assembler, (host-sycl) +// CHECK_PHASES: 9: assembler, {8}, object, (host-sycl) +// CHECK_PHASES: 10: linker, {9}, image, (host-sycl) +// CHECK_PHASES: 11: linker, {5}, ir, (device-sycl, skl) +// CHECK_PHASES: 12: sycl-post-link, {11}, tempfiletable, (device-sycl, skl) +// CHECK_PHASES: 13: file-table-tform, {12}, tempfilelist, (device-sycl, skl) +// CHECK_PHASES: 14: llvm-spirv, {13}, tempfilelist, (device-sycl, skl) +// CHECK_PHASES: 15: backend-compiler, {14}, image, (device-sycl, skl) +// CHECK_PHASES: 16: file-table-tform, {12, 15}, tempfiletable, (device-sycl, skl) +// CHECK_PHASES: 17: clang-offload-wrapper, {16}, object, (device-sycl, skl) +// CHECK_PHASES: 18: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (spir64_gen-unknown-unknown:skl)" {17}, image + +/// NVIDIA Test phases, BoundArch settings used for -device target. Additional +/// offload action used for compilation and backend compilation. +// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_50 -fno-sycl-device-lib=all \ +// RUN: -fno-sycl-instrument-device-code \ +// RUN: -target x86_64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=NVIDIA_CHECK_PHASES +// NVIDIA_CHECK_PHASES: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl) +// NVIDIA_CHECK_PHASES: 1: append-footer, {0}, c++, (host-sycl) +// NVIDIA_CHECK_PHASES: 2: preprocessor, {1}, c++-cpp-output, (host-sycl) +// NVIDIA_CHECK_PHASES: 3: input, "[[INPUT]]", c++, (device-sycl, sm_50) +// NVIDIA_CHECK_PHASES: 5: compiler, {4}, ir, (device-sycl, sm_50) +// NVIDIA_CHECK_PHASES: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {5}, c++-cpp-output +// NVIDIA_CHECK_PHASES: 7: compiler, {6}, ir, (host-sycl) +// NVIDIA_CHECK_PHASES: 8: backend, {7}, assembler, (host-sycl) +// NVIDIA_CHECK_PHASES: 9: assembler, {8}, object, (host-sycl) +// NVIDIA_CHECK_PHASES: 10: linker, {9}, image, (host-sycl) +// NVIDIA_CHECK_PHASES: 11: linker, {5}, ir, (device-sycl, sm_50) +// NVIDIA_CHECK_PHASES: 12: sycl-post-link, {11}, ir, (device-sycl, sm_50) +// NVIDIA_CHECK_PHASES: 13: file-table-tform, {12}, ir, (device-sycl, sm_50) +// NVIDIA_CHECK_PHASES: 14: backend, {13}, assembler, (device-sycl, sm_50) +// NVIDIA_CHECK_PHASES: 15: assembler, {14}, object, (device-sycl, sm_50) +// NVIDIA_CHECK_PHASES: linker, {14, 15}, cuda-fatbin, (device-sycl, sm_50) +// NVIDIA_CHECK_PHASES: foreach, {13, 16}, cuda-fatbin, (device-sycl, sm_50) +// NVIDIA_CHECK_PHASES: file-table-tform, {12, 17}, tempfiletable, (device-sycl, sm_50) +// NVIDIA_CHECK_PHASES: clang-offload-wrapper, {18}, object, (device-sycl, sm_50) +// NVIDIA_CHECK_PHASES: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {19}, image + +/// AMD Test phases, BoundArch settings used for -device target. Additional +/// offload action used for compilation and backend compilation. +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx700 -fno-sycl-device-lib=all \ +// RUN: -fno-sycl-instrument-device-code \ +// RUN: -target x86_64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=AMD_CHECK_PHASES +// AMD_CHECK_PHASES: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl) +// AMD_CHECK_PHASES: 1: append-footer, {0}, c++, (host-sycl) +// AMD_CHECK_PHASES: 2: preprocessor, {1}, c++-cpp-output, (host-sycl) +// AMD_CHECK_PHASES: 3: input, "[[INPUT]]", c++, (device-sycl, gfx700) +// AMD_CHECK_PHASES: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, gfx700) +// AMD_CHECK_PHASES: 5: compiler, {4}, ir, (device-sycl, gfx700) +// AMD_CHECK_PHASES: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (amdgcn-amd-amdhsa:gfx700)" {5}, c++-cpp-output +// AMD_CHECK_PHASES: 7: compiler, {6}, ir, (host-sycl) +// AMD_CHECK_PHASES: 8: backend, {7}, assembler, (host-sycl) +// AMD_CHECK_PHASES: 9: assembler, {8}, object, (host-sycl) +// AMD_CHECK_PHASES: 10: linker, {9}, image, (host-sycl) +// AMD_CHECK_PHASES: 11: linker, {5}, ir, (device-sycl, gfx700) +// AMD_CHECK_PHASES: 12: sycl-post-link, {11}, ir, (device-sycl, gfx700) +// AMD_CHECK_PHASES: 13: file-table-tform, {12}, ir, (device-sycl, gfx700) +// AMD_CHECK_PHASES: 14: backend, {13}, assembler, (device-sycl, gfx700) +// AMD_CHECK_PHASES: 15: assembler, {14}, object, (device-sycl, gfx700) +// AMD_CHECK_PHASES: 16: linker, {15}, image, (device-sycl, gfx700) +// AMD_CHECK_PHASES: 17: linker, {16}, hip-fatbin, (device-sycl, gfx700) +// AMD_CHECK_PHASES: 18: foreach, {13, 17}, hip-fatbin, (device-sycl, gfx700) +// AMD_CHECK_PHASES: 19: file-table-tform, {12, 18}, tempfiletable, (device-sycl, gfx700) +// AMD_CHECK_PHASES: 20: clang-offload-wrapper, {19}, object, (device-sycl, gfx700) +// AMD_CHECK_PHASES: 21: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (amdgcn-amd-amdhsa:gfx700)" {20}, image + +/// Check that ocloc and macro settings only occur for the expected toolchains +/// when mixing spir64_gen and intel_gpu +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spir64_gen \ +// RUN: -Xsycl-target-backend=spir64_gen "-device skl" \ +// RUN: -fno-sycl-device-lib=all -fno-sycl-instrument-device-code \ +// RUN: -target x86_64-unknown-linux-gnu -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK_TOOLS_MIX +// CHECK_TOOLS_MIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown" +// CHECK_TOOLS_MIX: "-D__SYCL_TARGET_INTEL_GPU_DG1__" +// CHECK_TOOLS_MIX: ocloc{{.*}} "-device" "dg1" +// CHECK_TOOLS_MIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown" +// CHECK_TOOLS_MIX-NOT: "-D__SYCL_TARGET_INTEL_GPU{{.*}}" +// CHECK_TOOLS_MIX: ocloc{{.*}} "-device" "skl" + +/// Test phases when using both spir64_gen and intel_gpu* +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl,spir64_gen \ +// RUN: -fno-sycl-device-lib=all -fno-sycl-instrument-device-code \ +// RUN: -target x86_64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK_PHASES_MIX +// CHECK_PHASES_MIX: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl) +// CHECK_PHASES_MIX: 1: append-footer, {0}, c++, (host-sycl) +// CHECK_PHASES_MIX: 2: preprocessor, {1}, c++-cpp-output, (host-sycl) +// CHECK_PHASES_MIX: 3: input, "[[INPUT]]", c++, (device-sycl) +// CHECK_PHASES_MIX: 4: preprocessor, {3}, c++-cpp-output, (device-sycl) +// CHECK_PHASES_MIX: 5: compiler, {4}, ir, (device-sycl) +// CHECK_PHASES_MIX: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (spir64_gen-unknown-unknown)" {5}, c++-cpp-output +// CHECK_PHASES_MIX: 7: compiler, {6}, ir, (host-sycl) +// CHECK_PHASES_MIX: 8: backend, {7}, assembler, (host-sycl) +// CHECK_PHASES_MIX: 9: assembler, {8}, object, (host-sycl) +// CHECK_PHASES_MIX: 10: linker, {9}, image, (host-sycl) +// CHECK_PHASES_MIX: 11: input, "[[INPUT]]", c++, (device-sycl, skl) +// CHECK_PHASES_MIX: 12: preprocessor, {11}, c++-cpp-output, (device-sycl, skl) +// CHECK_PHASES_MIX: 13: compiler, {12}, ir, (device-sycl, skl) +// CHECK_PHASES_MIX: 14: linker, {13}, ir, (device-sycl, skl) +// CHECK_PHASES_MIX: 15: sycl-post-link, {14}, tempfiletable, (device-sycl, skl) +// CHECK_PHASES_MIX: 16: file-table-tform, {15}, tempfilelist, (device-sycl, skl) +// CHECK_PHASES_MIX: 17: llvm-spirv, {16}, tempfilelist, (device-sycl, skl) +// CHECK_PHASES_MIX: 18: backend-compiler, {17}, image, (device-sycl, skl) +// CHECK_PHASES_MIX: 19: file-table-tform, {15, 18}, tempfiletable, (device-sycl, skl) +// CHECK_PHASES_MIX: 20: clang-offload-wrapper, {19}, object, (device-sycl, skl) +// CHECK_PHASES_MIX: 21: linker, {5}, ir, (device-sycl) +// CHECK_PHASES_MIX: 22: sycl-post-link, {21}, tempfiletable, (device-sycl) +// CHECK_PHASES_MIX: 23: file-table-tform, {22}, tempfilelist, (device-sycl) +// CHECK_PHASES_MIX: 24: llvm-spirv, {23}, tempfilelist, (device-sycl) +// CHECK_PHASES_MIX: 25: backend-compiler, {24}, image, (device-sycl) +// CHECK_PHASES_MIX: 26: file-table-tform, {22, 25}, tempfiletable, (device-sycl) +// CHECK_PHASES_MIX: 27: clang-offload-wrapper, {26}, object, (device-sycl) +// CHECK_PHASES_MIX: 28: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (spir64_gen-unknown-unknown:skl)" {20}, "device-sycl (spir64_gen-unknown-unknown)" {27}, image + diff --git a/sycl/doc/UsersManual.md b/sycl/doc/UsersManual.md index 608e8a5192102..b47b91e4846ea 100644 --- a/sycl/doc/UsersManual.md +++ b/sycl/doc/UsersManual.md @@ -32,7 +32,7 @@ and not recommended to use in production environment. spir64_fpga-unknown-unknown, spir64_gen-unknown-unknown Available in special build configuration: * nvptx64-nvidia-cuda - generate code ahead of time for CUDA target; - Special target values specific to Intel Processor Graphics support are + Special target values specific to Intel, NVIDIA and AMD Processor Graphics support are accepted, providing a streamlined interface for AOT. Only one of these values at a time is supported. * intel_gpu_pvc - Ponte Vecchio Intel graphics architecture @@ -57,6 +57,41 @@ and not recommended to use in production environment. * intel_gpu_kbl, intel_gpu_9_1_9 - Kaby Lake Intel graphics architecture * intel_gpu_skl, intel_gpu_9_0_9 - Skylake Intel graphics architecture * intel_gpu_bdw, intel_gpu_8_0_0 - Broadwell Intel graphics architecture + * nvidia_gpu_sm_50 - NVIDIA Maxwell architecture (compute capability 5.0) + * nvidia_gpu_sm_52 - NVIDIA Maxwell architecture (compute capability 5.2) + * nvidia_gpu_sm_53 - NVIDIA Maxwell architecture (compute capability 5.3) + * nvidia_gpu_sm_60 - NVIDIA Pascal architecture (compute capability 6.0) + * nvidia_gpu_sm_61 - NVIDIA Pascal architecture (compute capability 6.1) + * nvidia_gpu_sm_62 - NVIDIA Pascal architecture (compute capability 6.2) + * nvidia_gpu_sm_70 - NVIDIA Volta architecture (compute capability 7.0) + * nvidia_gpu_sm_72 - NVIDIA Volta architecture (compute capability 7.2) + * nvidia_gpu_sm_75 - NVIDIA Turing architecture (compute capability 7.5) + * nvidia_gpu_sm_80 - NVIDIA Ampere architecture (compute capability 8.0) + * nvidia_gpu_sm_86 - NVIDIA Ampere architecture (compute capability 8.6) + * nvidia_gpu_sm_87 - NVIDIA Jetson/Drive AGX Orin architecture + * nvidia_gpu_sm_89 - NVIDIA Ada Lovelace architecture + * nvidia_gpu_sm_90 - NVIDIA Hopper architecture + * amd_gpu_gfx700 - AMD GCN GFX7 (Sea Islands (CI)) architecture + * amd_gpu_gfx701 - AMD GCN GFX7 (Sea Islands (CI)) architecture + * amd_gpu_gfx702 - AMD GCN GFX7 (Sea Islands (CI)) architecture + * amd_gpu_gfx801 - AMD GCN GFX8 (Volcanic Islands (VI)) architecture + * amd_gpu_gfx802 - AMD GCN GFX8 (Volcanic Islands (VI)) architecture + * amd_gpu_gfx803 - AMD GCN GFX8 (Volcanic Islands (VI)) architecture + * amd_gpu_gfx805 - AMD GCN GFX8 (Volcanic Islands (VI)) architecture + * amd_gpu_gfx810 - AMD GCN GFX8 (Volcanic Islands (VI)) architecture + * amd_gpu_gfx900 - AMD GCN GFX9 (Vega) architecture + * amd_gpu_gfx902 - AMD GCN GFX9 (Vega) architecture + * amd_gpu_gfx904 - AMD GCN GFX9 (Vega) architecture + * amd_gpu_gfx906 - AMD GCN GFX9 (Vega) architecture + * amd_gpu_gfx908 - AMD GCN GFX9 (Vega) architecture + * amd_gpu_gfx90a - AMD GCN GFX9 (Vega) architecture + * amd_gpu_gfx1010 - AMD GCN GFX10.1 (RDNA 1) architecture + * amd_gpu_gfx1011 - AMD GCN GFX10.1 (RDNA 1) architecture + * amd_gpu_gfx1012 - AMD GCN GFX10.1 (RDNA 1) architecture + * amd_gpu_gfx1013 - AMD GCN GFX10.1 (RDNA 1) architecture + * amd_gpu_gfx1030 - AMD GCN GFX10.3 (RDNA 2) architecture + * amd_gpu_gfx1031 - GCN GFX10.3 (RDNA 2) architecture + * amd_gpu_gfx1032 - GCN GFX10.3 (RDNA 2) architecture ## Language options diff --git a/sycl/doc/design/DeviceIf.md b/sycl/doc/design/DeviceIf.md index 5ef5c681f5513..1c2333e815796 100644 --- a/sycl/doc/design/DeviceIf.md +++ b/sycl/doc/design/DeviceIf.md @@ -73,25 +73,20 @@ recognizes: * `intel_gpu_11_2_0` (alias for `intel_gpu_ehl`) * `intel_gpu_12_0_0` (alias for `intel_gpu_tgllp`) * `intel_gpu_12_10_0` (alias for `intel_gpu_dg1`) -* `nvidia_gpu_sm20` -* `nvidia_gpu_sm30` -* `nvidia_gpu_sm32` -* `nvidia_gpu_sm35` -* `nvidia_gpu_sm37` -* `nvidia_gpu_sm50` -* `nvidia_gpu_sm52` -* `nvidia_gpu_sm53` -* `nvidia_gpu_sm60` -* `nvidia_gpu_sm61` -* `nvidia_gpu_sm62` -* `nvidia_gpu_sm70` -* `nvidia_gpu_sm72` -* `nvidia_gpu_sm75` -* `nvidia_gpu_sm80` -* `nvidia_gpu_sm86` -* `nvidia_gpu_sm87` -* `nvidia_gpu_sm89` -* `nvidia_gpu_sm90` +* `nvidia_gpu_sm_50` +* `nvidia_gpu_sm_52` +* `nvidia_gpu_sm_53` +* `nvidia_gpu_sm_60` +* `nvidia_gpu_sm_61` +* `nvidia_gpu_sm_62` +* `nvidia_gpu_sm_70` +* `nvidia_gpu_sm_72` +* `nvidia_gpu_sm_75` +* `nvidia_gpu_sm_80` +* `nvidia_gpu_sm_86` +* `nvidia_gpu_sm_87` +* `nvidia_gpu_sm_89` +* `nvidia_gpu_sm_90` * `amd_gpu_gfx700` * `amd_gpu_gfx701` * `amd_gpu_gfx702` @@ -160,11 +155,6 @@ one of the following corresponding C++ macro names: * `__SYCL_TARGET_INTEL_GPU_ACM_G11__` * `__SYCL_TARGET_INTEL_GPU_ACM_G12__` * `__SYCL_TARGET_INTEL_GPU_PVC__` -* `__SYCL_TARGET_NVIDIA_GPU_SM20__` -* `__SYCL_TARGET_NVIDIA_GPU_SM30__` -* `__SYCL_TARGET_NVIDIA_GPU_SM32__` -* `__SYCL_TARGET_NVIDIA_GPU_SM35__` -* `__SYCL_TARGET_NVIDIA_GPU_SM37__` * `__SYCL_TARGET_NVIDIA_GPU_SM50__` * `__SYCL_TARGET_NVIDIA_GPU_SM52__` * `__SYCL_TARGET_NVIDIA_GPU_SM53__` diff --git a/sycl/doc/extensions/experimental/sycl_ext_intel_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_intel_device_architecture.asciidoc deleted file mode 100644 index dc2a1373d3b82..0000000000000 --- a/sycl/doc/extensions/experimental/sycl_ext_intel_device_architecture.asciidoc +++ /dev/null @@ -1,429 +0,0 @@ -= sycl_ext_intel_device_architecture - -:source-highlighter: coderay -:coderay-linenums-mode: table - -// This section needs to be after the document title. -:doctype: book -:toc2: -:toc: left -:encoding: utf-8 -:lang: en -:dpcpp: pass:[DPC++] - -// Set the default source code type in this document to C++, -// for syntax highlighting purposes. This is needed because -// docbook uses c++ and html5 uses cpp. -:language: {basebackend@docbook:c++:cpp} - - -== Notice - -[%hardbreaks] -Copyright (C) 2022-2022 Intel Corporation. All rights reserved. - -Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks -of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by -permission by Khronos. - - -== Contact - -To report problems with this extension, please open a new issue at: - -https://github.com/intel/llvm/issues - - -== Dependencies - -This extension is written against the SYCL 2020 revision 5 specification. All -references below to the "core SYCL specification" or to section numbers in the -SYCL specification refer to that revision. - - -== Status - -This is a proposed extension specification, intended to gather community -feedback. Interfaces defined in this specification may not be implemented yet -or may be in a preliminary state. The specification itself may also change in -incompatible ways before it is finalized. *Shipping software products should -not rely on APIs defined in this specification.* - -[comment] --- -_Add the following paragraph when this specification becomes "experimental"._ - -There are important limitations with the DPC++ implementation of this -experimental extension. In particular, this extension may only be used when -the application is compiled in AOT mode. See the section below titled -"Limitations with the experimental version" for a full description of the -limitations. --- - - -== Overview - -This extension provides a way for device code to query the device architecture -on which it is running. This is similar to the -link:./sycl_ext_oneapi_device_if.asciidoc[sycl_ext_oneapi_device_if] extension -except the comparison is for the device's architecture not the device's -aspects. In some cases, low-level application code can use special features or -do specific optimizations depending on the device architecture, and this -extension enables such applications. - - -== Specification - -=== Feature test macro - -This extension provides a feature-test macro as described in the core SYCL -specification. An implementation supporting this extension must predefine the -macro `SYCL_EXT_INTEL_DEVICE_ARCHITECTURE` to one of the values defined in the -table below. Applications can test for the existence of this macro to -determine if the implementation supports this feature, or applications can test -the macro's value to determine which of the extension's features the -implementation supports. - -[%header,cols="1,5"] -|=== -|Value -|Description - -|1 -|Initial version of this extension. -|=== - -=== New enumeration of architectures - -This extension adds a new enumeration of the architectures that can be tested. - -``` -namespace sycl::ext::intel::experimental { - -enum class architecture : /* unspecified */ { - x86_64, - intel_gpu_bdw, - intel_gpu_skl, - intel_gpu_kbl, - intel_gpu_cfl, - intel_gpu_apl, - intel_gpu_glk, - intel_gpu_whl, - intel_gpu_aml, - intel_gpu_cml, - intel_gpu_icllp, - intel_gpu_ehl, - intel_gpu_tgllp, - intel_gpu_rkl, - intel_gpu_adl_s, - intel_gpu_rpl_s, - intel_gpu_adl_p, - intel_gpu_adl_n, - intel_gpu_dg1, - intel_gpu_acm_g10, - intel_gpu_acm_g11, - intel_gpu_acm_g12, - intel_gpu_pvc, - - intel_gpu_8_0_0 = intel_gpu_bdw, - intel_gpu_9_0_9 = intel_gpu_skl, - intel_gpu_9_1_9 = intel_gpu_kbl - intel_gpu_9_2_9 = intel_gpu_cfl, - intel_gpu_9_3_0 = intel_gpu_apl, - intel_gpu_9_4_0 = intel_gpu_glk, - intel_gpu_9_5_0 = intel_gpu_whl, - intel_gpu_9_6_0 = intel_gpu_aml, - intel_gpu_9_7_0 = intel_gpu_cml, - intel_gpu_11_0_0 = intel_gpu_icllp, - intel_gpu_11_2_0 = intel_gpu_ehl, - intel_gpu_12_0_0 = intel_gpu_tgllp, - intel_gpu_12_10_0 = intel_gpu_dg1 -}; - -} // namespace sycl::ext::intel::experimental -``` - -The following table tells which version of this extension first included each -of these enumerators, and it provides a brief description of their meanings. - -[%header,cols="5,1,5"] -|=== -|Enumerator name -|Added in version -|Description - -|`x86_64` -|1 -|Any CPU device with the x86_64 instruction set. - -|`intel_gpu_bdw` -|1 -|Broadwell Intel graphics architecture. - -|`intel_gpu_skl` -|1 -|Broadwell Intel graphics architecture. - -|`intel_gpu_kbl` -|1 -|Kaby Lake Intel graphics architecture. - -|`intel_gpu_cfl` -|1 -|Coffee Lake Intel graphics architecture. - -|`intel_gpu_apl` -|1 -|Apollo Lake Intel graphics architecture. - -|`intel_gpu_glk` -|1 -|Gemini Lake Intel graphics architecture. - -|`intel_gpu_whl` -|1 -|Whiskey Lake Intel graphics architecture. - -|`intel_gpu_aml` -|1 -|Amber Lake Intel graphics architecture. - -|`intel_gpu_cml` -|1 -|Comet Lake Intel graphics architecture. - -|`intel_gpu_icllp` -|1 -|Ice Lake Intel graphics architecture. - -|`intel_gpu_ehl` -|1 -|Elkhart Lake Intel graphics architecture. - -|`intel_gpu_tgllp` -|1 -|Tiger Lake Intel graphics architecture. - -|`intel_gpu_rkl` -|1 -|Rocket Lake Intel graphics architecture. - -|`intel_gpu_adl_s` -|1 -|Alder Lake S Intel graphics architecture. - -|`intel_gpu_rpl_s` -|1 -|Raptor Lake Intel graphics architecture. - -|`intel_gpu_adl_p` -|1 -|Alder Lake P Intel graphics architecture. - -|`intel_gpu_adl_n` -|1 -|Alder Lake N Intel graphics architecture. - -|`intel_gpu_dg1` -|1 -|DG1 Intel graphics architecture. - -|`intel_gpu_acm_g10` -|1 -|Alchemist G10 Intel graphics architecture. - -|`intel_gpu_acm_g11` -|1 -|Alchemist G11 Intel graphics architecture. - -|`intel_gpu_acm_g12` -|1 -|Alchemist G12 Intel graphics architecture. - -|`intel_gpu_pvc` -|1 -|Ponte Vecchio Intel graphics architecture. - -|`intel_gpu_8_0_0` -|1 -|Alias for `intel_gpu_bdw`. - -|`intel_gpu_9_0_9` -|1 -|Alias for `intel_gpu_skl`. - -|`intel_gpu_9_1_9` -|1 -|Alias for `intel_gpu_kbl`. - -|`intel_gpu_9_2_9` -|1 -|Alias for `intel_gpu_cfl`. - -|`intel_gpu_9_3_0` -|1 -|Alias for `intel_gpu_apl`. - -|`intel_gpu_9_4_0` -|1 -|Alias for `intel_gpu_glk`. - -|`intel_gpu_9_5_0` -|1 -|Alias for `intel_gpu_whl`. - -|`intel_gpu_9_6_0` -|1 -|Alias for `intel_gpu_aml`. - -|`intel_gpu_9_7_0` -|1 -|Alias for `intel_gpu_cml`. - -|`intel_gpu_11_0_0` -|1 -|Alias for `intel_gpu_icllp`. - -|`intel_gpu_11_2_0` -|1 -|Alias for `intel_gpu_ehl`. - -|`intel_gpu_12_0_0` -|1 -|Alias for `intel_gpu_tgllp`. - -|`intel_gpu_12_10_0` -|1 -|Alias for `intel_gpu_dg1`. -|=== - -[NOTE] -==== -An "alias" enumerator is generally added for new devices only after hardware -has finalized and the exact version is known. -==== - - -=== New `if_architecture_is` free function - -This extension adds one new free function which may be called from device -code. This function is not available in host code. - -``` -namespace sycl::ext::intel::experimental { - -template -/* unspecified */ if_architecture_is(T fn, Args ...args); - -} // namespace sycl::ext::intel::experimental -``` - -This function operates exactly like `if_device_has` from the -link:./sycl_ext_oneapi_device_if.asciidoc[sycl_ext_oneapi_device_if] extension -except that the condition gating execution of the callable function `fn` is -determined by the `Archs` parameter pack. This condition is `true` if the -device which executes `if_architecture_is` matches **any** of the architectures -listed in this pack. - -The value returned by `if_architecture_is` is an object _F_ of an unspecified -type, which provides the following member functions: - -``` -class /* unspecified */ { - public: - template - /* unspecified */ else_if_architecture_is(T fn, Args ...args); - - template - void otherwise(T fn, Args ...args); -}; -``` - -The `otherwise` function behaves exactly like the `otherwise` function from the -link:./sycl_ext_oneapi_device_if.asciidoc[sycl_ext_oneapi_device_if] extension. -The `else_if_architecture_is` function behaves exactly like -`else_if_device_has` from that extension except that the condition gating -execution of the callable object `fn` is determined by the `Archs` parameter -pack. This condition is `true` only if the object _F_ comes from a previous -call to `if_architecture_is` or `else_if_architecture_is` whose condition is -`false` *and* if the device calling `else_if_architecture_is` has one of the -architectures in the `Archs` parameter pack. - - -== Future direction - -This experimental extension is still evolving. We expect that future versions -will include the following: - -* An extended member function like: -+ --- -``` -namespace sycl { - -class device { - bool ext_intel_architecture_is( - ext::intel::experimental::architecture arch); -}; - -// namespace sycl -``` - -This provides a way to query a device's architecture from host code. --- - -* An extended device information descriptor named - `sycl::ext::intel::experimental::info::device::architecture`, which returns - the architecture of the device. This allows host code such as: -+ --- -``` -using namespace sycl::ext::intel::experimental; - -architecture arch = dev.get_info(); -switch (arch) { -case architecture::x86_64: - /* ... */ - break; -case architecture::intel_gpu_bdw: - /* ... */ - break; -/* etc. */ -} -``` --- - -* A compile-time constant property that can be used to decorate kernels and - non-kernel device functions: -+ --- -``` -namespace sycl::ext::intel::experimental { - -struct device_architecture_is_key { - template - using value_t = property_value...>; -}; - -template -struct property_value...> -{ - static constexpr std::array value; -}; - -template -inline constexpr device_architecture_is_key::value_t - device_architecture_is; - -} // namespace sycl::ext::intel::experimental -``` - -This property indicates that a kernel or non-kernel device function uses -features that are available on devices with the given architecture list but -may not be available on devices with other architectures. --- - -* Additional enumerators in the `architecture` enumeration. This could include - entries for different x86_64 architectures or entries for CUDA devices. diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc similarity index 93% rename from sycl/doc/extensions/proposed/sycl_ext_oneapi_device_architecture.asciidoc rename to sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc index a785c426a59c6..bb5692552796d 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_device_architecture.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc @@ -128,25 +128,20 @@ enum class architecture : /* unspecified */ { intel_gpu_acm_g12, intel_gpu_pvc, - nvidia_gpu_sm20, - nvidia_gpu_sm30, - nvidia_gpu_sm32, - nvidia_gpu_sm35, - nvidia_gpu_sm37, - nvidia_gpu_sm50, - nvidia_gpu_sm52, - nvidia_gpu_sm53, - nvidia_gpu_sm60, - nvidia_gpu_sm61, - nvidia_gpu_sm62, - nvidia_gpu_sm70, - nvidia_gpu_sm72, - nvidia_gpu_sm75, - nvidia_gpu_sm80, - nvidia_gpu_sm86, - nvidia_gpu_sm87, - nvidia_gpu_sm89, - nvidia_gpu_sm90, + nvidia_gpu_sm_50, + nvidia_gpu_sm_52, + nvidia_gpu_sm_53, + nvidia_gpu_sm_60, + nvidia_gpu_sm_61, + nvidia_gpu_sm_62, + nvidia_gpu_sm_70, + nvidia_gpu_sm_72, + nvidia_gpu_sm_75, + nvidia_gpu_sm_80, + nvidia_gpu_sm_86, + nvidia_gpu_sm_87, + nvidia_gpu_sm_89, + nvidia_gpu_sm_90, amd_gpu_gfx700, amd_gpu_gfx701, @@ -341,79 +336,59 @@ of these enumerators, and it provides a brief description of their meanings. |1 |Alias for `intel_gpu_dg1`. -|`nvidia_gpu_sm20` -|2 -|NVIDIA Fermi architecture. - -|`nvidia_gpu_sm30` -|2 -|NVIDIA Kepler architecture (compute capability 3.0). - -|`nvidia_gpu_sm32` -|2 -|NVIDIA Kepler architecture (compute capability 3.2). - -|`nvidia_gpu_sm35` -|2 -|NVIDIA Kepler architecture (compute capability 3.5). - -|`nvidia_gpu_sm37` -|2 -|NVIDIA Kepler architecture (compute capability 3.7). - -|`nvidia_gpu_sm50` +|`nvidia_gpu_sm_50` |2 |NVIDIA Maxwell architecture (compute capability 5.0). -|`nvidia_gpu_sm52` +|`nvidia_gpu_sm_52` |2 |NVIDIA Maxwell architecture (compute capability 5.2). -|`nvidia_gpu_sm53` +|`nvidia_gpu_sm_53` |2 |NVIDIA Maxwell architecture (compute capability 5.3). -|`nvidia_gpu_sm60` +|`nvidia_gpu_sm_60` |2 |NVIDIA Pascal architecture (compute capability 6.0). -|`nvidia_gpu_sm61` +|`nvidia_gpu_sm_61` |2 |NVIDIA Pascal architecture (compute capability 6.1). -|`nvidia_gpu_sm62` +|`nvidia_gpu_sm_62` |2 |NVIDIA Pascal architecture (compute capability 6.2). -|`nvidia_gpu_sm70` +|`nvidia_gpu_sm_70` |2 |NVIDIA Volta architecture (compute capability 7.0). -|`nvidia_gpu_sm72` +|`nvidia_gpu_sm_72` |2 |NVIDIA Volta architecture (compute capability 7.2). -|`nvidia_gpu_sm75` +|`nvidia_gpu_sm_75` |2 |NVIDIA Turing architecture (compute capability 7.5). -|`nvidia_gpu_sm80` +|`nvidia_gpu_sm_80` |2 |NVIDIA Ampere architecture (compute capability 8.0). -|`nvidia_gpu_sm86` +|`nvidia_gpu_sm_86` |2 |NVIDIA Ampere architecture (compute capability 8.6). -|`nvidia_gpu_sm87` +|`nvidia_gpu_sm_87` |2 |Jetson/Drive AGX Orin architecture. -|`nvidia_gpu_sm89` +|`nvidia_gpu_sm_89` |2 |NVIDIA Ada Lovelace architecture. -|`nvidia_gpu_sm90` +|`nvidia_gpu_sm_90` |2 |NVIDIA Hopper architecture. diff --git a/sycl/include/sycl/ext/intel/experimental/device_architecture.hpp b/sycl/include/sycl/ext/intel/experimental/device_architecture.hpp deleted file mode 100644 index f2f37db5312c5..0000000000000 --- a/sycl/include/sycl/ext/intel/experimental/device_architecture.hpp +++ /dev/null @@ -1,278 +0,0 @@ -#pragma once - -#include - -namespace sycl { -__SYCL_INLINE_VER_NAMESPACE(_V1) { -namespace ext::intel::experimental { - -enum class architecture { - x86_64, - intel_gpu_bdw, - intel_gpu_skl, - intel_gpu_kbl, - intel_gpu_cfl, - intel_gpu_apl, - intel_gpu_glk, - intel_gpu_whl, - intel_gpu_aml, - intel_gpu_cml, - intel_gpu_icllp, - intel_gpu_ehl, - intel_gpu_tgllp, - intel_gpu_rkl, - intel_gpu_adl_s, - intel_gpu_rpl_s, - intel_gpu_adl_p, - intel_gpu_adl_n, - intel_gpu_dg1, - intel_gpu_acm_g10, - intel_gpu_acm_g11, - intel_gpu_acm_g12, - intel_gpu_pvc, - // Update "detail::max_architecture" below if you add new elements here! - intel_gpu_8_0_0 = intel_gpu_bdw, - intel_gpu_9_0_9 = intel_gpu_skl, - intel_gpu_9_1_9 = intel_gpu_kbl, - intel_gpu_9_2_9 = intel_gpu_cfl, - intel_gpu_9_3_0 = intel_gpu_apl, - intel_gpu_9_4_0 = intel_gpu_glk, - intel_gpu_9_5_0 = intel_gpu_whl, - intel_gpu_9_6_0 = intel_gpu_aml, - intel_gpu_9_7_0 = intel_gpu_cml, - intel_gpu_11_0_0 = intel_gpu_icllp, - intel_gpu_11_2_0 = intel_gpu_ehl, - intel_gpu_12_0_0 = intel_gpu_tgllp, - intel_gpu_12_10_0 = intel_gpu_dg1, -}; - -} // namespace ext::intel::experimental - -namespace detail { - -static constexpr ext::intel::experimental::architecture max_architecture = - ext::intel::experimental::architecture::intel_gpu_pvc; - -#ifndef __SYCL_TARGET_INTEL_X86_64__ -#define __SYCL_TARGET_INTEL_X86_64__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_BDW__ -#define __SYCL_TARGET_INTEL_GPU_BDW__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_SKL__ -#define __SYCL_TARGET_INTEL_GPU_SKL__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_KBL__ -#define __SYCL_TARGET_INTEL_GPU_KBL__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_CFL__ -#define __SYCL_TARGET_INTEL_GPU_CFL__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_APL__ -#define __SYCL_TARGET_INTEL_GPU_APL__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_GLK__ -#define __SYCL_TARGET_INTEL_GPU_GLK__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_WHL__ -#define __SYCL_TARGET_INTEL_GPU_WHL__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_AML__ -#define __SYCL_TARGET_INTEL_GPU_AML__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_CML__ -#define __SYCL_TARGET_INTEL_GPU_CML__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_ICLLP__ -#define __SYCL_TARGET_INTEL_GPU_ICLLP__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_EHL__ -#define __SYCL_TARGET_INTEL_GPU_EHL__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_TGLLP__ -#define __SYCL_TARGET_INTEL_GPU_TGLLP__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_RKL__ -#define __SYCL_TARGET_INTEL_GPU_RKL__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_ADL_S__ -#define __SYCL_TARGET_INTEL_GPU_ADL_S__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_RPL_S__ -#define __SYCL_TARGET_INTEL_GPU_RPL_S__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_ADL_P__ -#define __SYCL_TARGET_INTEL_GPU_ADL_P__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_ADL_N__ -#define __SYCL_TARGET_INTEL_GPU_ADL_N__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_DG1__ -#define __SYCL_TARGET_INTEL_GPU_DG1__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G10__ -#define __SYCL_TARGET_INTEL_GPU_ACM_G10__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G11__ -#define __SYCL_TARGET_INTEL_GPU_ACM_G11__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G12__ -#define __SYCL_TARGET_INTEL_GPU_ACM_G12__ 0 -#endif -#ifndef __SYCL_TARGET_INTEL_GPU_PVC__ -#define __SYCL_TARGET_INTEL_GPU_PVC__ 0 -#endif - -// This is true when the translation unit is compiled in AOT mode with target -// names that supports the "if_architecture_is" features. If an unsupported -// target name is specified via "-fsycl-targets", the associated invocation of -// the device compiler will set this variable to false, and that will trigger -// an error for code that uses "if_architecture_is". -static constexpr bool is_allowable_aot_mode = - (__SYCL_TARGET_INTEL_X86_64__ == 1) || - (__SYCL_TARGET_INTEL_GPU_BDW__ == 1) || - (__SYCL_TARGET_INTEL_GPU_SKL__ == 1) || - (__SYCL_TARGET_INTEL_GPU_KBL__ == 1) || - (__SYCL_TARGET_INTEL_GPU_CFL__ == 1) || - (__SYCL_TARGET_INTEL_GPU_APL__ == 1) || - (__SYCL_TARGET_INTEL_GPU_GLK__ == 1) || - (__SYCL_TARGET_INTEL_GPU_WHL__ == 1) || - (__SYCL_TARGET_INTEL_GPU_AML__ == 1) || - (__SYCL_TARGET_INTEL_GPU_CML__ == 1) || - (__SYCL_TARGET_INTEL_GPU_ICLLP__ == 1) || - (__SYCL_TARGET_INTEL_GPU_EHL__ == 1) || - (__SYCL_TARGET_INTEL_GPU_TGLLP__ == 1) || - (__SYCL_TARGET_INTEL_GPU_RKL__ == 1) || - (__SYCL_TARGET_INTEL_GPU_ADL_S__ == 1) || - (__SYCL_TARGET_INTEL_GPU_RPL_S__ == 1) || - (__SYCL_TARGET_INTEL_GPU_ADL_P__ == 1) || - (__SYCL_TARGET_INTEL_GPU_ADL_N__ == 1) || - (__SYCL_TARGET_INTEL_GPU_DG1__ == 1) || - (__SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1) || - (__SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1) || - (__SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1) || - (__SYCL_TARGET_INTEL_GPU_PVC__ == 1); - -struct IsAOTForArchitectureClass { - // Allocate an array of size == size of ext::intel::experimental::architecture - // enum. - bool arr[static_cast(max_architecture) + 1]; - - using arch = ext::intel::experimental::architecture; - - constexpr IsAOTForArchitectureClass() : arr() { - arr[static_cast(arch::x86_64)] = __SYCL_TARGET_INTEL_X86_64__ == 1; - arr[static_cast(arch::intel_gpu_bdw)] = - __SYCL_TARGET_INTEL_GPU_BDW__ == 1; - arr[static_cast(arch::intel_gpu_skl)] = - __SYCL_TARGET_INTEL_GPU_SKL__ == 1; - arr[static_cast(arch::intel_gpu_kbl)] = - __SYCL_TARGET_INTEL_GPU_KBL__ == 1; - arr[static_cast(arch::intel_gpu_cfl)] = - __SYCL_TARGET_INTEL_GPU_CFL__ == 1; - arr[static_cast(arch::intel_gpu_apl)] = - __SYCL_TARGET_INTEL_GPU_APL__ == 1; - arr[static_cast(arch::intel_gpu_glk)] = - __SYCL_TARGET_INTEL_GPU_GLK__ == 1; - arr[static_cast(arch::intel_gpu_whl)] = - __SYCL_TARGET_INTEL_GPU_WHL__ == 1; - arr[static_cast(arch::intel_gpu_aml)] = - __SYCL_TARGET_INTEL_GPU_AML__ == 1; - arr[static_cast(arch::intel_gpu_cml)] = - __SYCL_TARGET_INTEL_GPU_CML__ == 1; - arr[static_cast(arch::intel_gpu_icllp)] = - __SYCL_TARGET_INTEL_GPU_ICLLP__ == 1; - arr[static_cast(arch::intel_gpu_ehl)] = - __SYCL_TARGET_INTEL_GPU_EHL__ == 1; - arr[static_cast(arch::intel_gpu_tgllp)] = - __SYCL_TARGET_INTEL_GPU_TGLLP__ == 1; - arr[static_cast(arch::intel_gpu_rkl)] = - __SYCL_TARGET_INTEL_GPU_RKL__ == 1; - arr[static_cast(arch::intel_gpu_adl_s)] = - __SYCL_TARGET_INTEL_GPU_ADL_S__ == 1; - arr[static_cast(arch::intel_gpu_rpl_s)] = - __SYCL_TARGET_INTEL_GPU_RPL_S__ == 1; - arr[static_cast(arch::intel_gpu_adl_p)] = - __SYCL_TARGET_INTEL_GPU_ADL_P__ == 1; - arr[static_cast(arch::intel_gpu_adl_n)] = - __SYCL_TARGET_INTEL_GPU_ADL_N__ == 1; - arr[static_cast(arch::intel_gpu_dg1)] = - __SYCL_TARGET_INTEL_GPU_DG1__ == 1; - arr[static_cast(arch::intel_gpu_acm_g10)] = - __SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1; - arr[static_cast(arch::intel_gpu_acm_g11)] = - __SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1; - arr[static_cast(arch::intel_gpu_acm_g12)] = - __SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1; - arr[static_cast(arch::intel_gpu_pvc)] = - __SYCL_TARGET_INTEL_GPU_PVC__ == 1; - } -}; - -// One entry for each enumerator in "architecture" telling whether the AOT -// target matches that architecture. -static constexpr IsAOTForArchitectureClass is_aot_for_architecture; - -// Reads the value of "is_allowable_aot_mode" via a template to defer triggering -// static_assert() until template instantiation time. -template -constexpr static bool allowable_aot_mode() { - return is_allowable_aot_mode; -} - -// Tells if the current device has one of the architectures in the parameter -// pack. -template -constexpr static bool device_architecture_is() { - return (is_aot_for_architecture.arr[static_cast(Archs)] || ...); -} - -// Helper object used to implement "else_if_architecture_is" and "otherwise". -// The "MakeCall" template parameter tells whether a previous clause in the -// "if-elseif-elseif ..." chain was true. When "MakeCall" is false, some -// previous clause was true, so none of the subsequent -// "else_if_architecture_is" or "otherwise" member functions should call the -// user's function. -template class if_architecture_helper { -public: - template - constexpr auto else_if_architecture_is(T fnTrue, Args... args) { - if constexpr (MakeCall && device_architecture_is()) { - fnTrue(args...); - return if_architecture_helper{}; - } else { - (void)fnTrue; - return if_architecture_helper{}; - } - } - - template - constexpr void otherwise(T fn, Args... args) { - if constexpr (MakeCall) { - fn(args...); - } - } -}; -} // namespace detail - -namespace ext::intel::experimental { - -template -constexpr static auto if_architecture_is(T fnTrue, Args... args) { - static_assert(detail::allowable_aot_mode(), - "The if_architecture_is function may only be used when AOT " - "compiling with '-fsycl-targets=spir64_x86_64' or " - "'-fsycl-targets=intel_gpu_*'"); - if constexpr (detail::device_architecture_is()) { - fnTrue(args...); - return detail::if_architecture_helper{}; - } else { - (void)fnTrue; - return detail::if_architecture_helper{}; - } -} - -} // namespace ext::intel::experimental -} // __SYCL_INLINE_VER_NAMESPACE(_V1) -} // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp new file mode 100644 index 0000000000000..4fcfdb2036b43 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -0,0 +1,523 @@ +#pragma once + +#include + +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace ext::oneapi::experimental { + +enum class architecture { + x86_64, + intel_gpu_bdw, + intel_gpu_skl, + intel_gpu_kbl, + intel_gpu_cfl, + intel_gpu_apl, + intel_gpu_glk, + intel_gpu_whl, + intel_gpu_aml, + intel_gpu_cml, + intel_gpu_icllp, + intel_gpu_ehl, + intel_gpu_tgllp, + intel_gpu_rkl, + intel_gpu_adl_s, + intel_gpu_rpl_s, + intel_gpu_adl_p, + intel_gpu_adl_n, + intel_gpu_dg1, + intel_gpu_acm_g10, + intel_gpu_acm_g11, + intel_gpu_acm_g12, + intel_gpu_pvc, + // NVIDIA architectures + nvidia_gpu_sm_50, + nvidia_gpu_sm_52, + nvidia_gpu_sm_53, + nvidia_gpu_sm_60, + nvidia_gpu_sm_61, + nvidia_gpu_sm_62, + nvidia_gpu_sm_70, + nvidia_gpu_sm_72, + nvidia_gpu_sm_75, + nvidia_gpu_sm_80, + nvidia_gpu_sm_86, + nvidia_gpu_sm_87, + nvidia_gpu_sm_89, + nvidia_gpu_sm_90, + // AMD architectures + amd_gpu_gfx700, + amd_gpu_gfx701, + amd_gpu_gfx702, + amd_gpu_gfx801, + amd_gpu_gfx802, + amd_gpu_gfx803, + amd_gpu_gfx805, + amd_gpu_gfx810, + amd_gpu_gfx900, + amd_gpu_gfx902, + amd_gpu_gfx904, + amd_gpu_gfx906, + amd_gpu_gfx908, + amd_gpu_gfx90a, + amd_gpu_gfx1010, + amd_gpu_gfx1011, + amd_gpu_gfx1012, + amd_gpu_gfx1013, + amd_gpu_gfx1030, + amd_gpu_gfx1031, + amd_gpu_gfx1032, + // Update "detail::max_architecture" below if you add new elements here! + intel_gpu_8_0_0 = intel_gpu_bdw, + intel_gpu_9_0_9 = intel_gpu_skl, + intel_gpu_9_1_9 = intel_gpu_kbl, + intel_gpu_9_2_9 = intel_gpu_cfl, + intel_gpu_9_3_0 = intel_gpu_apl, + intel_gpu_9_4_0 = intel_gpu_glk, + intel_gpu_9_5_0 = intel_gpu_whl, + intel_gpu_9_6_0 = intel_gpu_aml, + intel_gpu_9_7_0 = intel_gpu_cml, + intel_gpu_11_0_0 = intel_gpu_icllp, + intel_gpu_11_2_0 = intel_gpu_ehl, + intel_gpu_12_0_0 = intel_gpu_tgllp, + intel_gpu_12_10_0 = intel_gpu_dg1, +}; + +} // namespace ext::oneapi::experimental + +namespace detail { + +static constexpr ext::oneapi::experimental::architecture max_architecture = + ext::oneapi::experimental::architecture::amd_gpu_gfx1032; + +#ifndef __SYCL_TARGET_INTEL_X86_64__ +#define __SYCL_TARGET_INTEL_X86_64__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_BDW__ +#define __SYCL_TARGET_INTEL_GPU_BDW__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_SKL__ +#define __SYCL_TARGET_INTEL_GPU_SKL__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_KBL__ +#define __SYCL_TARGET_INTEL_GPU_KBL__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_CFL__ +#define __SYCL_TARGET_INTEL_GPU_CFL__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_APL__ +#define __SYCL_TARGET_INTEL_GPU_APL__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_GLK__ +#define __SYCL_TARGET_INTEL_GPU_GLK__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_WHL__ +#define __SYCL_TARGET_INTEL_GPU_WHL__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_AML__ +#define __SYCL_TARGET_INTEL_GPU_AML__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_CML__ +#define __SYCL_TARGET_INTEL_GPU_CML__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_ICLLP__ +#define __SYCL_TARGET_INTEL_GPU_ICLLP__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_EHL__ +#define __SYCL_TARGET_INTEL_GPU_EHL__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_TGLLP__ +#define __SYCL_TARGET_INTEL_GPU_TGLLP__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_RKL__ +#define __SYCL_TARGET_INTEL_GPU_RKL__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_ADL_S__ +#define __SYCL_TARGET_INTEL_GPU_ADL_S__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_RPL_S__ +#define __SYCL_TARGET_INTEL_GPU_RPL_S__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_ADL_P__ +#define __SYCL_TARGET_INTEL_GPU_ADL_P__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_ADL_N__ +#define __SYCL_TARGET_INTEL_GPU_ADL_N__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_DG1__ +#define __SYCL_TARGET_INTEL_GPU_DG1__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G10__ +#define __SYCL_TARGET_INTEL_GPU_ACM_G10__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G11__ +#define __SYCL_TARGET_INTEL_GPU_ACM_G11__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G12__ +#define __SYCL_TARGET_INTEL_GPU_ACM_G12__ 0 +#endif +#ifndef __SYCL_TARGET_INTEL_GPU_PVC__ +#define __SYCL_TARGET_INTEL_GPU_PVC__ 0 +#endif +#ifndef __SYCL_TARGET_NVIDIA_GPU_SM50__ +#define __SYCL_TARGET_NVIDIA_GPU_SM50__ 0 +#endif +#ifndef __SYCL_TARGET_NVIDIA_GPU_SM52__ +#define __SYCL_TARGET_NVIDIA_GPU_SM52__ 0 +#endif +#ifndef __SYCL_TARGET_NVIDIA_GPU_SM53__ +#define __SYCL_TARGET_NVIDIA_GPU_SM53__ 0 +#endif +#ifndef __SYCL_TARGET_NVIDIA_GPU_SM60__ +#define __SYCL_TARGET_NVIDIA_GPU_SM60__ 0 +#endif +#ifndef __SYCL_TARGET_NVIDIA_GPU_SM61__ +#define __SYCL_TARGET_NVIDIA_GPU_SM61__ 0 +#endif +#ifndef __SYCL_TARGET_NVIDIA_GPU_SM62__ +#define __SYCL_TARGET_NVIDIA_GPU_SM62__ 0 +#endif +#ifndef __SYCL_TARGET_NVIDIA_GPU_SM70__ +#define __SYCL_TARGET_NVIDIA_GPU_SM70__ 0 +#endif +#ifndef __SYCL_TARGET_NVIDIA_GPU_SM72__ +#define __SYCL_TARGET_NVIDIA_GPU_SM72__ 0 +#endif +#ifndef __SYCL_TARGET_NVIDIA_GPU_SM75__ +#define __SYCL_TARGET_NVIDIA_GPU_SM75__ 0 +#endif +#ifndef __SYCL_TARGET_NVIDIA_GPU_SM80__ +#define __SYCL_TARGET_NVIDIA_GPU_SM80__ 0 +#endif +#ifndef __SYCL_TARGET_NVIDIA_GPU_SM86__ +#define __SYCL_TARGET_NVIDIA_GPU_SM86__ 0 +#endif +#ifndef __SYCL_TARGET_NVIDIA_GPU_SM87__ +#define __SYCL_TARGET_NVIDIA_GPU_SM87__ 0 +#endif +#ifndef __SYCL_TARGET_NVIDIA_GPU_SM89__ +#define __SYCL_TARGET_NVIDIA_GPU_SM89__ 0 +#endif +#ifndef __SYCL_TARGET_NVIDIA_GPU_SM90__ +#define __SYCL_TARGET_NVIDIA_GPU_SM90__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX700__ +#define __SYCL_TARGET_AMD_GPU_GFX700__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX701__ +#define __SYCL_TARGET_AMD_GPU_GFX701__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX702__ +#define __SYCL_TARGET_AMD_GPU_GFX702__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX801__ +#define __SYCL_TARGET_AMD_GPU_GFX801__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX802__ +#define __SYCL_TARGET_AMD_GPU_GFX802__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX803__ +#define __SYCL_TARGET_AMD_GPU_GFX803__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX805__ +#define __SYCL_TARGET_AMD_GPU_GFX805__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX810__ +#define __SYCL_TARGET_AMD_GPU_GFX810__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX900__ +#define __SYCL_TARGET_AMD_GPU_GFX900__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX902__ +#define __SYCL_TARGET_AMD_GPU_GFX902__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX904__ +#define __SYCL_TARGET_AMD_GPU_GFX904__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX906__ +#define __SYCL_TARGET_AMD_GPU_GFX906__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX908__ +#define __SYCL_TARGET_AMD_GPU_GFX908__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX90A__ +#define __SYCL_TARGET_AMD_GPU_GFX90A__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX1010__ +#define __SYCL_TARGET_AMD_GPU_GFX1010__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX1011__ +#define __SYCL_TARGET_AMD_GPU_GFX1011__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX1012__ +#define __SYCL_TARGET_AMD_GPU_GFX1012__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX1013__ +#define __SYCL_TARGET_AMD_GPU_GFX1013__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX1030__ +#define __SYCL_TARGET_AMD_GPU_GFX1030__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX1031__ +#define __SYCL_TARGET_AMD_GPU_GFX1031__ 0 +#endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX1032__ +#define __SYCL_TARGET_AMD_GPU_GFX1032__ 0 +#endif + +// This is true when the translation unit is compiled in AOT mode with target +// names that supports the "if_architecture_is" features. If an unsupported +// target name is specified via "-fsycl-targets", the associated invocation of +// the device compiler will set this variable to false, and that will trigger +// an error for code that uses "if_architecture_is". +static constexpr bool is_allowable_aot_mode = + (__SYCL_TARGET_INTEL_X86_64__ == 1) || + (__SYCL_TARGET_INTEL_GPU_BDW__ == 1) || + (__SYCL_TARGET_INTEL_GPU_SKL__ == 1) || + (__SYCL_TARGET_INTEL_GPU_KBL__ == 1) || + (__SYCL_TARGET_INTEL_GPU_CFL__ == 1) || + (__SYCL_TARGET_INTEL_GPU_APL__ == 1) || + (__SYCL_TARGET_INTEL_GPU_GLK__ == 1) || + (__SYCL_TARGET_INTEL_GPU_WHL__ == 1) || + (__SYCL_TARGET_INTEL_GPU_AML__ == 1) || + (__SYCL_TARGET_INTEL_GPU_CML__ == 1) || + (__SYCL_TARGET_INTEL_GPU_ICLLP__ == 1) || + (__SYCL_TARGET_INTEL_GPU_EHL__ == 1) || + (__SYCL_TARGET_INTEL_GPU_TGLLP__ == 1) || + (__SYCL_TARGET_INTEL_GPU_RKL__ == 1) || + (__SYCL_TARGET_INTEL_GPU_ADL_S__ == 1) || + (__SYCL_TARGET_INTEL_GPU_RPL_S__ == 1) || + (__SYCL_TARGET_INTEL_GPU_ADL_P__ == 1) || + (__SYCL_TARGET_INTEL_GPU_ADL_N__ == 1) || + (__SYCL_TARGET_INTEL_GPU_DG1__ == 1) || + (__SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1) || + (__SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1) || + (__SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1) || + (__SYCL_TARGET_INTEL_GPU_PVC__ == 1) || + (__SYCL_TARGET_NVIDIA_GPU_SM50__ == 1) || + (__SYCL_TARGET_NVIDIA_GPU_SM52__ == 1) || + (__SYCL_TARGET_NVIDIA_GPU_SM53__ == 1) || + (__SYCL_TARGET_NVIDIA_GPU_SM60__ == 1) || + (__SYCL_TARGET_NVIDIA_GPU_SM61__ == 1) || + (__SYCL_TARGET_NVIDIA_GPU_SM62__ == 1) || + (__SYCL_TARGET_NVIDIA_GPU_SM70__ == 1) || + (__SYCL_TARGET_NVIDIA_GPU_SM72__ == 1) || + (__SYCL_TARGET_NVIDIA_GPU_SM75__ == 1) || + (__SYCL_TARGET_NVIDIA_GPU_SM80__ == 1) || + (__SYCL_TARGET_NVIDIA_GPU_SM86__ == 1) || + (__SYCL_TARGET_NVIDIA_GPU_SM87__ == 1) || + (__SYCL_TARGET_NVIDIA_GPU_SM89__ == 1) || + (__SYCL_TARGET_NVIDIA_GPU_SM90__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX700__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX701__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX702__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX801__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX802__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX803__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX805__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX810__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX900__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX902__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX904__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX906__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX908__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX90A__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX1010__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX1011__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX1012__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX1013__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX1030__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX1031__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX1032__ == 1); + +struct IsAOTForArchitectureClass { + // Allocate an array of size == size of + // ext::oneapi::experimental::architecture enum. + bool arr[static_cast(max_architecture) + 1]; + + using arch = ext::oneapi::experimental::architecture; + + constexpr IsAOTForArchitectureClass() : arr() { + arr[static_cast(arch::x86_64)] = __SYCL_TARGET_INTEL_X86_64__ == 1; + arr[static_cast(arch::intel_gpu_bdw)] = + __SYCL_TARGET_INTEL_GPU_BDW__ == 1; + arr[static_cast(arch::intel_gpu_skl)] = + __SYCL_TARGET_INTEL_GPU_SKL__ == 1; + arr[static_cast(arch::intel_gpu_kbl)] = + __SYCL_TARGET_INTEL_GPU_KBL__ == 1; + arr[static_cast(arch::intel_gpu_cfl)] = + __SYCL_TARGET_INTEL_GPU_CFL__ == 1; + arr[static_cast(arch::intel_gpu_apl)] = + __SYCL_TARGET_INTEL_GPU_APL__ == 1; + arr[static_cast(arch::intel_gpu_glk)] = + __SYCL_TARGET_INTEL_GPU_GLK__ == 1; + arr[static_cast(arch::intel_gpu_whl)] = + __SYCL_TARGET_INTEL_GPU_WHL__ == 1; + arr[static_cast(arch::intel_gpu_aml)] = + __SYCL_TARGET_INTEL_GPU_AML__ == 1; + arr[static_cast(arch::intel_gpu_cml)] = + __SYCL_TARGET_INTEL_GPU_CML__ == 1; + arr[static_cast(arch::intel_gpu_icllp)] = + __SYCL_TARGET_INTEL_GPU_ICLLP__ == 1; + arr[static_cast(arch::intel_gpu_ehl)] = + __SYCL_TARGET_INTEL_GPU_EHL__ == 1; + arr[static_cast(arch::intel_gpu_tgllp)] = + __SYCL_TARGET_INTEL_GPU_TGLLP__ == 1; + arr[static_cast(arch::intel_gpu_rkl)] = + __SYCL_TARGET_INTEL_GPU_RKL__ == 1; + arr[static_cast(arch::intel_gpu_adl_s)] = + __SYCL_TARGET_INTEL_GPU_ADL_S__ == 1; + arr[static_cast(arch::intel_gpu_rpl_s)] = + __SYCL_TARGET_INTEL_GPU_RPL_S__ == 1; + arr[static_cast(arch::intel_gpu_adl_p)] = + __SYCL_TARGET_INTEL_GPU_ADL_P__ == 1; + arr[static_cast(arch::intel_gpu_adl_n)] = + __SYCL_TARGET_INTEL_GPU_ADL_N__ == 1; + arr[static_cast(arch::intel_gpu_dg1)] = + __SYCL_TARGET_INTEL_GPU_DG1__ == 1; + arr[static_cast(arch::intel_gpu_acm_g10)] = + __SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1; + arr[static_cast(arch::intel_gpu_acm_g11)] = + __SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1; + arr[static_cast(arch::intel_gpu_acm_g12)] = + __SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1; + arr[static_cast(arch::intel_gpu_pvc)] = + __SYCL_TARGET_INTEL_GPU_PVC__ == 1; + arr[static_cast(arch::nvidia_gpu_sm_50)] = + __SYCL_TARGET_NVIDIA_GPU_SM50__ == 1; + arr[static_cast(arch::nvidia_gpu_sm_52)] = + __SYCL_TARGET_NVIDIA_GPU_SM52__ == 1; + arr[static_cast(arch::nvidia_gpu_sm_53)] = + __SYCL_TARGET_NVIDIA_GPU_SM53__ == 1; + arr[static_cast(arch::nvidia_gpu_sm_60)] = + __SYCL_TARGET_NVIDIA_GPU_SM60__ == 1; + arr[static_cast(arch::nvidia_gpu_sm_61)] = + __SYCL_TARGET_NVIDIA_GPU_SM61__ == 1; + arr[static_cast(arch::nvidia_gpu_sm_62)] = + __SYCL_TARGET_NVIDIA_GPU_SM62__ == 1; + arr[static_cast(arch::nvidia_gpu_sm_70)] = + __SYCL_TARGET_NVIDIA_GPU_SM70__ == 1; + arr[static_cast(arch::nvidia_gpu_sm_72)] = + __SYCL_TARGET_NVIDIA_GPU_SM72__ == 1; + arr[static_cast(arch::nvidia_gpu_sm_75)] = + __SYCL_TARGET_NVIDIA_GPU_SM75__ == 1; + arr[static_cast(arch::nvidia_gpu_sm_80)] = + __SYCL_TARGET_NVIDIA_GPU_SM80__ == 1; + arr[static_cast(arch::nvidia_gpu_sm_86)] = + __SYCL_TARGET_NVIDIA_GPU_SM86__ == 1; + arr[static_cast(arch::nvidia_gpu_sm_87)] = + __SYCL_TARGET_NVIDIA_GPU_SM87__ == 1; + arr[static_cast(arch::nvidia_gpu_sm_89)] = + __SYCL_TARGET_NVIDIA_GPU_SM89__ == 1; + arr[static_cast(arch::nvidia_gpu_sm_90)] = + __SYCL_TARGET_NVIDIA_GPU_SM90__ == 1; + arr[static_cast(arch::amd_gpu_gfx700)] = + __SYCL_TARGET_AMD_GPU_GFX700__ == 1; + arr[static_cast(arch::amd_gpu_gfx701)] = + __SYCL_TARGET_AMD_GPU_GFX701__ == 1; + arr[static_cast(arch::amd_gpu_gfx702)] = + __SYCL_TARGET_AMD_GPU_GFX702__ == 1; + arr[static_cast(arch::amd_gpu_gfx801)] = + __SYCL_TARGET_AMD_GPU_GFX801__ == 1; + arr[static_cast(arch::amd_gpu_gfx802)] = + __SYCL_TARGET_AMD_GPU_GFX802__ == 1; + arr[static_cast(arch::amd_gpu_gfx803)] = + __SYCL_TARGET_AMD_GPU_GFX803__ == 1; + arr[static_cast(arch::amd_gpu_gfx805)] = + __SYCL_TARGET_AMD_GPU_GFX805__ == 1; + arr[static_cast(arch::amd_gpu_gfx810)] = + __SYCL_TARGET_AMD_GPU_GFX810__ == 1; + arr[static_cast(arch::amd_gpu_gfx900)] = + __SYCL_TARGET_AMD_GPU_GFX900__ == 1; + arr[static_cast(arch::amd_gpu_gfx902)] = + __SYCL_TARGET_AMD_GPU_GFX902__ == 1; + arr[static_cast(arch::amd_gpu_gfx904)] = + __SYCL_TARGET_AMD_GPU_GFX904__ == 1; + arr[static_cast(arch::amd_gpu_gfx906)] = + __SYCL_TARGET_AMD_GPU_GFX906__ == 1; + arr[static_cast(arch::amd_gpu_gfx908)] = + __SYCL_TARGET_AMD_GPU_GFX908__ == 1; + arr[static_cast(arch::amd_gpu_gfx90a)] = + __SYCL_TARGET_AMD_GPU_GFX90A__ == 1; + arr[static_cast(arch::amd_gpu_gfx1010)] = + __SYCL_TARGET_AMD_GPU_GFX1010__ == 1; + arr[static_cast(arch::amd_gpu_gfx1011)] = + __SYCL_TARGET_AMD_GPU_GFX1011__ == 1; + arr[static_cast(arch::amd_gpu_gfx1012)] = + __SYCL_TARGET_AMD_GPU_GFX1012__ == 1; + arr[static_cast(arch::amd_gpu_gfx1030)] = + __SYCL_TARGET_AMD_GPU_GFX1030__ == 1; + arr[static_cast(arch::amd_gpu_gfx1031)] = + __SYCL_TARGET_AMD_GPU_GFX1031__ == 1; + arr[static_cast(arch::amd_gpu_gfx1032)] = + __SYCL_TARGET_AMD_GPU_GFX1032__ == 1; + } +}; + +// One entry for each enumerator in "architecture" telling whether the AOT +// target matches that architecture. +static constexpr IsAOTForArchitectureClass is_aot_for_architecture; + +// Reads the value of "is_allowable_aot_mode" via a template to defer triggering +// static_assert() until template instantiation time. +template +constexpr static bool allowable_aot_mode() { + return is_allowable_aot_mode; +} + +// Tells if the current device has one of the architectures in the parameter +// pack. +template +constexpr static bool device_architecture_is() { + return (is_aot_for_architecture.arr[static_cast(Archs)] || ...); +} + +// Helper object used to implement "else_if_architecture_is" and "otherwise". +// The "MakeCall" template parameter tells whether a previous clause in the +// "if-elseif-elseif ..." chain was true. When "MakeCall" is false, some +// previous clause was true, so none of the subsequent +// "else_if_architecture_is" or "otherwise" member functions should call the +// user's function. +template class if_architecture_helper { +public: + template + constexpr auto else_if_architecture_is(T fnTrue, Args... args) { + if constexpr (MakeCall && device_architecture_is()) { + fnTrue(args...); + return if_architecture_helper{}; + } else { + (void)fnTrue; + return if_architecture_helper{}; + } + } + + template + constexpr void otherwise(T fn, Args... args) { + if constexpr (MakeCall) { + fn(args...); + } + } +}; +} // namespace detail + +namespace ext::oneapi::experimental { + +template +constexpr static auto if_architecture_is(T fnTrue, Args... args) { + static_assert(detail::allowable_aot_mode(), + "The if_architecture_is function may only be used when AOT " + "compiling with '-fsycl-targets=spir64_x86_64' or " + "'-fsycl-targets=*_gpu_*'"); + if constexpr (detail::device_architecture_is()) { + fnTrue(args...); + return detail::if_architecture_helper{}; + } else { + (void)fnTrue; + return detail::if_architecture_helper{}; + } +} + +} // namespace ext::oneapi::experimental +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl diff --git a/sycl/unittests/Extensions/DeviceArchitecture.cpp b/sycl/unittests/Extensions/DeviceArchitecture.cpp index 452d29ba17211..46662dbcefa04 100644 --- a/sycl/unittests/Extensions/DeviceArchitecture.cpp +++ b/sycl/unittests/Extensions/DeviceArchitecture.cpp @@ -8,14 +8,14 @@ #include -// define one of __SYCL_TARGET_INTEL_*** macro, e.g., the one for SKL +// define one of __SYCL_TARGET_ macro, e.g., the one for SKL #define __SYCL_TARGET_INTEL_GPU_SKL__ 1 -#include +#include using namespace sycl; using namespace sycl::detail; -using namespace sycl::ext::intel::experimental; +using namespace sycl::ext::oneapi::experimental; TEST(DeviceArchitectureTest, DeviceArchitecture_If) { bool res = false;