diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 34cc069c5b7ce..263600ffe12d9 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -824,19 +824,6 @@ static bool addSYCLDefaultTriple(Compilation &C,
   return true;
 }
 
-// Prefix for Intel GPU specific targets used for -fsycl-targets
-constexpr char IntelGPU[] = "intel_gpu_";
-
-static llvm::Optional<StringRef> isIntelGPUTarget(StringRef Target) {
-  // Handle target specifications that resemble 'intel_gpu_*' here. These are
-  // 'spir64_gen' based.
-  if (Target.startswith(IntelGPU)) {
-    return tools::SYCL::gen::resolveGenDevice(
-        Target.drop_front(sizeof(IntelGPU) - 1));
-  }
-  return llvm::None;
-}
-
 void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
                                               InputList &Inputs) {
 
@@ -845,6 +832,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
   //
   // We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA
   // or HIP type. However, mixed CUDA/HIP compilation is not supported.
+  using namespace tools::SYCL;
   bool IsCuda =
       llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
         return types::isCuda(I.first);
@@ -1122,12 +1110,24 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
 
         for (StringRef Val : SYCLTargetsValues->getValues()) {
           StringRef UserTargetName(Val);
-          if (auto Device = isIntelGPUTarget(Val)) {
+          if (auto Device = gen::isGPUTarget<gen::IntelGPU>(Val)) {
             if (Device->empty()) {
               Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
               continue;
             }
             UserTargetName = "spir64_gen";
+          } else if (auto Device = gen::isGPUTarget<gen::NvidiaGPU>(Val)) {
+            if (Device->empty()) {
+              Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
+              continue;
+            }
+            UserTargetName = "nvptx64-nvidia-cuda";
+          } else if (auto Device = gen::isGPUTarget<gen::AmdGPU>(Val)) {
+            if (Device->empty()) {
+              Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
+              continue;
+            }
+            UserTargetName = "amdgcn-amd-amdhsa";
           }
 
           if (!isValidSYCLTriple(MakeSYCLDeviceTriple(UserTargetName))) {
@@ -5716,6 +5716,7 @@ class OffloadingActionBuilder final {
     }
 
     bool initialize() override {
+      using namespace tools::SYCL;
       // Get the SYCL toolchains. If we don't get any, the action builder will
       // know there is nothing to do related to SYCL offloading.
       auto SYCLTCRange = C.getOffloadToolChains<Action::OFK_SYCL>();
@@ -5755,7 +5756,7 @@ class OffloadingActionBuilder final {
           llvm::StringMap<StringRef> FoundNormalizedTriples;
           for (StringRef Val : SYCLTargetsValues->getValues()) {
             StringRef UserTargetName(Val);
-            if (auto ValidDevice = isIntelGPUTarget(Val)) {
+            if (auto ValidDevice = gen::isGPUTarget<gen::IntelGPU>(Val)) {
               if (ValidDevice->empty())
                 // Unrecognized, we have already diagnosed this earlier; skip.
                 continue;
@@ -5763,7 +5764,27 @@ class OffloadingActionBuilder final {
               GpuArchList.emplace_back(C.getDriver().MakeSYCLDeviceTriple(
                                        "spir64_gen"), ValidDevice->data());
               UserTargetName = "spir64_gen";
+            } else if (auto ValidDevice =
+                           gen::isGPUTarget<gen::NvidiaGPU>(Val)) {
+              if (ValidDevice->empty())
+                // Unrecognized, we have already diagnosed this earlier; skip.
+                continue;
+              // Add the proper -device value to the list.
+              GpuArchList.emplace_back(
+                  C.getDriver().MakeSYCLDeviceTriple("nvptx64-nvidia-cuda"),
+                  ValidDevice->data());
+              UserTargetName = "nvptx64-nvidia-cuda";
+            } else if (auto ValidDevice = gen::isGPUTarget<gen::AmdGPU>(Val)) {
+              if (ValidDevice->empty())
+                // Unrecognized, we have already diagnosed this earlier; skip.
+                continue;
+              // Add the proper -device value to the list.
+              GpuArchList.emplace_back(
+                  C.getDriver().MakeSYCLDeviceTriple("amdgcn-amd-amdhsa"),
+                  ValidDevice->data());
+              UserTargetName = "amdgcn-amd-amdhsa";
             }
+
             llvm::Triple TT(C.getDriver().MakeSYCLDeviceTriple(Val));
             std::string NormalizedName = TT.normalize();
 
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 869bd83a7b065..6ef84262341c6 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5192,10 +5192,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     // between device and host where we should be able to use the offloading
     // arch to add the macro to the host compile.
     auto addTargetMacros = [&](const llvm::Triple &Triple) {
-      if (!Triple.isSPIR())
+      if (!Triple.isSPIR() && !Triple.isNVPTX() && !Triple.isAMDGCN())
         return;
       SmallString<64> Macro;
-      if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) {
+      if ((Triple.isSPIR() &&
+           Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) ||
+          Triple.isNVPTX() || Triple.isAMDGCN()) {
         StringRef Device = JA.getOffloadingArch();
         if (!Device.empty()) {
           Macro = "-D";
diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp
index ae40b71f4c99b..794be18d06a35 100644
--- a/clang/lib/Driver/ToolChains/SYCL.cpp
+++ b/clang/lib/Driver/ToolChains/SYCL.cpp
@@ -596,60 +596,130 @@ void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C,
 StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) {
   StringRef Device;
   Device = llvm::StringSwitch<StringRef>(DeviceName)
-               .Cases("bdw", "8_0_0", "bdw")
-               .Cases("skl", "9_0_9", "skl")
-               .Cases("kbl", "9_1_9", "kbl")
-               .Cases("cfl", "9_2_9", "cfl")
-               .Cases("apl", "9_3_0", "apl")
-               .Cases("glk", "9_4_0", "glk")
-               .Cases("whl", "9_5_0", "whl")
-               .Cases("aml", "9_6_0", "aml")
-               .Cases("cml", "9_7_0", "cml")
-               .Cases("icllp", "11_0_0", "icllp")
-               .Cases("ehl", "11_2_0", "ehl")
-               .Cases("tgllp", "12_0_0", "tgllp")
-               .Case("rkl", "rkl")
-               .Case("adl_s", "adl_s")
-               .Case("rpl_s", "rpl_s")
-               .Case("adl_p", "adl_p")
-               .Case("adl_n", "adl_n")
-               .Cases("dg1", "12_10_0", "dg1")
-               .Case("acm_g10", "acm_g10")
-               .Case("acm_g11", "acm_g11")
-               .Case("acm_g12", "acm_g12")
-               .Case("pvc", "pvc")
+               .Cases("intel_gpu_bdw", "intel_gpu_8_0_0", "bdw")
+               .Cases("intel_gpu_skl", "intel_gpu_9_0_9", "skl")
+               .Cases("intel_gpu_kbl", "intel_gpu_9_1_9", "kbl")
+               .Cases("intel_gpu_cfl", "intel_gpu_9_2_9", "cfl")
+               .Cases("intel_gpu_apl", "intel_gpu_9_3_0", "apl")
+               .Cases("intel_gpu_glk", "intel_gpu_9_4_0", "glk")
+               .Cases("intel_gpu_whl", "intel_gpu_9_5_0", "whl")
+               .Cases("intel_gpu_aml", "intel_gpu_9_6_0", "aml")
+               .Cases("intel_gpu_cml", "intel_gpu_9_7_0", "cml")
+               .Cases("intel_gpu_icllp", "intel_gpu_11_0_0", "icllp")
+               .Cases("intel_gpu_ehl", "intel_gpu_11_2_0", "ehl")
+               .Cases("intel_gpu_tgllp", "intel_gpu_12_0_0", "tgllp")
+               .Case("intel_gpu_rkl", "rkl")
+               .Case("intel_gpu_adl_s", "adl_s")
+               .Case("intel_gpu_rpl_s", "rpl_s")
+               .Case("intel_gpu_adl_p", "adl_p")
+               .Case("intel_gpu_adl_n", "adl_n")
+               .Cases("intel_gpu_dg1", "intel_gpu_12_10_0", "dg1")
+               .Case("intel_gpu_acm_g10", "acm_g10")
+               .Case("intel_gpu_acm_g11", "acm_g11")
+               .Case("intel_gpu_acm_g12", "acm_g12")
+               .Case("intel_gpu_pvc", "pvc")
+               .Case("nvidia_gpu_sm_50", "sm_50")
+               .Case("nvidia_gpu_sm_52", "sm_52")
+               .Case("nvidia_gpu_sm_53", "sm_53")
+               .Case("nvidia_gpu_sm_60", "sm_60")
+               .Case("nvidia_gpu_sm_61", "sm_61")
+               .Case("nvidia_gpu_sm_62", "sm_62")
+               .Case("nvidia_gpu_sm_70", "sm_70")
+               .Case("nvidia_gpu_sm_72", "sm_72")
+               .Case("nvidia_gpu_sm_75", "sm_75")
+               .Case("nvidia_gpu_sm_80", "sm_80")
+               .Case("nvidia_gpu_sm_86", "sm_86")
+               .Case("nvidia_gpu_sm_87", "sm_87")
+               .Case("nvidia_gpu_sm_89", "sm_89")
+               .Case("nvidia_gpu_sm_90", "sm_90")
+               .Case("amd_gpu_gfx700", "gfx700")
+               .Case("amd_gpu_gfx701", "gfx701")
+               .Case("amd_gpu_gfx702", "gfx702")
+               .Case("amd_gpu_gfx801", "gfx801")
+               .Case("amd_gpu_gfx802", "gfx802")
+               .Case("amd_gpu_gfx803", "gfx803")
+               .Case("amd_gpu_gfx805", "gfx805")
+               .Case("amd_gpu_gfx810", "gfx810")
+               .Case("amd_gpu_gfx900", "gfx900")
+               .Case("amd_gpu_gfx902", "gfx902")
+               .Case("amd_gpu_gfx904", "gfx904")
+               .Case("amd_gpu_gfx906", "gfx906")
+               .Case("amd_gpu_gfx908", "gfx908")
+               .Case("amd_gpu_gfx90a", "gfx90a")
+               .Case("amd_gpu_gfx1010", "gfx1010")
+               .Case("amd_gpu_gfx1011", "gfx1011")
+               .Case("amd_gpu_gfx1012", "gfx1012")
+               .Case("amd_gpu_gfx1013", "gfx1013")
+               .Case("amd_gpu_gfx1030", "gfx1030")
+               .Case("amd_gpu_gfx1031", "gfx1031")
+               .Case("amd_gpu_gfx1032", "gfx1032")
                .Default("");
   return Device;
 }
 
-StringRef SYCL::gen::getGenDeviceMacro(StringRef DeviceName) {
+SmallString<64> SYCL::gen::getGenDeviceMacro(StringRef DeviceName) {
   SmallString<64> Macro;
   StringRef Ext = llvm::StringSwitch<StringRef>(DeviceName)
-                      .Case("bdw", "BDW")
-                      .Case("skl", "SKL")
-                      .Case("kbl", "KBL")
-                      .Case("cfl", "CFL")
-                      .Case("apl", "APL")
-                      .Case("glk", "GLK")
-                      .Case("whl", "WHL")
-                      .Case("aml", "AML")
-                      .Case("cml", "CML")
-                      .Case("icllp", "ICLLP")
-                      .Case("ehl", "EHL")
-                      .Case("tgllp", "TGLLP")
-                      .Case("rkl", "RKL")
-                      .Case("adl_s", "ADL_S")
-                      .Case("rpl_s", "RPL_S")
-                      .Case("adl_p", "ADL_P")
-                      .Case("adl_n", "ADL_N")
-                      .Case("dg1", "DG1")
-                      .Case("acm_g10", "ACM_G10")
-                      .Case("acm_g11", "ACM_G11")
-                      .Case("acm_g12", "ACM_G12")
-                      .Case("pvc", "PVC")
+                      .Case("bdw", "INTEL_GPU_BDW")
+                      .Case("skl", "INTEL_GPU_SKL")
+                      .Case("kbl", "INTEL_GPU_KBL")
+                      .Case("cfl", "INTEL_GPU_CFL")
+                      .Case("apl", "INTEL_GPU_APL")
+                      .Case("glk", "INTEL_GPU_GLK")
+                      .Case("whl", "INTEL_GPU_WHL")
+                      .Case("aml", "INTEL_GPU_AML")
+                      .Case("cml", "INTEL_GPU_CML")
+                      .Case("icllp", "INTEL_GPU_ICLLP")
+                      .Case("ehl", "INTEL_GPU_EHL")
+                      .Case("tgllp", "INTEL_GPU_TGLLP")
+                      .Case("rkl", "INTEL_GPU_RKL")
+                      .Case("adl_s", "INTEL_GPU_ADL_S")
+                      .Case("rpl_s", "INTEL_GPU_RPL_S")
+                      .Case("adl_p", "INTEL_GPU_ADL_P")
+                      .Case("adl_n", "INTEL_GPU_ADL_N")
+                      .Case("dg1", "INTEL_GPU_DG1")
+                      .Case("acm_g10", "INTEL_GPU_ACM_G10")
+                      .Case("acm_g11", "INTEL_GPU_ACM_G11")
+                      .Case("acm_g12", "INTEL_GPU_ACM_G12")
+                      .Case("pvc", "INTEL_GPU_PVC")
+                      .Case("sm_50", "NVIDIA_GPU_SM_50")
+                      .Case("sm_52", "NVIDIA_GPU_SM_52")
+                      .Case("sm_53", "NVIDIA_GPU_SM_53")
+                      .Case("sm_60", "NVIDIA_GPU_SM_60")
+                      .Case("sm_61", "NVIDIA_GPU_SM_61")
+                      .Case("sm_62", "NVIDIA_GPU_SM_62")
+                      .Case("sm_70", "NVIDIA_GPU_SM_70")
+                      .Case("sm_72", "NVIDIA_GPU_SM_72")
+                      .Case("sm_75", "NVIDIA_GPU_SM_75")
+                      .Case("sm_80", "NVIDIA_GPU_SM_80")
+                      .Case("sm_86", "NVIDIA_GPU_SM_86")
+                      .Case("sm_87", "NVIDIA_GPU_SM_87")
+                      .Case("sm_89", "NVIDIA_GPU_SM_89")
+                      .Case("sm_90", "NVIDIA_GPU_SM_90")
+                      .Case("gfx700", "AMD_GPU_GFX700")
+                      .Case("gfx701", "AMD_GPU_GFX701")
+                      .Case("gfx702", "AMD_GPU_GFX702")
+                      .Case("gfx801", "AMD_GPU_GFX801")
+                      .Case("gfx802", "AMD_GPU_GFX802")
+                      .Case("gfx803", "AMD_GPU_GFX803")
+                      .Case("gfx805", "AMD_GPU_GFX805")
+                      .Case("gfx810", "AMD_GPU_GFX810")
+                      .Case("gfx900", "AMD_GPU_GFX900")
+                      .Case("gfx902", "AMD_GPU_GFX902")
+                      .Case("gfx904", "AMD_GPU_GFX904")
+                      .Case("gfx906", "AMD_GPU_GFX906")
+                      .Case("gfx908", "AMD_GPU_GFX908")
+                      .Case("gfx90a", "AMD_GPU_GFX90A")
+                      .Case("gfx1010", "AMD_GPU_GFX1010")
+                      .Case("gfx1011", "AMD_GPU_GFX1011")
+                      .Case("gfx1012", "AMD_GPU_GFX1012")
+                      .Case("gfx1013", "AMD_GPU_GFX1013")
+                      .Case("gfx1030", "AMD_GPU_GFX1030")
+                      .Case("gfx1031", "AMD_GPU_GFX1031")
+                      .Case("gfx1032", "AMD_GPU_GFX1032")
                       .Default("");
   if (!Ext.empty()) {
-    Macro = "__SYCL_TARGET_INTEL_GPU_";
+    Macro = "__SYCL_TARGET_";
     Macro += Ext;
     Macro += "__";
   }
@@ -759,6 +829,25 @@ static void parseTargetOpts(StringRef ArgString, const llvm::opt::ArgList &Args,
     CmdArgs.push_back(Args.MakeArgString(TA));
 }
 
+void SYCLToolChain::TranslateGPUTargetOpt(const llvm::opt::ArgList &Args,
+                                          llvm::opt::ArgStringList &CmdArgs,
+                                          OptSpecifier Opt_EQ) const {
+  for (auto *A : Args) {
+    if (A->getOption().matches(Opt_EQ)) {
+      if (auto GpuDevice =
+              tools::SYCL::gen::isGPUTarget<tools::SYCL::gen::AmdGPU>(
+                  A->getValue())) {
+        StringRef ArgString;
+        SmallString<64> OffloadArch("--offload-arch=");
+        OffloadArch += GpuDevice->data();
+        ArgString = OffloadArch;
+        parseTargetOpts(ArgString, Args, CmdArgs);
+        A->claim();
+      }
+    }
+  }
+}
+
 // Expects a specific type of option (e.g. -Xsycl-target-backend) and will
 // extract the arguments.
 void SYCLToolChain::TranslateTargetOpt(const llvm::opt::ArgList &Args,
@@ -914,6 +1003,7 @@ void SYCLToolChain::TranslateBackendTargetArgs(
   // Handle -Xsycl-target-backend.
   TranslateTargetOpt(Args, CmdArgs, options::OPT_Xsycl_backend,
                      options::OPT_Xsycl_backend_EQ, Device);
+  TranslateGPUTargetOpt(Args, CmdArgs, options::OPT_fsycl_targets_EQ);
 }
 
 void SYCLToolChain::TranslateLinkerTargetArgs(
diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h
index fb78d2ab7e270..01f4e9c15271c 100644
--- a/clang/lib/Driver/ToolChains/SYCL.h
+++ b/clang/lib/Driver/ToolChains/SYCL.h
@@ -106,7 +106,21 @@ class LLVM_LIBRARY_VISIBILITY BackendCompiler : public Tool {
 };
 
 StringRef resolveGenDevice(StringRef DeviceName);
-StringRef getGenDeviceMacro(StringRef DeviceName);
+SmallString<64> getGenDeviceMacro(StringRef DeviceName);
+
+// // Prefix for GPU specific targets used for -fsycl-targets
+constexpr char IntelGPU[] = "intel_gpu_";
+constexpr char NvidiaGPU[] = "nvidia_gpu_";
+constexpr char AmdGPU[] = "amd_gpu_";
+
+template <auto GPUArh> llvm::Optional<StringRef> isGPUTarget(StringRef Target) {
+  // Handle target specifications that resemble '(intel, nvidia, amd)_gpu_*'
+  // here.
+  if (Target.startswith(GPUArh)) {
+    return resolveGenDevice(Target);
+  }
+  return llvm::None;
+}
 
 } // end namespace gen
 
@@ -189,6 +203,9 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain {
                           llvm::opt::OptSpecifier Opt,
                           llvm::opt::OptSpecifier Opt_EQ,
                           StringRef Device) const;
+  void TranslateGPUTargetOpt(const llvm::opt::ArgList &Args,
+                             llvm::opt::ArgStringList &CmdArgs,
+                             llvm::opt::OptSpecifier Opt_EQ) const;
 };
 
 } // end namespace toolchains
diff --git a/clang/test/Driver/sycl-intel-gpu.cpp b/clang/test/Driver/sycl-intel-gpu.cpp
deleted file mode 100644
index 40fda7134cbf8..0000000000000
--- a/clang/test/Driver/sycl-intel-gpu.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-/// Tests the behaviors of using -fsycl-targets=intel_gpu*
-
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_bdw -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=bdw -DMAC_STR=BDW
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_8_0_0 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=bdw -DMAC_STR=BDW
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=skl -DMAC_STR=SKL
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_0_9 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=skl -DMAC_STR=SKL
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_kbl -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=kbl -DMAC_STR=KBL
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_1_9 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=kbl -DMAC_STR=KBL
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_cfl -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cfl -DMAC_STR=CFL
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_2_9 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cfl -DMAC_STR=CFL
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_apl -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=apl -DMAC_STR=APL
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_3_0 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=apl -DMAC_STR=APL
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_glk -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=glk -DMAC_STR=GLK
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_4_0 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=glk -DMAC_STR=GLK
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_whl -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=whl -DMAC_STR=WHL
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_5_0 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=whl -DMAC_STR=WHL
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_aml -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=aml -DMAC_STR=AML
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_6_0 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=aml -DMAC_STR=AML
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_cml -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cml -DMAC_STR=CML
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_7_0 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cml -DMAC_STR=CML
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_icllp -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=icllp \
-// RUN:             -DMAC_STR=ICLLP
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_11_0_0 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=icllp \
-// RUN:             -DMAC_STR=ICLLP
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_ehl -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=ehl -DMAC_STR=EHL
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_11_2_0 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=ehl -DMAC_STR=EHL
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_tgllp -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=tgllp \
-// RUN:             -DMAC_STR=TGLLP
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_0_0 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=tgllp \
-// RUN:             -DMAC_STR=TGLLP
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_rkl -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=rkl -DMAC_STR=RKL
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_adl_s -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=adl_s \
-// RUN:             -DMAC_STR=ADL_S
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_rpl_s -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=rpl_s \
-// RUN:             -DMAC_STR=RPL_S
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_adl_p -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=adl_p \
-// RUN:             -DMAC_STR=ADL_P
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_adl_n -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=adl_n \
-// RUN:             -DMAC_STR=ADL_N
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=dg1 -DMAC_STR=DG1
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_10_0 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=dg1 -DMAC_STR=DG1
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g10 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=acm_g10 \
-// RUN:             -DMAC_STR=ACM_G10
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g11 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=acm_g11 \
-// RUN:             -DMAC_STR=ACM_G11
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g12 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=acm_g12 \
-// RUN:             -DMAC_STR=ACM_G12
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc -DMAC_STR=PVC
-// MACRO: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"
-// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__"
-// DEVICE: ocloc{{.*}} "-device" "[[DEV_STR]]"
-// MACRO: clang{{.*}} "-fsycl-is-host"
-// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__"
-
-/// -fsycl-targets=spir64_x86_64 should set a specific macro
-// RUN: %clangxx -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=MACRO_X86_64
-// RUN: %clang_cl -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=MACRO_X86_64
-// MACRO_X86_64: clang{{.*}} "-triple" "spir64_x86_64-unknown-unknown"
-// MACRO_X86_64: "-D__SYCL_TARGET_INTEL_X86_64__"
-// MACRO_X86_64: clang{{.*}} "-fsycl-is-host"
-// MACRO_X86_64: "-D__SYCL_TARGET_INTEL_X86_64__"
-
-/// test for invalid arch
-// RUN: %clangxx -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=BAD_INPUT
-// RUN: %clang_cl -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=BAD_INPUT
-// BAD_INPUT: error: SYCL target is invalid: 'intel_gpu_bad'
-
-/// Test for proper creation of fat object
-// RUN: %clangxx -c -fsycl -fsycl-targets=intel_gpu_skl \
-// RUN:   -target x86_64-unknown-linux-gnu -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=FATO
-// FATO: clang-offload-bundler{{.*}} "-type=o"
-// FATO: "-targets=sycl-spir64_gen-unknown-unknown-skl,host-x86_64-unknown-linux-gnu"
-
-/// Test for proper consumption of fat object
-// RUN: touch %t.o
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl \
-// RUN:   -target x86_64-unknown-linux-gnu -### %t.o 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=CONSUME_FAT
-// CONSUME_FAT: clang-offload-bundler{{.*}} "-type=o"
-// CONSUME_FAT: "-targets=host-x86_64-unknown-linux-gnu,sycl-spir64_gen-unknown-unknown-skl"
-// CONSUME_FAT: "-unbundle" "-allow-missing-bundles"
-
-/// Test phases, BoundArch settings used for -device target. Additional
-/// offload action used for compilation and backend compilation.
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl -fno-sycl-device-lib=all \
-// RUN:   -fno-sycl-instrument-device-code \
-// RUN:   -target x86_64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=CHECK_PHASES
-// CHECK_PHASES: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl)
-// CHECK_PHASES: 1: append-footer, {0}, c++, (host-sycl)
-// CHECK_PHASES: 2: preprocessor, {1}, c++-cpp-output, (host-sycl)
-// CHECK_PHASES: 3: input, "[[INPUT]]", c++, (device-sycl, skl)
-// CHECK_PHASES: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, skl)
-// CHECK_PHASES: 5: compiler, {4}, ir, (device-sycl, skl)
-// CHECK_PHASES: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (spir64_gen-unknown-unknown:skl)" {5}, c++-cpp-output
-// CHECK_PHASES: 7: compiler, {6}, ir, (host-sycl)
-// CHECK_PHASES: 8: backend, {7}, assembler, (host-sycl)
-// CHECK_PHASES: 9: assembler, {8}, object, (host-sycl)
-// CHECK_PHASES: 10: linker, {9}, image, (host-sycl)
-// CHECK_PHASES: 11: linker, {5}, ir, (device-sycl, skl)
-// CHECK_PHASES: 12: sycl-post-link, {11}, tempfiletable, (device-sycl, skl)
-// CHECK_PHASES: 13: file-table-tform, {12}, tempfilelist, (device-sycl, skl)
-// CHECK_PHASES: 14: llvm-spirv, {13}, tempfilelist, (device-sycl, skl)
-// CHECK_PHASES: 15: backend-compiler, {14}, image, (device-sycl, skl)
-// CHECK_PHASES: 16: file-table-tform, {12, 15}, tempfiletable, (device-sycl, skl)
-// CHECK_PHASES: 17: clang-offload-wrapper, {16}, object, (device-sycl, skl)
-// CHECK_PHASES: 18: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (spir64_gen-unknown-unknown:skl)" {17}, image
-
-/// Check that ocloc and macro settings only occur for the expected toolchains
-/// when mixing spir64_gen and intel_gpu
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spir64_gen \
-// RUN:   -Xsycl-target-backend=spir64_gen "-device skl" \
-// RUN:   -fno-sycl-device-lib=all -fno-sycl-instrument-device-code \
-// RUN:   -target x86_64-unknown-linux-gnu -### %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=CHECK_TOOLS_MIX
-// CHECK_TOOLS_MIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"
-// CHECK_TOOLS_MIX: "-D__SYCL_TARGET_INTEL_GPU_DG1__"
-// CHECK_TOOLS_MIX: ocloc{{.*}} "-device" "dg1"
-// CHECK_TOOLS_MIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"
-// CHECK_TOOLS_MIX-NOT: "-D__SYCL_TARGET_INTEL_GPU{{.*}}"
-// CHECK_TOOLS_MIX: ocloc{{.*}} "-device" "skl"
-
-/// Test phases when using both spir64_gen and intel_gpu*
-// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl,spir64_gen \
-// RUN:   -fno-sycl-device-lib=all -fno-sycl-instrument-device-code \
-// RUN:   -target x86_64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=CHECK_PHASES_MIX
-// CHECK_PHASES_MIX: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl)
-// CHECK_PHASES_MIX: 1: append-footer, {0}, c++, (host-sycl)
-// CHECK_PHASES_MIX: 2: preprocessor, {1}, c++-cpp-output, (host-sycl)
-// CHECK_PHASES_MIX: 3: input, "[[INPUT]]", c++, (device-sycl)
-// CHECK_PHASES_MIX: 4: preprocessor, {3}, c++-cpp-output, (device-sycl)
-// CHECK_PHASES_MIX: 5: compiler, {4}, ir, (device-sycl)
-// CHECK_PHASES_MIX: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (spir64_gen-unknown-unknown)" {5}, c++-cpp-output
-// CHECK_PHASES_MIX: 7: compiler, {6}, ir, (host-sycl)
-// CHECK_PHASES_MIX: 8: backend, {7}, assembler, (host-sycl)
-// CHECK_PHASES_MIX: 9: assembler, {8}, object, (host-sycl)
-// CHECK_PHASES_MIX: 10: linker, {9}, image, (host-sycl)
-// CHECK_PHASES_MIX: 11: input, "[[INPUT]]", c++, (device-sycl, skl)
-// CHECK_PHASES_MIX: 12: preprocessor, {11}, c++-cpp-output, (device-sycl, skl)
-// CHECK_PHASES_MIX: 13: compiler, {12}, ir, (device-sycl, skl)
-// CHECK_PHASES_MIX: 14: linker, {13}, ir, (device-sycl, skl)
-// CHECK_PHASES_MIX: 15: sycl-post-link, {14}, tempfiletable, (device-sycl, skl)
-// CHECK_PHASES_MIX: 16: file-table-tform, {15}, tempfilelist, (device-sycl, skl)
-// CHECK_PHASES_MIX: 17: llvm-spirv, {16}, tempfilelist, (device-sycl, skl)
-// CHECK_PHASES_MIX: 18: backend-compiler, {17}, image, (device-sycl, skl)
-// CHECK_PHASES_MIX: 19: file-table-tform, {15, 18}, tempfiletable, (device-sycl, skl)
-// CHECK_PHASES_MIX: 20: clang-offload-wrapper, {19}, object, (device-sycl, skl)
-// CHECK_PHASES_MIX: 21: linker, {5}, ir, (device-sycl)
-// CHECK_PHASES_MIX: 22: sycl-post-link, {21}, tempfiletable, (device-sycl)
-// CHECK_PHASES_MIX: 23: file-table-tform, {22}, tempfilelist, (device-sycl)
-// CHECK_PHASES_MIX: 24: llvm-spirv, {23}, tempfilelist, (device-sycl)
-// CHECK_PHASES_MIX: 25: backend-compiler, {24}, image, (device-sycl)
-// CHECK_PHASES_MIX: 26: file-table-tform, {22, 25}, tempfiletable, (device-sycl)
-// CHECK_PHASES_MIX: 27: clang-offload-wrapper, {26}, object, (device-sycl)
-// CHECK_PHASES_MIX: 28: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (spir64_gen-unknown-unknown:skl)" {20}, "device-sycl (spir64_gen-unknown-unknown)" {27}, image
-
diff --git a/clang/test/Driver/sycl-oneapi-gpu.cpp b/clang/test/Driver/sycl-oneapi-gpu.cpp
new file mode 100644
index 0000000000000..c64929873f281
--- /dev/null
+++ b/clang/test/Driver/sycl-oneapi-gpu.cpp
@@ -0,0 +1,384 @@
+/// Tests the behaviors of using -fsycl-targets=intel_gpu*
+
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_bdw -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=bdw -DMAC_STR=BDW
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_8_0_0 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=bdw -DMAC_STR=BDW
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=skl -DMAC_STR=SKL
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_0_9 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=skl -DMAC_STR=SKL
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_kbl -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=kbl -DMAC_STR=KBL
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_1_9 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=kbl -DMAC_STR=KBL
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_cfl -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cfl -DMAC_STR=CFL
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_2_9 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cfl -DMAC_STR=CFL
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_apl -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=apl -DMAC_STR=APL
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_3_0 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=apl -DMAC_STR=APL
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_glk -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=glk -DMAC_STR=GLK
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_4_0 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=glk -DMAC_STR=GLK
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_whl -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=whl -DMAC_STR=WHL
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_5_0 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=whl -DMAC_STR=WHL
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_aml -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=aml -DMAC_STR=AML
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_6_0 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=aml -DMAC_STR=AML
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_cml -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cml -DMAC_STR=CML
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_9_7_0 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=cml -DMAC_STR=CML
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_icllp -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=icllp \
+// RUN:             -DMAC_STR=ICLLP
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_11_0_0 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=icllp \
+// RUN:             -DMAC_STR=ICLLP
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_ehl -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=ehl -DMAC_STR=EHL
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_11_2_0 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=ehl -DMAC_STR=EHL
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_tgllp -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=tgllp \
+// RUN:             -DMAC_STR=TGLLP
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_0_0 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=tgllp \
+// RUN:             -DMAC_STR=TGLLP
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_rkl -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=rkl -DMAC_STR=RKL
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_adl_s -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=adl_s \
+// RUN:             -DMAC_STR=ADL_S
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_rpl_s -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=rpl_s \
+// RUN:             -DMAC_STR=RPL_S
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_adl_p -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=adl_p \
+// RUN:             -DMAC_STR=ADL_P
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_adl_n -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=adl_n \
+// RUN:             -DMAC_STR=ADL_N
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=dg1 -DMAC_STR=DG1
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_10_0 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=dg1 -DMAC_STR=DG1
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g10 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=acm_g10 \
+// RUN:             -DMAC_STR=ACM_G10
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g11 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=acm_g11 \
+// RUN:             -DMAC_STR=ACM_G11
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g12 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=acm_g12 \
+// RUN:             -DMAC_STR=ACM_G12
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc -DMAC_STR=PVC
+// MACRO: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"
+// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__"
+// DEVICE: ocloc{{.*}} "-device" "[[DEV_STR]]"
+// MACRO: clang{{.*}} "-fsycl-is-host"
+// MACRO: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__"
+
+/// Tests the behaviors of using -fsycl-targets=nvidia_gpu*
+
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_50 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_50 -DMAC_STR=SM_50
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_52 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_52 -DMAC_STR=SM_52
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_53 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_53 -DMAC_STR=SM_53
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_60 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_60 -DMAC_STR=SM_60
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_61 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_61 -DMAC_STR=SM_61
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_62 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_62 -DMAC_STR=SM_62
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_70 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_70 -DMAC_STR=SM_70
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_72 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_72 -DMAC_STR=SM_72
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_75 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_75 -DMAC_STR=SM_75
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_80 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_80 -DMAC_STR=SM_80
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_86 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_86 -DMAC_STR=SM_86
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_87 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_87 -DMAC_STR=SM_87
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_89 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_89 -DMAC_STR=SM_89
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_90 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_NVIDIA,MACRO_NVIDIA -DDEV_STR=sm_90 -DMAC_STR=SM_90
+// MACRO_NVIDIA: clang{{.*}} "-triple" "nvptx64-nvidia-cuda"
+// DEVICE_NVIDIA: llvm-foreach{{.*}} "--gpu-name" "[[DEV_STR]]"
+// MACRO_NVIDIA: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu"
+// MACRO_NVIDIA: "-D__SYCL_TARGET_NVIDIA_GPU_[[MAC_STR]]__"
+
+/// Tests the behaviors of using -fsycl-targets=amd_gpu*
+
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx700 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx700 -DMAC_STR=GFX700
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx701 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx701 -DMAC_STR=GFX701
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx702 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx702 -DMAC_STR=GFX702
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx801 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx801 -DMAC_STR=GFX801
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx802 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx802 -DMAC_STR=GFX802
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx803 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx803 -DMAC_STR=GFX803
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx805 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx805 -DMAC_STR=GFX805
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx810 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx810 -DMAC_STR=GFX810
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx900 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx900 -DMAC_STR=GFX900
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx902 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx902 -DMAC_STR=GFX902
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx904 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx904 -DMAC_STR=GFX904
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx906 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx906 -DMAC_STR=GFX906
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx908 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx908 -DMAC_STR=GFX908
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx90a -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx90a -DMAC_STR=GFX90A
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1010 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1010 -DMAC_STR=GFX1010
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1011 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1011 -DMAC_STR=GFX1011
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1012 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1012 -DMAC_STR=GFX1012
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1013 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1013 -DMAC_STR=GFX1013
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1030 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1030 -DMAC_STR=GFX1030
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1031 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1031 -DMAC_STR=GFX1031
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1032 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1032 -DMAC_STR=GFX1032
+// MACRO_AMD: clang{{.*}} "-triple" "amdgcn-amd-amdhsa"
+// MACRO_AMD: "-D__SYCL_TARGET_AMD_GPU_[[MAC_STR]]__"
+// DEVICE_AMD: clang-offload-wrapper{{.*}} "-compile-opts=--offload-arch=[[DEV_STR]]"
+// MACRO_AMD: clang{{.*}} "-fsycl-is-host"
+// MACRO_AMD: "-D__SYCL_TARGET_AMD_GPU_[[MAC_STR]]__"
+
+/// -fsycl-targets=spir64_x86_64 should set a specific macro
+// RUN: %clangxx -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=MACRO_X86_64
+// RUN: %clang_cl -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=MACRO_X86_64
+// MACRO_X86_64: clang{{.*}} "-triple" "spir64_x86_64-unknown-unknown"
+// MACRO_X86_64: "-D__SYCL_TARGET_INTEL_X86_64__"
+// MACRO_X86_64: clang{{.*}} "-fsycl-is-host"
+// MACRO_X86_64: "-D__SYCL_TARGET_INTEL_X86_64__"
+
+/// test for invalid intel arch
+// RUN: %clangxx -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=BAD_INPUT
+// RUN: %clang_cl -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=BAD_INPUT
+// BAD_INPUT: error: SYCL target is invalid: 'intel_gpu_bad'
+
+/// test for invalid nvidia arch
+// RUN: %clangxx -c -fsycl -fsycl-targets=nvidia_gpu_bad -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=BAD_NVIDIA_INPUT
+// RUN: %clang_cl -c -fsycl -fsycl-targets=nvidia_gpu_bad -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=BAD_NVIDIA_INPUT
+// BAD_NVIDIA_INPUT: error: SYCL target is invalid: 'nvidia_gpu_bad'
+
+/// test for invalid amd arch
+// RUN: %clangxx -c -fsycl -fsycl-targets=amd_gpu_bad -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=BAD_AMD_INPUT
+// RUN: %clang_cl -c -fsycl -fsycl-targets=amd_gpu_bad -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=BAD_AMD_INPUT
+// BAD_AMD_INPUT: error: SYCL target is invalid: 'amd_gpu_bad'
+
+/// Test for proper creation of fat object
+// RUN: %clangxx -c -fsycl -fsycl-targets=intel_gpu_skl \
+// RUN:   -target x86_64-unknown-linux-gnu -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=FATO
+// FATO: clang-offload-bundler{{.*}} "-type=o"
+// FATO: "-targets=sycl-spir64_gen-unknown-unknown-skl,host-x86_64-unknown-linux-gnu"
+
+/// Test for proper creation of fat object
+// RUN: %clangxx -c -fsycl -fsycl-targets=nvidia_gpu_sm_50 \
+// RUN:   -target x86_64-unknown-linux-gnu -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=NVIDIA_FATO
+// NVIDIA_FATO: clang-offload-bundler{{.*}} "-type=o"
+// NVIDIA_FATO: "-targets=sycl-nvptx64-nvidia-cuda-sm_50,host-x86_64-unknown-linux-gnu"
+
+/// Test for proper creation of fat object
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx700 \
+// RUN:   -target x86_64-unknown-linux-gnu -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=AMD_FATO
+// AMD_FATO: clang-offload-bundler{{.*}} "-type=o"
+// AMD_FATO: "-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx700"
+
+/// Test for proper consumption of fat object
+// RUN: touch %t.o
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl \
+// RUN:   -target x86_64-unknown-linux-gnu -### %t.o 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=CONSUME_FAT
+// CONSUME_FAT: clang-offload-bundler{{.*}} "-type=o"
+// CONSUME_FAT: "-targets=host-x86_64-unknown-linux-gnu,sycl-spir64_gen-unknown-unknown-skl"
+// CONSUME_FAT: "-unbundle" "-allow-missing-bundles"
+
+/// Test for proper consumption of fat object
+// RUN: touch %t.o
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_50 \
+// RUN:   -target x86_64-unknown-linux-gnu -### %t.o 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=NVIDIA_CONSUME_FAT
+// NVIDIA_CONSUME_FAT: clang-offload-bundler{{.*}} "-type=o"
+// NVIDIA_CONSUME_FAT: "-targets=host-x86_64-unknown-linux-gnu,sycl-nvptx64-nvidia-cuda-sm_50"
+// NVIDIA_CONSUME_FAT: "-unbundle" "-allow-missing-bundles"
+
+/// Test for proper consumption of fat object
+// RUN: touch %t.o
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx700 \
+// RUN:   -target x86_64-unknown-linux-gnu -### %t.o 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=AMD_CONSUME_FAT
+// AMD_CONSUME_FAT: clang-offload-bundler{{.*}} "-type=o"
+// AMD_CONSUME_FAT: "-targets=host-x86_64-unknown-linux-gnu,sycl-amdgcn-amd-amdhsa-gfx700"
+// AMD_CONSUME_FAT: "-unbundle" "-allow-missing-bundles"
+
+/// Test phases, BoundArch settings used for -device target. Additional
+/// offload action used for compilation and backend compilation.
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl -fno-sycl-device-lib=all \
+// RUN:   -fno-sycl-instrument-device-code \
+// RUN:   -target x86_64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=CHECK_PHASES
+// CHECK_PHASES: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl)
+// CHECK_PHASES: 1: append-footer, {0}, c++, (host-sycl)
+// CHECK_PHASES: 2: preprocessor, {1}, c++-cpp-output, (host-sycl)
+// CHECK_PHASES: 3: input, "[[INPUT]]", c++, (device-sycl, skl)
+// CHECK_PHASES: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, skl)
+// CHECK_PHASES: 5: compiler, {4}, ir, (device-sycl, skl)
+// CHECK_PHASES: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (spir64_gen-unknown-unknown:skl)" {5}, c++-cpp-output
+// CHECK_PHASES: 7: compiler, {6}, ir, (host-sycl)
+// CHECK_PHASES: 8: backend, {7}, assembler, (host-sycl)
+// CHECK_PHASES: 9: assembler, {8}, object, (host-sycl)
+// CHECK_PHASES: 10: linker, {9}, image, (host-sycl)
+// CHECK_PHASES: 11: linker, {5}, ir, (device-sycl, skl)
+// CHECK_PHASES: 12: sycl-post-link, {11}, tempfiletable, (device-sycl, skl)
+// CHECK_PHASES: 13: file-table-tform, {12}, tempfilelist, (device-sycl, skl)
+// CHECK_PHASES: 14: llvm-spirv, {13}, tempfilelist, (device-sycl, skl)
+// CHECK_PHASES: 15: backend-compiler, {14}, image, (device-sycl, skl)
+// CHECK_PHASES: 16: file-table-tform, {12, 15}, tempfiletable, (device-sycl, skl)
+// CHECK_PHASES: 17: clang-offload-wrapper, {16}, object, (device-sycl, skl)
+// CHECK_PHASES: 18: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (spir64_gen-unknown-unknown:skl)" {17}, image
+
+/// NVIDIA Test phases, BoundArch settings used for -device target. Additional
+/// offload action used for compilation and backend compilation.
+// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_50 -fno-sycl-device-lib=all \
+// RUN:   -fno-sycl-instrument-device-code \
+// RUN:   -target x86_64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=NVIDIA_CHECK_PHASES
+// NVIDIA_CHECK_PHASES: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl)
+// NVIDIA_CHECK_PHASES: 1: append-footer, {0}, c++, (host-sycl)
+// NVIDIA_CHECK_PHASES: 2: preprocessor, {1}, c++-cpp-output, (host-sycl)
+// NVIDIA_CHECK_PHASES: 3: input, "[[INPUT]]", c++, (device-sycl, sm_50)
+// NVIDIA_CHECK_PHASES: 5: compiler, {4}, ir, (device-sycl, sm_50)
+// NVIDIA_CHECK_PHASES: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {5}, c++-cpp-output
+// NVIDIA_CHECK_PHASES: 7: compiler, {6}, ir, (host-sycl)
+// NVIDIA_CHECK_PHASES: 8: backend, {7}, assembler, (host-sycl)
+// NVIDIA_CHECK_PHASES: 9: assembler, {8}, object, (host-sycl)
+// NVIDIA_CHECK_PHASES: 10: linker, {9}, image, (host-sycl)
+// NVIDIA_CHECK_PHASES: 11: linker, {5}, ir, (device-sycl, sm_50)
+// NVIDIA_CHECK_PHASES: 12: sycl-post-link, {11}, ir, (device-sycl, sm_50)
+// NVIDIA_CHECK_PHASES: 13: file-table-tform, {12}, ir, (device-sycl, sm_50)
+// NVIDIA_CHECK_PHASES: 14: backend, {13}, assembler, (device-sycl, sm_50)
+// NVIDIA_CHECK_PHASES: 15: assembler, {14}, object, (device-sycl, sm_50)
+// NVIDIA_CHECK_PHASES: linker, {14, 15}, cuda-fatbin, (device-sycl, sm_50)
+// NVIDIA_CHECK_PHASES: foreach, {13, 16}, cuda-fatbin, (device-sycl, sm_50)
+// NVIDIA_CHECK_PHASES: file-table-tform, {12, 17}, tempfiletable, (device-sycl, sm_50)
+// NVIDIA_CHECK_PHASES: clang-offload-wrapper, {18}, object, (device-sycl, sm_50)
+// NVIDIA_CHECK_PHASES: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {19}, image
+
+/// AMD Test phases, BoundArch settings used for -device target. Additional
+/// offload action used for compilation and backend compilation.
+// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx700 -fno-sycl-device-lib=all \
+// RUN:   -fno-sycl-instrument-device-code \
+// RUN:   -target x86_64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=AMD_CHECK_PHASES
+// AMD_CHECK_PHASES: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl)
+// AMD_CHECK_PHASES: 1: append-footer, {0}, c++, (host-sycl)
+// AMD_CHECK_PHASES: 2: preprocessor, {1}, c++-cpp-output, (host-sycl)
+// AMD_CHECK_PHASES: 3: input, "[[INPUT]]", c++, (device-sycl, gfx700)
+// AMD_CHECK_PHASES: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, gfx700)
+// AMD_CHECK_PHASES: 5: compiler, {4}, ir, (device-sycl, gfx700)
+// AMD_CHECK_PHASES: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (amdgcn-amd-amdhsa:gfx700)" {5}, c++-cpp-output
+// AMD_CHECK_PHASES: 7: compiler, {6}, ir, (host-sycl)
+// AMD_CHECK_PHASES: 8: backend, {7}, assembler, (host-sycl)
+// AMD_CHECK_PHASES: 9: assembler, {8}, object, (host-sycl)
+// AMD_CHECK_PHASES: 10: linker, {9}, image, (host-sycl)
+// AMD_CHECK_PHASES: 11: linker, {5}, ir, (device-sycl, gfx700)
+// AMD_CHECK_PHASES: 12: sycl-post-link, {11}, ir, (device-sycl, gfx700)
+// AMD_CHECK_PHASES: 13: file-table-tform, {12}, ir, (device-sycl, gfx700)
+// AMD_CHECK_PHASES: 14: backend, {13}, assembler, (device-sycl, gfx700)
+// AMD_CHECK_PHASES: 15: assembler, {14}, object, (device-sycl, gfx700)
+// AMD_CHECK_PHASES: 16: linker, {15}, image, (device-sycl, gfx700)
+// AMD_CHECK_PHASES: 17: linker, {16}, hip-fatbin, (device-sycl, gfx700)
+// AMD_CHECK_PHASES: 18: foreach, {13, 17}, hip-fatbin, (device-sycl, gfx700)
+// AMD_CHECK_PHASES: 19: file-table-tform, {12, 18}, tempfiletable, (device-sycl, gfx700)
+// AMD_CHECK_PHASES: 20: clang-offload-wrapper, {19}, object, (device-sycl, gfx700)
+// AMD_CHECK_PHASES: 21: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (amdgcn-amd-amdhsa:gfx700)" {20}, image
+
+/// Check that ocloc and macro settings only occur for the expected toolchains
+/// when mixing spir64_gen and intel_gpu
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spir64_gen \
+// RUN:   -Xsycl-target-backend=spir64_gen "-device skl" \
+// RUN:   -fno-sycl-device-lib=all -fno-sycl-instrument-device-code \
+// RUN:   -target x86_64-unknown-linux-gnu -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=CHECK_TOOLS_MIX
+// CHECK_TOOLS_MIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"
+// CHECK_TOOLS_MIX: "-D__SYCL_TARGET_INTEL_GPU_DG1__"
+// CHECK_TOOLS_MIX: ocloc{{.*}} "-device" "dg1"
+// CHECK_TOOLS_MIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"
+// CHECK_TOOLS_MIX-NOT: "-D__SYCL_TARGET_INTEL_GPU{{.*}}"
+// CHECK_TOOLS_MIX: ocloc{{.*}} "-device" "skl"
+
+/// Test phases when using both spir64_gen and intel_gpu*
+// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_skl,spir64_gen \
+// RUN:   -fno-sycl-device-lib=all -fno-sycl-instrument-device-code \
+// RUN:   -target x86_64-unknown-linux-gnu -ccc-print-phases %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=CHECK_PHASES_MIX
+// CHECK_PHASES_MIX: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl)
+// CHECK_PHASES_MIX: 1: append-footer, {0}, c++, (host-sycl)
+// CHECK_PHASES_MIX: 2: preprocessor, {1}, c++-cpp-output, (host-sycl)
+// CHECK_PHASES_MIX: 3: input, "[[INPUT]]", c++, (device-sycl)
+// CHECK_PHASES_MIX: 4: preprocessor, {3}, c++-cpp-output, (device-sycl)
+// CHECK_PHASES_MIX: 5: compiler, {4}, ir, (device-sycl)
+// CHECK_PHASES_MIX: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (spir64_gen-unknown-unknown)" {5}, c++-cpp-output
+// CHECK_PHASES_MIX: 7: compiler, {6}, ir, (host-sycl)
+// CHECK_PHASES_MIX: 8: backend, {7}, assembler, (host-sycl)
+// CHECK_PHASES_MIX: 9: assembler, {8}, object, (host-sycl)
+// CHECK_PHASES_MIX: 10: linker, {9}, image, (host-sycl)
+// CHECK_PHASES_MIX: 11: input, "[[INPUT]]", c++, (device-sycl, skl)
+// CHECK_PHASES_MIX: 12: preprocessor, {11}, c++-cpp-output, (device-sycl, skl)
+// CHECK_PHASES_MIX: 13: compiler, {12}, ir, (device-sycl, skl)
+// CHECK_PHASES_MIX: 14: linker, {13}, ir, (device-sycl, skl)
+// CHECK_PHASES_MIX: 15: sycl-post-link, {14}, tempfiletable, (device-sycl, skl)
+// CHECK_PHASES_MIX: 16: file-table-tform, {15}, tempfilelist, (device-sycl, skl)
+// CHECK_PHASES_MIX: 17: llvm-spirv, {16}, tempfilelist, (device-sycl, skl)
+// CHECK_PHASES_MIX: 18: backend-compiler, {17}, image, (device-sycl, skl)
+// CHECK_PHASES_MIX: 19: file-table-tform, {15, 18}, tempfiletable, (device-sycl, skl)
+// CHECK_PHASES_MIX: 20: clang-offload-wrapper, {19}, object, (device-sycl, skl)
+// CHECK_PHASES_MIX: 21: linker, {5}, ir, (device-sycl)
+// CHECK_PHASES_MIX: 22: sycl-post-link, {21}, tempfiletable, (device-sycl)
+// CHECK_PHASES_MIX: 23: file-table-tform, {22}, tempfilelist, (device-sycl)
+// CHECK_PHASES_MIX: 24: llvm-spirv, {23}, tempfilelist, (device-sycl)
+// CHECK_PHASES_MIX: 25: backend-compiler, {24}, image, (device-sycl)
+// CHECK_PHASES_MIX: 26: file-table-tform, {22, 25}, tempfiletable, (device-sycl)
+// CHECK_PHASES_MIX: 27: clang-offload-wrapper, {26}, object, (device-sycl)
+// CHECK_PHASES_MIX: 28: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (spir64_gen-unknown-unknown:skl)" {20}, "device-sycl (spir64_gen-unknown-unknown)" {27}, image
+
diff --git a/sycl/doc/UsersManual.md b/sycl/doc/UsersManual.md
index 608e8a5192102..b47b91e4846ea 100644
--- a/sycl/doc/UsersManual.md
+++ b/sycl/doc/UsersManual.md
@@ -32,7 +32,7 @@ and not recommended to use in production environment.
       spir64_fpga-unknown-unknown, spir64_gen-unknown-unknown
     Available in special build configuration:
     * nvptx64-nvidia-cuda - generate code ahead of time for CUDA target;
-    Special target values specific to Intel Processor Graphics support are
+    Special target values specific to Intel, NVIDIA and AMD Processor Graphics support are
     accepted, providing a streamlined interface for AOT. Only one of these
     values at a time is supported.
     * intel_gpu_pvc - Ponte Vecchio Intel graphics architecture
@@ -57,6 +57,41 @@ and not recommended to use in production environment.
     * intel_gpu_kbl, intel_gpu_9_1_9 - Kaby Lake Intel graphics architecture
     * intel_gpu_skl, intel_gpu_9_0_9 - Skylake Intel graphics architecture
     * intel_gpu_bdw, intel_gpu_8_0_0 - Broadwell Intel graphics architecture
+    * nvidia_gpu_sm_50 - NVIDIA Maxwell architecture (compute capability 5.0)
+    * nvidia_gpu_sm_52 - NVIDIA Maxwell architecture (compute capability 5.2)
+    * nvidia_gpu_sm_53 - NVIDIA Maxwell architecture (compute capability 5.3)
+    * nvidia_gpu_sm_60 - NVIDIA Pascal architecture (compute capability 6.0)
+    * nvidia_gpu_sm_61 - NVIDIA Pascal architecture (compute capability 6.1)
+    * nvidia_gpu_sm_62 - NVIDIA Pascal architecture (compute capability 6.2)
+    * nvidia_gpu_sm_70 - NVIDIA Volta architecture (compute capability 7.0)
+    * nvidia_gpu_sm_72 - NVIDIA Volta architecture (compute capability 7.2)
+    * nvidia_gpu_sm_75 - NVIDIA Turing architecture (compute capability 7.5)
+    * nvidia_gpu_sm_80 - NVIDIA Ampere architecture (compute capability 8.0)
+    * nvidia_gpu_sm_86 - NVIDIA Ampere architecture (compute capability 8.6)
+    * nvidia_gpu_sm_87 - NVIDIA Jetson/Drive AGX Orin architecture
+    * nvidia_gpu_sm_89 - NVIDIA Ada Lovelace architecture
+    * nvidia_gpu_sm_90 - NVIDIA Hopper architecture
+    * amd_gpu_gfx700 - AMD GCN GFX7 (Sea Islands (CI)) architecture
+    * amd_gpu_gfx701 - AMD GCN GFX7 (Sea Islands (CI)) architecture
+    * amd_gpu_gfx702 - AMD GCN GFX7 (Sea Islands (CI)) architecture
+    * amd_gpu_gfx801 - AMD GCN GFX8 (Volcanic Islands (VI)) architecture
+    * amd_gpu_gfx802 - AMD GCN GFX8 (Volcanic Islands (VI)) architecture
+    * amd_gpu_gfx803 - AMD GCN GFX8 (Volcanic Islands (VI)) architecture
+    * amd_gpu_gfx805 - AMD GCN GFX8 (Volcanic Islands (VI)) architecture
+    * amd_gpu_gfx810 - AMD GCN GFX8 (Volcanic Islands (VI)) architecture
+    * amd_gpu_gfx900 - AMD GCN GFX9 (Vega) architecture
+    * amd_gpu_gfx902 - AMD GCN GFX9 (Vega) architecture
+    * amd_gpu_gfx904 - AMD GCN GFX9 (Vega) architecture
+    * amd_gpu_gfx906 - AMD GCN GFX9 (Vega) architecture
+    * amd_gpu_gfx908 - AMD GCN GFX9 (Vega) architecture
+    * amd_gpu_gfx90a - AMD GCN GFX9 (Vega) architecture
+    * amd_gpu_gfx1010 - AMD GCN GFX10.1 (RDNA 1) architecture
+    * amd_gpu_gfx1011 - AMD GCN GFX10.1 (RDNA 1) architecture
+    * amd_gpu_gfx1012 - AMD GCN GFX10.1 (RDNA 1) architecture
+    * amd_gpu_gfx1013 - AMD GCN GFX10.1 (RDNA 1) architecture
+    * amd_gpu_gfx1030 - AMD GCN GFX10.3 (RDNA 2) architecture
+    * amd_gpu_gfx1031 - GCN GFX10.3 (RDNA 2) architecture
+    * amd_gpu_gfx1032 - GCN GFX10.3 (RDNA 2) architecture
 
 ## Language options
 
diff --git a/sycl/doc/design/DeviceIf.md b/sycl/doc/design/DeviceIf.md
index 5ef5c681f5513..1c2333e815796 100644
--- a/sycl/doc/design/DeviceIf.md
+++ b/sycl/doc/design/DeviceIf.md
@@ -73,25 +73,20 @@ recognizes:
 * `intel_gpu_11_2_0` (alias for `intel_gpu_ehl`)
 * `intel_gpu_12_0_0` (alias for `intel_gpu_tgllp`)
 * `intel_gpu_12_10_0` (alias for `intel_gpu_dg1`)
-* `nvidia_gpu_sm20`
-* `nvidia_gpu_sm30`
-* `nvidia_gpu_sm32`
-* `nvidia_gpu_sm35`
-* `nvidia_gpu_sm37`
-* `nvidia_gpu_sm50`
-* `nvidia_gpu_sm52`
-* `nvidia_gpu_sm53`
-* `nvidia_gpu_sm60`
-* `nvidia_gpu_sm61`
-* `nvidia_gpu_sm62`
-* `nvidia_gpu_sm70`
-* `nvidia_gpu_sm72`
-* `nvidia_gpu_sm75`
-* `nvidia_gpu_sm80`
-* `nvidia_gpu_sm86`
-* `nvidia_gpu_sm87`
-* `nvidia_gpu_sm89`
-* `nvidia_gpu_sm90`
+* `nvidia_gpu_sm_50`
+* `nvidia_gpu_sm_52`
+* `nvidia_gpu_sm_53`
+* `nvidia_gpu_sm_60`
+* `nvidia_gpu_sm_61`
+* `nvidia_gpu_sm_62`
+* `nvidia_gpu_sm_70`
+* `nvidia_gpu_sm_72`
+* `nvidia_gpu_sm_75`
+* `nvidia_gpu_sm_80`
+* `nvidia_gpu_sm_86`
+* `nvidia_gpu_sm_87`
+* `nvidia_gpu_sm_89`
+* `nvidia_gpu_sm_90`
 * `amd_gpu_gfx700`
 * `amd_gpu_gfx701`
 * `amd_gpu_gfx702`
@@ -160,11 +155,6 @@ one of the following corresponding C++ macro names:
 * `__SYCL_TARGET_INTEL_GPU_ACM_G11__`
 * `__SYCL_TARGET_INTEL_GPU_ACM_G12__`
 * `__SYCL_TARGET_INTEL_GPU_PVC__`
-* `__SYCL_TARGET_NVIDIA_GPU_SM20__`
-* `__SYCL_TARGET_NVIDIA_GPU_SM30__`
-* `__SYCL_TARGET_NVIDIA_GPU_SM32__`
-* `__SYCL_TARGET_NVIDIA_GPU_SM35__`
-* `__SYCL_TARGET_NVIDIA_GPU_SM37__`
 * `__SYCL_TARGET_NVIDIA_GPU_SM50__`
 * `__SYCL_TARGET_NVIDIA_GPU_SM52__`
 * `__SYCL_TARGET_NVIDIA_GPU_SM53__`
diff --git a/sycl/doc/extensions/experimental/sycl_ext_intel_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_intel_device_architecture.asciidoc
deleted file mode 100644
index dc2a1373d3b82..0000000000000
--- a/sycl/doc/extensions/experimental/sycl_ext_intel_device_architecture.asciidoc
+++ /dev/null
@@ -1,429 +0,0 @@
-= sycl_ext_intel_device_architecture
-
-:source-highlighter: coderay
-:coderay-linenums-mode: table
-
-// This section needs to be after the document title.
-:doctype: book
-:toc2:
-:toc: left
-:encoding: utf-8
-:lang: en
-:dpcpp: pass:[DPC++]
-
-// Set the default source code type in this document to C++,
-// for syntax highlighting purposes.  This is needed because
-// docbook uses c++ and html5 uses cpp.
-:language: {basebackend@docbook:c++:cpp}
-
-
-== Notice
-
-[%hardbreaks]
-Copyright (C) 2022-2022 Intel Corporation.  All rights reserved.
-
-Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks
-of The Khronos Group Inc.  OpenCL(TM) is a trademark of Apple Inc. used by
-permission by Khronos.
-
-
-== Contact
-
-To report problems with this extension, please open a new issue at:
-
-https://github.com/intel/llvm/issues
-
-
-== Dependencies
-
-This extension is written against the SYCL 2020 revision 5 specification.  All
-references below to the "core SYCL specification" or to section numbers in the
-SYCL specification refer to that revision.
-
-
-== Status
-
-This is a proposed extension specification, intended to gather community
-feedback.  Interfaces defined in this specification may not be implemented yet
-or may be in a preliminary state.  The specification itself may also change in
-incompatible ways before it is finalized.  *Shipping software products should
-not rely on APIs defined in this specification.*
-
-[comment]
---
-_Add the following paragraph when this specification becomes "experimental"._
-
-There are important limitations with the DPC++ implementation of this
-experimental extension.  In particular, this extension may only be used when
-the application is compiled in AOT mode.  See the section below titled
-"Limitations with the experimental version" for a full description of the
-limitations.
---
-
-
-== Overview
-
-This extension provides a way for device code to query the device architecture
-on which it is running.  This is similar to the
-link:./sycl_ext_oneapi_device_if.asciidoc[sycl_ext_oneapi_device_if] extension
-except the comparison is for the device's architecture not the device's
-aspects.  In some cases, low-level application code can use special features or
-do specific optimizations depending on the device architecture, and this
-extension enables such applications.
-
-
-== Specification
-
-=== Feature test macro
-
-This extension provides a feature-test macro as described in the core SYCL
-specification.  An implementation supporting this extension must predefine the
-macro `SYCL_EXT_INTEL_DEVICE_ARCHITECTURE` to one of the values defined in the
-table below.  Applications can test for the existence of this macro to
-determine if the implementation supports this feature, or applications can test
-the macro's value to determine which of the extension's features the
-implementation supports.
-
-[%header,cols="1,5"]
-|===
-|Value
-|Description
-
-|1
-|Initial version of this extension.
-|===
-
-=== New enumeration of architectures
-
-This extension adds a new enumeration of the architectures that can be tested.
-
-```
-namespace sycl::ext::intel::experimental {
-
-enum class architecture : /* unspecified */ {
-  x86_64,
-  intel_gpu_bdw,
-  intel_gpu_skl,
-  intel_gpu_kbl,
-  intel_gpu_cfl,
-  intel_gpu_apl,
-  intel_gpu_glk,
-  intel_gpu_whl,
-  intel_gpu_aml,
-  intel_gpu_cml,
-  intel_gpu_icllp,
-  intel_gpu_ehl,
-  intel_gpu_tgllp,
-  intel_gpu_rkl,
-  intel_gpu_adl_s,
-  intel_gpu_rpl_s,
-  intel_gpu_adl_p,
-  intel_gpu_adl_n,
-  intel_gpu_dg1,
-  intel_gpu_acm_g10,
-  intel_gpu_acm_g11,
-  intel_gpu_acm_g12,
-  intel_gpu_pvc,
-
-  intel_gpu_8_0_0 = intel_gpu_bdw,
-  intel_gpu_9_0_9 = intel_gpu_skl,
-  intel_gpu_9_1_9 = intel_gpu_kbl
-  intel_gpu_9_2_9 = intel_gpu_cfl,
-  intel_gpu_9_3_0 = intel_gpu_apl,
-  intel_gpu_9_4_0 = intel_gpu_glk,
-  intel_gpu_9_5_0 = intel_gpu_whl,
-  intel_gpu_9_6_0 = intel_gpu_aml,
-  intel_gpu_9_7_0 = intel_gpu_cml,
-  intel_gpu_11_0_0 = intel_gpu_icllp,
-  intel_gpu_11_2_0 = intel_gpu_ehl,
-  intel_gpu_12_0_0 = intel_gpu_tgllp,
-  intel_gpu_12_10_0 = intel_gpu_dg1
-};
-
-} // namespace sycl::ext::intel::experimental
-```
-
-The following table tells which version of this extension first included each
-of these enumerators, and it provides a brief description of their meanings.
-
-[%header,cols="5,1,5"]
-|===
-|Enumerator name
-|Added in version
-|Description
-
-|`x86_64`
-|1
-|Any CPU device with the x86_64 instruction set.
-
-|`intel_gpu_bdw`
-|1
-|Broadwell Intel graphics architecture.
-
-|`intel_gpu_skl`
-|1
-|Broadwell Intel graphics architecture.
-
-|`intel_gpu_kbl`
-|1
-|Kaby Lake Intel graphics architecture.
-
-|`intel_gpu_cfl`
-|1
-|Coffee Lake Intel graphics architecture.
-
-|`intel_gpu_apl`
-|1
-|Apollo Lake Intel graphics architecture.
-
-|`intel_gpu_glk`
-|1
-|Gemini Lake Intel graphics architecture.
-
-|`intel_gpu_whl`
-|1
-|Whiskey Lake Intel graphics architecture.
-
-|`intel_gpu_aml`
-|1
-|Amber Lake Intel graphics architecture.
-
-|`intel_gpu_cml`
-|1
-|Comet Lake Intel graphics architecture.
-
-|`intel_gpu_icllp`
-|1
-|Ice Lake Intel graphics architecture.
-
-|`intel_gpu_ehl`
-|1
-|Elkhart Lake Intel graphics architecture.
-
-|`intel_gpu_tgllp`
-|1
-|Tiger Lake Intel graphics architecture.
-
-|`intel_gpu_rkl`
-|1
-|Rocket Lake Intel graphics architecture.
-
-|`intel_gpu_adl_s`
-|1
-|Alder Lake S Intel graphics architecture.
-
-|`intel_gpu_rpl_s`
-|1
-|Raptor Lake Intel graphics architecture.
-
-|`intel_gpu_adl_p`
-|1
-|Alder Lake P Intel graphics architecture.
-
-|`intel_gpu_adl_n`
-|1
-|Alder Lake N Intel graphics architecture.
-
-|`intel_gpu_dg1`
-|1
-|DG1 Intel graphics architecture.
-
-|`intel_gpu_acm_g10`
-|1
-|Alchemist G10 Intel graphics architecture.
-
-|`intel_gpu_acm_g11`
-|1
-|Alchemist G11 Intel graphics architecture.
-
-|`intel_gpu_acm_g12`
-|1
-|Alchemist G12 Intel graphics architecture.
-
-|`intel_gpu_pvc`
-|1
-|Ponte Vecchio Intel graphics architecture.
-
-|`intel_gpu_8_0_0`
-|1
-|Alias for `intel_gpu_bdw`.
-
-|`intel_gpu_9_0_9`
-|1
-|Alias for `intel_gpu_skl`.
-
-|`intel_gpu_9_1_9`
-|1
-|Alias for `intel_gpu_kbl`.
-
-|`intel_gpu_9_2_9`
-|1
-|Alias for `intel_gpu_cfl`.
-
-|`intel_gpu_9_3_0`
-|1
-|Alias for `intel_gpu_apl`.
-
-|`intel_gpu_9_4_0`
-|1
-|Alias for `intel_gpu_glk`.
-
-|`intel_gpu_9_5_0`
-|1
-|Alias for `intel_gpu_whl`.
-
-|`intel_gpu_9_6_0`
-|1
-|Alias for `intel_gpu_aml`.
-
-|`intel_gpu_9_7_0`
-|1
-|Alias for `intel_gpu_cml`.
-
-|`intel_gpu_11_0_0`
-|1
-|Alias for `intel_gpu_icllp`.
-
-|`intel_gpu_11_2_0`
-|1
-|Alias for `intel_gpu_ehl`.
-
-|`intel_gpu_12_0_0`
-|1
-|Alias for `intel_gpu_tgllp`.
-
-|`intel_gpu_12_10_0`
-|1
-|Alias for `intel_gpu_dg1`.
-|===
-
-[NOTE]
-====
-An "alias" enumerator is generally added for new devices only after hardware
-has finalized and the exact version is known.
-====
-
-
-=== New `if_architecture_is` free function
-
-This extension adds one new free function which may be called from device
-code.  This function is not available in host code.
-
-```
-namespace sycl::ext::intel::experimental {
-
-template<architecture ...Archs, typename ...Args, typename T>
-/* unspecified */ if_architecture_is(T fn, Args ...args);
-
-} // namespace sycl::ext::intel::experimental
-```
-
-This function operates exactly like `if_device_has` from the
-link:./sycl_ext_oneapi_device_if.asciidoc[sycl_ext_oneapi_device_if] extension
-except that the condition gating execution of the callable function `fn` is
-determined by the `Archs` parameter pack.  This condition is `true` if the
-device which executes `if_architecture_is` matches **any** of the architectures
-listed in this pack.
-
-The value returned by `if_architecture_is` is an object _F_ of an unspecified
-type, which provides the following member functions:
-
-```
-class /* unspecified */ {
- public:
-  template<architecture ...Archs, typename ...Args, typename T>
-  /* unspecified */ else_if_architecture_is(T fn, Args ...args);
-
-  template<typename T, typename ...Args>
-  void otherwise(T fn, Args ...args);
-};
-```
-
-The `otherwise` function behaves exactly like the `otherwise` function from the
-link:./sycl_ext_oneapi_device_if.asciidoc[sycl_ext_oneapi_device_if] extension.
-The `else_if_architecture_is` function behaves exactly like
-`else_if_device_has` from that extension except that the condition gating
-execution of the callable object `fn` is determined by the `Archs` parameter
-pack.  This condition is `true` only if the object _F_ comes from a previous
-call to `if_architecture_is` or `else_if_architecture_is` whose condition is
-`false` *and* if the device calling `else_if_architecture_is` has one of the
-architectures in the `Archs` parameter pack.
-
-
-== Future direction
-
-This experimental extension is still evolving.  We expect that future versions
-will include the following:
-
-* An extended member function like:
-+
---
-```
-namespace sycl {
-
-class device {
-  bool ext_intel_architecture_is(
-    ext::intel::experimental::architecture arch);
-};
-
-// namespace sycl
-```
-
-This provides a way to query a device's architecture from host code.
---
-
-* An extended device information descriptor named
-  `sycl::ext::intel::experimental::info::device::architecture`, which returns
-  the architecture of the device.  This allows host code such as:
-+
---
-```
-using namespace sycl::ext::intel::experimental;
-
-architecture arch = dev.get_info<info::device::architecture>();
-switch (arch) {
-case architecture::x86_64:
-  /* ... */
-  break;
-case architecture::intel_gpu_bdw:
-  /* ... */
-  break;
-/* etc. */
-}
-```
---
-
-* A compile-time constant property that can be used to decorate kernels and
-  non-kernel device functions:
-+
---
-```
-namespace sycl::ext::intel::experimental {
-
-struct device_architecture_is_key {
-  template <architecture... Archs>
-  using value_t = property_value<device_architecture_is_key,
-    std::integral_constant<architecture, Archs>...>;
-};
-
-template <architecture... Archs>
-struct property_value<device_architecture_is_key,
-  std::integral_constant<architecture, Archs>...>
-{
-  static constexpr std::array<architecture, sizeof...(Archs)> value;
-};
-
-template <architecture... Archs>
-inline constexpr device_architecture_is_key::value_t<Archs...>
-  device_architecture_is;
-
-} // namespace sycl::ext::intel::experimental
-```
-
-This property indicates that a kernel or non-kernel device function uses
-features that are available on devices with the given architecture list but
-may not be available on devices with other architectures.
---
-
-* Additional enumerators in the `architecture` enumeration.  This could include
-  entries for different x86_64 architectures or entries for CUDA devices.
diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
similarity index 93%
rename from sycl/doc/extensions/proposed/sycl_ext_oneapi_device_architecture.asciidoc
rename to sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
index a785c426a59c6..bb5692552796d 100644
--- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_device_architecture.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
@@ -128,25 +128,20 @@ enum class architecture : /* unspecified */ {
   intel_gpu_acm_g12,
   intel_gpu_pvc,
 
-  nvidia_gpu_sm20,
-  nvidia_gpu_sm30,
-  nvidia_gpu_sm32,
-  nvidia_gpu_sm35,
-  nvidia_gpu_sm37,
-  nvidia_gpu_sm50,
-  nvidia_gpu_sm52,
-  nvidia_gpu_sm53,
-  nvidia_gpu_sm60,
-  nvidia_gpu_sm61,
-  nvidia_gpu_sm62,
-  nvidia_gpu_sm70,
-  nvidia_gpu_sm72,
-  nvidia_gpu_sm75,
-  nvidia_gpu_sm80,
-  nvidia_gpu_sm86,
-  nvidia_gpu_sm87,
-  nvidia_gpu_sm89,
-  nvidia_gpu_sm90,
+  nvidia_gpu_sm_50,
+  nvidia_gpu_sm_52,
+  nvidia_gpu_sm_53,
+  nvidia_gpu_sm_60,
+  nvidia_gpu_sm_61,
+  nvidia_gpu_sm_62,
+  nvidia_gpu_sm_70,
+  nvidia_gpu_sm_72,
+  nvidia_gpu_sm_75,
+  nvidia_gpu_sm_80,
+  nvidia_gpu_sm_86,
+  nvidia_gpu_sm_87,
+  nvidia_gpu_sm_89,
+  nvidia_gpu_sm_90,
 
   amd_gpu_gfx700,
   amd_gpu_gfx701,
@@ -341,79 +336,59 @@ of these enumerators, and it provides a brief description of their meanings.
 |1
 |Alias for `intel_gpu_dg1`.
 
-|`nvidia_gpu_sm20`
-|2
-|NVIDIA Fermi architecture.
-
-|`nvidia_gpu_sm30`
-|2
-|NVIDIA Kepler architecture (compute capability 3.0).
-
-|`nvidia_gpu_sm32`
-|2
-|NVIDIA Kepler architecture (compute capability 3.2).
-
-|`nvidia_gpu_sm35`
-|2
-|NVIDIA Kepler architecture (compute capability 3.5).
-
-|`nvidia_gpu_sm37`
-|2
-|NVIDIA Kepler architecture (compute capability 3.7).
-
-|`nvidia_gpu_sm50`
+|`nvidia_gpu_sm_50`
 |2
 |NVIDIA Maxwell architecture (compute capability 5.0).
 
-|`nvidia_gpu_sm52`
+|`nvidia_gpu_sm_52`
 |2
 |NVIDIA Maxwell architecture (compute capability 5.2).
 
-|`nvidia_gpu_sm53`
+|`nvidia_gpu_sm_53`
 |2
 |NVIDIA Maxwell architecture (compute capability 5.3).
 
-|`nvidia_gpu_sm60`
+|`nvidia_gpu_sm_60`
 |2
 |NVIDIA Pascal architecture (compute capability 6.0).
 
-|`nvidia_gpu_sm61`
+|`nvidia_gpu_sm_61`
 |2
 |NVIDIA Pascal architecture (compute capability 6.1).
 
-|`nvidia_gpu_sm62`
+|`nvidia_gpu_sm_62`
 |2
 |NVIDIA Pascal architecture (compute capability 6.2).
 
-|`nvidia_gpu_sm70`
+|`nvidia_gpu_sm_70`
 |2
 |NVIDIA Volta architecture (compute capability 7.0).
 
-|`nvidia_gpu_sm72`
+|`nvidia_gpu_sm_72`
 |2
 |NVIDIA Volta architecture (compute capability 7.2).
 
-|`nvidia_gpu_sm75`
+|`nvidia_gpu_sm_75`
 |2
 |NVIDIA Turing architecture (compute capability 7.5).
 
-|`nvidia_gpu_sm80`
+|`nvidia_gpu_sm_80`
 |2
 |NVIDIA Ampere architecture (compute capability 8.0).
 
-|`nvidia_gpu_sm86`
+|`nvidia_gpu_sm_86`
 |2
 |NVIDIA Ampere architecture (compute capability 8.6).
 
-|`nvidia_gpu_sm87`
+|`nvidia_gpu_sm_87`
 |2
 |Jetson/Drive AGX Orin architecture.
 
-|`nvidia_gpu_sm89`
+|`nvidia_gpu_sm_89`
 |2
 |NVIDIA Ada Lovelace architecture.
 
-|`nvidia_gpu_sm90`
+|`nvidia_gpu_sm_90`
 |2
 |NVIDIA Hopper architecture.
 
diff --git a/sycl/include/sycl/ext/intel/experimental/device_architecture.hpp b/sycl/include/sycl/ext/intel/experimental/device_architecture.hpp
deleted file mode 100644
index f2f37db5312c5..0000000000000
--- a/sycl/include/sycl/ext/intel/experimental/device_architecture.hpp
+++ /dev/null
@@ -1,278 +0,0 @@
-#pragma once
-
-#include <sycl/detail/defines_elementary.hpp>
-
-namespace sycl {
-__SYCL_INLINE_VER_NAMESPACE(_V1) {
-namespace ext::intel::experimental {
-
-enum class architecture {
-  x86_64,
-  intel_gpu_bdw,
-  intel_gpu_skl,
-  intel_gpu_kbl,
-  intel_gpu_cfl,
-  intel_gpu_apl,
-  intel_gpu_glk,
-  intel_gpu_whl,
-  intel_gpu_aml,
-  intel_gpu_cml,
-  intel_gpu_icllp,
-  intel_gpu_ehl,
-  intel_gpu_tgllp,
-  intel_gpu_rkl,
-  intel_gpu_adl_s,
-  intel_gpu_rpl_s,
-  intel_gpu_adl_p,
-  intel_gpu_adl_n,
-  intel_gpu_dg1,
-  intel_gpu_acm_g10,
-  intel_gpu_acm_g11,
-  intel_gpu_acm_g12,
-  intel_gpu_pvc,
-  // Update "detail::max_architecture" below if you add new elements here!
-  intel_gpu_8_0_0 = intel_gpu_bdw,
-  intel_gpu_9_0_9 = intel_gpu_skl,
-  intel_gpu_9_1_9 = intel_gpu_kbl,
-  intel_gpu_9_2_9 = intel_gpu_cfl,
-  intel_gpu_9_3_0 = intel_gpu_apl,
-  intel_gpu_9_4_0 = intel_gpu_glk,
-  intel_gpu_9_5_0 = intel_gpu_whl,
-  intel_gpu_9_6_0 = intel_gpu_aml,
-  intel_gpu_9_7_0 = intel_gpu_cml,
-  intel_gpu_11_0_0 = intel_gpu_icllp,
-  intel_gpu_11_2_0 = intel_gpu_ehl,
-  intel_gpu_12_0_0 = intel_gpu_tgllp,
-  intel_gpu_12_10_0 = intel_gpu_dg1,
-};
-
-} // namespace ext::intel::experimental
-
-namespace detail {
-
-static constexpr ext::intel::experimental::architecture max_architecture =
-    ext::intel::experimental::architecture::intel_gpu_pvc;
-
-#ifndef __SYCL_TARGET_INTEL_X86_64__
-#define __SYCL_TARGET_INTEL_X86_64__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_BDW__
-#define __SYCL_TARGET_INTEL_GPU_BDW__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_SKL__
-#define __SYCL_TARGET_INTEL_GPU_SKL__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_KBL__
-#define __SYCL_TARGET_INTEL_GPU_KBL__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_CFL__
-#define __SYCL_TARGET_INTEL_GPU_CFL__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_APL__
-#define __SYCL_TARGET_INTEL_GPU_APL__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_GLK__
-#define __SYCL_TARGET_INTEL_GPU_GLK__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_WHL__
-#define __SYCL_TARGET_INTEL_GPU_WHL__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_AML__
-#define __SYCL_TARGET_INTEL_GPU_AML__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_CML__
-#define __SYCL_TARGET_INTEL_GPU_CML__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_ICLLP__
-#define __SYCL_TARGET_INTEL_GPU_ICLLP__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_EHL__
-#define __SYCL_TARGET_INTEL_GPU_EHL__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_TGLLP__
-#define __SYCL_TARGET_INTEL_GPU_TGLLP__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_RKL__
-#define __SYCL_TARGET_INTEL_GPU_RKL__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_ADL_S__
-#define __SYCL_TARGET_INTEL_GPU_ADL_S__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_RPL_S__
-#define __SYCL_TARGET_INTEL_GPU_RPL_S__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_ADL_P__
-#define __SYCL_TARGET_INTEL_GPU_ADL_P__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_ADL_N__
-#define __SYCL_TARGET_INTEL_GPU_ADL_N__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_DG1__
-#define __SYCL_TARGET_INTEL_GPU_DG1__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G10__
-#define __SYCL_TARGET_INTEL_GPU_ACM_G10__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G11__
-#define __SYCL_TARGET_INTEL_GPU_ACM_G11__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G12__
-#define __SYCL_TARGET_INTEL_GPU_ACM_G12__ 0
-#endif
-#ifndef __SYCL_TARGET_INTEL_GPU_PVC__
-#define __SYCL_TARGET_INTEL_GPU_PVC__ 0
-#endif
-
-// This is true when the translation unit is compiled in AOT mode with target
-// names that supports the "if_architecture_is" features.  If an unsupported
-// target name is specified via "-fsycl-targets", the associated invocation of
-// the device compiler will set this variable to false, and that will trigger
-// an error for code that uses "if_architecture_is".
-static constexpr bool is_allowable_aot_mode =
-    (__SYCL_TARGET_INTEL_X86_64__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_BDW__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_SKL__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_KBL__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_CFL__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_APL__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_GLK__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_WHL__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_AML__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_CML__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_ICLLP__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_EHL__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_TGLLP__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_RKL__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_ADL_S__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_RPL_S__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_ADL_P__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_ADL_N__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_DG1__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1) ||
-    (__SYCL_TARGET_INTEL_GPU_PVC__ == 1);
-
-struct IsAOTForArchitectureClass {
-  // Allocate an array of size == size of ext::intel::experimental::architecture
-  // enum.
-  bool arr[static_cast<int>(max_architecture) + 1];
-
-  using arch = ext::intel::experimental::architecture;
-
-  constexpr IsAOTForArchitectureClass() : arr() {
-    arr[static_cast<int>(arch::x86_64)] = __SYCL_TARGET_INTEL_X86_64__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_bdw)] =
-        __SYCL_TARGET_INTEL_GPU_BDW__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_skl)] =
-        __SYCL_TARGET_INTEL_GPU_SKL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_kbl)] =
-        __SYCL_TARGET_INTEL_GPU_KBL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_cfl)] =
-        __SYCL_TARGET_INTEL_GPU_CFL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_apl)] =
-        __SYCL_TARGET_INTEL_GPU_APL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_glk)] =
-        __SYCL_TARGET_INTEL_GPU_GLK__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_whl)] =
-        __SYCL_TARGET_INTEL_GPU_WHL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_aml)] =
-        __SYCL_TARGET_INTEL_GPU_AML__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_cml)] =
-        __SYCL_TARGET_INTEL_GPU_CML__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_icllp)] =
-        __SYCL_TARGET_INTEL_GPU_ICLLP__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_ehl)] =
-        __SYCL_TARGET_INTEL_GPU_EHL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_tgllp)] =
-        __SYCL_TARGET_INTEL_GPU_TGLLP__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_rkl)] =
-        __SYCL_TARGET_INTEL_GPU_RKL__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_adl_s)] =
-        __SYCL_TARGET_INTEL_GPU_ADL_S__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_rpl_s)] =
-        __SYCL_TARGET_INTEL_GPU_RPL_S__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_adl_p)] =
-        __SYCL_TARGET_INTEL_GPU_ADL_P__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_adl_n)] =
-        __SYCL_TARGET_INTEL_GPU_ADL_N__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_dg1)] =
-        __SYCL_TARGET_INTEL_GPU_DG1__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_acm_g10)] =
-        __SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_acm_g11)] =
-        __SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_acm_g12)] =
-        __SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1;
-    arr[static_cast<int>(arch::intel_gpu_pvc)] =
-        __SYCL_TARGET_INTEL_GPU_PVC__ == 1;
-  }
-};
-
-// One entry for each enumerator in "architecture" telling whether the AOT
-// target matches that architecture.
-static constexpr IsAOTForArchitectureClass is_aot_for_architecture;
-
-// Reads the value of "is_allowable_aot_mode" via a template to defer triggering
-// static_assert() until template instantiation time.
-template <ext::intel::experimental::architecture... Archs>
-constexpr static bool allowable_aot_mode() {
-  return is_allowable_aot_mode;
-}
-
-// Tells if the current device has one of the architectures in the parameter
-// pack.
-template <ext::intel::experimental::architecture... Archs>
-constexpr static bool device_architecture_is() {
-  return (is_aot_for_architecture.arr[static_cast<int>(Archs)] || ...);
-}
-
-// Helper object used to implement "else_if_architecture_is" and "otherwise".
-// The "MakeCall" template parameter tells whether a previous clause in the
-// "if-elseif-elseif ..." chain was true.  When "MakeCall" is false, some
-// previous clause was true, so none of the subsequent
-// "else_if_architecture_is" or "otherwise" member functions should call the
-// user's function.
-template <bool MakeCall> class if_architecture_helper {
-public:
-  template <ext::intel::experimental::architecture... Archs, typename T,
-            typename... Args>
-  constexpr auto else_if_architecture_is(T fnTrue, Args... args) {
-    if constexpr (MakeCall && device_architecture_is<Archs...>()) {
-      fnTrue(args...);
-      return if_architecture_helper<false>{};
-    } else {
-      (void)fnTrue;
-      return if_architecture_helper<MakeCall>{};
-    }
-  }
-
-  template <typename T, typename... Args>
-  constexpr void otherwise(T fn, Args... args) {
-    if constexpr (MakeCall) {
-      fn(args...);
-    }
-  }
-};
-} // namespace detail
-
-namespace ext::intel::experimental {
-
-template <architecture... Archs, typename T, typename... Args>
-constexpr static auto if_architecture_is(T fnTrue, Args... args) {
-  static_assert(detail::allowable_aot_mode<Archs...>(),
-                "The if_architecture_is function may only be used when AOT "
-                "compiling with '-fsycl-targets=spir64_x86_64' or "
-                "'-fsycl-targets=intel_gpu_*'");
-  if constexpr (detail::device_architecture_is<Archs...>()) {
-    fnTrue(args...);
-    return detail::if_architecture_helper<false>{};
-  } else {
-    (void)fnTrue;
-    return detail::if_architecture_helper<true>{};
-  }
-}
-
-} // namespace ext::intel::experimental
-} // __SYCL_INLINE_VER_NAMESPACE(_V1)
-} // namespace sycl
diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
new file mode 100644
index 0000000000000..4fcfdb2036b43
--- /dev/null
+++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
@@ -0,0 +1,523 @@
+#pragma once
+
+#include <sycl/detail/defines_elementary.hpp>
+
+namespace sycl {
+__SYCL_INLINE_VER_NAMESPACE(_V1) {
+namespace ext::oneapi::experimental {
+
+enum class architecture {
+  x86_64,
+  intel_gpu_bdw,
+  intel_gpu_skl,
+  intel_gpu_kbl,
+  intel_gpu_cfl,
+  intel_gpu_apl,
+  intel_gpu_glk,
+  intel_gpu_whl,
+  intel_gpu_aml,
+  intel_gpu_cml,
+  intel_gpu_icllp,
+  intel_gpu_ehl,
+  intel_gpu_tgllp,
+  intel_gpu_rkl,
+  intel_gpu_adl_s,
+  intel_gpu_rpl_s,
+  intel_gpu_adl_p,
+  intel_gpu_adl_n,
+  intel_gpu_dg1,
+  intel_gpu_acm_g10,
+  intel_gpu_acm_g11,
+  intel_gpu_acm_g12,
+  intel_gpu_pvc,
+  // NVIDIA architectures
+  nvidia_gpu_sm_50,
+  nvidia_gpu_sm_52,
+  nvidia_gpu_sm_53,
+  nvidia_gpu_sm_60,
+  nvidia_gpu_sm_61,
+  nvidia_gpu_sm_62,
+  nvidia_gpu_sm_70,
+  nvidia_gpu_sm_72,
+  nvidia_gpu_sm_75,
+  nvidia_gpu_sm_80,
+  nvidia_gpu_sm_86,
+  nvidia_gpu_sm_87,
+  nvidia_gpu_sm_89,
+  nvidia_gpu_sm_90,
+  // AMD architectures
+  amd_gpu_gfx700,
+  amd_gpu_gfx701,
+  amd_gpu_gfx702,
+  amd_gpu_gfx801,
+  amd_gpu_gfx802,
+  amd_gpu_gfx803,
+  amd_gpu_gfx805,
+  amd_gpu_gfx810,
+  amd_gpu_gfx900,
+  amd_gpu_gfx902,
+  amd_gpu_gfx904,
+  amd_gpu_gfx906,
+  amd_gpu_gfx908,
+  amd_gpu_gfx90a,
+  amd_gpu_gfx1010,
+  amd_gpu_gfx1011,
+  amd_gpu_gfx1012,
+  amd_gpu_gfx1013,
+  amd_gpu_gfx1030,
+  amd_gpu_gfx1031,
+  amd_gpu_gfx1032,
+  // Update "detail::max_architecture" below if you add new elements here!
+  intel_gpu_8_0_0 = intel_gpu_bdw,
+  intel_gpu_9_0_9 = intel_gpu_skl,
+  intel_gpu_9_1_9 = intel_gpu_kbl,
+  intel_gpu_9_2_9 = intel_gpu_cfl,
+  intel_gpu_9_3_0 = intel_gpu_apl,
+  intel_gpu_9_4_0 = intel_gpu_glk,
+  intel_gpu_9_5_0 = intel_gpu_whl,
+  intel_gpu_9_6_0 = intel_gpu_aml,
+  intel_gpu_9_7_0 = intel_gpu_cml,
+  intel_gpu_11_0_0 = intel_gpu_icllp,
+  intel_gpu_11_2_0 = intel_gpu_ehl,
+  intel_gpu_12_0_0 = intel_gpu_tgllp,
+  intel_gpu_12_10_0 = intel_gpu_dg1,
+};
+
+} // namespace ext::oneapi::experimental
+
+namespace detail {
+
+static constexpr ext::oneapi::experimental::architecture max_architecture =
+    ext::oneapi::experimental::architecture::amd_gpu_gfx1032;
+
+#ifndef __SYCL_TARGET_INTEL_X86_64__
+#define __SYCL_TARGET_INTEL_X86_64__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_BDW__
+#define __SYCL_TARGET_INTEL_GPU_BDW__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_SKL__
+#define __SYCL_TARGET_INTEL_GPU_SKL__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_KBL__
+#define __SYCL_TARGET_INTEL_GPU_KBL__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_CFL__
+#define __SYCL_TARGET_INTEL_GPU_CFL__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_APL__
+#define __SYCL_TARGET_INTEL_GPU_APL__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_GLK__
+#define __SYCL_TARGET_INTEL_GPU_GLK__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_WHL__
+#define __SYCL_TARGET_INTEL_GPU_WHL__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_AML__
+#define __SYCL_TARGET_INTEL_GPU_AML__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_CML__
+#define __SYCL_TARGET_INTEL_GPU_CML__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_ICLLP__
+#define __SYCL_TARGET_INTEL_GPU_ICLLP__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_EHL__
+#define __SYCL_TARGET_INTEL_GPU_EHL__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_TGLLP__
+#define __SYCL_TARGET_INTEL_GPU_TGLLP__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_RKL__
+#define __SYCL_TARGET_INTEL_GPU_RKL__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_ADL_S__
+#define __SYCL_TARGET_INTEL_GPU_ADL_S__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_RPL_S__
+#define __SYCL_TARGET_INTEL_GPU_RPL_S__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_ADL_P__
+#define __SYCL_TARGET_INTEL_GPU_ADL_P__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_ADL_N__
+#define __SYCL_TARGET_INTEL_GPU_ADL_N__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_DG1__
+#define __SYCL_TARGET_INTEL_GPU_DG1__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G10__
+#define __SYCL_TARGET_INTEL_GPU_ACM_G10__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G11__
+#define __SYCL_TARGET_INTEL_GPU_ACM_G11__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G12__
+#define __SYCL_TARGET_INTEL_GPU_ACM_G12__ 0
+#endif
+#ifndef __SYCL_TARGET_INTEL_GPU_PVC__
+#define __SYCL_TARGET_INTEL_GPU_PVC__ 0
+#endif
+#ifndef __SYCL_TARGET_NVIDIA_GPU_SM50__
+#define __SYCL_TARGET_NVIDIA_GPU_SM50__ 0
+#endif
+#ifndef __SYCL_TARGET_NVIDIA_GPU_SM52__
+#define __SYCL_TARGET_NVIDIA_GPU_SM52__ 0
+#endif
+#ifndef __SYCL_TARGET_NVIDIA_GPU_SM53__
+#define __SYCL_TARGET_NVIDIA_GPU_SM53__ 0
+#endif
+#ifndef __SYCL_TARGET_NVIDIA_GPU_SM60__
+#define __SYCL_TARGET_NVIDIA_GPU_SM60__ 0
+#endif
+#ifndef __SYCL_TARGET_NVIDIA_GPU_SM61__
+#define __SYCL_TARGET_NVIDIA_GPU_SM61__ 0
+#endif
+#ifndef __SYCL_TARGET_NVIDIA_GPU_SM62__
+#define __SYCL_TARGET_NVIDIA_GPU_SM62__ 0
+#endif
+#ifndef __SYCL_TARGET_NVIDIA_GPU_SM70__
+#define __SYCL_TARGET_NVIDIA_GPU_SM70__ 0
+#endif
+#ifndef __SYCL_TARGET_NVIDIA_GPU_SM72__
+#define __SYCL_TARGET_NVIDIA_GPU_SM72__ 0
+#endif
+#ifndef __SYCL_TARGET_NVIDIA_GPU_SM75__
+#define __SYCL_TARGET_NVIDIA_GPU_SM75__ 0
+#endif
+#ifndef __SYCL_TARGET_NVIDIA_GPU_SM80__
+#define __SYCL_TARGET_NVIDIA_GPU_SM80__ 0
+#endif
+#ifndef __SYCL_TARGET_NVIDIA_GPU_SM86__
+#define __SYCL_TARGET_NVIDIA_GPU_SM86__ 0
+#endif
+#ifndef __SYCL_TARGET_NVIDIA_GPU_SM87__
+#define __SYCL_TARGET_NVIDIA_GPU_SM87__ 0
+#endif
+#ifndef __SYCL_TARGET_NVIDIA_GPU_SM89__
+#define __SYCL_TARGET_NVIDIA_GPU_SM89__ 0
+#endif
+#ifndef __SYCL_TARGET_NVIDIA_GPU_SM90__
+#define __SYCL_TARGET_NVIDIA_GPU_SM90__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX700__
+#define __SYCL_TARGET_AMD_GPU_GFX700__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX701__
+#define __SYCL_TARGET_AMD_GPU_GFX701__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX702__
+#define __SYCL_TARGET_AMD_GPU_GFX702__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX801__
+#define __SYCL_TARGET_AMD_GPU_GFX801__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX802__
+#define __SYCL_TARGET_AMD_GPU_GFX802__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX803__
+#define __SYCL_TARGET_AMD_GPU_GFX803__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX805__
+#define __SYCL_TARGET_AMD_GPU_GFX805__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX810__
+#define __SYCL_TARGET_AMD_GPU_GFX810__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX900__
+#define __SYCL_TARGET_AMD_GPU_GFX900__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX902__
+#define __SYCL_TARGET_AMD_GPU_GFX902__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX904__
+#define __SYCL_TARGET_AMD_GPU_GFX904__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX906__
+#define __SYCL_TARGET_AMD_GPU_GFX906__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX908__
+#define __SYCL_TARGET_AMD_GPU_GFX908__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX90A__
+#define __SYCL_TARGET_AMD_GPU_GFX90A__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX1010__
+#define __SYCL_TARGET_AMD_GPU_GFX1010__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX1011__
+#define __SYCL_TARGET_AMD_GPU_GFX1011__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX1012__
+#define __SYCL_TARGET_AMD_GPU_GFX1012__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX1013__
+#define __SYCL_TARGET_AMD_GPU_GFX1013__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX1030__
+#define __SYCL_TARGET_AMD_GPU_GFX1030__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX1031__
+#define __SYCL_TARGET_AMD_GPU_GFX1031__ 0
+#endif
+#ifndef __SYCL_TARGET_AMD_GPU_GFX1032__
+#define __SYCL_TARGET_AMD_GPU_GFX1032__ 0
+#endif
+
+// This is true when the translation unit is compiled in AOT mode with target
+// names that supports the "if_architecture_is" features.  If an unsupported
+// target name is specified via "-fsycl-targets", the associated invocation of
+// the device compiler will set this variable to false, and that will trigger
+// an error for code that uses "if_architecture_is".
+static constexpr bool is_allowable_aot_mode =
+    (__SYCL_TARGET_INTEL_X86_64__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_BDW__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_SKL__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_KBL__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_CFL__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_APL__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_GLK__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_WHL__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_AML__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_CML__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_ICLLP__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_EHL__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_TGLLP__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_RKL__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_ADL_S__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_RPL_S__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_ADL_P__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_ADL_N__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_DG1__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1) ||
+    (__SYCL_TARGET_INTEL_GPU_PVC__ == 1) ||
+    (__SYCL_TARGET_NVIDIA_GPU_SM50__ == 1) ||
+    (__SYCL_TARGET_NVIDIA_GPU_SM52__ == 1) ||
+    (__SYCL_TARGET_NVIDIA_GPU_SM53__ == 1) ||
+    (__SYCL_TARGET_NVIDIA_GPU_SM60__ == 1) ||
+    (__SYCL_TARGET_NVIDIA_GPU_SM61__ == 1) ||
+    (__SYCL_TARGET_NVIDIA_GPU_SM62__ == 1) ||
+    (__SYCL_TARGET_NVIDIA_GPU_SM70__ == 1) ||
+    (__SYCL_TARGET_NVIDIA_GPU_SM72__ == 1) ||
+    (__SYCL_TARGET_NVIDIA_GPU_SM75__ == 1) ||
+    (__SYCL_TARGET_NVIDIA_GPU_SM80__ == 1) ||
+    (__SYCL_TARGET_NVIDIA_GPU_SM86__ == 1) ||
+    (__SYCL_TARGET_NVIDIA_GPU_SM87__ == 1) ||
+    (__SYCL_TARGET_NVIDIA_GPU_SM89__ == 1) ||
+    (__SYCL_TARGET_NVIDIA_GPU_SM90__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX700__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX701__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX702__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX801__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX802__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX803__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX805__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX810__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX900__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX902__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX904__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX906__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX908__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX90A__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX1010__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX1011__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX1012__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX1013__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX1030__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX1031__ == 1) ||
+    (__SYCL_TARGET_AMD_GPU_GFX1032__ == 1);
+
+struct IsAOTForArchitectureClass {
+  // Allocate an array of size == size of
+  // ext::oneapi::experimental::architecture enum.
+  bool arr[static_cast<int>(max_architecture) + 1];
+
+  using arch = ext::oneapi::experimental::architecture;
+
+  constexpr IsAOTForArchitectureClass() : arr() {
+    arr[static_cast<int>(arch::x86_64)] = __SYCL_TARGET_INTEL_X86_64__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_bdw)] =
+        __SYCL_TARGET_INTEL_GPU_BDW__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_skl)] =
+        __SYCL_TARGET_INTEL_GPU_SKL__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_kbl)] =
+        __SYCL_TARGET_INTEL_GPU_KBL__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_cfl)] =
+        __SYCL_TARGET_INTEL_GPU_CFL__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_apl)] =
+        __SYCL_TARGET_INTEL_GPU_APL__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_glk)] =
+        __SYCL_TARGET_INTEL_GPU_GLK__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_whl)] =
+        __SYCL_TARGET_INTEL_GPU_WHL__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_aml)] =
+        __SYCL_TARGET_INTEL_GPU_AML__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_cml)] =
+        __SYCL_TARGET_INTEL_GPU_CML__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_icllp)] =
+        __SYCL_TARGET_INTEL_GPU_ICLLP__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_ehl)] =
+        __SYCL_TARGET_INTEL_GPU_EHL__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_tgllp)] =
+        __SYCL_TARGET_INTEL_GPU_TGLLP__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_rkl)] =
+        __SYCL_TARGET_INTEL_GPU_RKL__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_adl_s)] =
+        __SYCL_TARGET_INTEL_GPU_ADL_S__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_rpl_s)] =
+        __SYCL_TARGET_INTEL_GPU_RPL_S__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_adl_p)] =
+        __SYCL_TARGET_INTEL_GPU_ADL_P__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_adl_n)] =
+        __SYCL_TARGET_INTEL_GPU_ADL_N__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_dg1)] =
+        __SYCL_TARGET_INTEL_GPU_DG1__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_acm_g10)] =
+        __SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_acm_g11)] =
+        __SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_acm_g12)] =
+        __SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1;
+    arr[static_cast<int>(arch::intel_gpu_pvc)] =
+        __SYCL_TARGET_INTEL_GPU_PVC__ == 1;
+    arr[static_cast<int>(arch::nvidia_gpu_sm_50)] =
+        __SYCL_TARGET_NVIDIA_GPU_SM50__ == 1;
+    arr[static_cast<int>(arch::nvidia_gpu_sm_52)] =
+        __SYCL_TARGET_NVIDIA_GPU_SM52__ == 1;
+    arr[static_cast<int>(arch::nvidia_gpu_sm_53)] =
+        __SYCL_TARGET_NVIDIA_GPU_SM53__ == 1;
+    arr[static_cast<int>(arch::nvidia_gpu_sm_60)] =
+        __SYCL_TARGET_NVIDIA_GPU_SM60__ == 1;
+    arr[static_cast<int>(arch::nvidia_gpu_sm_61)] =
+        __SYCL_TARGET_NVIDIA_GPU_SM61__ == 1;
+    arr[static_cast<int>(arch::nvidia_gpu_sm_62)] =
+        __SYCL_TARGET_NVIDIA_GPU_SM62__ == 1;
+    arr[static_cast<int>(arch::nvidia_gpu_sm_70)] =
+        __SYCL_TARGET_NVIDIA_GPU_SM70__ == 1;
+    arr[static_cast<int>(arch::nvidia_gpu_sm_72)] =
+        __SYCL_TARGET_NVIDIA_GPU_SM72__ == 1;
+    arr[static_cast<int>(arch::nvidia_gpu_sm_75)] =
+        __SYCL_TARGET_NVIDIA_GPU_SM75__ == 1;
+    arr[static_cast<int>(arch::nvidia_gpu_sm_80)] =
+        __SYCL_TARGET_NVIDIA_GPU_SM80__ == 1;
+    arr[static_cast<int>(arch::nvidia_gpu_sm_86)] =
+        __SYCL_TARGET_NVIDIA_GPU_SM86__ == 1;
+    arr[static_cast<int>(arch::nvidia_gpu_sm_87)] =
+        __SYCL_TARGET_NVIDIA_GPU_SM87__ == 1;
+    arr[static_cast<int>(arch::nvidia_gpu_sm_89)] =
+        __SYCL_TARGET_NVIDIA_GPU_SM89__ == 1;
+    arr[static_cast<int>(arch::nvidia_gpu_sm_90)] =
+        __SYCL_TARGET_NVIDIA_GPU_SM90__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx700)] =
+        __SYCL_TARGET_AMD_GPU_GFX700__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx701)] =
+        __SYCL_TARGET_AMD_GPU_GFX701__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx702)] =
+        __SYCL_TARGET_AMD_GPU_GFX702__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx801)] =
+        __SYCL_TARGET_AMD_GPU_GFX801__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx802)] =
+        __SYCL_TARGET_AMD_GPU_GFX802__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx803)] =
+        __SYCL_TARGET_AMD_GPU_GFX803__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx805)] =
+        __SYCL_TARGET_AMD_GPU_GFX805__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx810)] =
+        __SYCL_TARGET_AMD_GPU_GFX810__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx900)] =
+        __SYCL_TARGET_AMD_GPU_GFX900__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx902)] =
+        __SYCL_TARGET_AMD_GPU_GFX902__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx904)] =
+        __SYCL_TARGET_AMD_GPU_GFX904__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx906)] =
+        __SYCL_TARGET_AMD_GPU_GFX906__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx908)] =
+        __SYCL_TARGET_AMD_GPU_GFX908__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx90a)] =
+        __SYCL_TARGET_AMD_GPU_GFX90A__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx1010)] =
+        __SYCL_TARGET_AMD_GPU_GFX1010__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx1011)] =
+        __SYCL_TARGET_AMD_GPU_GFX1011__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx1012)] =
+        __SYCL_TARGET_AMD_GPU_GFX1012__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx1030)] =
+        __SYCL_TARGET_AMD_GPU_GFX1030__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx1031)] =
+        __SYCL_TARGET_AMD_GPU_GFX1031__ == 1;
+    arr[static_cast<int>(arch::amd_gpu_gfx1032)] =
+        __SYCL_TARGET_AMD_GPU_GFX1032__ == 1;
+  }
+};
+
+// One entry for each enumerator in "architecture" telling whether the AOT
+// target matches that architecture.
+static constexpr IsAOTForArchitectureClass is_aot_for_architecture;
+
+// Reads the value of "is_allowable_aot_mode" via a template to defer triggering
+// static_assert() until template instantiation time.
+template <ext::oneapi::experimental::architecture... Archs>
+constexpr static bool allowable_aot_mode() {
+  return is_allowable_aot_mode;
+}
+
+// Tells if the current device has one of the architectures in the parameter
+// pack.
+template <ext::oneapi::experimental::architecture... Archs>
+constexpr static bool device_architecture_is() {
+  return (is_aot_for_architecture.arr[static_cast<int>(Archs)] || ...);
+}
+
+// Helper object used to implement "else_if_architecture_is" and "otherwise".
+// The "MakeCall" template parameter tells whether a previous clause in the
+// "if-elseif-elseif ..." chain was true.  When "MakeCall" is false, some
+// previous clause was true, so none of the subsequent
+// "else_if_architecture_is" or "otherwise" member functions should call the
+// user's function.
+template <bool MakeCall> class if_architecture_helper {
+public:
+  template <ext::oneapi::experimental::architecture... Archs, typename T,
+            typename... Args>
+  constexpr auto else_if_architecture_is(T fnTrue, Args... args) {
+    if constexpr (MakeCall && device_architecture_is<Archs...>()) {
+      fnTrue(args...);
+      return if_architecture_helper<false>{};
+    } else {
+      (void)fnTrue;
+      return if_architecture_helper<MakeCall>{};
+    }
+  }
+
+  template <typename T, typename... Args>
+  constexpr void otherwise(T fn, Args... args) {
+    if constexpr (MakeCall) {
+      fn(args...);
+    }
+  }
+};
+} // namespace detail
+
+namespace ext::oneapi::experimental {
+
+template <architecture... Archs, typename T, typename... Args>
+constexpr static auto if_architecture_is(T fnTrue, Args... args) {
+  static_assert(detail::allowable_aot_mode<Archs...>(),
+                "The if_architecture_is function may only be used when AOT "
+                "compiling with '-fsycl-targets=spir64_x86_64' or "
+                "'-fsycl-targets=*_gpu_*'");
+  if constexpr (detail::device_architecture_is<Archs...>()) {
+    fnTrue(args...);
+    return detail::if_architecture_helper<false>{};
+  } else {
+    (void)fnTrue;
+    return detail::if_architecture_helper<true>{};
+  }
+}
+
+} // namespace ext::oneapi::experimental
+} // __SYCL_INLINE_VER_NAMESPACE(_V1)
+} // namespace sycl
diff --git a/sycl/unittests/Extensions/DeviceArchitecture.cpp b/sycl/unittests/Extensions/DeviceArchitecture.cpp
index 452d29ba17211..46662dbcefa04 100644
--- a/sycl/unittests/Extensions/DeviceArchitecture.cpp
+++ b/sycl/unittests/Extensions/DeviceArchitecture.cpp
@@ -8,14 +8,14 @@
 
 #include <gtest/gtest.h>
 
-// define one of __SYCL_TARGET_INTEL_*** macro, e.g., the one for SKL
+// define one of __SYCL_TARGET_ macro, e.g., the one for SKL
 #define __SYCL_TARGET_INTEL_GPU_SKL__ 1
 
-#include <sycl/ext/intel/experimental/device_architecture.hpp>
+#include <sycl/ext/oneapi/experimental/device_architecture.hpp>
 
 using namespace sycl;
 using namespace sycl::detail;
-using namespace sycl::ext::intel::experimental;
+using namespace sycl::ext::oneapi::experimental;
 
 TEST(DeviceArchitectureTest, DeviceArchitecture_If) {
   bool res = false;