From 4257d8cd1dafb1ba339a9c707bb746e325a203ab Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Fri, 14 Jun 2024 21:01:43 +0000 Subject: [PATCH 01/10] [SYCL] Add more aspect information for intel_gpu_* in device config file --- .../llvm/SYCLLowerIR/DeviceConfigFile.td | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 38d5f2512a1c4..26783b67ff057 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -160,9 +160,31 @@ def : TargetInfo<"x86_64", [], [], "", "", 1>; // TODO: The aspects listed for the intel_gpu targets right now are incomplete; // only the fp16/fp64/atomic64 aspects are listed. -def : TargetInfo<"intel_gpu_cfl", [AspectFp16, AspectFp64, AspectAtomic64], [8, 16, 32]>; -def : TargetInfo<"intel_gpu_tgllp", [AspectFp16, AspectAtomic64], [8, 16, 32]>; -def : TargetInfo<"intel_gpu_pvc", [AspectFp16, AspectFp64, AspectAtomic64], [16, 32]>; +defvar Fp16Fp64Atomic64 = [AspectFp16, AspectFp64, AspectAtomic64]; +defvar Fp16Atomic64 = [AspectFp16, AspectAtomic64]; +defvar Sg8_16_32 = [8, 16, 32]; +defvar Sg16_32 = [16, 32]; +def : TargetInfo<"intel_gpu_pvc", Fp16Fp64Atomic64, Sg16_32>; +def : TargetInfo<"intel_gpu_acm_g12", Fp16Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_acm_g11", Fp16Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_acm_g10", Fp16Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_dg1", Fp16Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_adl_n", Fp16Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_adl_p", Fp16Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_adl_s", Fp16Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_rkl", Fp16Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_tgllp", Fp16Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_ehl", Fp16Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_icllp", Fp16Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_cml", Fp16Fp64Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_aml", Fp16Fp64Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_whl", Fp16Fp64Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_glk", Fp16Fp64Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_apl", Fp16Fp64Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_cfl", Fp16Fp64Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_kbl", Fp16Fp64Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_skl", Fp16Fp64Atomic64, Sg8_16_32>; +def : TargetInfo<"intel_gpu_bdw", Fp16Fp64Atomic64, Sg8_16_32>; // // CUDA / NVPTX device aspects From f3a52b3d88fb5f039ed48580e66f414c6dfe4d10 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Wed, 26 Jun 2024 14:04:08 -0700 Subject: [PATCH 02/10] Add architecture requirement on e2e test --- sycl/test-e2e/ESIMD/hardware_dispatch.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/test-e2e/ESIMD/hardware_dispatch.cpp b/sycl/test-e2e/ESIMD/hardware_dispatch.cpp index b69229f91b576..9c5ea962dd7c9 100644 --- a/sycl/test-e2e/ESIMD/hardware_dispatch.cpp +++ b/sycl/test-e2e/ESIMD/hardware_dispatch.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// // Add "-options -vc-codegen" explicitly to workaround bug in dev igc package. +// REQUIRES: architecture-intel_gpu_bdw // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_bdw %s -Xs "-options -vc-codegen" -o %t.out // RUN: %t.out // TODO: remove XFAIL when the fix in GPU RT for Windows is updated on CI From 0b1adb15b9d556b138e20f62a03282db467bf2ea Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Wed, 26 Jun 2024 21:05:55 +0000 Subject: [PATCH 03/10] Update filtering logic for ESIMD --- .../llvm/SYCLLowerIR/DeviceConfigFile.td | 45 ++++++++++--------- llvm/tools/sycl-post-link/sycl-post-link.cpp | 6 ++- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 26783b67ff057..1d4ebafd00496 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -164,27 +164,30 @@ defvar Fp16Fp64Atomic64 = [AspectFp16, AspectFp64, AspectAtomic64]; defvar Fp16Atomic64 = [AspectFp16, AspectAtomic64]; defvar Sg8_16_32 = [8, 16, 32]; defvar Sg16_32 = [16, 32]; -def : TargetInfo<"intel_gpu_pvc", Fp16Fp64Atomic64, Sg16_32>; -def : TargetInfo<"intel_gpu_acm_g12", Fp16Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_acm_g11", Fp16Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_acm_g10", Fp16Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_dg1", Fp16Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_adl_n", Fp16Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_adl_p", Fp16Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_adl_s", Fp16Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_rkl", Fp16Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_tgllp", Fp16Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_ehl", Fp16Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_icllp", Fp16Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_cml", Fp16Fp64Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_aml", Fp16Fp64Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_whl", Fp16Fp64Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_glk", Fp16Fp64Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_apl", Fp16Fp64Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_cfl", Fp16Fp64Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_kbl", Fp16Fp64Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_skl", Fp16Fp64Atomic64, Sg8_16_32>; -def : TargetInfo<"intel_gpu_bdw", Fp16Fp64Atomic64, Sg8_16_32>; +defvar IntelBaseAspects = [AspectExt_intel_esimd]; +class IntelTarget Aspects, list subGroupSizesList> +: TargetInfo; +def : IntelTarget<"intel_gpu_pvc", Fp16Fp64Atomic64, Sg16_32>; +def : IntelTarget<"intel_gpu_acm_g12", Fp16Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_acm_g11", Fp16Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_acm_g10", Fp16Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_dg1", Fp16Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_adl_n", Fp16Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_adl_p", Fp16Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_adl_s", Fp16Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_rkl", Fp16Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_tgllp", Fp16Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_ehl", Fp16Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_icllp", Fp16Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_cml", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_aml", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_whl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_glk", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_apl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_cfl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_kbl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_skl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTarget<"intel_gpu_bdw", Fp16Fp64Atomic64, Sg8_16_32>; // // CUDA / NVPTX device aspects diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index d2c29d10aea7f..8f3dc41716635 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -1050,7 +1050,11 @@ bool isTargetCompatibleWithModule(const std::optional &Target, } // Check if module sub group size is compatible with the target. - if (ModuleReqs.SubGroupSize.has_value() && + // For ESIMD, the reqd_sub_group_size will be 1; this is not + // a supported by any backend (e.g. no backend can support a kernel + // with sycl::reqd_sub_group_size(1)), but for ESIMD, this is + // a special case. + if (!IrMD.isESIMD() && ModuleReqs.SubGroupSize.has_value() && !is_contained(TargetInfo.subGroupSizes, *ModuleReqs.SubGroupSize)) return false; From e08a6f58510560547f5510110ba1aa7b2bd271aa Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Fri, 28 Jun 2024 12:47:38 -0700 Subject: [PATCH 04/10] Add aot-esimd.ll test --- llvm/test/tools/sycl-post-link/aot-esimd.ll | 49 +++++++++++++++++++++ sycl/test-e2e/ESIMD/hardware_dispatch.cpp | 1 - 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 llvm/test/tools/sycl-post-link/aot-esimd.ll diff --git a/llvm/test/tools/sycl-post-link/aot-esimd.ll b/llvm/test/tools/sycl-post-link/aot-esimd.ll new file mode 100644 index 0000000000000..b1235a929e9b2 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/aot-esimd.ll @@ -0,0 +1,49 @@ +; With ESIMD, the reqd_sub_group_size of a kernel will be 1. Normally, +; no device can handled compiling for this reqd_sub_group_size, but +; for ESIMD, this is an exception. This test makes sure that +; ESIMD kernels are not filtered out when using filtering +; (e.g. -o intel_gpu_dg1,%t-dg1.table) and also ensures that +; non ESIMD kernels with reqd_sub_group_size=1 are still filtered out. + +; RUN: sycl-post-link %s -symbols -split=auto \ +; RUN: -o intel_gpu_dg1,%t-dg1.table + +; RUN: FileCheck %s -input-file=%t-dg1.table -check-prefix=CHECK-TABLE +; RUN: FileCheck %s -input-file=%t-dg1_esimd_0.sym -check-prefix=CHECK-SYM -implicit-check-not=reqd_sub_group_size_kernel_1 + +; CHECK-TABLE: _esimd_0.sym +; CHECK-SYM: esimd_kernel + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +define spir_kernel void @esimd_kernel(ptr addrspace(1) noundef align 8 %_arg_out) #0 !sycl_explicit_simd !69 !intel_reqd_sub_group_size !68 !sycl_used_aspects !67 { +entry: + ret void +} + +define spir_kernel void @reqd_sub_group_size_kernel_1(ptr addrspace(1) noundef align 8 %_arg_out) #0 !intel_reqd_sub_group_size !68 { +entry: + ret void +} + +attributes #0 = { mustprogress norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="double.cpp" "sycl-optlevel"="3" "uniform-work-group-size"="true" } + +!llvm.module.flags = !{!0, !1} +!opencl.spir.version = !{!2} +!spirv.Source = !{!3} +!llvm.ident = !{!64} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!9 = !{!"ext_intel_esimd", i32 53} +!64 = !{!"clang version 19.0.0git (/ws/llvm/clang a7f3a637bdd6299831f903bbed9e8d069fea5c86)"} +!67 = !{!9} +!68 = !{i32 1} +!69 = !{} +!78 = !{i32 8} +!79 = !{i32 16} +!80 = !{i32 32} +!81 = !{i32 64} diff --git a/sycl/test-e2e/ESIMD/hardware_dispatch.cpp b/sycl/test-e2e/ESIMD/hardware_dispatch.cpp index 9c5ea962dd7c9..b69229f91b576 100644 --- a/sycl/test-e2e/ESIMD/hardware_dispatch.cpp +++ b/sycl/test-e2e/ESIMD/hardware_dispatch.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// // Add "-options -vc-codegen" explicitly to workaround bug in dev igc package. -// REQUIRES: architecture-intel_gpu_bdw // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_bdw %s -Xs "-options -vc-codegen" -o %t.out // RUN: %t.out // TODO: remove XFAIL when the fix in GPU RT for Windows is updated on CI From 0684382a8cd5d2005598ed02026569e1d3568168 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Wed, 3 Jul 2024 10:51:24 -0700 Subject: [PATCH 05/10] Rename IntelTarget -> IntelTargetInfo --- .../llvm/SYCLLowerIR/DeviceConfigFile.td | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 1d4ebafd00496..a1746eb1eb4d0 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -165,29 +165,29 @@ defvar Fp16Atomic64 = [AspectFp16, AspectAtomic64]; defvar Sg8_16_32 = [8, 16, 32]; defvar Sg16_32 = [16, 32]; defvar IntelBaseAspects = [AspectExt_intel_esimd]; -class IntelTarget Aspects, list subGroupSizesList> +class IntelTargetInfo Aspects, list subGroupSizesList> : TargetInfo; -def : IntelTarget<"intel_gpu_pvc", Fp16Fp64Atomic64, Sg16_32>; -def : IntelTarget<"intel_gpu_acm_g12", Fp16Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_acm_g11", Fp16Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_acm_g10", Fp16Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_dg1", Fp16Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_adl_n", Fp16Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_adl_p", Fp16Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_adl_s", Fp16Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_rkl", Fp16Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_tgllp", Fp16Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_ehl", Fp16Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_icllp", Fp16Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_cml", Fp16Fp64Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_aml", Fp16Fp64Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_whl", Fp16Fp64Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_glk", Fp16Fp64Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_apl", Fp16Fp64Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_cfl", Fp16Fp64Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_kbl", Fp16Fp64Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_skl", Fp16Fp64Atomic64, Sg8_16_32>; -def : IntelTarget<"intel_gpu_bdw", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_pvc", Fp16Fp64Atomic64, Sg16_32>; +def : IntelTargetInfo<"intel_gpu_acm_g12", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_acm_g11", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_acm_g10", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_dg1", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_adl_n", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_adl_p", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_adl_s", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_rkl", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_tgllp", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_ehl", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_icllp", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_cml", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_aml", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_whl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_glk", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_apl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_cfl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_kbl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_skl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_bdw", Fp16Fp64Atomic64, Sg8_16_32>; // // CUDA / NVPTX device aspects From 18ea9a9dd30c1aed0971fc43415f9005a01d6d31 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Wed, 3 Jul 2024 18:04:45 +0000 Subject: [PATCH 06/10] Update architectures.def comment --- sycl/include/sycl/ext/oneapi/experimental/architectures.def | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/include/sycl/ext/oneapi/experimental/architectures.def b/sycl/include/sycl/ext/oneapi/experimental/architectures.def index 47741b0ba3778..e86e433569e87 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/architectures.def +++ b/sycl/include/sycl/ext/oneapi/experimental/architectures.def @@ -17,6 +17,8 @@ // device::get_info // - alias of architecture if this is Intel GPU architecture in format // intel_gpu_ +// - supported aspects ofarchitecture in +// llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td // // Important note about keeping architecture IDs below unique: // - the architecture ID must be a hex number with 16 digits From fd966aa5fc691bd01babe1e5b6d366a0b50a3d17 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Wed, 3 Jul 2024 16:31:49 -0700 Subject: [PATCH 07/10] Add pvc_vg, mtl_u, mtl_h, and arl_h targets special handling and aspects --- clang/lib/Driver/ToolChains/SYCL.cpp | 9 +++++++++ llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 1db4500bd3b51..37ecdbb623daa 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -1553,6 +1553,15 @@ void SYCLToolChain::AddImpliedTargetArgs(const llvm::Triple &Triple, getDriver().Diag(diag::err_drv_unsupported_opt_for_target) << "-device" << Target; } + // ocloc has different names for some of the newer architectures; + // translate them to the apropriate value here. + DepInfo = + llvm::StringSwitch(DepInfo) + .Cases("pvc_vg", "12_61_7", "pvc_xt_c0_vg") + .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "12_70_4", "mtl_s") + .Cases("mtl_h", "12_71_4", "mtl_p") + .Cases("arl_h", "12_74_4", "xe_lpgplus_b0") + .Default(DepInfo); CmdArgs.push_back("-device"); CmdArgs.push_back(Args.MakeArgString(DepInfo)); } diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index a1746eb1eb4d0..62bf6dc94d008 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -167,6 +167,10 @@ defvar Sg16_32 = [16, 32]; defvar IntelBaseAspects = [AspectExt_intel_esimd]; class IntelTargetInfo Aspects, list subGroupSizesList> : TargetInfo; +def : IntelTargetInfo<"intel_gpu_arl_h", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_mtl_h", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_mtl_u", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_pvc_vg", Fp16Fp64Atomic64, Sg16_32>; def : IntelTargetInfo<"intel_gpu_pvc", Fp16Fp64Atomic64, Sg16_32>; def : IntelTargetInfo<"intel_gpu_acm_g12", Fp16Atomic64, Sg8_16_32>; def : IntelTargetInfo<"intel_gpu_acm_g11", Fp16Atomic64, Sg8_16_32>; From 39c27203d34d637a84c889857c213b51a037d106 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Wed, 3 Jul 2024 16:33:51 -0700 Subject: [PATCH 08/10] Add comment about canonical target names --- llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 62bf6dc94d008..e71dedada7b5e 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -167,6 +167,8 @@ defvar Sg16_32 = [16, 32]; defvar IntelBaseAspects = [AspectExt_intel_esimd]; class IntelTargetInfo Aspects, list subGroupSizesList> : TargetInfo; +// Note: only the "canonical" target names are listed here - see +// SYCL::gen::resolveGenDevice(). def : IntelTargetInfo<"intel_gpu_arl_h", Fp16Fp64Atomic64, Sg8_16_32>; def : IntelTargetInfo<"intel_gpu_mtl_h", Fp16Fp64Atomic64, Sg8_16_32>; def : IntelTargetInfo<"intel_gpu_mtl_u", Fp16Fp64Atomic64, Sg8_16_32>; From ae3534b5ef0d6a3a302c70147a116d1f840081ff Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Mon, 8 Jul 2024 21:44:33 -0700 Subject: [PATCH 09/10] Update driver test to show different call of ocloc --- .../test/Driver/sycl-oneapi-gpu-intelgpu.cpp | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp b/clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp index 3647e4245f811..db6ab44b7b723 100644 --- a/clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp +++ b/clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp @@ -123,27 +123,27 @@ // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_60_7 -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc -DMAC_STR=PVC // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc_vg -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc_vg -DMAC_STR=PVC_VG +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc_xt_c0_vg -DMAC_STR=PVC_VG // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_61_7 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc_vg -DMAC_STR=PVC_VG +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc_xt_c0_vg -DMAC_STR=PVC_VG // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_mtl_u -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_mtl_s -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_arl_u -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_arl_s -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_70_4 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_mtl_h -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_h -DMAC_STR=MTL_H +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_p -DMAC_STR=MTL_H // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_71_4 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_h -DMAC_STR=MTL_H +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_p -DMAC_STR=MTL_H // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_arl_h -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=arl_h -DMAC_STR=ARL_H +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=xe_lpgplus_b0 -DMAC_STR=ARL_H // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_74_4 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=arl_h -DMAC_STR=ARL_H +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=xe_lpgplus_b0 -DMAC_STR=ARL_H // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_bmg_g21 -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=bmg_g21 -DMAC_STR=BMG_G21 // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_20_1_4 -### %s 2>&1 | \ From 2f3306adf2f097723d3126d7205bee204af6b7f4 Mon Sep 17 00:00:00 2001 From: Justin Cai Date: Tue, 9 Jul 2024 07:43:04 -0700 Subject: [PATCH 10/10] Update sycl/include/sycl/ext/oneapi/experimental/architectures.def Co-authored-by: Dmitry Vodopyanov --- sycl/include/sycl/ext/oneapi/experimental/architectures.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/architectures.def b/sycl/include/sycl/ext/oneapi/experimental/architectures.def index e86e433569e87..b8148f673814a 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/architectures.def +++ b/sycl/include/sycl/ext/oneapi/experimental/architectures.def @@ -17,7 +17,7 @@ // device::get_info // - alias of architecture if this is Intel GPU architecture in format // intel_gpu_ -// - supported aspects ofarchitecture in +// - supported aspects of architecture in // llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td // // Important note about keeping architecture IDs below unique: