diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 39a6d6d5534e8..aa3eaf78a1e1e 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -1554,6 +1554,15 @@ void SYCLToolChain::AddImpliedTargetArgs(const llvm::Triple &Triple, getDriver().Diag(diag::err_drv_unsupported_opt_for_target) << "-device" << Target; } + // ocloc has different names for some of the newer architectures; + // translate them to the apropriate value here. + DepInfo = + llvm::StringSwitch(DepInfo) + .Cases("pvc_vg", "12_61_7", "pvc_xt_c0_vg") + .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "12_70_4", "mtl_s") + .Cases("mtl_h", "12_71_4", "mtl_p") + .Cases("arl_h", "12_74_4", "xe_lpgplus_b0") + .Default(DepInfo); CmdArgs.push_back("-device"); CmdArgs.push_back(Args.MakeArgString(DepInfo)); } diff --git a/clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp b/clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp index 3647e4245f811..db6ab44b7b723 100644 --- a/clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp +++ b/clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp @@ -123,27 +123,27 @@ // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_60_7 -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc -DMAC_STR=PVC // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc_vg -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc_vg -DMAC_STR=PVC_VG +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc_xt_c0_vg -DMAC_STR=PVC_VG // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_61_7 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc_vg -DMAC_STR=PVC_VG +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc_xt_c0_vg -DMAC_STR=PVC_VG // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_mtl_u -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_mtl_s -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_arl_u -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_arl_s -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_70_4 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_mtl_h -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_h -DMAC_STR=MTL_H +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_p -DMAC_STR=MTL_H // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_71_4 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_h -DMAC_STR=MTL_H +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_p -DMAC_STR=MTL_H // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_arl_h -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=arl_h -DMAC_STR=ARL_H +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=xe_lpgplus_b0 -DMAC_STR=ARL_H // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_74_4 -### %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=arl_h -DMAC_STR=ARL_H +// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=xe_lpgplus_b0 -DMAC_STR=ARL_H // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_bmg_g21 -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=bmg_g21 -DMAC_STR=BMG_G21 // RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_20_1_4 -### %s 2>&1 | \ diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 4210a0d150abf..a108cb6dbd916 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -161,9 +161,40 @@ def : TargetInfo<"x86_64", [], [], "", "", 1>; // TODO: The aspects listed for the intel_gpu targets right now are incomplete; // only the fp16/fp64/atomic64 aspects are listed. -def : TargetInfo<"intel_gpu_cfl", [AspectFp16, AspectFp64, AspectAtomic64], [8, 16, 32]>; -def : TargetInfo<"intel_gpu_tgllp", [AspectFp16, AspectAtomic64], [8, 16, 32]>; -def : TargetInfo<"intel_gpu_pvc", [AspectFp16, AspectFp64, AspectAtomic64], [16, 32]>; +defvar Fp16Fp64Atomic64 = [AspectFp16, AspectFp64, AspectAtomic64]; +defvar Fp16Atomic64 = [AspectFp16, AspectAtomic64]; +defvar Sg8_16_32 = [8, 16, 32]; +defvar Sg16_32 = [16, 32]; +defvar IntelBaseAspects = [AspectExt_intel_esimd]; +class IntelTargetInfo Aspects, list subGroupSizesList> +: TargetInfo; +// Note: only the "canonical" target names are listed here - see +// SYCL::gen::resolveGenDevice(). +def : IntelTargetInfo<"intel_gpu_arl_h", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_mtl_h", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_mtl_u", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_pvc_vg", Fp16Fp64Atomic64, Sg16_32>; +def : IntelTargetInfo<"intel_gpu_pvc", Fp16Fp64Atomic64, Sg16_32>; +def : IntelTargetInfo<"intel_gpu_acm_g12", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_acm_g11", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_acm_g10", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_dg1", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_adl_n", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_adl_p", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_adl_s", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_rkl", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_tgllp", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_ehl", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_icllp", Fp16Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_cml", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_aml", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_whl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_glk", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_apl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_cfl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_kbl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_skl", Fp16Fp64Atomic64, Sg8_16_32>; +def : IntelTargetInfo<"intel_gpu_bdw", Fp16Fp64Atomic64, Sg8_16_32>; // // CUDA / NVPTX device aspects diff --git a/llvm/test/tools/sycl-post-link/aot-esimd.ll b/llvm/test/tools/sycl-post-link/aot-esimd.ll new file mode 100644 index 0000000000000..b1235a929e9b2 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/aot-esimd.ll @@ -0,0 +1,49 @@ +; With ESIMD, the reqd_sub_group_size of a kernel will be 1. Normally, +; no device can handled compiling for this reqd_sub_group_size, but +; for ESIMD, this is an exception. This test makes sure that +; ESIMD kernels are not filtered out when using filtering +; (e.g. -o intel_gpu_dg1,%t-dg1.table) and also ensures that +; non ESIMD kernels with reqd_sub_group_size=1 are still filtered out. + +; RUN: sycl-post-link %s -symbols -split=auto \ +; RUN: -o intel_gpu_dg1,%t-dg1.table + +; RUN: FileCheck %s -input-file=%t-dg1.table -check-prefix=CHECK-TABLE +; RUN: FileCheck %s -input-file=%t-dg1_esimd_0.sym -check-prefix=CHECK-SYM -implicit-check-not=reqd_sub_group_size_kernel_1 + +; CHECK-TABLE: _esimd_0.sym +; CHECK-SYM: esimd_kernel + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +define spir_kernel void @esimd_kernel(ptr addrspace(1) noundef align 8 %_arg_out) #0 !sycl_explicit_simd !69 !intel_reqd_sub_group_size !68 !sycl_used_aspects !67 { +entry: + ret void +} + +define spir_kernel void @reqd_sub_group_size_kernel_1(ptr addrspace(1) noundef align 8 %_arg_out) #0 !intel_reqd_sub_group_size !68 { +entry: + ret void +} + +attributes #0 = { mustprogress norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="double.cpp" "sycl-optlevel"="3" "uniform-work-group-size"="true" } + +!llvm.module.flags = !{!0, !1} +!opencl.spir.version = !{!2} +!spirv.Source = !{!3} +!llvm.ident = !{!64} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!9 = !{!"ext_intel_esimd", i32 53} +!64 = !{!"clang version 19.0.0git (/ws/llvm/clang a7f3a637bdd6299831f903bbed9e8d069fea5c86)"} +!67 = !{!9} +!68 = !{i32 1} +!69 = !{} +!78 = !{i32 8} +!79 = !{i32 16} +!80 = !{i32 32} +!81 = !{i32 64} diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 6760b2b1c6f7e..d1b9dc236218b 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -710,7 +710,11 @@ bool isTargetCompatibleWithModule(const std::optional &Target, } // Check if module sub group size is compatible with the target. - if (ModuleReqs.SubGroupSize.has_value() && + // For ESIMD, the reqd_sub_group_size will be 1; this is not + // a supported by any backend (e.g. no backend can support a kernel + // with sycl::reqd_sub_group_size(1)), but for ESIMD, this is + // a special case. + if (!IrMD.isESIMD() && ModuleReqs.SubGroupSize.has_value() && !is_contained(TargetInfo.subGroupSizes, *ModuleReqs.SubGroupSize)) return false; diff --git a/sycl/include/sycl/ext/oneapi/experimental/architectures.def b/sycl/include/sycl/ext/oneapi/experimental/architectures.def index 47741b0ba3778..b8148f673814a 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/architectures.def +++ b/sycl/include/sycl/ext/oneapi/experimental/architectures.def @@ -17,6 +17,8 @@ // device::get_info // - alias of architecture if this is Intel GPU architecture in format // intel_gpu_ +// - supported aspects of architecture in +// llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td // // Important note about keeping architecture IDs below unique: // - the architecture ID must be a hex number with 16 digits