diff --git a/llvm/test/tools/sycl-post-link/spec-constants/SYCL2020-struct-with-undef-padding.ll b/llvm/test/tools/sycl-post-link/spec-constants/SYCL2020-struct-with-undef-padding.ll new file mode 100644 index 0000000000000..2e000c057ed6f --- /dev/null +++ b/llvm/test/tools/sycl-post-link/spec-constants/SYCL2020-struct-with-undef-padding.ll @@ -0,0 +1,89 @@ +; RUN: sycl-post-link --spec-const=rt -S %s -o %t.files.table +; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-IR +; RUN: FileCheck %s -input-file=%t.files_0.prop --check-prefix CHECK-PROP +; +; This test is intended to check that SpecConstantsPass is able to handle the +; situation where specialization constants with complex types such as structs +; have an 'undef' value for padding in LLVM IR + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.cl::sycl::specialization_id" = type { %struct.coeff_str_aligned_t } +%"class.cl::sycl::specialization_id.1" = type { %struct.coeff2_str_aligned_t } +%struct.coeff_str_aligned_t = type { %"class.std::array", i64, [8 x i8] } +%struct.coeff2_str_aligned_t = type { %"class.std::array", i64, [7 x i8], i8 } +%"class.std::array" = type { [3 x float] } + +$_ZTSZ4mainEUlN2cl4sycl14kernel_handlerEE_ = comdat any + +@__usid_str = private unnamed_addr constant [32 x i8] c"ef880fa09cf7a9d7____ZL8coeff_id\00", align 1 +@_ZL8coeff_id = internal addrspace(1) constant %"class.cl::sycl::specialization_id" { %struct.coeff_str_aligned_t { %"class.std::array" zeroinitializer, i64 0, [8 x i8] undef } }, align 32 +@__usid_str.0 = private unnamed_addr constant [33 x i8] c"df991fa0adf9bad8____ZL8coeff_id2\00", align 1 +@_ZL8coeff_id2 = internal addrspace(1) constant %"class.cl::sycl::specialization_id.1" { %struct.coeff2_str_aligned_t { %"class.std::array" zeroinitializer, i64 0, [7 x i8] undef, i8 undef } }, align 32 + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTSZ4mainEUlN2cl4sycl14kernel_handlerEE_() local_unnamed_addr #0 comdat !kernel_arg_buffer_location !6 !sycl_kernel_omit_args !7 { + %1 = alloca %struct.coeff_str_aligned_t, align 32 + %2 = addrspacecast %struct.coeff_str_aligned_t* %1 to %struct.coeff_str_aligned_t addrspace(4)* + %3 = bitcast %struct.coeff_str_aligned_t* %1 to i8* + call spir_func void @_Z40__sycl_getComposite2020SpecConstantValueI19coeff_str_aligned_tET_PKcPKvS5_(%struct.coeff_str_aligned_t addrspace(4)* sret(%struct.coeff_str_aligned_t) align 32 %2, i8 addrspace(4)* noundef addrspacecast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @__usid_str, i64 0, i64 0) to i8 addrspace(4)*), i8 addrspace(4)* noundef addrspacecast (i8 addrspace(1)* bitcast (%"class.cl::sycl::specialization_id" addrspace(1)* @_ZL8coeff_id to i8 addrspace(1)*) to i8 addrspace(4)*), i8 addrspace(4)* noundef null) #4 +; CHECK-IR: %[[#NS0:]] = call float @_Z20__spirv_SpecConstantif(i32 [[#SCID0:]], float 0.000000e+00) +; CHECK-IR: %[[#NS1:]] = call float @_Z20__spirv_SpecConstantif(i32 [[#SCID1:]], float 0.000000e+00) +; CHECK-IR: %[[#NS2:]] = call float @_Z20__spirv_SpecConstantif(i32 [[#SCID2:]], float 0.000000e+00) +; CHECK-IR: %[[#NS3:]] = call [3 x float] @_Z29__spirv_SpecConstantCompositefff_RA3_f(float %[[#NS0]], float %[[#NS1]], float %[[#NS2]]) +; CHECK-IR: %[[#NS4:]] = call %"class.std::array" @"_Z29__spirv_SpecConstantCompositeA3_f_Rclass.std::array"([3 x float] %[[#NS3]]) +; CHECK-IR: %[[#NS5:]] = call i64 @_Z20__spirv_SpecConstantix(i32 [[#SCID3:]], i64 0) +; CHECK-IR: %[[#NS6:]] = call %struct.coeff_str_aligned_t @"_Z29__spirv_SpecConstantCompositeclass.std::arrayxA8_a_Rstruct.coeff_str_aligned_t"(%"class.std::array" %[[#NS4]], i64 %[[#NS5]], [8 x i8] undef) + + %4 = alloca %struct.coeff2_str_aligned_t, align 32 + %5 = addrspacecast %struct.coeff2_str_aligned_t* %4 to %struct.coeff2_str_aligned_t addrspace(4)* + %6 = bitcast %struct.coeff2_str_aligned_t* %4 to i8* + call spir_func void @_Z40__sycl_getComposite2020SpecConstantValueI19coeff2_str_aligned_tET_PKcPKvS5_(%struct.coeff2_str_aligned_t addrspace(4)* sret(%struct.coeff2_str_aligned_t) align 32 %5, i8 addrspace(4)* noundef addrspacecast (i8* getelementptr inbounds ([33 x i8], [33 x i8]* @__usid_str.0, i64 0, i64 0) to i8 addrspace(4)*), i8 addrspace(4)* noundef addrspacecast (i8 addrspace(1)* bitcast (%"class.cl::sycl::specialization_id.1" addrspace(1)* @_ZL8coeff_id2 to i8 addrspace(1)*) to i8 addrspace(4)*), i8 addrspace(4)* noundef null) #4 +; CHECK-IR: %[[#NS7:]] = call float @_Z20__spirv_SpecConstantif(i32 [[#SCID4:]], float 0.000000e+00) +; CHECK-IR: %[[#NS8:]] = call float @_Z20__spirv_SpecConstantif(i32 [[#SCID5:]], float 0.000000e+00) +; CHECK-IR: %[[#NS9:]] = call float @_Z20__spirv_SpecConstantif(i32 [[#SCID6:]], float 0.000000e+00) +; CHECK-IR: %[[#NS10:]] = call [3 x float] @_Z29__spirv_SpecConstantCompositefff_RA3_f(float %[[#NS7]], float %[[#NS8]], float %[[#NS9]]) +; CHECK-IR: %[[#NS11:]] = call %"class.std::array" @"_Z29__spirv_SpecConstantCompositeA3_f_Rclass.std::array"([3 x float] %[[#NS10]]) +; CHECK-IR: %[[#NS12:]] = call i64 @_Z20__spirv_SpecConstantix(i32 [[#SCID7:]], i64 0) +; CHECK-IR: %[[#NS13:]] = call %struct.coeff2_str_aligned_t @"_Z29__spirv_SpecConstantCompositeclass.std::arrayxA7_aa_Rstruct.coeff2_str_aligned_t"(%"class.std::array" %[[#NS11]], i64 %[[#NS12]], [7 x i8] undef, i8 undef) + + ret void +} +; Function Attrs: convergent +declare dso_local spir_func void @_Z40__sycl_getComposite2020SpecConstantValueI19coeff_str_aligned_tET_PKcPKvS5_(%struct.coeff_str_aligned_t addrspace(4)* sret(%struct.coeff_str_aligned_t) align 32, i8 addrspace(4)* noundef, i8 addrspace(4)* noundef, i8 addrspace(4)* noundef) local_unnamed_addr #2 + +declare dso_local spir_func void @_Z40__sycl_getComposite2020SpecConstantValueI19coeff2_str_aligned_tET_PKcPKvS5_(%struct.coeff2_str_aligned_t addrspace(4)* sret(%struct.coeff2_str_aligned_t) align 32, i8 addrspace(4)* noundef, i8 addrspace(4)* noundef, i8 addrspace(4)* noundef) local_unnamed_addr #2 + +attributes #0 = { convergent norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="spec-constant-test.cpp" "uniform-work-group-size"="true" } +attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn } +attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { nounwind } +attributes #4 = { convergent } + +!llvm.dependent-libraries = !{!0} +!llvm.module.flags = !{!1, !2} +!opencl.spir.version = !{!3} +!spirv.Source = !{!4} +!llvm.ident = !{!5} +; CHECK-IR: !sycl.specialization-constants = !{![[#MN0:]], ![[#MN1:]]} +; CHECK-IR: !sycl.specialization-constants-default-values = !{![[#MN2:]], ![[#MN3:]]} + +!0 = !{!"libcpmt"} +!1 = !{i32 1, !"wchar_size", i32 2} +!2 = !{i32 7, !"frame-pointer", i32 2} +!3 = !{i32 1, i32 2} +!4 = !{i32 4, i32 100000} +!5 = !{!"clang version 14.0.0"} +!6 = !{i32 -1} +!7 = !{i1 true} +; CHECK-IR: ![[#MN0]] = !{!"ef880fa09cf7a9d7____ZL8coeff_id", i32 0, i32 0, i32 4, i32 1, i32 4, i32 4, i32 2, i32 8, i32 4, i32 3, i32 16, i32 8, i32 -1, i32 24, i32 8} +; CHECK-IR: ![[#MN1]] = !{!"df991fa0adf9bad8____ZL8coeff_id2", i32 5, i32 0, i32 4, i32 6, i32 4, i32 4, i32 7, i32 8, i32 4, i32 8, i32 16, i32 8, i32 -1, i32 31, i32 1} +; CHECK-IR: ![[#MN2]] = !{%struct.coeff_str_aligned_t { %"class.std::array" zeroinitializer, i64 0, [8 x i8] undef }} +; CHECK-IR: ![[#MN3]] = !{%struct.coeff2_str_aligned_t { %"class.std::array" zeroinitializer, i64 0, [7 x i8] undef, i8 undef }} + +; CHECK-PROP: [SYCL/specialization constants] +; CHECK-PROP-NEXT: ef880fa09cf7a9d7____ZL8coeff_id=2| + +; CHECK-PROP: [SYCL/specialization constants default values] +; CHECK-PROP-NEXT: all=2| diff --git a/llvm/tools/sycl-post-link/SpecConstants.cpp b/llvm/tools/sycl-post-link/SpecConstants.cpp index fea47cf1b5239..5b0f748d9f443 100644 --- a/llvm/tools/sycl-post-link/SpecConstants.cpp +++ b/llvm/tools/sycl-post-link/SpecConstants.cpp @@ -51,6 +51,14 @@ constexpr char SPEC_CONST_MD_STRING[] = "sycl.specialization-constants"; constexpr char SPEC_CONST_DEFAULT_VAL_MD_STRING[] = "sycl.specialization-constants-default-values"; +/// Spec. Constant ID is a pair of Id and a flag whether this Id belongs to an +/// undefined value. Undefined values ('undef' in the IR) are used to get the +/// required alignment and should be handled in a special manner as padding. +struct ID { + unsigned ID; + bool Undef; +}; + StringRef getStringLiteralArg(const CallInst *CI, unsigned ArgNo, SmallVectorImpl &DelInsts) { Value *V = CI->getArgOperand(ArgNo)->stripPointerCasts(); @@ -236,8 +244,13 @@ MDNode *generateSpecConstDefaultValueMetadata(StringRef SymID, Value *Default) { /// Recursively iterates over a composite type in order to collect information /// about its scalar elements. void collectCompositeElementsInfoRecursive( - const Module &M, Type *Ty, const unsigned *&IDIter, unsigned &Offset, + const Module &M, Type *Ty, const ID *&IDIter, unsigned &Offset, std::vector &Result) { + if (IDIter->Undef) { + // We can just skip undefined values because every such value is just a + // padding and will be handled in a different manner. + return; + } if (auto *ArrTy = dyn_cast(Ty)) { for (size_t I = 0; I < ArrTy->getNumElements(); ++I) { // TODO: this is a spot for potential optimization: for arrays we could @@ -246,7 +259,9 @@ void collectCompositeElementsInfoRecursive( collectCompositeElementsInfoRecursive(M, ArrTy->getElementType(), IDIter, Offset, Result); } - } else if (auto *StructTy = dyn_cast(Ty)) { + return; + } + if (auto *StructTy = dyn_cast(Ty)) { const StructLayout *SL = M.getDataLayout().getStructLayout(StructTy); const unsigned BaseOffset = Offset; unsigned LocalOffset = Offset; @@ -267,7 +282,12 @@ void collectCompositeElementsInfoRecursive( BaseOffset + SL->getSizeInBytes() - LocalOffset; if (PostStructPadding > 0) { SpecConstantDescriptor Desc; - // ID of padding descriptors is the max value possible. + // ID of padding descriptors is the max value possible. This value is a + // magic value for the runtime and will just be skipped. Even if there + // are many specialization constants and every constant has padding of + // a different length, everything will work regardless rewriting + // the descriptions with Desc.ID equals to the max value: they will just + // be ignored at all. Desc.ID = std::numeric_limits::max(); Desc.Offset = LocalOffset; Desc.Size = PostStructPadding; @@ -277,7 +297,9 @@ void collectCompositeElementsInfoRecursive( // Update "global" offset according to the total size of a handled struct // type. Offset += SL->getSizeInBytes(); - } else if (auto *VecTy = dyn_cast(Ty)) { + return; + } + if (auto *VecTy = dyn_cast(Ty)) { for (size_t I = 0; I < VecTy->getNumElements(); ++I) { // TODO: this is a spot for potential optimization: for vectors we could // just make a single recursive call here and use it to populate Result @@ -285,17 +307,19 @@ void collectCompositeElementsInfoRecursive( collectCompositeElementsInfoRecursive(M, VecTy->getElementType(), IDIter, Offset, Result); } - } else { // Assume that we encountered some scalar element - SpecConstantDescriptor Desc; - Desc.ID = *IDIter; - Desc.Offset = Offset; - Desc.Size = M.getDataLayout().getTypeStoreSize(Ty); - Result.push_back(Desc); - - // Move current ID and offset - ++IDIter; - Offset += Desc.Size; + return; } + + // Assume that we encountered some scalar element + SpecConstantDescriptor Desc; + Desc.ID = IDIter->ID; + Desc.Offset = Offset; + Desc.Size = M.getDataLayout().getTypeStoreSize(Ty); + Result.push_back(Desc); + + // Move current ID and offset + ++IDIter; + Offset += Desc.Size; } /// Recursively iterates over a composite type in order to collect information @@ -306,8 +330,8 @@ void collectCompositeElementsInfoRecursive( void collectCompositeElementsDefaultValuesRecursive( const Module &M, Constant *C, unsigned &Offset, std::vector &DefaultValues) { - if (isa(C)) { - // This code is generic for zeroinitializer for both arrays and structs + if (isa(C) || isa(C)) { + // This code is generic for both arrays and structs size_t NumBytes = M.getDataLayout().getTypeStoreSize(C->getType()); std::fill_n(std::back_inserter(DefaultValues), NumBytes, 0); Offset += NumBytes; @@ -400,7 +424,7 @@ void collectCompositeElementsDefaultValuesRecursive( } MDNode *generateSpecConstantMetadata(const Module &M, StringRef SymbolicID, - Type *SCTy, ArrayRef IDs, + Type *SCTy, ArrayRef IDs, bool IsNativeSpecConstant) { SmallVector MDOps; LLVMContext &Ctx = M.getContext(); @@ -413,7 +437,7 @@ MDNode *generateSpecConstantMetadata(const Module &M, StringRef SymbolicID, std::vector Result; Result.reserve(IDs.size()); unsigned Offset = 0; - const unsigned *IDPtr = IDs.data(); + const ID *IDPtr = IDs.data(); collectCompositeElementsInfoRecursive(M, SCTy, IDPtr, Offset, Result); // We may have padding elements so size should be at least the same size as @@ -432,7 +456,7 @@ MDNode *generateSpecConstantMetadata(const Module &M, StringRef SymbolicID, assert(IDs.size() == 1 && "There must be a single ID for emulated spec constant"); MDOps.push_back(ConstantAsMetadata::get( - Constant::getIntegerValue(Int32Ty, APInt(32, IDs[0])))); + Constant::getIntegerValue(Int32Ty, APInt(32, IDs[0].ID)))); // Second element is always zero here MDOps.push_back(ConstantAsMetadata::get( Constant::getIntegerValue(Int32Ty, APInt(32, 0)))); @@ -519,14 +543,9 @@ Instruction *emitSpecConstant(unsigned NumericID, Type *Ty, return emitCall(Ty, SPIRV_GET_SPEC_CONST_VAL, Args, InsertBefore); } -Instruction *emitSpecConstantComposite(Type *Ty, - ArrayRef Elements, +Instruction *emitSpecConstantComposite(Type *Ty, ArrayRef Elements, Instruction *InsertBefore) { - SmallVector Args(Elements.size()); - for (unsigned I = 0; I < Elements.size(); ++I) { - Args[I] = cast(Elements[I]); - } - return emitCall(Ty, SPIRV_GET_SPEC_CONST_COMPOSITE, Args, InsertBefore); + return emitCall(Ty, SPIRV_GET_SPEC_CONST_COMPOSITE, Elements, InsertBefore); } /// For specified specialization constant type emits LLVM IR which is required @@ -553,28 +572,46 @@ Instruction *emitSpecConstantComposite(Type *Ty, /// composite (plus for the top-level composite). Also enumerates all /// encountered scalars and assigns them IDs (or re-uses existing ones). Instruction *emitSpecConstantRecursiveImpl(Type *Ty, Instruction *InsertBefore, - SmallVectorImpl &IDs, + SmallVectorImpl &IDs, unsigned &Index, Constant *DefaultValue) { if (!Ty->isArrayTy() && !Ty->isStructTy() && !Ty->isVectorTy()) { // Scalar if (Index >= IDs.size()) { // If it is a new specialization constant, we need to generate IDs for // scalar elements, starting with the second one. - IDs.push_back(IDs.back() + 1); + assert(!isa_and_nonnull(DefaultValue) && + "All scalar values should be defined"); + IDs.push_back({IDs.back().ID + 1, false}); } - return emitSpecConstant(IDs[Index++], Ty, InsertBefore, DefaultValue); + return emitSpecConstant(IDs[Index++].ID, Ty, InsertBefore, DefaultValue); } - SmallVector Elements; + SmallVector Elements; + auto HandleUndef = [&](Constant *Def) { + if (Index >= IDs.size()) { + // If it is a new specialization constant, we need to generate IDs for + // the whole undef value. + IDs.push_back({IDs.back().ID + 1, true}); + } + Elements.push_back(Def); + }; auto LoopIteration = [&](Type *Ty, unsigned LocalIndex) { // Select corresponding element of the default value if it was provided Constant *Def = DefaultValue ? DefaultValue->getAggregateElement(LocalIndex) : nullptr; - Elements.push_back( - emitSpecConstantRecursiveImpl(Ty, InsertBefore, IDs, Index, Def)); + if (isa_and_nonnull(Def)) + HandleUndef(Def); + else + Elements.push_back( + emitSpecConstantRecursiveImpl(Ty, InsertBefore, IDs, Index, Def)); }; - if (auto *ArrTy = dyn_cast(Ty)) { + if (isa_and_nonnull(DefaultValue)) { + // If the default value is a composite and has the value 'undef', we should + // not generate a bunch of __spirv_SpecConstant for its elements but + // pass it into __spirv_SpecConstantComposite as is. + HandleUndef(DefaultValue); + } else if (auto *ArrTy = dyn_cast(Ty)) { for (size_t I = 0; I < ArrTy->getNumElements(); ++I) { LoopIteration(ArrTy->getElementType(), I); } @@ -596,7 +633,7 @@ Instruction *emitSpecConstantRecursiveImpl(Type *Ty, Instruction *InsertBefore, /// Wrapper intended to hide IsFirstElement argument from the caller Instruction *emitSpecConstantRecursive(Type *Ty, Instruction *InsertBefore, - SmallVectorImpl &IDs, + SmallVectorImpl &IDs, Constant *DefaultValue) { unsigned Index = 0; return emitSpecConstantRecursiveImpl(Ty, InsertBefore, IDs, Index, @@ -607,9 +644,9 @@ Instruction *emitSpecConstantRecursive(Type *Ty, Instruction *InsertBefore, PreservedAnalyses SpecConstantsPass::run(Module &M, ModuleAnalysisManager &MAM) { - unsigned NextID = 0; + ID NextID = {0, false}; unsigned NextOffset = 0; - StringMap> IDMap; + StringMap> IDMap; StringMap OffsetMap; MapVector SCMetadata; SmallVector DefaultsMetadata; @@ -690,9 +727,8 @@ PreservedAnalyses SpecConstantsPass::run(Module &M, if (SetValAtRT) { // 2. Spec constant value will be set at run time - then add the literal // to a "spec const string literal ID" -> "vector of integer IDs" map, - // uniquing the integer IDs if this is a new literal - auto Ins = - IDMap.insert(std::make_pair(SymID, SmallVector{})); + // making the integer IDs unique if this is a new literal + auto Ins = IDMap.insert(std::make_pair(SymID, SmallVector{})); IsNewSpecConstant = Ins.second; auto &IDs = Ins.first->second; if (IsNewSpecConstant) { @@ -708,7 +744,7 @@ PreservedAnalyses SpecConstantsPass::run(Module &M, // emitSpecConstantRecursive might emit more than one spec constant // (because of composite types) and therefore, we need to adjust // NextID according to the actual amount of emitted spec constants. - NextID += IDs.size(); + NextID.ID += IDs.size(); // Generate necessary metadata which later will be pulled by // sycl-post-link and transformed into device image properties @@ -740,7 +776,7 @@ PreservedAnalyses SpecConstantsPass::run(Module &M, SCMetadata[SymID] = generateSpecConstantMetadata( M, SymID, SCTy, NextID, /* is native spec constant */ false); - ++NextID; + ++NextID.ID; NextOffset += Size; }