diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 24f313c45537f..01ff6d556dfbf 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -114,3 +114,8 @@ SYCLLowerIR/ @kbobrovs @DenisBakhvalov esimd/ @kbobrovs @DenisBakhvalov sycl/include/CL/sycl/INTEL/esimd.hpp @kbobrovs @DenisBakhvalov sycl/doc/extensions/ExplicitSIMD/ @kbobrovs + +# ITT annotations +llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp @MrSidims +llvm/include/llvm/Transforms/Instrumentation/SPIRITTAnnotations.h @MrSidims + diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 9d53b5b923bb2..d566a5cd7169b 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -417,6 +417,9 @@ CODEGENOPT(PassByValueIsNoAlias, 1, 0) /// according to the field declaring type width. CODEGENOPT(AAPCSBitfieldWidth, 1, 1) +// Whether to instrument SPIR device code with ITT annotations +CODEGENOPT(SPIRITTAnnotations, 1, 0) + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 33d292b429fd8..868453b1b8c93 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2418,6 +2418,10 @@ def fsycl_device_code_lower_esimd : Flag<["-"], "fsycl-device-code-lower-esimd"> Flags<[CC1Option, CoreOption]>, HelpText<"Lower ESIMD-specific constructs">; def fno_sycl_device_code_lower_esimd : Flag<["-"], "fno-sycl-device-code-lower-esimd">, Flags<[CC1Option, CoreOption]>, HelpText<"Do not lower ESIMD-specific constructs">; +def fsycl_instrument_device_code : Flag<["-"], "fsycl-instrument-device-code">, + Group, Flags<[CC1Option, CoreOption]>, + HelpText<"Add ITT instrumentation intrinsics calls">, + MarshallingInfoFlag>; defm sycl_id_queries_fit_in_int: OptInFFlag<"sycl-id-queries-fit-in-int", "Assume", "Do not assume", " that SYCL ID queries fit within MAX_INT.", [CC1Option,CoreOption], LangOpts<"SYCLValueFitInMaxInt">>; def fsycl_use_bitcode : Flag<["-"], "fsycl-use-bitcode">, Flags<[CC1Option, CoreOption]>, HelpText<"Use LLVM bitcode instead of SPIR-V in fat objects">; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index e48ad30df130a..17e37df253e8e 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -72,6 +72,7 @@ #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" +#include "llvm/Transforms/Instrumentation/SPIRITTAnnotations.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" @@ -948,6 +949,16 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, LangOpts.EnableDAEInSpirKernels) PerModulePasses.add(createDeadArgEliminationSYCLPass()); + // Add SPIRITTAnnotations pass to the pass manager if + // -fsycl-instrument-device-code option was passed. This option can be + // used only with spir triple. + if (CodeGenOpts.SPIRITTAnnotations) { + if (!llvm::Triple(TheModule->getTargetTriple()).isSPIR()) + llvm::report_fatal_error( + "ITT annotations can only by added to a module with spir target"); + PerModulePasses.add(createSPIRITTAnnotationsPass()); + } + switch (Action) { case Backend_EmitNothing: break; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index e879101651add..f997d4a941dd6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5883,6 +5883,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasFlag(options::OPT_fsycl, options::OPT_fno_sycl, false)) Args.AddLastArg(CmdArgs, options::OPT_sycl_std_EQ); + // Forward -fsycl-instrument-device-code option to cc1. This option can only + // be used with spir triple. + if (Arg *A = Args.getLastArg(options::OPT_fsycl_instrument_device_code)) { + if (!Triple.isSPIR()) + D.Diag(diag::err_drv_unsupported_opt_for_target) + << A->getAsString(Args) << TripleStr; + CmdArgs.push_back("-fsycl-instrument-device-code"); + } + if (IsHIP) { if (Args.hasFlag(options::OPT_fhip_new_launch_api, options::OPT_fno_hip_new_launch_api, true)) diff --git a/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp b/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp new file mode 100644 index 0000000000000..d88ef1abc3fc7 --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp @@ -0,0 +1,21 @@ +/// Check if start/finish ITT annotations are being added during compilation of +/// SYCL device code + +// RUN: %clang_cc1 -fsycl-is-device -fsycl-instrument-device-code -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s + +// CHECK: kernel_function +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() +// CHECK: call void @__itt_offload_wi_finish_wrapper() +// CHECK-NEXT: ret void + +#include "Inputs/sycl.hpp" + +int main() { + cl::sycl::accessor accessorA; + cl::sycl::kernel_single_task( + [=]() { + accessorA.use(); + }); + return 0; +} diff --git a/clang/test/Driver/sycl-instrumentation.c b/clang/test/Driver/sycl-instrumentation.c new file mode 100644 index 0000000000000..df433c425e0f0 --- /dev/null +++ b/clang/test/Driver/sycl-instrumentation.c @@ -0,0 +1,14 @@ +/// Check that SPIR ITT instrumentation is disabled by default: +// RUN: %clang -### %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s +// CHECK-DEFAULT-NOT: "-fsycl-instrument-device-code" + +/// Check if "fsycl_instrument_device_code" is passed to -cc1: +// RUN: %clang -### -fsycl-instrument-device-code %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-ENABLED %s +// CHECK-ENABLED: "-cc1"{{.*}} "-fsycl-instrument-device-code" + +/// Check if "fsycl_instrument_device_code" usage with a non-spirv target +/// results in an error. +// RUN: %clang -### -fsycl-instrument-device-code --target=x86 %s 2>&1 +// expected-error{{unsupported option '-fsycl-instrument-device-code' for target 'x86_64-unknown-linux-gnu'}} diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 3da961bbfc4ea..8340c62a1c5b7 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -429,6 +429,7 @@ void initializeStripSymbolsPass(PassRegistry&); void initializeStructurizeCFGLegacyPassPass(PassRegistry &); void initializeSYCLLowerWGScopeLegacyPassPass(PassRegistry &); void initializeSYCLLowerESIMDLegacyPassPass(PassRegistry &); +void initializeSPIRITTAnnotationsLegacyPassPass(PassRegistry &); void initializeESIMDLowerLoadStorePass(PassRegistry &); void initializeESIMDLowerVecArgLegacyPassPass(PassRegistry &); void initializeTailCallElimPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index f25604383627e..25a481e541be7 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -48,6 +48,7 @@ #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" +#include "llvm/Transforms/Instrumentation/SPIRITTAnnotations.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" @@ -204,6 +205,7 @@ namespace { (void)llvm::createSYCLLowerESIMDPass(); (void)llvm::createESIMDLowerLoadStorePass(); (void)llvm::createESIMDLowerVecArgPass(); + (void)llvm::createSPIRITTAnnotationsPass(); std::string buf; llvm::raw_string_ostream os(buf); (void) llvm::createPrintModulePass(os); diff --git a/llvm/include/llvm/Transforms/Instrumentation/SPIRITTAnnotations.h b/llvm/include/llvm/Transforms/Instrumentation/SPIRITTAnnotations.h new file mode 100644 index 0000000000000..547b3c374abbd --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/SPIRITTAnnotations.h @@ -0,0 +1,27 @@ +//===----- SPIRITTAnnotations.h - SPIR Instrumental Annotations Pass ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A transformation pass which adds instrumental calls to annotate SPIR +// synchronization instructions. This can be used for kernel profiling. +//===----------------------------------------------------------------------===// + +#pragma once + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class SPIRITTAnnotationsPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; + +ModulePass *createSPIRITTAnnotationsPass(); + +} // namespace llvm diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 3b29c3df64296..85485a2b98d51 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_component_library(LLVMInstrumentation PGOMemOPSizeOpt.cpp PoisonChecking.cpp SanitizerCoverage.cpp + SPIRITTAnnotations.cpp ValueProfileCollector.cpp ThreadSanitizer.cpp HWAddressSanitizer.cpp diff --git a/llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp b/llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp new file mode 100644 index 0000000000000..1c62285f68895 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp @@ -0,0 +1,294 @@ +//===---- SPIRITTAnnotations.cpp - SPIR Instrumental Annotations Pass -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A transformation pass which adds instrumental calls to annotate SPIR +// synchronization instructions. This can be used for kernel profiling. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/SPIRITTAnnotations.h" + +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" + +/** Following functions are used for ITT instrumentation: + * * * * * * * * * * * + * Notify tools work-item execution has started + * + * /param[in] group_id Pointer to array of 3 integers that uniquely identify + * group withing a kernel + * /param[in] wi_id Globally unique work-item id + * /param[in] wg_size Number of work-items in given group + * + * void __itt_offload_wi_start(size_t* group_id, size_t wi_id, + * uint32_t wg_size); + * * * * * * * * * * * + * Notify tools work-item execution resumed (e.g. after barrier) + * + * /param[in] group_id Pointer to array of 3 integers that uniquely identify + * group withing a kernel. + * /param[in] wi_id Globally unique work-item id. + * + * void __itt_offload_wi_resume(size_t* group_id, size_t wi_id); + * * * * * * * * * * * + * Notify tools work-item execution has finished + * + * /param[in] group_id Pointer to array of 3 integers that uniquely identify + * group withing a kernel. + * /param[in] wi_id Globally unique work-item id. + * + * void __itt_offload_wi_finish(size_t* group_id, size_t wi_id); + * * * * * * * * * * * + * Notify tools work-item has reached a barrier + * + * /param[in] barrier_id Unique barrier id. If multi-barriers are not supported. + * Pass 0 for barrier_id. Notify tools work-item has reached a barier. + * + * void __itt_offload_wg_barrier(uintptr_t barrier_id); + * * * * * * * * * * * + * Purpose of this pass is to add wrapper calls to these instructions. + * Also this pass adds annotations to atomic instructions: + * * * * * * * * * * * + * Atomic operation markup + * + * /param[in] object Memory location which is used in atomic operation + * /param[in] op_type Operation type + * /param[in] mem_order Memory ordering semantic + * + * void __itt_offload_atomic_op_start(void* object, + * __itt_atomic_mem_op_t op_type, + * __itt_atomic_mem_order_t mem_order); + * * * * * * * * * * * + * Atomic operation markup + * + * /param[in] object Memory location which is used in atomic operation + * /param[in] op_type Operation type + * /param[in] mem_order Memory ordering semantic + * + * void __itt_offload_atomic_op_finish(void* object, + * __itt_atomic_mem_op_t op_type, + * __itt_atomic_mem_order_t mem_order); + **/ + +using namespace llvm; + +namespace { +constexpr char SPIRV_PREFIX[] = "__spirv_"; +constexpr char SPIRV_CONTROL_BARRIER[] = "ControlBarrier"; +constexpr char SPIRV_GROUP_ALL[] = "GroupAll"; +constexpr char SPIRV_GROUP_ANY[] = "GroupAny"; +constexpr char SPIRV_GROUP_BROADCAST[] = "GroupBroadcast"; +constexpr char SPIRV_GROUP_IADD[] = "GroupIAdd"; +constexpr char SPIRV_GROUP_FADD[] = "GroupFAdd"; +constexpr char SPIRV_GROUP_FMIN[] = "GroupFMin"; +constexpr char SPIRV_GROUP_UMIN[] = "GroupUMin"; +constexpr char SPIRV_GROUP_SMIN[] = "GroupSMin"; +constexpr char SPIRV_GROUP_FMAX[] = "GroupFMax"; +constexpr char SPIRV_GROUP_UMAX[] = "GroupUMax"; +constexpr char SPIRV_GROUP_SMAX[] = "GroupSMax"; +constexpr char SPIRV_ATOMIC_INST[] = "Atomic"; +constexpr char SPIRV_ATOMIC_LOAD[] = "AtomicLoad"; +constexpr char SPIRV_ATOMIC_STORE[] = "AtomicStore"; +constexpr char ITT_ANNOTATION_WI_START[] = "__itt_offload_wi_start_wrapper"; +constexpr char ITT_ANNOTATION_WI_RESUME[] = "__itt_offload_wi_resume_wrapper"; +constexpr char ITT_ANNOTATION_WI_FINISH[] = "__itt_offload_wi_finish_wrapper"; +constexpr char ITT_ANNOTATION_WG_BARRIER[] = "__itt_offload_wg_barrier_wrapper"; +constexpr char ITT_ANNOTATION_ATOMIC_START[] = "__itt_offload_atomic_op_start"; +constexpr char ITT_ANNOTATION_ATOMIC_FINISH[] = + "__itt_offload_atomic_op_finish"; + +// Wrapper for the pass to make it working with the old pass manager +class SPIRITTAnnotationsLegacyPass : public ModulePass { +public: + static char ID; + SPIRITTAnnotationsLegacyPass() : ModulePass(ID) { + initializeSPIRITTAnnotationsLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + // run the SPIRITTAnnotations pass on the specified module + bool runOnModule(Module &M) override { + ModuleAnalysisManager MAM; + auto PA = Impl.run(M, MAM); + return !PA.areAllPreserved(); + } + +private: + SPIRITTAnnotationsPass Impl; +}; + +} // namespace + +char SPIRITTAnnotationsLegacyPass::ID = 0; +INITIALIZE_PASS(SPIRITTAnnotationsLegacyPass, "SPIRITTAnnotations", + "Insert ITT annotations in SPIR code", false, false) + +// Public interface to the SPIRITTAnnotationsPass. +ModulePass *llvm::createSPIRITTAnnotationsPass() { + return new SPIRITTAnnotationsLegacyPass(); +} + +namespace { + +// Check for calling convention of a function. +bool isSPIRKernel(Function &F) { + return F.getCallingConv() == CallingConv::SPIR_KERNEL; +} + +Instruction *emitCall(Module &M, Type *RetTy, StringRef FunctionName, + ArrayRef Args, Instruction *InsertBefore) { + SmallVector ArgTys(Args.size()); + for (unsigned I = 0; I < Args.size(); ++I) + ArgTys[I] = Args[I]->getType(); + auto *FT = FunctionType::get(RetTy, ArgTys, false /*isVarArg*/); + FunctionCallee FC = M.getOrInsertFunction(FunctionName, FT); + assert(FC.getCallee() && "Instruction creation failed"); + auto *Call = CallInst::Create(FT, FC.getCallee(), Args, "", InsertBefore); + return Call; +} + +// Insert instrumental annotation calls, that has no arguments (for example +// work items start/finish/resume and barrier annotation. +bool insertSimpleInstrumentationCall(Module &M, StringRef Name, + Instruction *Position) { + Type *VoidTy = Type::getVoidTy(M.getContext()); + ArrayRef Args; + Instruction *InstrumentationCall = emitCall(M, VoidTy, Name, Args, Position); + assert(InstrumentationCall && "Instrumentation call creation failed"); + return true; +} + +// Insert instrumental annotation calls for SPIR-V atomics. +bool insertAtomicInstrumentationCall(Module &M, StringRef Name, + CallInst *AtomicFun, + Instruction *Position) { + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + Type *Int32Ty = Type::getInt32Ty(Ctx); + // __spirv_Atomic... instructions have following arguments: + // Pointer, Memory Scope, Memory Semantics and others. To construct Atomic + // annotation instructions we need Pointer and Memory Semantic arguments + // taken from the original Atomic instruction. + Value *Ptr = dyn_cast(AtomicFun->getArgOperand(0)); + StringRef AtomicName = AtomicFun->getCalledFunction()->getName(); + Value *AtomicOp; + // Second parameter of Atomic Start/Finish annotation is an Op code of + // the instruction, encoded into a value of enum, defined like this on user's/ + // profiler's side: + // enum __itt_atomic_mem_op_t + // { + // __itt_mem_load = 0, + // __itt_mem_store = 1, + // __itt_mem_update = 2 + // } + if (AtomicName.contains(SPIRV_ATOMIC_LOAD)) + AtomicOp = ConstantInt::get(Int32Ty, 0); + else if (AtomicName.contains(SPIRV_ATOMIC_STORE)) + AtomicOp = ConstantInt::get(Int32Ty, 1); + else + AtomicOp = ConstantInt::get(Int32Ty, 2); + // Third parameter of Atomic Start/Finish annotation is an ordering + // semantic of the instruction, encoded into a value of enum, defined like + // this on user's/profiler's side: + // enum __itt_atomic_mem_order_t + // { + // __itt_mem_order_relaxed = 0, // SPIR-V 0x0 + // __itt_mem_order_acquire = 1, // SPIR-V 0x2 + // __itt_mem_order_release = 2, // SPIR-V 0x4 + // __itt_mem_order_acquire_release = 3 // SPIR-V 0x8 + // } + // which isn't 1:1 mapped on SPIR-V memory ordering mask (aside of a + // differencies in values between SYCL mem order and SPIR-V mem order, SYCL RT + // also applies Memory Semantic mask, like WorkgroupMemory (0x100)), need to + // align it. + uint64_t MemFlag = dyn_cast(AtomicFun->getArgOperand(2)) + ->getValue() + .getZExtValue(); + uint64_t Order; + if (MemFlag & 0x2) + Order = 1; + else if (MemFlag & 0x4) + Order = 2; + else if (MemFlag & 0x8) + Order = 3; + else + Order = 0; + Value *MemOrder = ConstantInt::get(Int32Ty, Order); + Value *Args[] = {Ptr, AtomicOp, MemOrder}; + Instruction *InstrumentationCall = emitCall(M, VoidTy, Name, Args, Position); + assert(InstrumentationCall && "Instrumentation call creation failed"); + return true; +} + +} // namespace + +PreservedAnalyses SPIRITTAnnotationsPass::run(Module &M, + ModuleAnalysisManager &MAM) { + bool IRModified = false; + std::vector SPIRVCrossWGInstuctions = { + SPIRV_CONTROL_BARRIER, SPIRV_GROUP_ALL, SPIRV_GROUP_ANY, + SPIRV_GROUP_BROADCAST, SPIRV_GROUP_IADD, SPIRV_GROUP_FADD, + SPIRV_GROUP_FMIN, SPIRV_GROUP_UMIN, SPIRV_GROUP_SMIN, + SPIRV_GROUP_FMAX, SPIRV_GROUP_UMAX, SPIRV_GROUP_SMAX}; + + for (Function &F : M) { + // Annotate only SPIR kernels + if (F.isDeclaration() || !isSPIRKernel(F)) + continue; + + // At the beggining of a kernel insert work item start annotation + // instruction. + IRModified |= insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_START, + &*inst_begin(F)); + + for (BasicBlock &BB : F) { + // Insert Finish instruction before return instruction + if (ReturnInst *RI = dyn_cast(BB.getTerminator())) + IRModified |= + insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_FINISH, RI); + for (Instruction &I : BB) { + CallInst *CI = dyn_cast(&I); + if (!CI) + continue; + Function *Callee = CI->getCalledFunction(); + if (!Callee) + continue; + StringRef CalleeName = Callee->getName(); + // Process only calls to functions which names starts with __spirv_ + size_t PrefixPosFound = CalleeName.find(SPIRV_PREFIX); + if (PrefixPosFound == StringRef::npos) + continue; + CalleeName = + CalleeName.drop_front(PrefixPosFound + /*len of SPIR-V prefix*/ 8); + // Annotate barrier and other cross WG calls + if (std::any_of(SPIRVCrossWGInstuctions.begin(), + SPIRVCrossWGInstuctions.end(), + [&CalleeName](StringRef Name) { + return CalleeName.startswith(Name); + })) { + Instruction *InstAfterBarrier = CI->getNextNode(); + IRModified |= + insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WG_BARRIER, CI); + IRModified |= insertSimpleInstrumentationCall( + M, ITT_ANNOTATION_WI_RESUME, InstAfterBarrier); + } else if (CalleeName.startswith(SPIRV_ATOMIC_INST)) { + Instruction *InstAfterAtomic = CI->getNextNode(); + IRModified |= insertAtomicInstrumentationCall( + M, ITT_ANNOTATION_ATOMIC_START, CI, CI); + IRModified |= insertAtomicInstrumentationCall( + M, ITT_ANNOTATION_ATOMIC_FINISH, CI, InstAfterAtomic); + } + } + } + } + + return IRModified ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} diff --git a/llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_load.ll b/llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_load.ll new file mode 100644 index 0000000000000..7c901d9973682 --- /dev/null +++ b/llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_load.ll @@ -0,0 +1,135 @@ +;; The test serves a purpose to check if Atomic load instruction is being +;; annotated by SPIRITTAnnotations pass +;; +;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/AtomicRef/load.cpp +;; with following commands: +;; clang++ -fsycl -fsycl-device-only load.cpp -o load.bc + +; RUN: opt < %s --SPIRITTAnnotations -S | FileCheck %s + +; ModuleID = 'load.bc' +source_filename = "llvm-test-suite/SYCL/AtomicRef/load.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown-sycldevice" + +%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } +%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } +%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } + +$_ZTSN2cl4sycl6detail19__pf_kernel_wrapperI11load_kernelIiEEE = comdat any + +$_ZTS11load_kernelIiE = comdat any + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTSN2cl4sycl6detail19__pf_kernel_wrapperI11load_kernelIiEEE(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_, i32 addrspace(1)* %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_4, i32 addrspace(1)* %_arg_5, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_8, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_9) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { +entry: +; CHECK-LABEL: _ZTSN2cl4sycl6detail19__pf_kernel_wrapperI11load_kernelIiEEE( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5 + %4 = extractelement <3 x i64> %3, i64 0 + %cmp.not.i = icmp ult i64 %4, %2 + br i1 %cmp.not.i, label %if.end.i, label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI11load_kernelIiEZZ9load_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit + +if.end.i: ; preds = %entry + %5 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_9, i64 0, i32 0, i32 0, i64 0 + %6 = addrspacecast i64* %5 to i64 addrspace(4)* + %7 = load i64, i64 addrspace(4)* %6, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_5, i64 %7 + %8 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_4, i64 0, i32 0, i32 0, i64 0 + %9 = addrspacecast i64* %8 to i64 addrspace(4)* + %10 = load i64, i64 addrspace(4)* %9, align 8 + %add.ptr.i34 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %10 +; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 0, i32 0) +; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 896 +; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 0, i32 0) + %call3.i.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i34, i32 1, i32 896) #2 + %ptridx.i.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %4 + %ptridx.ascast.i.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i.i to i32 addrspace(4)* + store i32 %call3.i.i.i.i, i32 addrspace(4)* %ptridx.ascast.i.i.i, align 4, !tbaa !14 + br label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI11load_kernelIiEZZ9load_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit + +_ZZN2cl4sycl7handler24parallel_for_lambda_implI11load_kernelIiEZZ9load_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit: ; preds = %entry, %if.end.i +; CHECK: call void @__itt_offload_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; Function Attrs: convergent +declare dso_local spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)*, i32, i32) local_unnamed_addr #1 + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTS11load_kernelIiE(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, i32 addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !18 { +entry: +; CHECK-LABEL: _ZTS11load_kernelIiE( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %add.ptr.i32 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %2 + %3 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0 + %4 = addrspacecast i64* %3 to i64 addrspace(4)* + %5 = load i64, i64 addrspace(4)* %4, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %5 + %6 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !19 + %7 = extractelement <3 x i64> %6, i64 0 +; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 0, i32 0) +; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 896) +; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 0, i32 0) + %call3.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i32, i32 1, i32 896) #2 + %ptridx.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %7 + %ptridx.ascast.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i to i32 addrspace(4)* + store i32 %call3.i.i.i, i32 addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14 +; CHECK: call void @__itt_offload_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; CHECK: declare void @__itt_offload_wi_start_wrapper() +; CHECK: declare void @__itt_offload_atomic_op_start(i32 addrspace(1)*, i32, i32) +; CHECK: declare void @__itt_offload_atomic_op_finish(i32 addrspace(1)*, i32, i32) +; CHECK: declare void @__itt_offload_wi_finish_wrapper() + +attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm-test-suite/SYCL/AtomicRef/load.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { convergent nounwind } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} +!llvm.ident = !{!3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 4, i32 100000} +!3 = !{!"clang version 13.0.0 (https://github.com/intel/llvm.git 51f22c4b69cf01465bdd7b586343f6e19e9ab045)"} +!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!5 = !{!6, !8, !10, !12} +!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} +!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} +!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} +!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} +!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} +!14 = !{!15, !15, i64 0} +!15 = !{!"int", !16, i64 0} +!16 = !{!"omnipotent char", !17, i64 0} +!17 = !{!"Simple C++ TBAA"} +!18 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!19 = !{!20, !22, !24, !26} +!20 = distinct !{!20, !21, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!21 = distinct !{!21, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!22 = distinct !{!22, !23, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!23 = distinct !{!23, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} +!24 = distinct !{!24, !25, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} +!25 = distinct !{!25, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} +!26 = distinct !{!26, !27, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} +!27 = distinct !{!27, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} diff --git a/llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_store.ll b/llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_store.ll new file mode 100644 index 0000000000000..7387ce3c2b8ac --- /dev/null +++ b/llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_store.ll @@ -0,0 +1,119 @@ +;; The test serves a purpose to check if Atomic store instruction is being +;; annotated by SPIRITTAnnotations pass +;; +;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/AtomicRef/load.cpp +;; with following commands: +;; clang++ -fsycl -fsycl-device-only load.cpp -o load.bc + +; RUN: opt < %s --SPIRITTAnnotations -S | FileCheck %s + +; ModuleID = 'store.bc' +source_filename = "llvm-test-suite/SYCL/AtomicRef/store.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown-sycldevice" + +%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } +%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } +%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } + +$_ZTSN2cl4sycl6detail19__pf_kernel_wrapperI12store_kernelIiEEE = comdat any + +$_ZTS12store_kernelIiE = comdat any + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTSN2cl4sycl6detail19__pf_kernel_wrapperI12store_kernelIiEEE(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_, i32 addrspace(1)* %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_4) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { +entry: +; CHECK-LABEL: _ZTSN2cl4sycl6detail19__pf_kernel_wrapperI12store_kernelIiEEE( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5 + %4 = extractelement <3 x i64> %3, i64 0 + %cmp.not.i = icmp ult i64 %4, %2 + br i1 %cmp.not.i, label %if.end.i, label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit + +if.end.i: ; preds = %entry + %5 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_4, i64 0, i32 0, i32 0, i64 0 + %6 = addrspacecast i64* %5 to i64 addrspace(4)* + %7 = load i64, i64 addrspace(4)* %6, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %7 + %conv.i.i = trunc i64 %4 to i32 +; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 1, i32 0 +; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 896 +; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 1, i32 0 + tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i.i) #2 + br label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit + +_ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit: ; preds = %entry, %if.end.i +; CHECK: call void @__itt_offload_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; Function Attrs: convergent +declare dso_local spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)*, i32, i32, i32) local_unnamed_addr #1 + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTS12store_kernelIiE(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !14 { +entry: +; CHECK-LABEL: _ZTS12store_kernelIiE( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %2 + %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !15 + %4 = extractelement <3 x i64> %3, i64 0 + %conv.i = trunc i64 %4 to i32 +; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 1, i32 0) +; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 896 +; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 1, i32 0) + tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i) #2 +; CHECK: call void @__itt_offload_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; CHECK: declare void @__itt_offload_wi_start_wrapper() +; CHECK: declare void @__itt_offload_atomic_op_start(i32 addrspace(1)*, i32, i32) +; CHECK: declare void @__itt_offload_atomic_op_finish(i32 addrspace(1)*, i32, i32) +; CHECK: declare void @__itt_offload_wi_finish_wrapper() + +attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm-test-suite/SYCL/AtomicRef/store.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { convergent nounwind } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} +!llvm.ident = !{!3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 4, i32 100000} +!3 = !{!"clang version 13.0.0 (https://github.com/intel/llvm.git 51f22c4b69cf01465bdd7b586343f6e19e9ab045)"} +!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!5 = !{!6, !8, !10, !12} +!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} +!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} +!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} +!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} +!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} +!14 = !{i32 -1, i32 -1, i32 -1, i32 -1} +!15 = !{!16, !18, !20, !22} +!16 = distinct !{!16, !17, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!17 = distinct !{!17, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!18 = distinct !{!18, !19, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!19 = distinct !{!19, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} +!20 = distinct !{!20, !21, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} +!21 = distinct !{!21, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} +!22 = distinct !{!22, !23, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} +!23 = distinct !{!23, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} diff --git a/llvm/test/Transforms/SPIRITTAnnotations/itt_barrier.ll b/llvm/test/Transforms/SPIRITTAnnotations/itt_barrier.ll new file mode 100644 index 0000000000000..127af4562082a --- /dev/null +++ b/llvm/test/Transforms/SPIRITTAnnotations/itt_barrier.ll @@ -0,0 +1,153 @@ +;; The test serves a purpose to check if barrier instruction is being annotated +;; by SPIRITTAnnotations pass +;; +;; Compiled from https://github.com/intel/llvm-test-suite/blob/intel/SYCL/KernelAndProgram/kernel-and-program.cpp +;; with following commands: +;; clang++ -fsycl -fsycl-device-only kernel-and-program.cpp -o kernel_and_program_optimized.bc + +; RUN: opt < %s --SPIRITTAnnotations -S | FileCheck %s + +; ModuleID = 'kernel_and_program_optimized.bc' +source_filename = "llvm-link" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown-sycldevice" + +%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } +%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } + +$_ZTSZ4mainE10SingleTask = comdat any + +$_ZTSZ4mainE11ParallelFor = comdat any + +$_ZTSZ4mainE13ParallelForND = comdat any + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInGlobalOffset = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: norecurse willreturn +define weak_odr dso_local spir_kernel void @_ZTSZ4mainE10SingleTask(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { +entry: +; CHECK-LABEL: _ZTSZ4mainE10SingleTask( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %2 + %ptridx.ascast.i9.i = addrspacecast i32 addrspace(1)* %add.ptr.i to i32 addrspace(4)* + %3 = load i32, i32 addrspace(4)* %ptridx.ascast.i9.i, align 4, !tbaa !5 + %add.i = add nsw i32 %3, 1 + store i32 %add.i, i32 addrspace(4)* %ptridx.ascast.i9.i, align 4, !tbaa !5 +; CHECK: call void @__itt_offload_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; Function Attrs: norecurse willreturn +define weak_odr dso_local spir_kernel void @_ZTSZ4mainE11ParallelFor(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { +entry: +; CHECK-LABEL: _ZTSZ4mainE11ParallelFor( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %2 + %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !9 + %4 = extractelement <3 x i64> %3, i64 0 + %ptridx.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %4 + %ptridx.ascast.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i to i32 addrspace(4)* + %5 = load i32, i32 addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !5 + %add.i = add nsw i32 %5, 1 + store i32 %add.i, i32 addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !5 +; CHECK: call void @__itt_offload_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTSZ4mainE13ParallelForND(i32 addrspace(3)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3, i32 addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_8) local_unnamed_addr #1 comdat !kernel_arg_buffer_location !16 { +entry: +; CHECK-LABEL: _ZTSZ4mainE13ParallelForND( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_8, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %2 + %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !17 + %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalOffset to <3 x i64> addrspace(4)*), align 32, !noalias !24 + %5 = extractelement <3 x i64> %3, i64 0 + %6 = extractelement <3 x i64> %4, i64 0 + %sub.i.i.i.i = sub i64 %5, %6 + %7 = trunc i64 %sub.i.i.i.i to i32 + %conv.i = and i32 %7, 1 + %xor.i = xor i32 %conv.i, 1 + %ptridx.i27.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %sub.i.i.i.i + %ptridx.ascast.i28.i = addrspacecast i32 addrspace(1)* %ptridx.i27.i to i32 addrspace(4)* + %8 = load i32, i32 addrspace(4)* %ptridx.ascast.i28.i, align 4, !tbaa !5 + %9 = zext i32 %conv.i to i64 + %ptridx.i23.i = getelementptr inbounds i32, i32 addrspace(3)* %_arg_, i64 %9 + %ptridx.ascast.i24.i = addrspacecast i32 addrspace(3)* %ptridx.i23.i to i32 addrspace(4)* + store i32 %8, i32 addrspace(4)* %ptridx.ascast.i24.i, align 4, !tbaa !5 +; CHECK: call void @__itt_offload_wg_barrier_wrapper() +; CHECK-NEXT: tail call void @_Z22__spirv_ControlBarrierjjj +; CHECK-NEXT: call void @__itt_offload_wi_resume_wrapper() + tail call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #3 + %conv6.i = zext i32 %xor.i to i64 + %ptridx.i17.i = getelementptr inbounds i32, i32 addrspace(3)* %_arg_, i64 %conv6.i + %ptridx.ascast.i18.i = addrspacecast i32 addrspace(3)* %ptridx.i17.i to i32 addrspace(4)* + %10 = load i32, i32 addrspace(4)* %ptridx.ascast.i18.i, align 4, !tbaa !5 + store i32 %10, i32 addrspace(4)* %ptridx.ascast.i28.i, align 4, !tbaa !5 +; CHECK: call void @__itt_offload_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; Function Attrs: convergent +declare dso_local void @_Z22__spirv_ControlBarrierjjj(i32, i32, i32) local_unnamed_addr #2 + +; CHECK: declare void @__itt_offload_wi_start_wrapper() +; CHECK: declare void @__itt_offload_wi_finish_wrapper() +; CHECK: declare void @__itt_offload_wg_barrier_wrapper() +; CHECK: declare void @__itt_offload_wi_resume_wrapper() + +attributes #0 = { norecurse willreturn "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm-test-suite/SYCL/KernelAndProgram/kernel-and-program.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm-test-suite/SYCL/KernelAndProgram/kernel-and-program.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { convergent } + +!opencl.spir.version = !{!0} +!spirv.Source = !{!1} +!llvm.ident = !{!2} +!llvm.module.flags = !{!3} + +!0 = !{i32 1, i32 2} +!1 = !{i32 4, i32 100000} +!2 = !{!"clang version 13.0.0 (https://github.com/intel/llvm.git 3d2adc7b3ca269708bcabdc4a40352a5cacb4b9d)"} +!3 = !{i32 1, !"wchar_size", i32 4} +!4 = !{i32 -1, i32 -1, i32 -1, i32 -1} +!5 = !{!6, !6, i64 0} +!6 = !{!"int", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} +!9 = !{!10, !12, !14} +!10 = distinct !{!10, !11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!11 = distinct !{!11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!12 = distinct !{!12, !13, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!13 = distinct !{!13, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} +!14 = distinct !{!14, !15, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_2idIXT_EEEPS5_: %agg.result"} +!15 = distinct !{!15, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_2idIXT_EEEPS5_"} +!16 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!17 = !{!18, !20, !22} +!18 = distinct !{!18, !19, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!19 = distinct !{!19, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!20 = distinct !{!20, !21, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!21 = distinct !{!21, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} +!22 = distinct !{!22, !23, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!23 = distinct !{!23, !"_ZN2cl4sycl6detail7Builder10getElementILi1EEEKNS0_7nd_itemIXT_EEEPS5_"} +!24 = !{!25, !27, !22} +!25 = distinct !{!25, !26, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} +!26 = distinct !{!26, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} +!27 = distinct !{!27, !28, !"_ZN7__spirvL16initGlobalOffsetILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} +!28 = distinct !{!28, !"_ZN7__spirvL16initGlobalOffsetILi1EN2cl4sycl2idILi1EEEEET0_v"} diff --git a/llvm/test/Transforms/SPIRITTAnnotations/itt_start_finish.ll b/llvm/test/Transforms/SPIRITTAnnotations/itt_start_finish.ll new file mode 100644 index 0000000000000..0999a31042207 --- /dev/null +++ b/llvm/test/Transforms/SPIRITTAnnotations/itt_start_finish.ll @@ -0,0 +1,55 @@ +;; The test serves a purpose to check if work item start/finish annotations +;; are being added by SPIRITTAnnotations pass + +; RUN: opt < %s --SPIRITTAnnotations -S | FileCheck %s + +; ModuleID = 'synthetic.bc' +source_filename = "synthetic.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown-sycldevice" + +; Function Attrs: convergent norecurse nounwind mustprogress +define dso_local spir_kernel void @_ZTSZ4mainE15kernel_function() local_unnamed_addr #0 !kernel_arg_buffer_location !4 { +entry: +; CHECK: _ZTSZ4mainE15kernel_function( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__itt_offload_wi_start_wrapper() + %call.i = tail call spir_func i32 @_Z3foov() #2 + %cmp.i = icmp eq i32 %call.i, 42 + br i1 %cmp.i, label %"_ZZ4mainENK3$_0clEv.exit", label %if.end.i + +if.end.i: ; preds = %entry + tail call spir_func void @_Z3boov() #2 +; CHECK: call void @__itt_offload_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void + +"_ZZ4mainENK3$_0clEv.exit": ; preds = %entry, %if.end.i +; CHECK: call void @__itt_offload_wi_finish_wrapper() +; CHECK-NEXT: ret void + ret void +} + +; CHECK: declare void @__itt_offload_wi_start_wrapper() +; CHECK: declare void @__itt_offload_wi_finish_wrapper() + +; Function Attrs: convergent +declare spir_func i32 @_Z3foov() local_unnamed_addr #1 + +; Function Attrs: convergent +declare spir_func void @_Z3boov() local_unnamed_addr #1 + +attributes #0 = { convergent norecurse nounwind mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="../../llvm/clang/test/CodeGenSYCL/kernel-simple-instrumentation.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { convergent nounwind } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} +!llvm.ident = !{!3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 4, i32 100000} +!3 = !{!"clang version 13.0.0 (https://github.com/intel/llvm.git f16527331b8cd18b3e45a4a7bc13a2460c8d0d84)"} +!4 = !{} diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 8cf52c1d50e82..3a3cc8857a612 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -582,6 +582,7 @@ int main(int argc, char **argv) { initializeReplaceWithVeclibLegacyPass(Registry); initializeSYCLLowerWGScopeLegacyPassPass(Registry); initializeSYCLLowerESIMDLegacyPassPass(Registry); + initializeSPIRITTAnnotationsLegacyPassPass(Registry); initializeESIMDLowerLoadStorePass(Registry); initializeESIMDLowerVecArgLegacyPassPass(Registry);