Skip to content

Commit 06f7bbf

Browse files
Victor Lomullersommerlukas
andauthored
[SYCL][Fusion] Add HIP support (#11003)
The patch add support for HIP via the libamd_comgr: - Add build path in sycl-fusion - Add finalization routine inside hip adapter's build program --------- Signed-off-by: Victor Lomuller <[email protected]> Co-authored-by: Lukas Sommer <[email protected]>
1 parent 08febcf commit 06f7bbf

32 files changed

+404
-86
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5559,8 +5559,9 @@ class OffloadingActionBuilder final {
55595559
DA.add(*DeviceWrappingAction, *TC, BoundArch, Action::OFK_SYCL);
55605560
continue;
55615561
}
5562-
if (IsNVPTX && Args.hasArg(options::OPT_fsycl_embed_ir)) {
5563-
// When compiling for Nvidia/CUDA devices and the user requested the
5562+
if ((IsNVPTX || IsAMDGCN) &&
5563+
Args.hasArg(options::OPT_fsycl_embed_ir)) {
5564+
// When compiling for Nvidia/AMD devices and the user requested the
55645565
// IR to be embedded in the application (via option), run the output
55655566
// of sycl-post-link (filetable referencing LLVM Bitcode + symbols)
55665567
// through the offload wrapper and link the resulting object to the
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/// Tests for -fsycl-embed-ir
2+
3+
// UNSUPPORTED: system-windows
4+
5+
// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_80 -fsycl-embed-ir -ccc-print-phases %s 2>&1 | \
6+
// RUN: FileCheck -check-prefix=CHECK-NV %s
7+
8+
// CHECK-NV: [[IR:[0-9]+]]: compiler, {4}, ir, (device-sycl, sm_80)
9+
// CHECK-NV: [[POSTLINK:[0-9]+]]: sycl-post-link, {{{.*}}}, ir, (device-sycl, sm_80)
10+
// CHECK-NV: [[WRAP:[0-9]+]]: clang-offload-wrapper, {[[POSTLINK]]}, object, (device-sycl, sm_80)
11+
// CHECK-NV: offload, "host-sycl (x86_64-unknown-linux-gnu)" {{{.*}}}, "device-sycl (nvptx64-nvidia-cuda:sm_80)" {[[WRAP]]}, "device-sycl (nvptx64-nvidia-cuda:sm_80)" {{{.*}}}, image
12+
13+
// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1010 -fsycl-embed-ir -ccc-print-phases %s 2>&1 | \
14+
// RUN: FileCheck -check-prefix=CHECK-AMD %s
15+
16+
// CHECK-AMD: [[IR:[0-9]+]]: compiler, {4}, ir, (device-sycl, gfx1010)
17+
// CHECK-AMD: [[POSTLINK:[0-9]+]]: sycl-post-link, {{{.*}}}, ir, (device-sycl, gfx1010)
18+
// CHECK-AMD: [[WRAP:[0-9]+]]: clang-offload-wrapper, {[[POSTLINK]]}, object, (device-sycl, gfx1010)
19+
// CHECK-AMD: offload, "host-sycl (x86_64-unknown-linux-gnu)" {{{.*}}}, "device-sycl (amdgcn-amd-amdhsa:gfx1010)" {[[WRAP]]}, "device-sycl (amdgcn-amd-amdhsa:gfx1010)" {{{.*}}}, image

sycl-fusion/common/include/Kernel.h

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,30 @@ namespace jit_compiler {
2020

2121
using BinaryAddress = const uint8_t *;
2222

23+
/// Possible barrier flags
24+
enum class BarrierFlags : uint32_t {
25+
None = 0, // Do not insert barrier
26+
Local = 1, // Ensure correct ordering of memory operations to local memory
27+
Global = 2, // Ensure correct ordering of memory operations to global memory
28+
LocalAndGlobal = Local | Global
29+
};
30+
31+
constexpr BarrierFlags getNoBarrierFlag() { return BarrierFlags::None; }
32+
constexpr BarrierFlags getLocalAndGlobalBarrierFlag() {
33+
return BarrierFlags::LocalAndGlobal;
34+
}
35+
constexpr bool isNoBarrierFlag(BarrierFlags Flag) {
36+
return Flag == BarrierFlags::None;
37+
}
38+
constexpr bool hasLocalBarrierFlag(BarrierFlags Flag) {
39+
return static_cast<uint32_t>(Flag) &
40+
static_cast<uint32_t>(BarrierFlags::Local);
41+
}
42+
constexpr bool hasGlobalBarrierFlag(BarrierFlags Flag) {
43+
return static_cast<uint32_t>(Flag) &
44+
static_cast<uint32_t>(BarrierFlags::Global);
45+
}
46+
2347
///
2448
/// Enumerate possible kinds of parameters.
2549
/// 1:1 correspondence with the definition in kernel_desc.hpp in the DPC++ SYCL
@@ -35,7 +59,7 @@ enum class ParameterKind : uint32_t {
3559
};
3660

3761
/// Different binary formats supported as input to the JIT compiler.
38-
enum class BinaryFormat : uint32_t { INVALID, LLVM, SPIRV, PTX };
62+
enum class BinaryFormat : uint32_t { INVALID, LLVM, SPIRV, PTX, AMDGCN };
3963

4064
/// Information about a device intermediate representation module (e.g., SPIR-V,
4165
/// LLVM IR) from DPC++.

sycl-fusion/common/lib/KernelIO.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ template <> struct ScalarEnumerationTraits<jit_compiler::BinaryFormat> {
4848
IO.enumCase(BF, "LLVM", jit_compiler::BinaryFormat::LLVM);
4949
IO.enumCase(BF, "SPIRV", jit_compiler::BinaryFormat::SPIRV);
5050
IO.enumCase(BF, "PTX", jit_compiler::BinaryFormat::PTX);
51+
IO.enumCase(BF, "AMDGCN", jit_compiler::BinaryFormat::AMDGCN);
5152
IO.enumCase(BF, "INVALID", jit_compiler::BinaryFormat::INVALID);
5253
}
5354
};

sycl-fusion/jit-compiler/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ add_llvm_library(sycl-fusion
99
lib/fusion/ModuleHelper.cpp
1010
lib/helper/ConfigHelper.cpp
1111

12+
DEPENDS
13+
intrinsics_gen
14+
1215
LINK_COMPONENTS
1316
BitReader
1417
Core
@@ -50,6 +53,10 @@ if("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
5053
target_compile_definitions(sycl-fusion PRIVATE FUSION_JIT_SUPPORT_PTX)
5154
endif()
5255

56+
if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)
57+
target_compile_definitions(sycl-fusion PRIVATE FUSION_JIT_SUPPORT_AMDGCN)
58+
endif()
59+
5360
if (BUILD_SHARED_LIBS)
5461
if(NOT MSVC AND NOT APPLE)
5562
# Manage symbol visibility through the linker to make sure no LLVM symbols

sycl-fusion/jit-compiler/include/JITContext.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "Hashing.h"
2121
#include "Kernel.h"
22+
#include "Options.h"
2223
#include "Parameter.h"
2324

2425
namespace llvm {
@@ -28,7 +29,7 @@ class LLVMContext;
2829
namespace jit_compiler {
2930

3031
using CacheKeyT =
31-
std::tuple<std::vector<std::string>, ParamIdentList, int,
32+
std::tuple<std::vector<std::string>, ParamIdentList, BarrierFlags,
3233
std::vector<ParameterInternalization>, std::vector<JITConstant>,
3334
// This field of the cache is optional because, if all of the
3435
// ranges are equal, we will perform no remapping, so that fused

sycl-fusion/jit-compiler/include/KernelFusion.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ class KernelFusion {
6060
const std::vector<SYCLKernelInfo> &KernelInformation,
6161
const std::vector<std::string> &KernelsToFuse,
6262
const std::string &FusedKernelName,
63-
jit_compiler::ParamIdentList &Identities, int BarriersFlags,
63+
jit_compiler::ParamIdentList &Identities,
64+
BarrierFlags BarriersFlags,
6465
const std::vector<jit_compiler::ParameterInternalization>
6566
&Internalization,
6667
const std::vector<jit_compiler::JITConstant> &JITConstants);

sycl-fusion/jit-compiler/lib/KernelFusion.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ static bool isTargetFormatSupported(BinaryFormat TargetFormat) {
5858
#else // FUSION_JIT_SUPPORT_PTX
5959
return false;
6060
#endif // FUSION_JIT_SUPPORT_PTX
61+
}
62+
case BinaryFormat::AMDGCN: {
63+
#ifdef FUSION_JIT_SUPPORT_AMDGCN
64+
return true;
65+
#else // FUSION_JIT_SUPPORT_AMDGCN
66+
return false;
67+
#endif // FUSION_JIT_SUPPORT_AMDGCN
6168
}
6269
default:
6370
return false;
@@ -69,7 +76,7 @@ FusionResult KernelFusion::fuseKernels(
6976
const std::vector<SYCLKernelInfo> &KernelInformation,
7077
const std::vector<std::string> &KernelsToFuse,
7178
const std::string &FusedKernelName, ParamIdentList &Identities,
72-
int BarriersFlags,
79+
BarrierFlags BarriersFlags,
7380
const std::vector<jit_compiler::ParameterInternalization> &Internalization,
7481
const std::vector<jit_compiler::JITConstant> &Constants) {
7582
// Initialize the configuration helper to make the options for this invocation

sycl-fusion/jit-compiler/lib/fusion/FusionPipeline.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ static unsigned getFlatAddressSpace(Module &Mod) {
4040
// Ideally, we could get this information from the TargetTransformInfo, but
4141
// the SPIR-V backend does not yet seem to have an implementation for that.
4242
llvm::Triple Tri(Mod.getTargetTriple());
43-
if (Tri.isNVPTX()) {
43+
if (Tri.isNVPTX() || Tri.isAMDGCN()) {
4444
return 0;
4545
}
4646
if (Tri.isSPIRV() || Tri.isSPIR()) {
@@ -53,7 +53,7 @@ static unsigned getFlatAddressSpace(Module &Mod) {
5353

5454
std::unique_ptr<SYCLModuleInfo>
5555
FusionPipeline::runFusionPasses(Module &Mod, SYCLModuleInfo &InputInfo,
56-
int BarriersFlags) {
56+
BarrierFlags BarriersFlags) {
5757
// Perform the actual kernel fusion, i.e., generate a kernel function for the
5858
// fused kernel from the kernel functions of the input kernels. This is done
5959
// by the SYCLKernelFusion LLVM pass, which is run here through a custom LLVM

sycl-fusion/jit-compiler/lib/fusion/FusionPipeline.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class FusionPipeline {
2525
/// contain an entry for the fused kernel.
2626
static std::unique_ptr<SYCLModuleInfo>
2727
runFusionPasses(llvm::Module &Mod, SYCLModuleInfo &InputInfo,
28-
int BarriersFlags);
28+
BarrierFlags BarriersFlags);
2929
};
3030
} // namespace fusion
3131
} // namespace jit_compiler

0 commit comments

Comments
 (0)