Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vi", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "b", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_get_barrier_state, "Uii", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_prefetch_data, "vvC*Ui", "nc", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_buffer_prefetch_data, "vQbIiUi", "nc", "gfx12-insts")

TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_b64_v2i32, "V2iV2i*1", "nc", "gfx12-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_b128_v8i16, "V8sV8s*1", "nc", "gfx12-insts,wavefrontsize32")
Expand Down
5 changes: 5 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,8 @@ kernel void builtins_amdgcn_s_barrier_signal_isfirst_err(global int* in, global
__builtin_amdgcn_s_barrier_wait(-1);
*out = *in;
}

void test_s_buffer_prefetch_data(__amdgpu_buffer_rsrc_t rsrc, unsigned int off)
{
__builtin_amdgcn_s_buffer_prefetch_data(rsrc, off, 31); // expected-error {{'__builtin_amdgcn_s_buffer_prefetch_data' must be a constant integer}}
}
19 changes: 19 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,22 @@ void test_s_prefetch_data(int *fp, global float *gp, constant char *cp, unsigned
__builtin_amdgcn_s_prefetch_data(gp, len);
__builtin_amdgcn_s_prefetch_data(cp, 31);
}

// CHECK-LABEL: @test_s_buffer_prefetch_data(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RSRC_ADDR:%.*]] = alloca ptr addrspace(8), align 16, addrspace(5)
// CHECK-NEXT: [[LEN_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: store ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(5) [[RSRC_ADDR]], align 16
// CHECK-NEXT: store i32 [[LEN:%.*]], ptr addrspace(5) [[LEN_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[RSRC_ADDR]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[LEN_ADDR]], align 4
// CHECK-NEXT: call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) [[TMP0]], i32 128, i32 [[TMP1]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[RSRC_ADDR]], align 16
// CHECK-NEXT: call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) [[TMP2]], i32 0, i32 31)
// CHECK-NEXT: ret void
//
void test_s_buffer_prefetch_data(__amdgpu_buffer_rsrc_t rsrc, unsigned int len)
{
__builtin_amdgcn_s_buffer_prefetch_data(rsrc, 128, len);
__builtin_amdgcn_s_buffer_prefetch_data(rsrc, 0, 31);
}
9 changes: 9 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1723,6 +1723,15 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
def int_amdgcn_struct_ptr_buffer_load_lds : AMDGPUStructPtrBufferLoadLDS;

def int_amdgcn_s_buffer_prefetch_data : DefaultAttrsIntrinsic <
[],
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
llvm_i32_ty, // offset (imm)
llvm_i32_ty], // len (SGPR/imm)
[IntrInaccessibleMemOrArgMemOnly, ImmArg<ArgIndex<1>>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>,
ClangBuiltin<"__builtin_amdgcn_s_buffer_prefetch_data">;

} // defset AMDGPUBufferIntrinsics

// Uses that do not set the done bit should set IntrWriteMem on the
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_SBYTE, SIsbuffer_load_byte>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_UBYTE, SIsbuffer_load_ubyte>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_SSHORT, SIsbuffer_load_short>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_USHORT, SIsbuffer_load_ushort>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_PREFETCH, SIsbuffer_prefetch>;

class GISelSop2Pat <
SDPatternOperator node,
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5545,6 +5545,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SBUFFER_LOAD_UBYTE)
NODE_NAME_CASE(SBUFFER_LOAD_SHORT)
NODE_NAME_CASE(SBUFFER_LOAD_USHORT)
NODE_NAME_CASE(SBUFFER_PREFETCH_DATA)
NODE_NAME_CASE(BUFFER_STORE)
NODE_NAME_CASE(BUFFER_STORE_BYTE)
NODE_NAME_CASE(BUFFER_STORE_SHORT)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ enum NodeType : unsigned {
SBUFFER_LOAD_UBYTE,
SBUFFER_LOAD_SHORT,
SBUFFER_LOAD_USHORT,
SBUFFER_PREFETCH_DATA,
BUFFER_STORE,
BUFFER_STORE_BYTE,
BUFFER_STORE_SHORT,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5237,7 +5237,8 @@ getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI) {

InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectSMRDBufferImm(MachineOperand &Root) const {
std::optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI);
std::optional<uint64_t> OffsetVal =
Root.isImm() ? Root.getImm() : getConstantZext32Val(Root.getReg(), *MRI);
if (!OffsetVal)
return {};

Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6797,6 +6797,18 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(LegalizerHelper &Helper,
return true;
}

bool AMDGPULegalizerInfo::legalizeSBufferPrefetch(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &B = Helper.MIRBuilder;
GISelChangeObserver &Observer = Helper.Observer;
Observer.changingInstr(MI);
MI.setDesc(B.getTII().get(AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH));
MI.removeOperand(0); // Remove intrinsic ID
castBufferRsrcArgToV4I32(MI, B, 0);
Observer.changedInstr(MI);
return true;
}

// TODO: Move to selection
bool AMDGPULegalizerInfo::legalizeTrap(MachineInstr &MI,
MachineRegisterInfo &MRI,
Expand Down Expand Up @@ -7485,6 +7497,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_permlanex16:
case Intrinsic::amdgcn_permlane64:
return legalizeLaneOp(Helper, MI, IntrID);
case Intrinsic::amdgcn_s_buffer_prefetch_data:
return legalizeSBufferPrefetch(Helper, MI);
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrID))
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {

bool legalizeSBufferLoad(LegalizerHelper &Helper, MachineInstr &MI) const;

bool legalizeSBufferPrefetch(LegalizerHelper &Helper, MachineInstr &MI) const;

bool legalizeTrap(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeTrapEndpgm(MachineInstr &MI, MachineRegisterInfo &MRI,
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3101,6 +3101,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
applyMappingSBufferLoad(B, OpdMapper);
return;
}
case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
constrainOpWithReadfirstlane(B, MI, 0);
constrainOpWithReadfirstlane(B, MI, 2);
return;
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT: {
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
Expand Down Expand Up @@ -4464,6 +4468,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
break;
}
case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
OpdsMapping[0] = getSGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
break;
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT: {
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
Expand Down
34 changes: 26 additions & 8 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1210,9 +1210,13 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = RsrcArg;
}

auto *Aux = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1));
if (Aux->getZExtValue() & AMDGPU::CPol::VOLATILE)
Info.flags |= MachineMemOperand::MOVolatile;
bool IsSPrefetch = IntrID == Intrinsic::amdgcn_s_buffer_prefetch_data;
if (!IsSPrefetch) {
auto *Aux = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1));
if (Aux->getZExtValue() & AMDGPU::CPol::VOLATILE)
Info.flags |= MachineMemOperand::MOVolatile;
}

Info.flags |= MachineMemOperand::MODereferenceable;
if (ME.onlyReadsMemory()) {
if (RsrcIntr->IsImage) {
Expand Down Expand Up @@ -1251,16 +1255,18 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,

Info.flags |= MachineMemOperand::MOStore;
} else {
// Atomic or NoReturn Sampler
// Atomic, NoReturn Sampler or prefetch
Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID :
ISD::INTRINSIC_W_CHAIN;
Info.flags |= MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable;
Info.flags |=
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable;

if (!IsSPrefetch)
Info.flags |= MachineMemOperand::MOStore;

switch (IntrID) {
default:
if (RsrcIntr->IsImage && BaseOpcode->NoReturn) {
if ((RsrcIntr->IsImage && BaseOpcode->NoReturn) || IsSPrefetch) {
// Fake memory access type for no return sampler intrinsics
Info.memVT = MVT::i32;
} else {
Expand Down Expand Up @@ -9934,6 +9940,18 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return Op.getOperand(0);
return Op;
}
case Intrinsic::amdgcn_s_buffer_prefetch_data: {
SDValue Ops[] = {
Chain, bufferRsrcPtrToVector(Op.getOperand(2), DAG),
Op.getOperand(3), // offset
Op.getOperand(4), // length
};

MemSDNode *M = cast<MemSDNode>(Op);
return DAG.getMemIntrinsicNode(AMDGPUISD::SBUFFER_PREFETCH_DATA, DL,
Op->getVTList(), Ops, M->getMemoryVT(),
M->getMemOperand());
}
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,14 @@ class isIntType<ValueType SrcVT> {
bit ret = !and(SrcVT.isInteger, !ne(SrcVT.Value, i1.Value));
}

def SDTSBufferPrefetch : SDTypeProfile<0, 3,
[SDTCisVT<0, v4i32>, // rsrc
SDTCisVT<1, i32>, // offset(imm)
SDTCisVT<2, i32>]>; // length

def SIsbuffer_prefetch : SDNode<"AMDGPUISD::SBUFFER_PREFETCH_DATA", SDTSBufferPrefetch,
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;

//===----------------------------------------------------------------------===//
// SDNodes PatFrags for loads/stores with a glue input.
// This is for SDNodes and PatFrag for local loads and stores to
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -3978,6 +3978,16 @@ def G_AMDGPU_S_BUFFER_LOAD_UBYTE : SBufferLoadInstruction;
def G_AMDGPU_S_BUFFER_LOAD_SSHORT : SBufferLoadInstruction;
def G_AMDGPU_S_BUFFER_LOAD_USHORT : SBufferLoadInstruction;

class SBufferPrefetchInstruction : AMDGPUGenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins type0:$rsrc, untyped_imm_0:$offset, type1:$len);
let hasSideEffects = 0;
let mayLoad = 1;
let mayStore = 1;
}

def G_AMDGPU_S_BUFFER_PREFETCH : SBufferPrefetchInstruction;

def G_AMDGPU_S_MUL_U64_U32 : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src0, type0:$src1);
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/SMInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,17 @@ let SubtargetPredicate = isGFX12Plus in {
def : GCNPat <
(int_amdgcn_s_prefetch_data (i64 SReg_64:$sbase), imm:$len),
(S_PREFETCH_DATA $sbase, 0, (i32 SGPR_NULL), (as_i8timm $len))

>;

def : GCNPat <
(SIsbuffer_prefetch v4i32:$sbase, (SMRDBufferImm i32:$offset), (i32 SReg_32:$len)),
(S_BUFFER_PREFETCH_DATA SReg_128:$sbase, i32imm:$offset, $len, 0)
>;

def : GCNPat <
(SIsbuffer_prefetch v4i32:$sbase, (SMRDBufferImm i32:$offset), imm:$len),
(S_BUFFER_PREFETCH_DATA SReg_128:$sbase, i32imm:$offset, (i32 SGPR_NULL), (as_i8timm $len))
>;
} // End let SubtargetPredicate = isGFX12Plus

Expand Down
36 changes: 36 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.prefetch.data.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefix=GCN %s
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefix=GCN %s

declare void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) %rsrc, i32 %offset, i32 %len)

define amdgpu_ps void @buffer_prefetch_data_imm_offset_sgpr_len(ptr addrspace(8) inreg %rsrc, i32 inreg %len) {
; GCN-LABEL: buffer_prefetch_data_imm_offset_sgpr_len:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_buffer_prefetch_data s[0:3], 0x80, s4, 0
; GCN-NEXT: s_endpgm
entry:
tail call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) inreg %rsrc, i32 128, i32 %len)
ret void
}

define amdgpu_ps void @buffer_prefetch_data_imm_offset_imm_len(ptr addrspace(8) inreg %rsrc) {
; GCN-LABEL: buffer_prefetch_data_imm_offset_imm_len:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_buffer_prefetch_data s[0:3], 0x0, null, 31
; GCN-NEXT: s_endpgm
entry:
tail call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) inreg %rsrc, i32 0, i32 31)
ret void
}

define amdgpu_ps void @buffer_prefetch_data_imm_offset_vgpr_len(ptr addrspace(8) inreg %rsrc, i32 %len) {
; GCN-LABEL: buffer_prefetch_data_imm_offset_vgpr_len:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: v_readfirstlane_b32 s4, v0
; GCN-NEXT: s_buffer_prefetch_data s[0:3], 0x80, s4, 0
; GCN-NEXT: s_endpgm
entry:
tail call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) inreg %rsrc, i32 128, i32 %len)
ret void
}