Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions llvm/lib/SYCLLowerIR/LowerWGScope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,8 @@ static bool mayHaveSideEffects(const Instruction *I) {
case Instruction::Call:
assert(!isPFWICall(I) && "pfwi must have been handled separately");
return true;
case Instruction::AddrSpaceCast:
return false;
default:
return true;
}
Expand Down Expand Up @@ -630,6 +632,11 @@ static void fixupPrivateMemoryPFWILambdaCaptures(CallInst *PFWICall) {
// whether it is an alloca with "work_item_scope"
SmallVector<CaptureDesc, 4> PrivMemCaptures;

// Look through cast
auto *Cast = dyn_cast<AddrSpaceCastInst>(LambdaObj);
if (Cast)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From https://github.com/intel/llvm/pull/2864/files#r567811589:
Combine into if (auto *Cast = dyn_cast<AddrSpaceCastInst>(LambdaObj))

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

LambdaObj = Cast->getOperand(0);

for (auto *U : LambdaObj->users()) {
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U);

Expand Down Expand Up @@ -779,13 +786,15 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F, const llvm::Triple &TT,
// globals.
Instruction *I = BB.getFirstNonPHI();

for (; I->getOpcode() == Instruction::Alloca; I = I->getNextNode()) {
for (; I->getOpcode() == Instruction::Alloca ||
I->getOpcode() == Instruction::AddrSpaceCast;
I = I->getNextNode()) {
auto *AllocaI = dyn_cast<AllocaInst>(I);
// Allocas marked with "work_item_scope" are those originating from
// cl::sycl::private_memory<T> variables, which must be in private memory.
// No shadows/materialization is needed for them because they can be
// updated only within PFWIs
if (!AllocaI->getMetadata(WI_SCOPE_MD))
if (AllocaI && !AllocaI->getMetadata(WI_SCOPE_MD))
Allocas.insert(AllocaI);
}
for (; I && (I != BB.getTerminator()); I = I->getNextNode()) {
Expand Down
83 changes: 83 additions & 0 deletions llvm/test/SYCLLowerIR/addrspacecast_handling.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -LowerWGScope -S | FileCheck %s

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment what this test checks.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added comment.

%struct.ham = type { i64, i64, i32, i32 }
%struct.bar = type { i64 }
%struct.spam = type { i64, i64, i64, i64, i32 }

; CHECK: @[[SHADOW4:.*]] = internal unnamed_addr addrspace(3) global %struct.ham addrspace(4)*
; CHECK: @[[SHADOW3:.*]] = internal unnamed_addr addrspace(3) global %struct.spam
; CHECK: @[[SHADOW2:.*]] = internal unnamed_addr addrspace(3) global %struct.ham
; CHECK: @[[SHADOW1:.*]] = internal unnamed_addr addrspace(3) global %struct.bar

define linkonce_odr dso_local spir_func void @foo(%struct.ham addrspace(4)* dereferenceable_or_null(56) %arg, %struct.bar* byval(%struct.bar) align 8 %arg1) !work_group_scope !0 {
; CHECK-LABEL: @foo(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_HAM:%.*]] addrspace(4)*, align 8
; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4
; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0:#.*]]
; CHECK-NEXT: [[CMPZ3:%.*]] = icmp eq i64 [[TMP0]], 0
; CHECK-NEXT: br i1 [[CMPZ3]], label [[LEADER:%.*]], label [[MERGE:%.*]]
; CHECK: leader:
; CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.bar* [[ARG1:%.*]] to i8*
; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 8 bitcast (%struct.bar addrspace(3)* @[[SHADOW1]] to i8 addrspace(3)*), i8* align 8 [[TMP1]], i64 8, i1 false)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast [[STRUCT_HAM]] addrspace(4)* [[ARG:%.*]] to i8 addrspace(4)*
; CHECK-NEXT: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* align 16 bitcast (%struct.ham addrspace(3)* @[[SHADOW2]] to i8 addrspace(3)*), i8 addrspace(4)* align 8 [[TMP2]], i64 24, i1 false)
; CHECK-NEXT: br label [[MERGE]]
; CHECK: merge:
; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast %struct.bar* [[ARG1]] to i8*
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 8 [[TMP3]], i8 addrspace(3)* align 8 bitcast (%struct.bar addrspace(3)* @[[SHADOW1]] to i8 addrspace(3)*), i64 8, i1 false)
; CHECK-NEXT: [[TMP4:%.*]] = bitcast [[STRUCT_HAM]] addrspace(4)* [[ARG]] to i8 addrspace(4)*
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 8 [[TMP4]], i8 addrspace(3)* align 16 bitcast (%struct.ham addrspace(3)* @[[SHADOW2]] to i8 addrspace(3)*), i64 24, i1 false)
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast [[STRUCT_HAM]] addrspace(4)** [[TMP]] to [[STRUCT_HAM]] addrspace(4)* addrspace(4)*
; CHECK-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_SPAM:%.*]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast %struct.spam* [[TMP3]] to [[STRUCT_SPAM]] addrspace(4)*
; CHECK-NEXT: [[TMP5:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4
; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]]
; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[TMP5]], 0
; CHECK-NEXT: br i1 [[CMPZ]], label [[WG_LEADER:%.*]], label [[WG_CF:%.*]]
; CHECK: wg_leader:
; CHECK-NEXT: store [[STRUCT_HAM]] addrspace(4)* [[ARG]], [[STRUCT_HAM]] addrspace(4)* addrspace(4)* [[TMP2]], align 8
; CHECK-NEXT: br label [[WG_CF]]
; CHECK: wg_cf:
; CHECK-NEXT: [[TMP6:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4
; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]]
; CHECK-NEXT: [[CMPZ2:%.*]] = icmp eq i64 [[TMP6]], 0
; CHECK-NEXT: br i1 [[CMPZ2]], label [[TESTMAT:%.*]], label [[LEADERMAT:%.*]]
; CHECK: TestMat:
; CHECK-NEXT: [[TMP7:%.*]] = bitcast %struct.spam* [[TMP3]] to i8*
; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 16 bitcast (%struct.spam addrspace(3)* @[[SHADOW3]] to i8 addrspace(3)*), i8* align 8 [[TMP7]], i64 36, i1 false)
; CHECK-NEXT: [[MAT_LD:%.*]] = load [[STRUCT_HAM]] addrspace(4)*, [[STRUCT_HAM]] addrspace(4)** [[TMP]], align 8
; CHECK-NEXT: store [[STRUCT_HAM]] addrspace(4)* [[MAT_LD]], [[STRUCT_HAM]] addrspace(4)* addrspace(3)* @[[SHADOW4]], align 8
; CHECK-NEXT: br label [[LEADERMAT]]
; CHECK: LeaderMat:
; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]]
; CHECK-NEXT: [[MAT_LD1:%.*]] = load [[STRUCT_HAM]] addrspace(4)*, [[STRUCT_HAM]] addrspace(4)* addrspace(3)* @[[SHADOW4]], align 8
; CHECK-NEXT: store [[STRUCT_HAM]] addrspace(4)* [[MAT_LD1]], [[STRUCT_HAM]] addrspace(4)** [[TMP]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = bitcast %struct.spam* [[TMP3]] to i8*
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 8 [[TMP8]], i8 addrspace(3)* align 16 bitcast (%struct.spam addrspace(3)* @[[SHADOW3]] to i8 addrspace(3)*), i64 36, i1 false)
; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) [[ATTR0]]
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast %struct.bar* [[ARG1]] to [[STRUCT_BAR:%.*]] addrspace(4)*
; CHECK-NEXT: [[TMP6:%.*]] = addrspacecast [[STRUCT_SPAM]] addrspace(4)* [[TMP4]] to %struct.spam*
; CHECK-NEXT: call spir_func void @widget(%struct.bar addrspace(4)* dereferenceable_or_null(32) [[TMP5]], %struct.spam* byval(%struct.spam) align 8 [[TMP6]])
; CHECK-NEXT: ret void
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit:
Can the number of checks be reduced to addrspace cast dependence chain only?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's important to validate all the code generated by the pass to ensure correctness.
These checks should be easy to maintain as they are auto-generated (see the note at line 1).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with Alexey here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's important to validate all the code generated by the pass to ensure correctness.

I think regression tests are written for particular fixes and should be focused on testing the fix, not entire pass/feature, as there should exist other tests. But anyway, that was a nit, so either way is fine with me.
The script for automated maintenance is a good argument too.

;
bb:
%tmp = alloca %struct.ham addrspace(4)*, align 8
%tmp2 = addrspacecast %struct.ham addrspace(4)** %tmp to %struct.ham addrspace(4)* addrspace(4)*
%tmp3 = alloca %struct.spam, align 8
%tmp4 = addrspacecast %struct.spam* %tmp3 to %struct.spam addrspace(4)*
store %struct.ham addrspace(4)* %arg, %struct.ham addrspace(4)* addrspace(4)* %tmp2, align 8
%tmp5 = addrspacecast %struct.bar* %arg1 to %struct.bar addrspace(4)*
%tmp6 = addrspacecast %struct.spam addrspace(4)* %tmp4 to %struct.spam*
call spir_func void @widget(%struct.bar addrspace(4)* dereferenceable_or_null(32) %tmp5, %struct.spam* byval(%struct.spam) align 8 %tmp6)
ret void
}

define linkonce_odr dso_local spir_func void @widget(%struct.bar addrspace(4)* dereferenceable_or_null(32) %arg, %struct.spam* byval(%struct.spam) align 8 %arg1) !work_item_scope !0 !parallel_for_work_item !0 {
bb:
ret void
}

!0 = !{}