-
Notifications
You must be signed in to change notification settings - Fork 16k
x86: fix musttail sibcall miscompilation #168956
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
2482d8f
e12da2e
f2a43bb
08a4817
e5dbe7b
e5b01ef
f51e586
89010ff
bb80502
8faac44
b53002b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -2018,6 +2018,49 @@ SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, | |||||||||||||
| return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // Returns the type of copying which is required to set up a byval argument to | ||||||||||||||
| // a tail-called function. This isn't needed for non-tail calls, because they | ||||||||||||||
| // always need the equivalent of CopyOnce, but tail-calls sometimes need two to | ||||||||||||||
| // avoid clobbering another argument (CopyViaTemp), and sometimes can be | ||||||||||||||
| // optimised to zero copies when forwarding an argument from the caller's | ||||||||||||||
| // caller (NoCopy). | ||||||||||||||
| X86TargetLowering::ByValCopyKind X86TargetLowering::ByValNeedsCopyForTailCall( | ||||||||||||||
| SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const { | ||||||||||||||
| MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); | ||||||||||||||
|
|
||||||||||||||
| // Globals are always safe to copy from. | ||||||||||||||
| if (isa<GlobalAddressSDNode>(Src) || isa<ExternalSymbolSDNode>(Src)) | ||||||||||||||
| return CopyOnce; | ||||||||||||||
|
|
||||||||||||||
| // Can only analyse frame index nodes, conservatively assume we need a | ||||||||||||||
| // temporary. | ||||||||||||||
| auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src); | ||||||||||||||
| auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst); | ||||||||||||||
| if (!SrcFrameIdxNode || !DstFrameIdxNode) | ||||||||||||||
| return CopyViaTemp; | ||||||||||||||
folkertdev marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
|
|
||||||||||||||
| int SrcFI = SrcFrameIdxNode->getIndex(); | ||||||||||||||
| int DstFI = DstFrameIdxNode->getIndex(); | ||||||||||||||
| assert(MFI.isFixedObjectIndex(DstFI) && | ||||||||||||||
| "byval passed in non-fixed stack slot"); | ||||||||||||||
|
|
||||||||||||||
| int64_t SrcOffset = MFI.getObjectOffset(SrcFI); | ||||||||||||||
| int64_t DstOffset = MFI.getObjectOffset(DstFI); | ||||||||||||||
|
|
||||||||||||||
| // If the source is in the local frame, then the copy to the argument | ||||||||||||||
| // memory is always valid. | ||||||||||||||
| bool FixedSrc = MFI.isFixedObjectIndex(SrcFI); | ||||||||||||||
| if (!FixedSrc || (FixedSrc && SrcOffset < 0)) | ||||||||||||||
| return CopyOnce; | ||||||||||||||
|
|
||||||||||||||
| // If the value is already in the correct location, then no copying is | ||||||||||||||
| // needed. If not, then we need to copy via a temporary. | ||||||||||||||
| if (SrcOffset == DstOffset) | ||||||||||||||
| return NoCopy; | ||||||||||||||
| else | ||||||||||||||
| return CopyViaTemp; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| SDValue | ||||||||||||||
| X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | ||||||||||||||
| SmallVectorImpl<SDValue> &InVals) const { | ||||||||||||||
|
|
@@ -2037,9 +2080,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| bool Is64Bit = Subtarget.is64Bit(); | ||||||||||||||
| bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); | ||||||||||||||
| bool IsSibcall = false; | ||||||||||||||
folkertdev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt || | ||||||||||||||
| CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; | ||||||||||||||
| bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget); | ||||||||||||||
| bool ShouldGuaranteeTCO = shouldGuaranteeTCO( | ||||||||||||||
| CallConv, MF.getTarget().Options.GuaranteedTailCallOpt); | ||||||||||||||
| bool IsCalleePopSRet = | ||||||||||||||
| !ShouldGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget); | ||||||||||||||
| X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); | ||||||||||||||
| bool HasNCSR = (CB && isa<CallInst>(CB) && | ||||||||||||||
| CB->hasFnAttr("no_caller_saved_registers")); | ||||||||||||||
|
|
@@ -2086,7 +2130,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall(); | ||||||||||||||
| if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) { | ||||||||||||||
| if (Subtarget.isPICStyleGOT() && !ShouldGuaranteeTCO && !IsMustTail) { | ||||||||||||||
| // If we are using a GOT, disable tail calls to external symbols with | ||||||||||||||
| // default visibility. Tail calling such a symbol requires using a GOT | ||||||||||||||
| // relocation, which forces early binding of the symbol. This breaks code | ||||||||||||||
|
|
@@ -2098,15 +2142,18 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| isTailCall = false; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (isTailCall && !IsMustTail) { | ||||||||||||||
folkertdev marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
| if (isTailCall) { | ||||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this block can be simplified to avoid the eligibility check for the "guaranteed" TCO case, and I made some comment suggestions that hopefully explain this better.
Suggested change
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've included some of your comment changes, but the code changes caused regressions. Can you maybe formulate what change you want to see here? The variable names are not the most clear here, but I haven't really been able to come up with better ones. I think
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To elaborate, because this logic is tricky, the regressions appear in cases like this one: define dso_local tailcc void @void_test(i32, i32, i32, i32) {
; X64-LABEL: void_test:
; X64: # %bb.0: # %entry
; X64-NEXT: jmp void_test # TAILCALL
;
; X86-LABEL: void_test:
; X86: # %bb.0: # %entry
; X86-NEXT: jmp void_test # TAILCALL
entry:
musttail call tailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3)
ret void
}With the current logic, both
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it, this case is I think more simplifications are possible, but I'll torture the boolean conditionals later. |
||||||||||||||
| // Check if it's really possible to do a tail call. | ||||||||||||||
| isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, | ||||||||||||||
| IsCalleePopSRet); | ||||||||||||||
| IsSibcall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, | ||||||||||||||
| IsCalleePopSRet); | ||||||||||||||
folkertdev marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
|
|
||||||||||||||
| // Sibcalls are automatically detected tailcalls which do not require | ||||||||||||||
| // ABI changes. | ||||||||||||||
| if (!IsGuaranteeTCO && isTailCall) | ||||||||||||||
| IsSibcall = true; | ||||||||||||||
| if (!IsMustTail) { | ||||||||||||||
folkertdev marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
| isTailCall = IsSibcall; | ||||||||||||||
|
|
||||||||||||||
| // Sibcalls are automatically detected tailcalls which do not require | ||||||||||||||
| // ABI changes. | ||||||||||||||
| IsSibcall = IsSibcall && !ShouldGuaranteeTCO; | ||||||||||||||
| } | ||||||||||||||
folkertdev marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
|
|
||||||||||||||
| if (isTailCall) | ||||||||||||||
| ++NumTailCalls; | ||||||||||||||
|
|
@@ -2125,13 +2172,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| // This is a sibcall. The memory operands are available in caller's | ||||||||||||||
| // own caller's stack. | ||||||||||||||
| NumBytes = 0; | ||||||||||||||
| else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv)) | ||||||||||||||
| else if (ShouldGuaranteeTCO && canGuaranteeTCO(CallConv)) | ||||||||||||||
| NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); | ||||||||||||||
|
|
||||||||||||||
| // A sibcall is ABI-compatible and does not need to adjust the stack pointer. | ||||||||||||||
| int FPDiff = 0; | ||||||||||||||
| if (isTailCall && | ||||||||||||||
| shouldGuaranteeTCO(CallConv, | ||||||||||||||
| MF.getTarget().Options.GuaranteedTailCallOpt)) { | ||||||||||||||
| if (isTailCall && ShouldGuaranteeTCO && !IsSibcall) { | ||||||||||||||
| // Lower arguments at fp - stackoffset + fpdiff. | ||||||||||||||
| unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); | ||||||||||||||
|
|
||||||||||||||
|
|
@@ -2146,6 +2192,74 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| unsigned NumBytesToPush = NumBytes; | ||||||||||||||
| unsigned NumBytesToPop = NumBytes; | ||||||||||||||
|
|
||||||||||||||
| SDValue StackPtr; | ||||||||||||||
| const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); | ||||||||||||||
|
|
||||||||||||||
| // If we are doing a tail-call, any byval arguments will be written to stack | ||||||||||||||
| // space which was used for incoming arguments. If any the values being used | ||||||||||||||
| // are incoming byval arguments to this function, then they might be | ||||||||||||||
| // overwritten by the stores of the outgoing arguments. To avoid this, we | ||||||||||||||
| // need to make a temporary copy of them in local stack space, then copy back | ||||||||||||||
| // to the argument area. | ||||||||||||||
folkertdev marked this conversation as resolved.
Show resolved
Hide resolved
folkertdev marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
| DenseMap<unsigned, SDValue> ByValTemporaries; | ||||||||||||||
folkertdev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| SDValue ByValTempChain; | ||||||||||||||
| if (isTailCall) { | ||||||||||||||
| SmallVector<SDValue, 8> ByValCopyChains; | ||||||||||||||
| for (const CCValAssign &VA : ArgLocs) { | ||||||||||||||
| unsigned ArgIdx = VA.getValNo(); | ||||||||||||||
| SDValue Src = OutVals[ArgIdx]; | ||||||||||||||
| ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags; | ||||||||||||||
|
|
||||||||||||||
| if (!Flags.isByVal()) | ||||||||||||||
| continue; | ||||||||||||||
|
|
||||||||||||||
| auto PtrVT = getPointerTy(DAG.getDataLayout()); | ||||||||||||||
|
|
||||||||||||||
| if (!StackPtr.getNode()) | ||||||||||||||
| StackPtr = | ||||||||||||||
| DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), PtrVT); | ||||||||||||||
|
|
||||||||||||||
| // Destination: where this byval should live in the callee’s frame | ||||||||||||||
| // after the tail call. | ||||||||||||||
| int32_t Offset = VA.getLocMemOffset() + FPDiff; | ||||||||||||||
rnk marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| int Size = VA.getLocVT().getFixedSizeInBits() / 8; | ||||||||||||||
rnk marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); | ||||||||||||||
rnk marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| SDValue Dst = DAG.getFrameIndex(FI, PtrVT); | ||||||||||||||
|
|
||||||||||||||
| ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags); | ||||||||||||||
|
|
||||||||||||||
| if (Copy == NoCopy) { | ||||||||||||||
| // If the argument is already at the correct offset on the stack | ||||||||||||||
| // (because we are forwarding a byval argument from our caller), we | ||||||||||||||
| // don't need any copying. | ||||||||||||||
| continue; | ||||||||||||||
| } else if (Copy == CopyOnce) { | ||||||||||||||
| // If the argument is in our local stack frame, no other argument | ||||||||||||||
| // preparation can clobber it, so we can copy it to the final location | ||||||||||||||
| // later. | ||||||||||||||
| ByValTemporaries[ArgIdx] = Src; | ||||||||||||||
| } else { | ||||||||||||||
| assert(Copy == CopyViaTemp && "unexpected enum value"); | ||||||||||||||
| // If we might be copying this argument from the outgoing argument | ||||||||||||||
| // stack area, we need to copy via a temporary in the local stack | ||||||||||||||
| // frame. | ||||||||||||||
| MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||||||||||||
| int TempFrameIdx = MFI.CreateStackObject(Flags.getByValSize(), | ||||||||||||||
| Flags.getNonZeroByValAlign(), | ||||||||||||||
| /*isSS=*/false); | ||||||||||||||
| SDValue Temp = | ||||||||||||||
| DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout())); | ||||||||||||||
|
|
||||||||||||||
| SDValue CopyChain = | ||||||||||||||
| CreateCopyOfByValArgument(Src, Temp, Chain, Flags, DAG, dl); | ||||||||||||||
| ByValCopyChains.push_back(CopyChain); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| if (!ByValCopyChains.empty()) | ||||||||||||||
| ByValTempChain = | ||||||||||||||
| DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // If we have an inalloca argument, all stack space has already been allocated | ||||||||||||||
| // for us and be right at the top of the stack. We don't support multiple | ||||||||||||||
| // arguments passed in memory when using inalloca. | ||||||||||||||
|
|
@@ -2186,7 +2300,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
|
|
||||||||||||||
| SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; | ||||||||||||||
| SmallVector<SDValue, 8> MemOpChains; | ||||||||||||||
| SDValue StackPtr; | ||||||||||||||
|
|
||||||||||||||
| // The next loop assumes that the locations are in the same order of the | ||||||||||||||
| // input arguments. | ||||||||||||||
|
|
@@ -2195,7 +2308,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
|
|
||||||||||||||
| // Walk the register/memloc assignments, inserting copies/loads. In the case | ||||||||||||||
| // of tail call optimization arguments are handle later. | ||||||||||||||
| const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); | ||||||||||||||
| for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E; | ||||||||||||||
| ++I, ++OutIndex) { | ||||||||||||||
| assert(OutIndex < Outs.size() && "Invalid Out index"); | ||||||||||||||
|
|
@@ -2285,7 +2397,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| if (ShadowReg) | ||||||||||||||
| RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); | ||||||||||||||
| } | ||||||||||||||
| } else if (!IsSibcall && (!isTailCall || isByVal)) { | ||||||||||||||
| } else if (!IsSibcall && (!isTailCall || (isByVal && !IsMustTail))) { | ||||||||||||||
| assert(VA.isMemLoc()); | ||||||||||||||
| if (!StackPtr.getNode()) | ||||||||||||||
| StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), | ||||||||||||||
|
|
@@ -2362,7 +2474,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| // For tail calls lower the arguments to the 'real' stack slots. Sibcalls | ||||||||||||||
| // don't need this because the eligibility check rejects calls that require | ||||||||||||||
| // shuffling arguments passed in memory. | ||||||||||||||
| if (!IsSibcall && isTailCall) { | ||||||||||||||
| if (isTailCall && !IsSibcall) { | ||||||||||||||
| // Force all the incoming stack arguments to be loaded from the stack | ||||||||||||||
| // before any new outgoing arguments or the return address are stored to the | ||||||||||||||
| // stack, because the outgoing stack slots may alias the incoming argument | ||||||||||||||
|
|
@@ -2372,6 +2484,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| // would clobber. | ||||||||||||||
| Chain = DAG.getStackArgumentTokenFactor(Chain); | ||||||||||||||
|
|
||||||||||||||
| if (ByValTempChain) | ||||||||||||||
| Chain = | ||||||||||||||
| DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain, ByValTempChain); | ||||||||||||||
|
|
||||||||||||||
| SmallVector<SDValue, 8> MemOpChains2; | ||||||||||||||
| SDValue FIN; | ||||||||||||||
| int FI = 0; | ||||||||||||||
|
|
@@ -2404,16 +2520,24 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); | ||||||||||||||
|
|
||||||||||||||
| if (Flags.isByVal()) { | ||||||||||||||
| // Copy relative to framepointer. | ||||||||||||||
| SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl); | ||||||||||||||
| if (!StackPtr.getNode()) | ||||||||||||||
| StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), | ||||||||||||||
| getPointerTy(DAG.getDataLayout())); | ||||||||||||||
| Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), | ||||||||||||||
| StackPtr, Source); | ||||||||||||||
|
|
||||||||||||||
| MemOpChains2.push_back( | ||||||||||||||
| CreateCopyOfByValArgument(Source, FIN, Chain, Flags, DAG, dl)); | ||||||||||||||
| SDValue ByValSrc; | ||||||||||||||
| bool NeedsStackCopy; | ||||||||||||||
folkertdev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| if (auto It = ByValTemporaries.find(OutsIndex); | ||||||||||||||
| It != ByValTemporaries.end()) { | ||||||||||||||
| ByValSrc = It->second; | ||||||||||||||
| NeedsStackCopy = true; | ||||||||||||||
| } else { | ||||||||||||||
| ByValSrc = Arg; | ||||||||||||||
| NeedsStackCopy = false; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (NeedsStackCopy) { | ||||||||||||||
| auto PtrVT = getPointerTy(DAG.getDataLayout()); | ||||||||||||||
| SDValue DstAddr = DAG.getFrameIndex(FI, PtrVT); | ||||||||||||||
|
|
||||||||||||||
| MemOpChains2.push_back(CreateCopyOfByValArgument( | ||||||||||||||
| ByValSrc, DstAddr, Chain, Flags, DAG, dl)); | ||||||||||||||
| } | ||||||||||||||
| } else { | ||||||||||||||
| // Store relative to framepointer. | ||||||||||||||
| MemOpChains2.push_back(DAG.getStore( | ||||||||||||||
|
|
@@ -2846,16 +2970,16 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( | |||||||||||||
| bool CCMatch = CallerCC == CalleeCC; | ||||||||||||||
| bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); | ||||||||||||||
| bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); | ||||||||||||||
| bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt || | ||||||||||||||
| CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail; | ||||||||||||||
| bool ShouldGuaranteeTCO = shouldGuaranteeTCO( | ||||||||||||||
| CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt); | ||||||||||||||
|
|
||||||||||||||
| // Win64 functions have extra shadow space for argument homing. Don't do the | ||||||||||||||
| // sibcall if the caller and callee have mismatched expectations for this | ||||||||||||||
| // space. | ||||||||||||||
| if (IsCalleeWin64 != IsCallerWin64) | ||||||||||||||
| return false; | ||||||||||||||
|
|
||||||||||||||
| if (IsGuaranteeTCO) { | ||||||||||||||
| if (ShouldGuaranteeTCO) { | ||||||||||||||
| if (canGuaranteeTCO(CalleeCC) && CCMatch) | ||||||||||||||
| return true; | ||||||||||||||
| return false; | ||||||||||||||
|
|
||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,14 +21,13 @@ define void @zap(i64 %a, i64 %b) nounwind { | |
| ; CHECK-NEXT: movl $2, %ecx | ||
| ; CHECK-NEXT: movl $3, %r8d | ||
| ; CHECK-NEXT: movq %rax, %r9 | ||
| ; CHECK-NEXT: callq foo@PLT | ||
| ; CHECK-NEXT: popq %rbx | ||
| ; CHECK-NEXT: popq %r12 | ||
| ; CHECK-NEXT: popq %r13 | ||
| ; CHECK-NEXT: popq %r14 | ||
| ; CHECK-NEXT: popq %r15 | ||
| ; CHECK-NEXT: popq %rbp | ||
| ; CHECK-NEXT: retq | ||
| ; CHECK-NEXT: jmp foo@PLT # TAILCALL | ||
|
Comment on lines
24
to
30
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is due to using /// Return true if the calling convention is one that we can guarantee TCO for.
static bool canGuaranteeTCO(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
CC == CallingConv::Tail || CC == CallingConv::SwiftTail);
} |
||
| entry: | ||
| %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9) | ||
| %res = extractvalue {i64, i64, i64} %0, 2 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When I read this, I asked myself: should/could we handle this at the IR level? I believe, but am not 100% confident, that byval for all targets models large objects passed in memory. We could establish the invariant that all LLVM IR going into codegen either a) forwards a byval parameter in place or b) reads from a local stack object.
After reading the rest of the PR, I don't think it's worth the extra complexity of adding a new CodeGenPrepare invariant, but I felt like it was worth rasing the option.