-
Notifications
You must be signed in to change notification settings - Fork 15.9k
x86: fix musttail sibcall miscompilation #168956
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
2482d8f
e12da2e
f2a43bb
08a4817
e5dbe7b
e5b01ef
f51e586
89010ff
bb80502
8faac44
b53002b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -2018,6 +2018,49 @@ SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, | |||||||||||||
| return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // Returns the type of copying which is required to set up a byval argument to | ||||||||||||||
| // a tail-called function. This isn't needed for non-tail calls, because they | ||||||||||||||
| // always need the equivalent of CopyOnce, but tail-calls sometimes need two to | ||||||||||||||
| // avoid clobbering another argument (CopyViaTemp), and sometimes can be | ||||||||||||||
| // optimised to zero copies when forwarding an argument from the caller's | ||||||||||||||
| // caller (NoCopy). | ||||||||||||||
| X86TargetLowering::ByValCopyKind X86TargetLowering::ByValNeedsCopyForTailCall( | ||||||||||||||
| SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const { | ||||||||||||||
| MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); | ||||||||||||||
|
|
||||||||||||||
| // Globals are always safe to copy from. | ||||||||||||||
| if (isa<GlobalAddressSDNode>(Src) || isa<ExternalSymbolSDNode>(Src)) | ||||||||||||||
| return CopyOnce; | ||||||||||||||
|
|
||||||||||||||
| // Can only analyse frame index nodes, conservatively assume we need a | ||||||||||||||
| // temporary. | ||||||||||||||
| auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src); | ||||||||||||||
| auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst); | ||||||||||||||
| if (!SrcFrameIdxNode || !DstFrameIdxNode) | ||||||||||||||
| return CopyViaTemp; | ||||||||||||||
folkertdev marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
|
|
||||||||||||||
| int SrcFI = SrcFrameIdxNode->getIndex(); | ||||||||||||||
| int DstFI = DstFrameIdxNode->getIndex(); | ||||||||||||||
| assert(MFI.isFixedObjectIndex(DstFI) && | ||||||||||||||
| "byval passed in non-fixed stack slot"); | ||||||||||||||
|
|
||||||||||||||
| int64_t SrcOffset = MFI.getObjectOffset(SrcFI); | ||||||||||||||
| int64_t DstOffset = MFI.getObjectOffset(DstFI); | ||||||||||||||
|
|
||||||||||||||
| // If the source is in the local frame, then the copy to the argument | ||||||||||||||
| // memory is always valid. | ||||||||||||||
| bool FixedSrc = MFI.isFixedObjectIndex(SrcFI); | ||||||||||||||
| if (!FixedSrc || (FixedSrc && SrcOffset < 0)) | ||||||||||||||
| return CopyOnce; | ||||||||||||||
|
|
||||||||||||||
| // If the value is already in the correct location, then no copying is | ||||||||||||||
| // needed. If not, then we need to copy via a temporary. | ||||||||||||||
| if (SrcOffset == DstOffset) | ||||||||||||||
| return NoCopy; | ||||||||||||||
| else | ||||||||||||||
| return CopyViaTemp; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| SDValue | ||||||||||||||
| X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | ||||||||||||||
| SmallVectorImpl<SDValue> &InVals) const { | ||||||||||||||
|
|
@@ -2098,15 +2141,18 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| isTailCall = false; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (isTailCall && !IsMustTail) { | ||||||||||||||
folkertdev marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
| if (isTailCall) { | ||||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this block can be simplified to avoid the eligibility check for the "guaranteed" TCO case, and I made some comment suggestions that hopefully explain this better.
Suggested change
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've included some of your comment changes, but the code changes caused regressions. Can you maybe formulate what change you want to see here? The variable names are not the most clear here, but I haven't really been able to come up with better ones. I think
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To elaborate, because this logic is tricky, the regressions appear in cases like this one: define dso_local tailcc void @void_test(i32, i32, i32, i32) {
; X64-LABEL: void_test:
; X64: # %bb.0: # %entry
; X64-NEXT: jmp void_test # TAILCALL
;
; X86-LABEL: void_test:
; X86: # %bb.0: # %entry
; X86-NEXT: jmp void_test # TAILCALL
entry:
musttail call tailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3)
ret void
}With the current logic, both
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it, this case is I think more simplifications are possible, but I'll torture the boolean conditionals later. |
||||||||||||||
| // Check if it's really possible to do a tail call. | ||||||||||||||
| isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, | ||||||||||||||
| IsCalleePopSRet); | ||||||||||||||
| IsSibcall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, | ||||||||||||||
| IsCalleePopSRet); | ||||||||||||||
folkertdev marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
|
|
||||||||||||||
| // Sibcalls are automatically detected tailcalls which do not require | ||||||||||||||
| // ABI changes. | ||||||||||||||
| if (!IsGuaranteeTCO && isTailCall) | ||||||||||||||
| IsSibcall = true; | ||||||||||||||
| if (!IsMustTail) { | ||||||||||||||
folkertdev marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
| isTailCall = IsSibcall; | ||||||||||||||
|
|
||||||||||||||
| // Sibcalls are automatically detected tailcalls which do not require | ||||||||||||||
| // ABI changes. | ||||||||||||||
| IsSibcall = IsSibcall && !IsGuaranteeTCO; | ||||||||||||||
|
||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (isTailCall) | ||||||||||||||
| ++NumTailCalls; | ||||||||||||||
|
|
@@ -2128,8 +2174,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv)) | ||||||||||||||
| NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); | ||||||||||||||
|
|
||||||||||||||
| // A sibcall is ABI-compatible and does not need to adjust the stack pointer. | ||||||||||||||
| int FPDiff = 0; | ||||||||||||||
| if (isTailCall && | ||||||||||||||
| if (isTailCall && !IsSibcall && | ||||||||||||||
| shouldGuaranteeTCO(CallConv, | ||||||||||||||
| MF.getTarget().Options.GuaranteedTailCallOpt)) { | ||||||||||||||
| // Lower arguments at fp - stackoffset + fpdiff. | ||||||||||||||
|
|
@@ -2146,6 +2193,74 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| unsigned NumBytesToPush = NumBytes; | ||||||||||||||
| unsigned NumBytesToPop = NumBytes; | ||||||||||||||
|
|
||||||||||||||
| SDValue StackPtr; | ||||||||||||||
| const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); | ||||||||||||||
|
|
||||||||||||||
| // If we are doing a tail-call, any byval arguments will be written to stack | ||||||||||||||
| // space which was used for incoming arguments. If any the values being used | ||||||||||||||
| // are incoming byval arguments to this function, then they might be | ||||||||||||||
| // overwritten by the stores of the outgoing arguments. To avoid this, we | ||||||||||||||
| // need to make a temporary copy of them in local stack space, then copy back | ||||||||||||||
| // to the argument area. | ||||||||||||||
folkertdev marked this conversation as resolved.
Show resolved
Hide resolved
folkertdev marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
| DenseMap<unsigned, SDValue> ByValTemporaries; | ||||||||||||||
folkertdev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| SDValue ByValTempChain; | ||||||||||||||
| if (isTailCall) { | ||||||||||||||
| SmallVector<SDValue, 8> ByValCopyChains; | ||||||||||||||
| for (const CCValAssign &VA : ArgLocs) { | ||||||||||||||
| unsigned ArgIdx = VA.getValNo(); | ||||||||||||||
| SDValue Src = OutVals[ArgIdx]; | ||||||||||||||
| ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags; | ||||||||||||||
|
|
||||||||||||||
| if (!Flags.isByVal()) | ||||||||||||||
| continue; | ||||||||||||||
|
|
||||||||||||||
| auto PtrVT = getPointerTy(DAG.getDataLayout()); | ||||||||||||||
|
|
||||||||||||||
| if (!StackPtr.getNode()) | ||||||||||||||
| StackPtr = | ||||||||||||||
| DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), PtrVT); | ||||||||||||||
|
|
||||||||||||||
| // Destination: where this byval should live in the callee’s frame | ||||||||||||||
| // after the tail call. | ||||||||||||||
| int32_t Offset = VA.getLocMemOffset() + FPDiff; | ||||||||||||||
rnk marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| int Size = VA.getLocVT().getFixedSizeInBits() / 8; | ||||||||||||||
rnk marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); | ||||||||||||||
rnk marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| SDValue Dst = DAG.getFrameIndex(FI, PtrVT); | ||||||||||||||
|
|
||||||||||||||
| ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags); | ||||||||||||||
|
|
||||||||||||||
| if (Copy == NoCopy) { | ||||||||||||||
| // If the argument is already at the correct offset on the stack | ||||||||||||||
| // (because we are forwarding a byval argument from our caller), we | ||||||||||||||
| // don't need any copying. | ||||||||||||||
| continue; | ||||||||||||||
| } else if (Copy == CopyOnce) { | ||||||||||||||
| // If the argument is in our local stack frame, no other argument | ||||||||||||||
| // preparation can clobber it, so we can copy it to the final location | ||||||||||||||
| // later. | ||||||||||||||
| ByValTemporaries[ArgIdx] = Src; | ||||||||||||||
| } else { | ||||||||||||||
| assert(Copy == CopyViaTemp && "unexpected enum value"); | ||||||||||||||
| // If we might be copying this argument from the outgoing argument | ||||||||||||||
| // stack area, we need to copy via a temporary in the local stack | ||||||||||||||
| // frame. | ||||||||||||||
| MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||||||||||||
| int TempFrameIdx = MFI.CreateStackObject(Flags.getByValSize(), | ||||||||||||||
| Flags.getNonZeroByValAlign(), | ||||||||||||||
| /*isSS=*/false); | ||||||||||||||
| SDValue Temp = | ||||||||||||||
| DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout())); | ||||||||||||||
|
|
||||||||||||||
| SDValue CopyChain = | ||||||||||||||
| CreateCopyOfByValArgument(Src, Temp, Chain, Flags, DAG, dl); | ||||||||||||||
| ByValCopyChains.push_back(CopyChain); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| if (!ByValCopyChains.empty()) | ||||||||||||||
| ByValTempChain = | ||||||||||||||
| DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // If we have an inalloca argument, all stack space has already been allocated | ||||||||||||||
| // for us and be right at the top of the stack. We don't support multiple | ||||||||||||||
| // arguments passed in memory when using inalloca. | ||||||||||||||
|
|
@@ -2186,7 +2301,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
|
|
||||||||||||||
| SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; | ||||||||||||||
| SmallVector<SDValue, 8> MemOpChains; | ||||||||||||||
| SDValue StackPtr; | ||||||||||||||
|
|
||||||||||||||
| // The next loop assumes that the locations are in the same order of the | ||||||||||||||
| // input arguments. | ||||||||||||||
|
|
@@ -2195,7 +2309,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
|
|
||||||||||||||
| // Walk the register/memloc assignments, inserting copies/loads. In the case | ||||||||||||||
| // of tail call optimization arguments are handle later. | ||||||||||||||
| const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); | ||||||||||||||
| for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E; | ||||||||||||||
| ++I, ++OutIndex) { | ||||||||||||||
| assert(OutIndex < Outs.size() && "Invalid Out index"); | ||||||||||||||
|
|
@@ -2285,7 +2398,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| if (ShadowReg) | ||||||||||||||
| RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); | ||||||||||||||
| } | ||||||||||||||
| } else if (!IsSibcall && (!isTailCall || isByVal)) { | ||||||||||||||
| } else if (!IsSibcall && (!isTailCall || (isByVal && !IsMustTail))) { | ||||||||||||||
| assert(VA.isMemLoc()); | ||||||||||||||
| if (!StackPtr.getNode()) | ||||||||||||||
| StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), | ||||||||||||||
|
|
@@ -2372,6 +2485,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| // would clobber. | ||||||||||||||
| Chain = DAG.getStackArgumentTokenFactor(Chain); | ||||||||||||||
|
|
||||||||||||||
| if (ByValTempChain) | ||||||||||||||
| Chain = | ||||||||||||||
| DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain, ByValTempChain); | ||||||||||||||
|
|
||||||||||||||
| SmallVector<SDValue, 8> MemOpChains2; | ||||||||||||||
| SDValue FIN; | ||||||||||||||
| int FI = 0; | ||||||||||||||
|
|
@@ -2404,16 +2521,24 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | |||||||||||||
| FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); | ||||||||||||||
|
|
||||||||||||||
| if (Flags.isByVal()) { | ||||||||||||||
| // Copy relative to framepointer. | ||||||||||||||
| SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl); | ||||||||||||||
| if (!StackPtr.getNode()) | ||||||||||||||
| StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), | ||||||||||||||
| getPointerTy(DAG.getDataLayout())); | ||||||||||||||
| Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), | ||||||||||||||
| StackPtr, Source); | ||||||||||||||
|
|
||||||||||||||
| MemOpChains2.push_back( | ||||||||||||||
| CreateCopyOfByValArgument(Source, FIN, Chain, Flags, DAG, dl)); | ||||||||||||||
| SDValue ByValSrc; | ||||||||||||||
| bool NeedsStackCopy; | ||||||||||||||
folkertdev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| if (auto It = ByValTemporaries.find(OutsIndex); | ||||||||||||||
| It != ByValTemporaries.end()) { | ||||||||||||||
| ByValSrc = It->second; | ||||||||||||||
| NeedsStackCopy = true; | ||||||||||||||
| } else { | ||||||||||||||
| ByValSrc = Arg; | ||||||||||||||
| NeedsStackCopy = false; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (NeedsStackCopy) { | ||||||||||||||
| auto PtrVT = getPointerTy(DAG.getDataLayout()); | ||||||||||||||
| SDValue DstAddr = DAG.getFrameIndex(FI, PtrVT); | ||||||||||||||
|
|
||||||||||||||
| MemOpChains2.push_back(CreateCopyOfByValArgument( | ||||||||||||||
| ByValSrc, DstAddr, Chain, Flags, DAG, dl)); | ||||||||||||||
| } | ||||||||||||||
| } else { | ||||||||||||||
| // Store relative to framepointer. | ||||||||||||||
| MemOpChains2.push_back(DAG.getStore( | ||||||||||||||
|
|
||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When I read this, I asked myself: should/could we handle this at the IR level? I believe, but am not 100% confident, that byval for all targets models large objects passed in memory. We could establish the invariant that all LLVM IR going into codegen either a) forwards a byval parameter in place or b) reads from a local stack object.
After reading the rest of the PR, I don't think it's worth the extra complexity of adding a new CodeGenPrepare invariant, but I felt like it was worth rasing the option.