diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 50ce93104ab53..f4959efeab543 100644 --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -335,9 +335,16 @@ class MachineFrameInfo { /// stack objects like arguments so we can't treat them as immutable. bool HasTailCall = false; - /// Not empty, if shrink-wrapping found a better place for the prologue. + /// Not null, if shrink-wrapping found a better place for the prologue. + SmallVector PrologPoints; + /// Not null, if shrink-wrapping found a better place for the epilogue. + SmallVector EpilogPoints; + + /// Not empty, if shrink-wrapping found a better place for saving callee + /// saves. SaveRestorePoints SavePoints; - /// Not empty, if shrink-wrapping found a better place for the epilogue. + /// Not empty, if shrink-wrapping found a better place for restoring callee + /// saves. SaveRestorePoints RestorePoints; /// Size of the UnsafeStack Frame @@ -828,6 +835,39 @@ class MachineFrameInfo { /// \copydoc getCalleeSavedInfo() std::vector &getCalleeSavedInfo() { return CSInfo; } + /// Returns callee saved info vector for provided save point in + /// the current function. + std::vector getSaveCSInfo(MachineBasicBlock *MBB) const { + return SavePoints.lookup(MBB); + } + + /// Returns callee saved info vector for provided restore point + /// in the current function. + const std::vector + getRestoreCSInfo(MachineBasicBlock *MBB) const { + return RestorePoints.lookup(MBB); + } + + MachineBasicBlock *findSpilledIn(const CalleeSavedInfo &Match) const { + for (auto [BB, CSIV] : SavePoints) { + for (auto &CSI : CSIV) { + if (CSI.getReg() == Match.getReg()) + return BB; + } + } + return nullptr; + } + + MachineBasicBlock *findRestoredIn(const CalleeSavedInfo &Match) const { + for (auto [BB, CSIV] : RestorePoints) { + for (auto &CSI : CSIV) { + if (CSI.getReg() == Match.getReg()) + return BB; + } + } + return nullptr; + } + /// Used by prolog/epilog inserter to set the function's callee saved /// information. void setCalleeSavedInfo(std::vector CSI) { @@ -851,6 +891,19 @@ class MachineFrameInfo { RestorePoints = std::move(NewRestorePoints); } + const SmallVector &getPrologPoints() const { + return PrologPoints; + } + void setPrologPoints(SmallVector Points) { + PrologPoints = Points; + } + const SmallVector &getEpilogPoints() const { + return EpilogPoints; + } + void setEpilogPoints(SmallVector Points) { + EpilogPoints = Points; + } + void clearSavePoints() { SavePoints.clear(); } void clearRestorePoints() { RestorePoints.clear(); } diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index 75696faf114cc..a0cd7df6a4bde 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -201,6 +201,10 @@ class LLVM_ABI TargetFrameLowering { return false; } + /// enableCSRSaveRestorePointsSplit - Returns true if the target support + /// multiple save/restore points in shrink wrapping. + virtual bool enableCSRSaveRestorePointsSplit() const { return false; } + /// Returns true if the stack slot holes in the fixed and callee-save stack /// area should be used when allocating other stack locations to reduce stack /// size. diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index 1d54d72336860..a62bd23824eb5 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -152,6 +152,7 @@ convertSRPoints(ModuleSlotTracker &MST, std::vector &YamlSRPoints, const llvm::SaveRestorePoints &SRPoints, const TargetRegisterInfo *TRI); + static void convertStackObjects(yaml::MachineFunction &YMF, const MachineFunction &MF, ModuleSlotTracker &MST, MFPrintState &State); diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp index aed68afb4eb1b..c6658d2e9eba8 100644 --- a/llvm/lib/CodeGen/MachineFrameInfo.cpp +++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -244,6 +244,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ } OS << "\n"; } + OS << "save/restore points:\n"; if (!SavePoints.empty()) { diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 41efe622417c8..83d1cca205650 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -85,8 +85,12 @@ class PEIImpl { unsigned MinCSFrameIndex = std::numeric_limits::max(); unsigned MaxCSFrameIndex = 0; - // Save and Restore blocks of the current function. Typically there is a - // single save block, unless Windows EH funclets are involved. + // Prolog and Epilog blocks of the current function. Typically there is a + // single Prolog block, unless Windows EH funclets are involved. + MBBVector PrologBlocks; + MBBVector EpilogBlocks; + + // Save and Restore blocks of the current function. MBBVector SaveBlocks; MBBVector RestoreBlocks; @@ -104,6 +108,7 @@ class PEIImpl { void calculateCallFrameInfo(MachineFunction &MF); void calculateSaveRestoreBlocks(MachineFunction &MF); + void calculatePrologEpilogBlocks(MachineFunction &MF); void spillCalleeSavedRegs(MachineFunction &MF); void calculateFrameObjectOffsets(MachineFunction &MF); @@ -236,14 +241,17 @@ bool PEIImpl::run(MachineFunction &MF) { // information. Also eliminates call frame pseudo instructions. calculateCallFrameInfo(MF); - // Determine placement of CSR spill/restore code and prolog/epilog code: + // Determine placement of CSR spill/restore code: // place all spills in the entry block, all restores in return blocks. calculateSaveRestoreBlocks(MF); + // Determine placement of prolog/epilog code. + calculatePrologEpilogBlocks(MF); + // Stash away DBG_VALUEs that should not be moved by insertion of prolog code. SavedDbgValuesMap EntryDbgValues; - for (MachineBasicBlock *SaveBlock : SaveBlocks) - stashEntryDbgValues(*SaveBlock, EntryDbgValues); + for (MachineBasicBlock *PrologBlock : PrologBlocks) + stashEntryDbgValues(*PrologBlock, EntryDbgValues); // Handle CSR spilling and restoring, for targets that need it. if (MF.getTarget().usesPhysRegsForValues()) @@ -351,6 +359,8 @@ bool PEIImpl::run(MachineFunction &MF) { delete RS; SaveBlocks.clear(); RestoreBlocks.clear(); + PrologBlocks.clear(); + EpilogBlocks.clear(); MFI.clearSavePoints(); MFI.clearRestorePoints(); return true; @@ -419,6 +429,25 @@ void PEIImpl::calculateCallFrameInfo(MachineFunction &MF) { } } +/// Compute two sets of blocks for placing prolog and epilog code respectively. +void PEIImpl::calculatePrologEpilogBlocks(MachineFunction &MF) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + PrologBlocks = MFI.getPrologPoints(); + EpilogBlocks = MFI.getEpilogPoints(); + + if (PrologBlocks.empty()) { + assert(EpilogBlocks.empty() && + "Both PrologBlocks and EpilogBlocks should be empty"); + PrologBlocks.push_back(&MF.front()); + for (MachineBasicBlock &MBB : MF) { + if (MBB.isEHFuncletEntry()) + PrologBlocks.push_back(&MBB); + if (MBB.isReturnBlock()) + EpilogBlocks.push_back(&MBB); + } + } +} + /// Compute the sets of entry and return blocks for saving and restoring /// callee-saved registers, and placing prolog and epilog code. void PEIImpl::calculateSaveRestoreBlocks(MachineFunction &MF) { @@ -429,19 +458,19 @@ void PEIImpl::calculateSaveRestoreBlocks(MachineFunction &MF) { // Use the points found by shrink-wrapping, if any. if (!MFI.getSavePoints().empty()) { - assert(MFI.getSavePoints().size() == 1 && - "Multiple save points are not yet supported!"); - const auto &SavePoint = *MFI.getSavePoints().begin(); - SaveBlocks.push_back(SavePoint.first); - assert(MFI.getRestorePoints().size() == 1 && - "Multiple restore points are not yet supported!"); - const auto &RestorePoint = *MFI.getRestorePoints().begin(); - MachineBasicBlock *RestoreBlock = RestorePoint.first; - // If RestoreBlock does not have any successor and is not a return block - // then the end point is unreachable and we do not need to insert any - // epilogue. - if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) - RestoreBlocks.push_back(RestoreBlock); + assert(!MFI.getRestorePoints().empty() && + "Both restore and save must be set"); + for (auto &item : MFI.getSavePoints()) + SaveBlocks.push_back(item.first); + + for (auto &item : MFI.getRestorePoints()) { + MachineBasicBlock *RestoreBlock = item.first; + // If RestoreBlock does not have any successor and is not a return block + // then the end point is unreachable and we do not need to insert any + // epilogue. + if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) + RestoreBlocks.push_back(RestoreBlock); + } return; } @@ -550,8 +579,8 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F, /// Helper function to update the liveness information for the callee-saved /// registers. -static void updateLiveness(MachineFunction &MF) { - MachineFrameInfo &MFI = MF.getFrameInfo(); +static void updateLiveness(MachineFunction &MF, MachineBasicBlock *Save, + MachineBasicBlock *Restore, CalleeSavedInfo &Info) { // Visited will contain all the basic blocks that are in the region // where the callee saved registers are alive: // - Anything that is not Save or Restore -> LiveThrough. @@ -563,12 +592,6 @@ static void updateLiveness(MachineFunction &MF) { SmallVector WorkList; MachineBasicBlock *Entry = &MF.front(); - assert(MFI.getSavePoints().size() < 2 && - "Multiple save points not yet supported!"); - MachineBasicBlock *Save = MFI.getSavePoints().empty() - ? nullptr - : (*MFI.getSavePoints().begin()).first; - if (!Save) Save = Entry; @@ -578,11 +601,6 @@ static void updateLiveness(MachineFunction &MF) { } Visited.insert(Save); - assert(MFI.getRestorePoints().size() < 2 && - "Multiple restore points not yet supported!"); - MachineBasicBlock *Restore = MFI.getRestorePoints().empty() - ? nullptr - : (*MFI.getRestorePoints().begin()).first; if (Restore) // By construction Restore cannot be visited, otherwise it // means there exists a path to Restore that does not go @@ -602,30 +620,26 @@ static void updateLiveness(MachineFunction &MF) { WorkList.push_back(SuccBB); } - const std::vector &CSI = MFI.getCalleeSavedInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - for (const CalleeSavedInfo &I : CSI) { - for (MachineBasicBlock *MBB : Visited) { - MCRegister Reg = I.getReg(); - // Add the callee-saved register as live-in. - // It's killed at the spill. - if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg)) - MBB->addLiveIn(Reg); - } - // If callee-saved register is spilled to another register rather than - // spilling to stack, the destination register has to be marked as live for - // each MBB between the prologue and epilogue so that it is not clobbered - // before it is reloaded in the epilogue. The Visited set contains all - // blocks outside of the region delimited by prologue/epilogue. - if (I.isSpilledToReg()) { - for (MachineBasicBlock &MBB : MF) { - if (Visited.count(&MBB)) - continue; - MCRegister DstReg = I.getDstReg(); - if (!MBB.isLiveIn(DstReg)) - MBB.addLiveIn(DstReg); - } + for (MachineBasicBlock *MBB : Visited) { + MCPhysReg Reg = Info.getReg(); + // Add the callee-saved register as live-in. + // It's killed at the spill. + if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg)) + MBB->addLiveIn(Reg); + } + // If callee-saved register is spilled to another register rather than + // spilling to stack, the destination register has to be marked as live for + // each MBB between the save and restore point so that it is not clobbered + // before it is reloaded in the restore point. The Visited set contains all + // blocks outside of the region delimited by save/restore. + if (Info.isSpilledToReg()) { + for (MachineBasicBlock &MBB : MF) { + if (Visited.count(&MBB)) + continue; + MCPhysReg DstReg = Info.getDstReg(); + if (!MBB.isLiveIn(DstReg)) + MBB.addLiveIn(DstReg); } } } @@ -648,7 +662,7 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, /// Insert restore code for the callee-saved registers used in the function. static void insertCSRRestores(MachineBasicBlock &RestoreBlock, - std::vector &CSI) { + std::vector CSI) { MachineFunction &MF = *RestoreBlock.getParent(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); @@ -665,6 +679,64 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, } } +static void fillCSInfoPerBB(MachineFrameInfo &MFI, + DenseMap &RegToInfo, + MBBVector &PrologEpilogBlocks, bool isSave) { + // Global CalleeSavedInfo list aggregating CSIVs for all points + std::vector GCSIV; + const SaveRestorePoints &SRPoints = + isSave ? MFI.getSavePoints() : MFI.getRestorePoints(); + SaveRestorePoints Inner; + for (auto [BB, Regs] : SRPoints) { + // CalleeSavedInfo list for each point + std::vector CSIV; + for (auto &Reg : Regs) { + auto It = RegToInfo.find(Reg.getReg()); + if (It == RegToInfo.end()) + continue; + CSIV.push_back(*RegToInfo.at(Reg.getReg())); + GCSIV.push_back(*RegToInfo.at(Reg.getReg())); + } + // We need to sort CSIV, because Aarch64 expect CSI list to come sorted by + // frame index + sort(CSIV, [](const CalleeSavedInfo &Lhs, const CalleeSavedInfo &Rhs) { + return Lhs.getFrameIdx() < Rhs.getFrameIdx(); + }); + Inner.try_emplace(BB, std::move(CSIV)); + } + + // If in any case not all CSRs listed in MFI.getCalleeSavedInfo are in the + // list of spilled/restored registers (for example AArch64 backend add VG + // registers in the list of CalleeSavedRegs during spill slot assignment), we + // should add them to this list and spill/restore them in Prolog/Epilog. + if (GCSIV.size() < RegToInfo.size()) { + for (auto &RTI : RegToInfo) { + if (count_if(GCSIV, [&RTI](const CalleeSavedInfo &CSI) { + return CSI.getReg() == RTI.first; + })) + continue; + for (MachineBasicBlock *BB : PrologEpilogBlocks) { + if (auto Entry = Inner.find(BB); Entry != Inner.end()) { + auto &CSI = Entry->second; + CSI.push_back(*RTI.second); + sort(CSI, [](const CalleeSavedInfo &Lhs, const CalleeSavedInfo &Rhs) { + return Lhs.getFrameIdx() < Rhs.getFrameIdx(); + }); + continue; + } + // CalleeSavedInfo list for each point + Inner.try_emplace(BB, + std::initializer_list{*RTI.second}); + } + } + } + + if (isSave) + MFI.setSavePoints(Inner); + else + MFI.setRestorePoints(Inner); +} + void PEIImpl::spillCalleeSavedRegs(MachineFunction &MF) { // We can't list this requirement in getRequiredProperties because some // targets (WebAssembly) use virtual registers past this point, and the pass @@ -691,18 +763,18 @@ void PEIImpl::spillCalleeSavedRegs(MachineFunction &MF) { MFI.setCalleeSavedInfoValid(true); std::vector &CSI = MFI.getCalleeSavedInfo(); + DenseMap RegToInfo; + for (auto &CS : CSI) + RegToInfo.insert({CS.getReg(), &CS}); - // Fill SavePoints and RestorePoints with CalleeSavedRegisters if (!MFI.getSavePoints().empty()) { - SaveRestorePoints SaveRestorePts; - for (const auto &SavePoint : MFI.getSavePoints()) - SaveRestorePts.insert({SavePoint.first, CSI}); - MFI.setSavePoints(std::move(SaveRestorePts)); - - SaveRestorePts.clear(); - for (const auto &RestorePoint : MFI.getRestorePoints()) - SaveRestorePts.insert({RestorePoint.first, CSI}); - MFI.setRestorePoints(std::move(SaveRestorePts)); + fillCSInfoPerBB(MFI, RegToInfo, PrologBlocks, /*isSave=*/true); + fillCSInfoPerBB(MFI, RegToInfo, EpilogBlocks, /*isSave=*/false); + } else { + SaveRestorePoints SavePts; + for (MachineBasicBlock *PrologBlock : PrologBlocks) + SavePts.insert({PrologBlock, MFI.getCalleeSavedInfo()}); + MFI.setSavePoints(std::move(SavePts)); } if (!CSI.empty()) { @@ -710,13 +782,38 @@ void PEIImpl::spillCalleeSavedRegs(MachineFunction &MF) { NumLeafFuncWithSpills++; for (MachineBasicBlock *SaveBlock : SaveBlocks) - insertCSRSaves(*SaveBlock, CSI); + insertCSRSaves(*SaveBlock, MFI.getSavePoints().empty() + ? CSI + : MFI.getSaveCSInfo(SaveBlock)); + + MachineBasicBlock *Save = nullptr; + MachineBasicBlock *Restore = nullptr; + for (auto &CS : CSI) { + if (!MFI.getSavePoints().empty()) { + if (auto BB = MFI.findSpilledIn(CS)) + Save = BB; + + if (auto BB = MFI.findRestoredIn(CS)) + Restore = BB; + } + // Update the live-in information of all the blocks up to the save + // point. + updateLiveness(MF, Save, Restore, CS); + } - // Update the live-in information of all the blocks up to the save point. - updateLiveness(MF); + if (MFI.getRestorePoints().empty()) { + SaveRestorePoints RestorePts; + for (MachineBasicBlock *EpilogBlock : EpilogBlocks) + RestorePts.insert({EpilogBlock, MFI.getCalleeSavedInfo()}); + MFI.setRestorePoints(std::move(RestorePts)); + } - for (MachineBasicBlock *RestoreBlock : RestoreBlocks) - insertCSRRestores(*RestoreBlock, CSI); + for (MachineBasicBlock *RestoreBlock : RestoreBlocks) { + insertCSRRestores(*RestoreBlock, + MFI.getRestorePoints().empty() + ? CSI + : MFI.getRestoreCSInfo(RestoreBlock)); + } } } } @@ -1189,26 +1286,26 @@ void PEIImpl::insertPrologEpilogCode(MachineFunction &MF) { const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); // Add prologue to the function... - for (MachineBasicBlock *SaveBlock : SaveBlocks) - TFI.emitPrologue(MF, *SaveBlock); + for (MachineBasicBlock *PrologBlock : PrologBlocks) + TFI.emitPrologue(MF, *PrologBlock); // Add epilogue to restore the callee-save registers in each exiting block. - for (MachineBasicBlock *RestoreBlock : RestoreBlocks) - TFI.emitEpilogue(MF, *RestoreBlock); + for (MachineBasicBlock *EpilogBlock : EpilogBlocks) + TFI.emitEpilogue(MF, *EpilogBlock); // Zero call used registers before restoring callee-saved registers. insertZeroCallUsedRegs(MF); - for (MachineBasicBlock *SaveBlock : SaveBlocks) - TFI.inlineStackProbe(MF, *SaveBlock); + for (MachineBasicBlock *PrologBlock : PrologBlocks) + TFI.inlineStackProbe(MF, *PrologBlock); // Emit additional code that is required to support segmented stacks, if // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. if (MF.shouldSplitStack()) { - for (MachineBasicBlock *SaveBlock : SaveBlocks) - TFI.adjustForSegmentedStacks(MF, *SaveBlock); + for (MachineBasicBlock *PrologBlock : PrologBlocks) + TFI.adjustForSegmentedStacks(MF, *PrologBlock); } // Emit additional code that is required to explicitly handle the stack in @@ -1217,8 +1314,8 @@ void PEIImpl::insertPrologEpilogCode(MachineFunction &MF) { // different conditional check and another BIF for allocating more stack // space. if (MF.getFunction().getCallingConv() == CallingConv::HiPE) - for (MachineBasicBlock *SaveBlock : SaveBlocks) - TFI.adjustForHiPEPrologue(MF, *SaveBlock); + for (MachineBasicBlock *PrologBlock : PrologBlocks) + TFI.adjustForHiPEPrologue(MF, *PrologBlock); } /// insertZeroCallUsedRegs - Zero out call used registers. diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index 83581052560cb..33081a4016cc1 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -94,13 +94,20 @@ STATISTIC(NumFunc, "Number of functions"); STATISTIC(NumCandidates, "Number of shrink-wrapping candidates"); STATISTIC(NumCandidatesDropped, "Number of shrink-wrapping candidates dropped because of frequency"); +STATISTIC( + NumFuncWithSplitting, + "Number of functions, for which we managed to split Save/Restore points"); static cl::opt EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, cl::desc("enable the shrink-wrapping pass")); static cl::opt EnablePostShrinkWrapOpt( - "enable-shrink-wrap-region-split", cl::init(true), cl::Hidden, - cl::desc("enable splitting of the restore block if possible")); + "enable-post-shrink-wrap-restore-split", cl::init(true), cl::Hidden, + cl::desc( + "enable after-shrink-wrap splitting of the restore block if possible")); +static cl::opt EnableShrinkWrapSplitOpt( + "enable-shrink-wrap-into-multiple-points", cl::init(false), cl::Hidden, + cl::desc("enable splitting of the save and restore blocks if possible")); namespace { @@ -117,15 +124,87 @@ class ShrinkWrapImpl { MachineDominatorTree *MDT = nullptr; MachinePostDominatorTree *MPDT = nullptr; - /// Current safe point found for the prologue. - /// The prologue will be inserted before the first instruction - /// in this basic block. - MachineBasicBlock *Save = nullptr; + /// Hash table, mapping register with its corresponding spill and restore + /// basic block. + DenseMap> + SavedRegs; - /// Current safe point found for the epilogue. - /// The epilogue will be inserted before the first terminator instruction - /// in this basic block. - MachineBasicBlock *Restore = nullptr; + class SaveRestorePoints { + llvm::SaveRestorePoints SRPoints; + + public: + llvm::SaveRestorePoints &get() { return SRPoints; } + + void set(llvm::SaveRestorePoints &Rhs) { SRPoints = std::move(Rhs); } + + void clear() { SRPoints.clear(); } + + bool areMultiple() const { return SRPoints.size() > 1; } + + MachineBasicBlock *getFirst() { + return SRPoints.empty() ? nullptr : SRPoints.begin()->first; + } + + void insert(const std::pair> &Point) { + SRPoints.insert(Point); + } + + void insert( + std::pair> &&Point) { + SRPoints.insert(Point); + } + + std::set + insertReg(Register Reg, MachineBasicBlock *MBB, + std::optional> SaveRestoreBlockSet = + std::nullopt) { + assert(MBB && "MBB is nullptr"); + if (SRPoints.contains(MBB)) { + SRPoints[MBB].push_back(CalleeSavedInfo(Reg)); + if (SaveRestoreBlockSet.has_value()) + return SaveRestoreBlockSet.value(); + return std::set(); + } + std::vector CSInfos{}; + CSInfos.push_back(CalleeSavedInfo(Reg)); + SRPoints.insert(std::make_pair(MBB, CSInfos)); + if (SaveRestoreBlockSet.has_value()) { + SaveRestoreBlockSet->insert(MBB); + return SaveRestoreBlockSet.value(); + } + return std::set(); + } + + void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { + for (auto [BB, CSIV] : SRPoints) { + OS << printMBBReference(*BB) << ": "; + for (auto &CSI : CSIV) { + OS << printReg(CSI.getReg(), TRI) << " "; + } + OS << "\n"; + } + } + + void dump(const TargetRegisterInfo *TRI) const { print(dbgs(), TRI); } + }; + + /// Class, wrapping hash table contained safe points, found for register spill + /// mapped to the list of corresponding registers. Register spill will be + /// inserted before the first instruction in this basic block. + SaveRestorePoints SavePoints; + + /// Class, wrapping hash table contained safe points, found for register + /// restore mapped to the list of corresponding registers. Register restore + /// will be inserted before the first terminator instruction in this basic + /// block. + SaveRestorePoints RestorePoints; + + std::set SaveBlocks; + std::set RestoreBlocks; + + SmallVector PrologPoints; + SmallVector EpilogPoints; /// Hold the information of the basic block frequency. /// Use to check the profitability of the new points. @@ -168,11 +247,17 @@ class ShrinkWrapImpl { /// therefore this approach is fair. BitVector StackAddressUsedBlockInfo; - /// Check if \p MI uses or defines a callee-saved register or - /// a frame index. If this is the case, this means \p MI must happen + /// Check if \p MI uses or defines a frame index. + /// If this is the case, this means \p MI must happen /// after Save and before Restore. - bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS, - bool StackAddressUsed) const; + bool useOrDefFI(const MachineInstr &MI, RegScavenger *RS, + bool StackAddressUsed) const; + + /// Check if \p MI uses or defines a callee-saved register. + /// If this is the case, this means \p MI must happen + /// after Save and before Restore. + bool useOrDefCSR(const MachineInstr &MI, RegScavenger *RS, + std::set *RegsToSave) const; const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const { if (CurrentCSRs.empty()) { @@ -189,12 +274,32 @@ class ShrinkWrapImpl { return CurrentCSRs; } + std::vector getTargetCSIList(MachineFunction &MF) { + const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); + std::vector TargetCSRs; + for (unsigned i = 0; CSRegs[i]; ++i) + TargetCSRs.push_back(CalleeSavedInfo(CSRegs[i])); + return TargetCSRs; + } + + void setupSaveRestorePoints(RegScavenger *RS); + + void placeAdditionalCSRs(); + + void performSimpleShrinkWrap(RegScavenger *RS, MachineBasicBlock &SavePoint); + + bool canSplitSaveRestorePoints( + MachineFrameInfo &MFI, + const ReversePostOrderTraversal &RPOT, + RegScavenger *RS); + /// Update the Save and Restore points such that \p MBB is in /// the region that is dominated by Save and post-dominated by Restore /// and Save and Restore still match the safe point definition. /// Such point may not exist and Save and/or Restore may be null after /// this call. - void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS); + void updateSaveRestorePoints(MachineBasicBlock &MBB, Register Reg, + RegScavenger *RS); // Try to find safe point based on dominance and block frequency without // any change in IR. @@ -205,7 +310,10 @@ class ShrinkWrapImpl { /// This function tries to split the restore point if doing so can shrink the /// save point further. \return True if restore point is split. bool postShrinkWrapping(bool HasCandidate, MachineFunction &MF, - RegScavenger *RS); + RegScavenger *RS, MachineBasicBlock *Save, + MachineBasicBlock *Restore); + + void setupPrologEpilog(); /// This function analyzes if the restore point can split to create a new /// restore point. This function collects @@ -225,8 +333,13 @@ class ShrinkWrapImpl { /// Initialize the pass for \p MF. void init(MachineFunction &MF) { RCI.runOnMachineFunction(MF); - Save = nullptr; - Restore = nullptr; + SavedRegs.clear(); + SavePoints.clear(); + RestorePoints.clear(); + PrologPoints.clear(); + EpilogPoints.clear(); + SaveBlocks.clear(); + RestoreBlocks.clear(); EntryFreq = MBFI->getEntryFreq(); const TargetSubtargetInfo &Subtarget = MF.getSubtarget(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); @@ -242,7 +355,22 @@ class ShrinkWrapImpl { /// Check whether or not Save and Restore points are still interesting for /// shrink-wrapping. - bool ArePointsInteresting() const { return Save != Entry && Save && Restore; } + bool AreCandidatesFound(bool splitEnabled) const { + if (SavedRegs.empty()) + return false; + + auto Cond = [splitEnabled, this](auto &RegEntry) { + auto [Save, Restore] = RegEntry.second; + return (Save && Restore && Save != Entry) == splitEnabled; + }; + + auto It = std::find_if(begin(SavedRegs), end(SavedRegs), Cond); + + if (It == SavedRegs.end()) + return !splitEnabled; + + return splitEnabled; + } public: ShrinkWrapImpl(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT, @@ -301,8 +429,8 @@ INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_END(ShrinkWrapLegacy, DEBUG_TYPE, "Shrink Wrap Pass", false, false) -bool ShrinkWrapImpl::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS, - bool StackAddressUsed) const { +bool ShrinkWrapImpl::useOrDefFI(const MachineInstr &MI, RegScavenger *RS, + bool StackAddressUsed) const { /// Check if \p Op is known to access an address not on the function's stack . /// At the moment, accesses where the underlying object is a global, function /// argument, or jump table are considered non-stack accesses. Note that the @@ -334,10 +462,28 @@ bool ShrinkWrapImpl::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS, LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); return true; } + + if (MI.isDebugValue()) + return false; + + const auto &Ops = MI.operands(); + + auto FIOpIt = std::find_if(Ops.begin(), Ops.end(), + [](const auto &MO) { return MO.isFI(); }); + if (FIOpIt == Ops.end()) + return false; + + LLVM_DEBUG(dbgs() << "Use or define FI( " << FIOpIt->isFI() << "): " << MI + << '\n'); + + return true; +} + +bool ShrinkWrapImpl::useOrDefCSR(const MachineInstr &MI, RegScavenger *RS, + std::set *RegsToSave) const { const MachineFunction *MF = MI.getParent()->getParent(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); for (const MachineOperand &MO : MI.operands()) { - bool UseOrDefCSR = false; if (MO.isReg()) { // Ignore instructions like DBG_VALUE which don't read/def the register. if (!MO.isDef() && !MO.readsReg()) @@ -358,26 +504,33 @@ bool ShrinkWrapImpl::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS, // PPC's Frame pointer (FP) is also not described as a callee-saved // register. Until the FP is assigned a Physical Register PPC's FP needs // to be checked separately. - UseOrDefCSR = (!MI.isCall() && PhysReg == SP) || - RCI.getLastCalleeSavedAlias(PhysReg) || - (!MI.isReturn() && - TRI->isNonallocatableRegisterCalleeSave(PhysReg)) || - TRI->isVirtualFrameRegister(PhysReg); + if ((!MI.isCall() && PhysReg == SP) || + RCI.getLastCalleeSavedAlias(PhysReg) || + (!MI.isReturn() && + TRI->isNonallocatableRegisterCalleeSave(PhysReg)) || + TRI->isVirtualFrameRegister(PhysReg)) { + LLVM_DEBUG(dbgs() << MI << " uses or defines CSR: " + << RCI.getLastCalleeSavedAlias(PhysReg) << "\n"); + if (!RegsToSave) + return true; + + RegsToSave->insert(PhysReg); + } } else if (MO.isRegMask()) { // Check if this regmask clobbers any of the CSRs. for (unsigned Reg : getCurrentCSRs(RS)) { if (MO.clobbersPhysReg(Reg)) { - UseOrDefCSR = true; - break; + if (!RegsToSave) + return true; + RegsToSave->insert(Reg); } } } - // Skip FrameIndex operands in DBG_VALUE instructions. - if (UseOrDefCSR || (MO.isFI() && !MI.isDebugValue())) { - LLVM_DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI(" - << MO.isFI() << "): " << MI << '\n'); - return true; - } + } + + // Skip FrameIndex operands in DBG_VALUE instructions. + if (RegsToSave && !RegsToSave->empty()) { + return true; } return false; } @@ -563,7 +716,8 @@ bool ShrinkWrapImpl::checkIfRestoreSplittable( SmallVectorImpl &CleanPreds, const TargetInstrInfo *TII, RegScavenger *RS) { for (const MachineInstr &MI : *CurRestore) - if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true)) + if (useOrDefFI(MI, RS, /*StackAddressUsed=*/true) || + useOrDefCSR(MI, RS, nullptr)) return false; for (MachineBasicBlock *PredBB : CurRestore->predecessors()) { @@ -580,7 +734,9 @@ bool ShrinkWrapImpl::checkIfRestoreSplittable( } bool ShrinkWrapImpl::postShrinkWrapping(bool HasCandidate, MachineFunction &MF, - RegScavenger *RS) { + RegScavenger *RS, + MachineBasicBlock *Save, + MachineBasicBlock *Restore) { if (!EnablePostShrinkWrapOpt) return false; @@ -623,7 +779,8 @@ bool ShrinkWrapImpl::postShrinkWrapping(bool HasCandidate, MachineFunction &MF, continue; } for (const MachineInstr &MI : MBB) - if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true)) { + if (useOrDefFI(MI, RS, /*StackAddressUsed=*/true) || + useOrDefCSR(MI, RS, nullptr)) { DirtyBBs.insert(&MBB); break; } @@ -683,34 +840,53 @@ bool ShrinkWrapImpl::postShrinkWrapping(bool HasCandidate, MachineFunction &MF, assert((EntryFreq >= MBFI->getBlockFreq(Save) && EntryFreq >= MBFI->getBlockFreq(Restore)) && "Incorrect save or restore point based on block frequency"); + + SavePoints.clear(); + RestorePoints.clear(); + + std::vector CSIV = getTargetCSIList(MF); + SavePoints.insert(std::make_pair(Save, CSIV)); + RestorePoints.insert(std::make_pair(Restore, CSIV)); + PrologPoints.clear(); + EpilogPoints.clear(); + PrologPoints.push_back(Save); + EpilogPoints.push_back(Restore); return true; } void ShrinkWrapImpl::updateSaveRestorePoints(MachineBasicBlock &MBB, - RegScavenger *RS) { + Register Reg, RegScavenger *RS) { + MachineBasicBlock *Save = nullptr; + MachineBasicBlock *Restore = nullptr; + // Get rid of the easy cases first. - if (!Save) - Save = &MBB; - else + if (SavedRegs.contains(Reg) && (Save = SavedRegs.at(Reg).first)) Save = MDT->findNearestCommonDominator(Save, &MBB); - assert(Save); + else { + auto Pos = + SavedRegs.insert(std::make_pair(Reg, std::make_pair(&MBB, nullptr))); + Save = Pos.first->second.first; + } + + assert(SavedRegs.contains(Reg) && Save); + Restore = SavedRegs.at(Reg).second; if (!Restore) Restore = &MBB; - else if (MPDT->getNode(&MBB)) // If the block is not in the post dom tree, it - // means the block never returns. If that's the - // case, we don't want to call + else if (MPDT->getNode(&MBB)) // If the block is not in the post dom tree, + // it means the block never returns. If + // that's the case, we don't want to call // `findNearestCommonDominator`, which will - // return `Restore`. + // return `Restore` and RestoreBlock for + // this register will be null. Restore = MPDT->findNearestCommonDominator(Restore, &MBB); - else - Restore = nullptr; // Abort, we can't find a restore point in this case. // Make sure we would be able to insert the restore code before the // terminator. if (Restore == &MBB) { for (const MachineInstr &Terminator : MBB.terminators()) { - if (!useOrDefCSROrFI(Terminator, RS, /*StackAddressUsed=*/true)) + if (!useOrDefFI(Terminator, RS, /*StackAddressUsed=*/true) && + !useOrDefCSR(Terminator, RS, nullptr)) continue; // One of the terminator needs to happen before the restore point. if (MBB.succ_empty()) { @@ -725,8 +901,10 @@ void ShrinkWrapImpl::updateSaveRestorePoints(MachineBasicBlock &MBB, } if (!Restore) { - LLVM_DEBUG( - dbgs() << "Restore point needs to be spanned on several blocks\n"); + SavedRegs[Reg].first = Save; + SavedRegs[Reg].second = nullptr; + LLVM_DEBUG(dbgs() << "Restore point needs to be spanned on several blocks " + << Reg << "\n"); return; } @@ -802,6 +980,9 @@ void ShrinkWrapImpl::updateSaveRestorePoints(MachineBasicBlock &MBB, } } } + + SavedRegs[Reg].first = Save; + SavedRegs[Reg].second = Restore; } static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE, @@ -817,9 +998,78 @@ static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE, return false; } +void ShrinkWrapImpl::setupSaveRestorePoints(RegScavenger *RS) { + for (unsigned Reg : getCurrentCSRs(RS)) { + auto [Save, Restore] = SavedRegs[Reg]; + if (SavedRegs.contains(Reg) && Save && Restore) + continue; + + SaveBlocks = SavePoints.insertReg(Reg, &MachineFunc->front(), SaveBlocks); + for (MachineBasicBlock &MBB : *MachineFunc) { + if (MBB.isEHFuncletEntry()) + SaveBlocks = SavePoints.insertReg(Reg, &MBB, SaveBlocks); + if (MBB.isReturnBlock()) + RestoreBlocks = RestorePoints.insertReg(Reg, &MBB, RestoreBlocks); + } + } + + for (auto [Reg, SaveRestoreBlocks] : SavedRegs) { + auto [Save, Restore] = SaveRestoreBlocks; + if (Save && Restore) { + SaveBlocks = SavePoints.insertReg(Reg, Save, SaveBlocks); + // if (!Restore->succ_empty() || Restore->isReturnBlock()) + RestoreBlocks = RestorePoints.insertReg(Reg, Restore, RestoreBlocks); + // else { + // dbgs() << "Here\n"; + // RestorePoints.insertReg(Reg, Restore); + //} + } + } +} + +bool ShrinkWrapImpl::canSplitSaveRestorePoints( + MachineFrameInfo &MFI, + const ReversePostOrderTraversal &RPOT, + RegScavenger *RS) { + if (MFI.hasVarSizedObjects()) { + LLVM_DEBUG(dbgs() << "Can't split save/restore points, because frame " + "contains var sized objects\n"); + return false; + } + + for (MachineBasicBlock *MBB : RPOT) { + if (MBB->isEHPad() || MBB->isInlineAsmBrIndirectTarget()) + return false; + } + + return true; +} + +void ShrinkWrapImpl::performSimpleShrinkWrap(RegScavenger *RS, + MachineBasicBlock &SavePoint) { + auto CSIV = getTargetCSIList(*MachineFunc); + if (!CSIV.empty()) { + for (CalleeSavedInfo CSI : CSIV) { + auto Reg = CSI.getReg(); + if (SavedRegs.contains(Reg) && + (!SavedRegs[Reg].first || !SavedRegs[Reg].second)) + continue; + updateSaveRestorePoints(SavePoint, Reg, RS); + } + } else + updateSaveRestorePoints(SavePoint, MCRegister::NoRegister, RS); +} + bool ShrinkWrapImpl::performShrinkWrapping( const ReversePostOrderTraversal &RPOT, RegScavenger *RS) { + const TargetFrameLowering *TFI = + MachineFunc->getSubtarget().getFrameLowering(); + + MachineFrameInfo &MFI = MachineFunc->getFrameInfo(); + bool canSplit = canSplitSaveRestorePoints(MFI, RPOT, RS); + StackAddressUsedBlockInfo.set(); + for (MachineBasicBlock *MBB : RPOT) { LLVM_DEBUG(dbgs() << "Look into: " << printMBBReference(*MBB) << '\n'); @@ -834,8 +1084,9 @@ bool ShrinkWrapImpl::performShrinkWrapping( // are at least at the boundary of the save and restore points. The // problem is that a basic block can jump out from the middle in these // cases, which we do not handle. - updateSaveRestorePoints(*MBB, RS); - if (!ArePointsInteresting()) { + performSimpleShrinkWrap(RS, *MBB); + + if (!AreCandidatesFound(false /* splitEnabled */)) { LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n"); return false; } @@ -855,30 +1106,46 @@ bool ShrinkWrapImpl::performShrinkWrapping( } } + std::set RegsToSave; for (const MachineInstr &MI : *MBB) { - if (useOrDefCSROrFI(MI, RS, StackAddressUsed)) { - // Save (resp. restore) point must dominate (resp. post dominate) - // MI. Look for the proper basic block for those. - updateSaveRestorePoints(*MBB, RS); - // If we are at a point where we cannot improve the placement of - // save/restore instructions, just give up. - if (!ArePointsInteresting()) { - LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n"); - return false; + RegsToSave.clear(); + bool UseOrDefCSR = useOrDefCSR(MI, RS, &RegsToSave); + bool UseOrDefFI = useOrDefFI(MI, RS, StackAddressUsed); + if (UseOrDefCSR || UseOrDefFI) { + if (!EnableShrinkWrapSplitOpt || + !TFI->enableCSRSaveRestorePointsSplit() || !canSplit) { + performSimpleShrinkWrap(RS, *MBB); + if (!AreCandidatesFound(false /* splitEnabled */)) { + LLVM_DEBUG( + dbgs() + << "No candidates in simple case prevents shrink-wrapping\n"); + return false; + } + } else { + if (UseOrDefCSR) { + for (auto Reg : RegsToSave) { + // Save (resp. restore) point must dominate (resp. post dominate) + // MI. Look for the proper basic block for those. + updateSaveRestorePoints(*MBB, Reg, RS); + } + } + if (UseOrDefFI) { + performSimpleShrinkWrap(RS, *MBB); + if (!AreCandidatesFound(true /* splitEnabled */)) { + LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found!\n"); + return false; + } + } } - // No need to look for other instructions, this basic block - // will already be part of the handled region. StackAddressUsed = true; - break; } } StackAddressUsedBlockInfo[MBB->getNumber()] = StackAddressUsed; } - if (!ArePointsInteresting()) { + if (!AreCandidatesFound(canSplit /* splitEnabled */)) { // If the points are not interesting at this point, then they must be null // because it means we did not encounter any frame/CSR related code. // Otherwise, we would have returned from the previous loop. - assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!"); LLVM_DEBUG(dbgs() << "Nothing to shrink-wrap\n"); return false; } @@ -886,46 +1153,74 @@ bool ShrinkWrapImpl::performShrinkWrapping( LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq.getFrequency() << '\n'); - const TargetFrameLowering *TFI = - MachineFunc->getSubtarget().getFrameLowering(); - do { - LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " - << printMBBReference(*Save) << ' ' - << printBlockFreq(*MBFI, *Save) - << "\nRestore: " << printMBBReference(*Restore) << ' ' - << printBlockFreq(*MBFI, *Restore) << '\n'); - - bool IsSaveCheap, TargetCanUseSaveAsPrologue = false; - if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save)) && - EntryFreq >= MBFI->getBlockFreq(Restore)) && - ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) && - TFI->canUseAsEpilogue(*Restore))) - break; - LLVM_DEBUG( - dbgs() << "New points are too expensive or invalid for the target\n"); - MachineBasicBlock *NewBB; - if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) { - Save = FindIDom<>(*Save, Save->predecessors(), *MDT); - if (!Save) - break; - NewBB = Save; - } else { - // Restore is expensive. - Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); - if (!Restore) + for (auto [Reg, SaveRestoreBlocks] : SavedRegs) { + auto [Save, Restore] = SaveRestoreBlocks; + if (!Save || !Restore) + continue; + + do { + LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " + << printMBBReference(*Save) << ' ' + << printBlockFreq(*MBFI, *Save) + << "\nRestore: " << printMBBReference(*Restore) << ' ' + << printBlockFreq(*MBFI, *Restore) << '\n'); + + bool IsSaveCheap, TargetCanUseSaveAsPrologue = false; + if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save)) && + EntryFreq >= MBFI->getBlockFreq(Restore)) && + ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) && + TFI->canUseAsEpilogue(*Restore))) break; - NewBB = Restore; - } - updateSaveRestorePoints(*NewBB, RS); - } while (Save && Restore); + LLVM_DEBUG( + dbgs() << "New points are too expensive or invalid for the target\n"); + MachineBasicBlock *NewBB; + if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) { + Save = FindIDom<>(*Save, Save->predecessors(), *MDT); + if (!Save) + break; + NewBB = Save; + } else { + // Restore is expensive. + Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); + if (!Restore) + break; + NewBB = Restore; + } + updateSaveRestorePoints(*NewBB, Reg, RS); + } while (Save && Restore); + } - if (!ArePointsInteresting()) { + if (!AreCandidatesFound(true /* splitEnabled */)) { ++NumCandidatesDropped; return false; } return true; } +void ShrinkWrapImpl::setupPrologEpilog() { + MachineBasicBlock *Prolog = + SaveBlocks.empty() + ? nullptr + : MDT->findNearestCommonDominator(iterator_range(SaveBlocks)); + MachineBasicBlock *Epilog = + RestoreBlocks.empty() + ? nullptr + : MPDT->findNearestCommonDominator(iterator_range(RestoreBlocks)); + if (Prolog && Epilog) { + PrologPoints.push_back(Prolog); + if (Epilog->isReturnBlock() || !Epilog->succ_empty()) + EpilogPoints.push_back(Epilog); + } else { + PrologPoints.push_back(&MachineFunc->front()); + for (MachineBasicBlock &MBB : *MachineFunc) { + if (MBB.isEHFuncletEntry()) + PrologPoints.push_back(&MBB); + if (MBB.isReturnBlock()) + EpilogPoints.push_back(&MBB); + } + } +} + bool ShrinkWrapImpl::run(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); @@ -954,27 +1249,47 @@ bool ShrinkWrapImpl::run(MachineFunction &MF) { // basic block and change the state only for those basic blocks for which we // were able to prove the opposite. StackAddressUsedBlockInfo.resize(MF.getNumBlockIDs(), true); - bool HasCandidate = performShrinkWrapping(RPOT, RS.get()); + bool HasCandidates = performShrinkWrapping(RPOT, RS.get()); StackAddressUsedBlockInfo.clear(); - Changed = postShrinkWrapping(HasCandidate, MF, RS.get()); - if (!HasCandidate && !Changed) - return false; - if (!ArePointsInteresting()) - return Changed; - LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " - << printMBBReference(*Save) << ' ' - << "\nRestore: " << printMBBReference(*Restore) << '\n'); + if (HasCandidates) { + /* Fill SavePoints and RestorePoints with CSRs, for which both Save and + * Restore are found */ + setupSaveRestorePoints(RS.get()); - MachineFrameInfo &MFI = MF.getFrameInfo(); + setupPrologEpilog(); + } + + if (!HasCandidates || + (!SavePoints.areMultiple() && !RestorePoints.areMultiple())) { + Changed = + postShrinkWrapping(HasCandidates, MF, RS.get(), SavePoints.getFirst(), + RestorePoints.getFirst()); + if (!HasCandidates && !Changed) + return false; + + if ((!SavePoints.getFirst()) || (!RestorePoints.getFirst()) || + (SavePoints.getFirst() == Entry)) + return Changed; + } + + if (SavePoints.areMultiple() || RestorePoints.areMultiple()) { + ++NumFuncWithSplitting; + } + + LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\n"); - // List of CalleeSavedInfo for registers will be added during prologepilog - // pass - SaveRestorePoints SavePoints({{Save, {}}}); - SaveRestorePoints RestorePoints({{Restore, {}}}); + LLVM_DEBUG(dbgs() << "SavePoints:\n"); + LLVM_DEBUG(SavePoints.dump(TRI)); - MFI.setSavePoints(SavePoints); - MFI.setRestorePoints(RestorePoints); + LLVM_DEBUG(dbgs() << "RestorePoints:\n"); + LLVM_DEBUG(RestorePoints.dump(TRI)); + + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setPrologPoints(PrologPoints); + MFI.setEpilogPoints(EpilogPoints); + MFI.setSavePoints(SavePoints.get()); + MFI.setRestorePoints(RestorePoints.get()); ++NumCandidates; return Changed; } diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index cbd08f0fb5dff..0b9b9910e9dc7 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -209,20 +209,21 @@ void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { // So set the save points for those. // Use the points found by shrink-wrapping, if any. + if (!MFI.getSavePoints().empty()) { - assert(MFI.getSavePoints().size() == 1 && - "Multiple save points not yet supported!"); - const auto &SavePoint = *MFI.getSavePoints().begin(); - SaveBlocks.push_back(SavePoint.first); - assert(MFI.getRestorePoints().size() == 1 && - "Multiple restore points not yet supported!"); - const auto &RestorePoint = *MFI.getRestorePoints().begin(); - MachineBasicBlock *RestoreBlock = RestorePoint.first; - // If RestoreBlock does not have any successor and is not a return block - // then the end point is unreachable and we do not need to insert any - // epilogue. - if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) - RestoreBlocks.push_back(RestoreBlock); + assert(!MFI.getRestorePoints().empty() && + "Both restores and saves must be set"); + for (auto &item : MFI.getSavePoints()) + SaveBlocks.push_back(item.first); + + for (auto &item : MFI.getRestorePoints()) { + MachineBasicBlock *RestoreBlock = item.first; + // If RestoreBlock does not have any successor and is not a return block + // then the end point is unreachable and we do not need to insert any + // epilogue. + if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) + RestoreBlocks.push_back(RestoreBlock); + } return; } diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index aae3e49f6c70b..0fa9a1037744b 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2079,8 +2079,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, // won't be generated by emitEpilogue(), because shrink-wrap has chosen new // RestoreBlock. So we handle this case here. if (!MFI.getSavePoints().empty() && MFI.hasTailCall()) { - assert(MFI.getRestorePoints().size() < 2 && - "MFI can't contain multiple restore points!"); for (MachineBasicBlock &MBB : MF) { if (MBB.isReturnBlock() && (!MFI.getRestorePoints().contains(&MBB))) createTailCallBranchInstr(MBB); diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 75e7cf347e461..58307fe55163e 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -547,9 +547,8 @@ uint64_t RISCVFrameLowering::getStackSizeWithRVVPadding( return alignTo(MFI.getStackSize() + RVFI->getRVVPadding(), getStackAlign()); } -static SmallVector -getUnmanagedCSI(const MachineFunction &MF, - const std::vector &CSI) { +SmallVector RISCVFrameLowering::getUnmanagedCSI( + const MachineFunction &MF, const std::vector &CSI) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); SmallVector NonLibcallCSI; @@ -567,7 +566,6 @@ getRVVCalleeSavedInfo(const MachineFunction &MF, const std::vector &CSI) { const MachineFrameInfo &MFI = MF.getFrameInfo(); SmallVector RVVCSI; - for (auto &CS : CSI) { int FI = CS.getFrameIdx(); if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector) @@ -948,15 +946,22 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // Determine the correct frame layout determineFrameLayout(MF); - const auto &CSI = MFI.getCalleeSavedInfo(); + const auto &CSI = MFI.getSaveCSInfo(&MBB); // Skip to before the spills of scalar callee-saved registers // FIXME: assumes exactly one instruction is used to restore each // callee-saved register. - MBBI = std::prev(MBBI, getRVVCalleeSavedInfo(MF, CSI).size() + - getUnmanagedCSI(MF, CSI).size()); CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); bool NeedsDwarfCFI = needsDwarfCFI(MF); + // For scalar register spills we skip 2 instrs at once, because right after + // spills there are cfi instructions. At the moment of prolog emission they + // are already inserted for scalar instructions, but not for vector + // instructions. + int ScalarDistance = getUnmanagedCSI(MF, CSI).size(); + if (NeedsDwarfCFI) + ScalarDistance *= 2; + int VectorDistance = getRVVCalleeSavedInfo(MF, CSI).size(); + MBBI = std::prev(MBBI, VectorDistance + ScalarDistance); // If libcalls are used to spill and restore callee-saved registers, the frame // has two sections; the opaque section managed by the libcalls, and the @@ -1045,6 +1050,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, StackSize -= StackAdj; if (NeedsDwarfCFI) { + CFIBuilder.setInsertPoint(MBBI); CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize); for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) CFIBuilder.buildOffset(CS.getReg(), @@ -1073,15 +1079,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // to the stack, not before. // FIXME: assumes exactly one instruction is used to save each callee-saved // register. - std::advance(MBBI, getUnmanagedCSI(MF, CSI).size()); - CFIBuilder.setInsertPoint(MBBI); - - // Iterate over list of callee-saved registers and emit .cfi_offset - // directives. - if (NeedsDwarfCFI) - for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI)) - CFIBuilder.buildOffset(CS.getReg(), - MFI.getObjectOffset(CS.getFrameIdx())); + std::advance(MBBI, ScalarDistance); // Generate new FP. if (hasFP(MF)) { @@ -1100,8 +1098,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, MachineInstr::FrameSetup, getStackAlign()); } - if (NeedsDwarfCFI) + if (NeedsDwarfCFI) { + CFIBuilder.setInsertPoint(MBBI); CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize()); + } } uint64_t SecondSPAdjustAmount = 0; @@ -1131,6 +1131,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, } if (NeedsDwarfCFI && !hasFP(MF)) { + CFIBuilder.setInsertPoint(MBBI); // Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb". CFIBuilder.insertCFIInst(createDefCFAExpression( *RI, SPReg, getStackSizeWithRVVPadding(MF), RVVStackSize / 8)); @@ -1238,7 +1239,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, --MBBI; } - const auto &CSI = MFI.getCalleeSavedInfo(); + const auto &CSI = MFI.getRestoreCSInfo(&MBB); // Skip to before the restores of scalar callee-saved registers // FIXME: assumes exactly one instruction is used to restore each @@ -1315,8 +1316,11 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, // Skip to after the restores of scalar callee-saved registers // FIXME: assumes exactly one instruction is used to restore each // callee-saved register. - MBBI = std::next(FirstScalarCSRRestoreInsn, getUnmanagedCSI(MF, CSI).size()); - CFIBuilder.setInsertPoint(MBBI); + // Skip CSR restore instructions + corresponding cfi restore instructions + int ScalarDistance = getUnmanagedCSI(MF, CSI).size(); + if (NeedsDwarfCFI) + ScalarDistance *= 2; + MBBI = std::next(FirstScalarCSRRestoreInsn, ScalarDistance); if (getLibCallID(MF, CSI) != -1) { // tail __riscv_restore_[0-12] instruction is considered as a terminator, @@ -1331,11 +1335,6 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, return; } - // Recover callee-saved registers. - if (NeedsDwarfCFI) - for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI)) - CFIBuilder.buildRestore(CS.getReg()); - if (RVFI->isPushable(MF) && MBBI != MBB.end() && isPop(MBBI->getOpcode())) { // Use available stack adjustment in pop instruction to deallocate stack // space. Align the stack size down to a multiple of 16. This is needed for @@ -2110,6 +2109,34 @@ bool RISCVFrameLowering::assignCalleeSavedSpillSlots( return true; } +static int64_t calculateCSRSpillOffsets(MachineFrameInfo &MFI, + const TargetFrameLowering *TFI, + int MinCSFI, int FrameIdx) { + int LocalAreaOffset = -TFI->getOffsetOfLocalArea(); + Align MaxAlign = MFI.getMaxAlign(); + Align Alignment = MFI.getObjectAlign(FrameIdx); + MaxAlign = std::max(MaxAlign, Alignment); + int64_t Offset = LocalAreaOffset; + + for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) { + // Only allocate objects on the default stack. + if (MFI.getStackID(i) != TargetStackID::Default) + continue; + + int64_t FixedOff; + FixedOff = -MFI.getObjectOffset(i); + if (FixedOff > Offset) + Offset = FixedOff; + } + + for (int i = MinCSFI; i <= FrameIdx; ++i) { + Offset += MFI.getObjectSize(i); + } + + Offset = alignTo(Offset, Alignment); + return -Offset; +} + bool RISCVFrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef CSI, const TargetRegisterInfo *TRI) const { @@ -2137,6 +2164,7 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( MBB.addLiveIn(Reg); } + // Emit CM.PUSH with base SPimm & evaluate Push stack if (RVFI->isPushable(*MF)) { // Emit CM.PUSH with base StackAdj & evaluate Push stack unsigned PushedRegNum = RVFI->getRVPushRegs(); @@ -2179,7 +2207,54 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( MachineInstr::FrameSetup); } }; + storeRegsToStackSlots(UnmanagedCSI); + + bool NeedsDwarfCFI = needsDwarfCFI(*MF); + // Iterate over list of callee-saved registers and emit .cfi_offset + // directives. + if (NeedsDwarfCFI) { + CFIInstBuilder CFIBuilder(MBB, MI, MachineInstr::FrameSetup); + MachineFrameInfo &MFI = MF->getFrameInfo(); + + for (const CalleeSavedInfo &CS : UnmanagedCSI) { + int FrameIdx = CS.getFrameIdx(); + if (FrameIdx < 0 || + MFI.getStackID(FrameIdx) != TargetStackID::ScalableVector) { + int64_t Offset = 0; + + auto *RVFI = MF->getInfo(); + std::vector GCSI = MFI.getCalleeSavedInfo(); + unsigned MinCSFI = std::numeric_limits::max(); + for (auto CS : GCSI) { + unsigned NonNegCSFI = + CS.getFrameIdx() >= 0 ? CS.getFrameIdx() : MinCSFI; + if (NonNegCSFI < MinCSFI) + MinCSFI = NonNegCSFI; + } + if (MinCSFI == std::numeric_limits::max()) + MinCSFI = 0; + + if (RVFI->isSiFivePreemptibleInterrupt(*MF)) { + for (int I = 0; I < 2; ++I) { + int FI = RVFI->getInterruptCSRFrameIndex(I); + MinCSFI = std::min(MinCSFI, FI); + } + } + + if (FrameIdx < 0 && + (RVFI->isPushable(*MF) || RVFI->useSaveRestoreLibCalls(*MF))) { + Offset = MFI.getObjectOffset(FrameIdx); + } else { + const TargetFrameLowering *TFI = + MF->getSubtarget().getFrameLowering(); + Offset = calculateCSRSpillOffsets(MFI, TFI, MinCSFI, FrameIdx); + } + CFIBuilder.buildOffset(CS.getReg(), Offset); + } + } + } + storeRegsToStackSlots(RVVCSI); return true; @@ -2272,8 +2347,17 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters( } }; loadRegFromStackSlot(RVVCSI); + loadRegFromStackSlot(UnmanagedCSI); + bool NeedsDwarfCFI = needsDwarfCFI(*MF); + // Recover callee-saved registers. + if (NeedsDwarfCFI) { + CFIInstBuilder CFIBuilder(MBB, MI, MachineInstr::FrameDestroy); + for (const CalleeSavedInfo &CS : UnmanagedCSI) + CFIBuilder.buildRestore(CS.getReg()); + } + RISCVMachineFunctionInfo *RVFI = MF->getInfo(); if (RVFI->useQCIInterrupt(*MF)) { // Don't emit anything here because restoration is handled by @@ -2516,3 +2600,14 @@ Register RISCVFrameLowering::getInitialCFARegister(const MachineFunction &MF) const { return RISCV::X2; } + +bool RISCVFrameLowering::enableCSRSaveRestorePointsSplit() const { + // Zcmp extention introduces cm.push and cm.pop instructions, which allow to + // perform all spills and restores in one corresponding instruction. This + // contradicts the idea of splitting Save Restore points. "-msave-restore" + // does the same, not via new instructions but via save/restore libcalls. + if (!STI.hasStdExtZcmp() && !STI.enableSaveRestore() && + !STI.hasVendorXqccmp()) + return true; + return false; +} diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h index 87980dfb09f96..d4ce5b152a402 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -31,6 +31,10 @@ class RISCVFrameLowering : public TargetFrameLowering { uint64_t getStackSizeWithRVVPadding(const MachineFunction &MF) const; + SmallVector + getUnmanagedCSI(const MachineFunction &MF, + const std::vector &CSI) const; + StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override; @@ -87,6 +91,8 @@ class RISCVFrameLowering : public TargetFrameLowering { uint64_t ProbeSize, bool DynAllocation, MachineInstr::MIFlag Flag) const; + bool enableCSRSaveRestorePointsSplit() const override; + protected: const RISCVSubtarget &STI; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 263d6a1fc2220..6e66a705e6fc2 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -511,12 +511,69 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); DebugLoc DL = MI.getDebugLoc(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); Register FrameReg; StackOffset Offset = getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg); + + const auto &CSI = + getFrameLowering(MF)->getUnmanagedCSI(MF, MFI.getCalleeSavedInfo()); + + if (!CSI.empty()) { + int MinCSFI = CSI.front().getFrameIdx(); + int MaxCSFI = CSI.back().getFrameIdx(); + + // If our FrameIndex is CSI FrameIndex we in some cases need additional + // adjustment + if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) { + MachineBasicBlock *SpilledIn = nullptr; + MachineBasicBlock *RestoredIn = nullptr; + auto It = std::find_if(CSI.begin(), CSI.end(), [FrameIndex](auto &CS) { + return CS.getFrameIdx() == FrameIndex; + }); + + assert(It != CSI.end() && + "Did't find CalleeSavedInfo for CalleeSaved FrameIndex"); + + assert(!(MI.mayLoad() && MI.mayStore()) && + "Instruction with frame index operand may load and store " + "simultaneously!"); + + if (MI.mayStore()) + SpilledIn = MFI.findSpilledIn(*It); + else if (MI.mayLoad()) + RestoredIn = MFI.findRestoredIn(*It); + else + llvm_unreachable( + "Instruction with frame index operand neither loads nor stores!"); + + bool SpilledRestoredInPrologEpilog = true; + // If we didn't managed to find NCD (NCPD) for the list of Save (Restore) + // blocks, spill (restore) will be unconditionally in Prolog (Epilog) + if (MI.mayStore() && !MFI.getPrologPoints().empty()) { + SpilledRestoredInPrologEpilog = + (llvm::count(MFI.getPrologPoints(), SpilledIn) > 0); + } else if (MI.mayLoad() && !MFI.getEpilogPoints().empty()) { + SpilledRestoredInPrologEpilog = + (llvm::count(MFI.getEpilogPoints(), RestoredIn) > 0); + } + + // For spills/restores performed not in Prolog/Epilog we need to add full + // SP offset, despite SPAdjusment optimization, because at the end of + // Prolog or at the start of Epilog SP has maximum offset + uint64_t FirstSPAdjustAmount = + getFrameLowering(MF)->getFirstSPAdjustAmount(MF); + if (FirstSPAdjustAmount && !SpilledRestoredInPrologEpilog) { + Offset += StackOffset::getFixed( + getFrameLowering(MF)->getStackSizeWithRVVPadding(MF) - + FirstSPAdjustAmount); + } + } + } + bool IsRVVSpill = RISCV::isRVVSpill(MI); if (!IsRVVSpill) Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); diff --git a/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir b/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir index 1e14e7149817a..89f4e8eb161ce 100644 --- a/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir +++ b/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir @@ -121,9 +121,17 @@ frameInfo: hasCalls: true maxCallFrameSize: 0 savePoint: - - point: '%bb.1' + - point: '%bb.1' + registers: + - '$fp' + - '$lr' + - '$x28' restorePoint: - - point: '%bb.1' + - point: '%bb.1' + registers: + - '$fp' + - '$lr' + - '$x28' stack: - { id: 0, size: 16, alignment: 16, stack-id: scalable-vector } machineFunctionInfo: {} diff --git a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll index 35ddcfd9ba6d6..a61d669b014b5 100644 --- a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll @@ -49,9 +49,9 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64 ; CHECK-NEXT: slwi r8, r4, 1 ; CHECK-NEXT: li r10, 0 ; CHECK-NEXT: li r11, 0 -; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill -; CHECK-NEXT: add r8, r4, r8 ; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill +; CHECK-NEXT: add r8, r4, r8 +; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill ; CHECK-NEXT: add r9, r5, r8 ; CHECK-NEXT: add r5, r5, r4 ; CHECK-NEXT: add r8, r3, r5 @@ -84,8 +84,8 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64 ; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt ; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_3 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: lwz r31, -4(r1) # 4-byte Folded Reload ; CHECK-NEXT: lwz r30, -8(r1) # 4-byte Folded Reload +; CHECK-NEXT: lwz r31, -4(r1) # 4-byte Folded Reload ; CHECK-NEXT: mr r4, r5 ; CHECK-NEXT: blr ; CHECK-NEXT: L..BB0_6: diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll index 8283e7bac3457..0be04d4efc263 100644 --- a/llvm/test/CodeGen/PowerPC/common-chain.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain.ll @@ -426,11 +426,11 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) { ; CHECK-NEXT: cmpdi r6, 0 ; CHECK-NEXT: ble cr0, .LBB4_4 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: add r5, r3, r5 ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: mtctr r6 ; CHECK-NEXT: mulli r11, r4, 10 ; CHECK-NEXT: sldi r8, r4, 2 @@ -455,9 +455,9 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) { ; CHECK-NEXT: maddld r3, r6, r28, r3 ; CHECK-NEXT: bdnz .LBB4_2 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB4_4: ; CHECK-NEXT: li r3, 0 @@ -728,23 +728,23 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64 ; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill ; CHECK-NEXT: mr r26, r10 ; CHECK-NEXT: cmpdi r6, 1 -; CHECK-NEXT: std r14, -144(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r15, -136(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r16, -128(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r17, -120(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r18, -112(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r19, -104(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r20, -96(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r21, -88(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r22, -80(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r23, -72(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r24, -64(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r31, -8(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r24, -64(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r23, -72(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r22, -80(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r21, -88(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r20, -96(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r19, -104(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r18, -112(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r17, -120(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r16, -128(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r15, -136(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r14, -144(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r2, -152(r1) # 8-byte Folded Spill ; CHECK-NEXT: iselgt r11, r6, r11 ; CHECK-NEXT: addi r12, r11, -1 @@ -963,24 +963,24 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64 ; CHECK-NEXT: bne cr0, .LBB7_7 ; CHECK-NEXT: .LBB7_8: ; CHECK-NEXT: ld r2, -152(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r31, -8(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r24, -64(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r23, -72(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r22, -80(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r21, -88(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r20, -96(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r19, -104(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r18, -112(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r17, -120(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r16, -128(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r15, -136(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r14, -144(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r15, -136(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r16, -128(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r17, -120(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r18, -112(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r19, -104(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r20, -96(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r21, -88(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r22, -80(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r23, -72(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, -64(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r31, -8(r1) # 8-byte Folded Reload ; CHECK-NEXT: .LBB7_9: # %for.cond.cleanup ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll index cc38e250f183f..00627b8434beb 100644 --- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll @@ -189,8 +189,8 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: cmplwi r4, 0 ; CHECK-NEXT: beq cr0, .LBB2_4 ; CHECK-NEXT: # %bb.1: # %bb3.preheader -; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: addi r10, r3, 4002 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: li r5, -1 @@ -198,10 +198,10 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: li r7, 3 ; CHECK-NEXT: li r8, 5 ; CHECK-NEXT: li r9, 9 -; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB2_2: # %bb3 @@ -226,13 +226,13 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: maddld r3, r11, r25, r3 ; CHECK-NEXT: bdnz .LBB2_2 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: add r3, r3, r4 -; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB2_4: ; CHECK-NEXT: addi r3, r4, 0 @@ -583,10 +583,10 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: beq cr0, .LBB6_9 ; CHECK-NEXT: # %bb.1: # %bb3 ; CHECK-NEXT: addis r5, r2, .LC0@toc@ha -; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: ld r5, .LC0@toc@l(r5) -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: addi r6, r3, 4009 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: li r7, -7 @@ -649,9 +649,9 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: add r4, r30, r4 ; CHECK-NEXT: b .LBB6_3 ; CHECK-NEXT: .LBB6_8: -; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB6_9: ; CHECK-NEXT: li r3, 0 diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll index 79f2ef3e3746a..3de0fe239021c 100644 --- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll +++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll @@ -8,22 +8,22 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6 ; CHECK-NEXT: cmpd 5, 7 ; CHECK-NEXT: bgelr 0 ; CHECK-NEXT: # %bb.1: # %.preheader -; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill -; CHECK-NEXT: addi 27, 5, 2 -; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill -; CHECK-NEXT: addi 28, 5, 3 ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-NEXT: addi 30, 5, 1 +; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill +; CHECK-NEXT: addi 28, 5, 3 +; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill +; CHECK-NEXT: addi 27, 5, 2 ; CHECK-NEXT: mulld 12, 8, 5 ; CHECK-NEXT: mulld 0, 9, 8 ; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill ; CHECK-NEXT: addi 29, 3, 16 ; CHECK-NEXT: sldi 11, 10, 3 -; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill ; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill +; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill ; CHECK-NEXT: mulld 30, 8, 30 ; CHECK-NEXT: mulld 28, 8, 28 ; CHECK-NEXT: mulld 8, 8, 27 @@ -104,15 +104,15 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6 ; CHECK-NEXT: blt 0, .LBB0_5 ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_6: -; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 26, -48(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 25, -56(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 24, -64(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 23, -72(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 22, -80(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 23, -72(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 24, -64(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 25, -56(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 26, -48(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; CHECK-NEXT: blr %9 = icmp slt i64 %2, %4 br i1 %9, label %10, label %97 diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll index af0942e99182d..f22f89a22816a 100644 --- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll @@ -56,28 +56,28 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: .cfi_offset v29, -240 ; CHECK-NEXT: .cfi_offset v30, -224 ; CHECK-NEXT: .cfi_offset v31, -208 -; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 2, 728(1) -; CHECK-NEXT: ld 14, 688(1) -; CHECK-NEXT: ld 11, 704(1) -; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill ; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill +; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill ; CHECK-NEXT: mr 21, 5 ; CHECK-NEXT: lwa 5, 0(7) +; CHECK-NEXT: ld 2, 728(1) +; CHECK-NEXT: ld 11, 704(1) +; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill +; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 14, 688(1) ; CHECK-NEXT: ld 7, 720(1) -; CHECK-NEXT: std 22, 464(1) # 8-byte Folded Spill ; CHECK-NEXT: std 23, 472(1) # 8-byte Folded Spill +; CHECK-NEXT: std 22, 464(1) # 8-byte Folded Spill ; CHECK-NEXT: mr 22, 6 ; CHECK-NEXT: ld 6, 848(1) ; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: ld 15, 736(1) -; CHECK-NEXT: std 18, 432(1) # 8-byte Folded Spill ; CHECK-NEXT: std 19, 440(1) # 8-byte Folded Spill +; CHECK-NEXT: std 18, 432(1) # 8-byte Folded Spill ; CHECK-NEXT: ld 19, 768(1) ; CHECK-NEXT: ld 18, 760(1) -; CHECK-NEXT: std 30, 528(1) # 8-byte Folded Spill ; CHECK-NEXT: std 31, 536(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, 528(1) # 8-byte Folded Spill ; CHECK-NEXT: ld 12, 696(1) ; CHECK-NEXT: lxv 0, 0(9) ; CHECK-NEXT: std 9, 64(1) # 8-byte Folded Spill @@ -85,24 +85,24 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: lxv 1, 0(8) ; CHECK-NEXT: cmpldi 3, 9 ; CHECK-NEXT: ld 30, 824(1) -; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill ; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill ; CHECK-NEXT: ld 29, 840(1) ; CHECK-NEXT: ld 28, 832(1) -; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 23, 784(1) -; CHECK-NEXT: ld 20, 776(1) -; CHECK-NEXT: std 24, 480(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, 488(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 25, 800(1) -; CHECK-NEXT: ld 24, 792(1) -; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill ; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill ; CHECK-NEXT: ld 27, 816(1) ; CHECK-NEXT: ld 26, 808(1) -; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, 488(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, 480(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 25, 800(1) +; CHECK-NEXT: ld 24, 792(1) +; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill +; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 23, 784(1) +; CHECK-NEXT: ld 20, 776(1) +; CHECK-NEXT: stxv 63, 384(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 62, 368(1) # 16-byte Folded Spill ; CHECK-NEXT: ld 17, 752(1) ; CHECK-NEXT: extswsli 9, 5, 3 ; CHECK-NEXT: lxv 4, 0(14) @@ -154,37 +154,37 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: lxv 12, 0(20) ; CHECK-NEXT: lxv 11, 0(23) ; CHECK-NEXT: add 20, 21, 9 -; CHECK-NEXT: stfd 28, 560(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 29, 568(1) # 8-byte Folded Spill +; CHECK-NEXT: stxv 61, 352(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 60, 336(1) # 16-byte Folded Spill ; CHECK-NEXT: lxv 10, 0(24) ; CHECK-NEXT: lxv 9, 0(25) -; CHECK-NEXT: stfd 30, 576(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 31, 584(1) # 8-byte Folded Spill +; CHECK-NEXT: stxv 59, 320(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 58, 304(1) # 16-byte Folded Spill ; CHECK-NEXT: lxv 8, 0(26) ; CHECK-NEXT: lxv 7, 0(27) ; CHECK-NEXT: addi 12, 12, 32 ; CHECK-NEXT: li 27, 0 ; CHECK-NEXT: mr 26, 21 -; CHECK-NEXT: stxv 52, 208(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 53, 224(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 57, 288(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 56, 272(1) # 16-byte Folded Spill ; CHECK-NEXT: lxv 6, 0(30) ; CHECK-NEXT: lxv 41, 0(28) ; CHECK-NEXT: addi 7, 11, 1 ; CHECK-NEXT: add 11, 0, 21 ; CHECK-NEXT: li 28, 1 -; CHECK-NEXT: stxv 54, 240(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 55, 256(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 54, 240(1) # 16-byte Folded Spill ; CHECK-NEXT: lxv 43, 0(29) ; CHECK-NEXT: lxv 42, 0(5) -; CHECK-NEXT: stxv 56, 272(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 57, 288(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 53, 224(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 52, 208(1) # 16-byte Folded Spill ; CHECK-NEXT: addi 11, 11, 32 -; CHECK-NEXT: stxv 58, 304(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 59, 320(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 60, 336(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 61, 352(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 62, 368(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 63, 384(1) # 16-byte Folded Spill +; CHECK-NEXT: stfd 31, 584(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 30, 576(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 29, 568(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 28, 560(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill ; CHECK-NEXT: std 16, 96(1) # 8-byte Folded Spill ; CHECK-NEXT: std 17, 104(1) # 8-byte Folded Spill ; CHECK-NEXT: std 24, 144(1) # 8-byte Folded Spill @@ -268,50 +268,50 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: ble 0, .LBB0_3 ; CHECK-NEXT: # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit ; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload -; CHECK-NEXT: lxv 63, 384(1) # 16-byte Folded Reload +; CHECK-NEXT: ld 14, 400(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 1, 0(3) ; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload -; CHECK-NEXT: lxv 62, 368(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 61, 352(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 60, 336(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 59, 320(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 58, 304(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 57, 288(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 56, 272(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 55, 256(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 54, 240(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 53, 224(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 52, 208(1) # 16-byte Folded Reload +; CHECK-NEXT: ld 15, 408(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 16, 416(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 17, 424(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 18, 432(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 19, 440(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 20, 448(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 21, 456(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 22, 464(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 23, 472(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 24, 480(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 25, 488(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 0, 0(3) ; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 31, 584(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 30, 576(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 29, 568(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 28, 560(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 27, 552(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 26, 544(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 31, 536(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 30, 528(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 29, 520(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 28, 512(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 26, 496(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 27, 504(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 28, 512(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, 520(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 30, 528(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 31, 536(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 26, 544(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 27, 552(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 28, 560(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 29, 568(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 30, 576(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 5, 0(3) ; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 26, 496(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 25, 488(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 24, 480(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 23, 472(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 22, 464(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 21, 456(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 20, 448(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 19, 440(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 18, 432(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 17, 424(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 16, 416(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 31, 584(1) # 8-byte Folded Reload +; CHECK-NEXT: lxv 52, 208(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 53, 224(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 54, 240(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 55, 256(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 56, 272(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 57, 288(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 58, 304(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 59, 320(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 60, 336(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 61, 352(1) # 16-byte Folded Reload ; CHECK-NEXT: stxv 4, 0(3) ; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 15, 408(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 14, 400(1) # 8-byte Folded Reload +; CHECK-NEXT: lxv 62, 368(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 63, 384(1) # 16-byte Folded Reload ; CHECK-NEXT: stxv 3, 0(3) ; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 2, 0(8) diff --git a/llvm/test/CodeGen/PowerPC/pr43527.ll b/llvm/test/CodeGen/PowerPC/pr43527.ll index adfea51077a0b..e46ff748f0b14 100644 --- a/llvm/test/CodeGen/PowerPC/pr43527.ll +++ b/llvm/test/CodeGen/PowerPC/pr43527.ll @@ -15,9 +15,9 @@ define dso_local void @test(i64 %arg, i64 %arg1, ptr %arg2) { ; CHECK-NEXT: .cfi_offset r28, -32 ; CHECK-NEXT: .cfi_offset r29, -24 ; CHECK-NEXT: .cfi_offset r30, -16 -; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: stdu r1, -64(r1) ; CHECK-NEXT: mr r30, r5 ; CHECK-NEXT: sub r29, r4, r3 @@ -36,9 +36,9 @@ define dso_local void @test(i64 %arg, i64 %arg1, ptr %arg2) { ; CHECK-NEXT: # %bb.4: # %bb15 ; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB0_5: # %bb2 diff --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll index 12d0b056ca886..44215ce8c4dff 100644 --- a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll +++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll @@ -10,26 +10,26 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-NEXT: ble 0, .LBB0_4 ; POWERPC64-NEXT: # %bb.1: # %for.body.preheader ; POWERPC64-NEXT: addi 4, 4, -1 -; POWERPC64-NEXT: std 14, -144(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 15, -136(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 16, -128(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 17, -120(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 18, -112(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 19, -104(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 20, -96(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 21, -88(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 22, -80(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 23, -72(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 24, -64(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 25, -56(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 26, -48(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 27, -40(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 31, -8(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 30, -16(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 29, -24(1) # 8-byte Folded Spill ; POWERPC64-NEXT: std 28, -32(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 27, -40(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 26, -48(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 25, -56(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 24, -64(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 23, -72(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 22, -80(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 21, -88(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 20, -96(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 19, -104(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 18, -112(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 17, -120(1) # 8-byte Folded Spill ; POWERPC64-NEXT: clrldi 4, 4, 32 ; POWERPC64-NEXT: addi 4, 4, 1 -; POWERPC64-NEXT: std 29, -24(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 30, -16(1) # 8-byte Folded Spill -; POWERPC64-NEXT: std 31, -8(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 16, -128(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 15, -136(1) # 8-byte Folded Spill +; POWERPC64-NEXT: std 14, -144(1) # 8-byte Folded Spill ; POWERPC64-NEXT: mtctr 4 ; POWERPC64-NEXT: li 4, 0 ; POWERPC64-NEXT: .p2align 4 @@ -40,25 +40,25 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-NEXT: #NO_APP ; POWERPC64-NEXT: bdnz .LBB0_2 ; POWERPC64-NEXT: # %bb.3: -; POWERPC64-NEXT: ld 31, -8(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 14, -144(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 15, -136(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 16, -128(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 17, -120(1) # 8-byte Folded Reload ; POWERPC64-NEXT: extsw 3, 4 -; POWERPC64-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 26, -48(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 25, -56(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 24, -64(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 23, -72(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 22, -80(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 21, -88(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 20, -96(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 19, -104(1) # 8-byte Folded Reload ; POWERPC64-NEXT: ld 18, -112(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 17, -120(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 16, -128(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 15, -136(1) # 8-byte Folded Reload -; POWERPC64-NEXT: ld 14, -144(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 19, -104(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 20, -96(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 21, -88(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 22, -80(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 23, -72(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 24, -64(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 25, -56(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 26, -48(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 27, -40(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; POWERPC64-NEXT: ld 31, -8(1) # 8-byte Folded Reload ; POWERPC64-NEXT: blr ; POWERPC64-NEXT: .LBB0_4: ; POWERPC64-NEXT: li 4, 0 @@ -70,24 +70,24 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC32-AIX-NEXT: cmpwi 4, 0 ; POWERPC32-AIX-NEXT: ble 0, L..BB0_4 ; POWERPC32-AIX-NEXT: # %bb.1: # %for.body.preheader -; POWERPC32-AIX-NEXT: stw 14, -72(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 15, -68(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 16, -64(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 17, -60(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 18, -56(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 19, -52(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 20, -48(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 21, -44(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 22, -40(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 23, -36(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 24, -32(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 25, -28(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 26, -24(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 27, -20(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 28, -16(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 29, -12(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: stw 30, -8(1) # 4-byte Folded Spill ; POWERPC32-AIX-NEXT: stw 31, -4(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 30, -8(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 29, -12(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 28, -16(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 27, -20(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 26, -24(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 25, -28(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 24, -32(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 23, -36(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 22, -40(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 21, -44(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 20, -48(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 19, -52(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 18, -56(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 17, -60(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 16, -64(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 15, -68(1) # 4-byte Folded Spill +; POWERPC32-AIX-NEXT: stw 14, -72(1) # 4-byte Folded Spill ; POWERPC32-AIX-NEXT: mtctr 4 ; POWERPC32-AIX-NEXT: li 4, 0 ; POWERPC32-AIX-NEXT: .align 4 @@ -98,25 +98,25 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC32-AIX-NEXT: #NO_APP ; POWERPC32-AIX-NEXT: bdnz L..BB0_2 ; POWERPC32-AIX-NEXT: # %bb.3: -; POWERPC32-AIX-NEXT: lwz 31, -4(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 30, -8(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 29, -12(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 28, -16(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 14, -72(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 15, -68(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 16, -64(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 17, -60(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: mr 3, 4 -; POWERPC32-AIX-NEXT: lwz 27, -20(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 26, -24(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 25, -28(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 24, -32(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 23, -36(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 22, -40(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 21, -44(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 20, -48(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 19, -52(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: lwz 18, -56(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 17, -60(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 16, -64(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 15, -68(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: lwz 14, -72(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 19, -52(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 20, -48(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 21, -44(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 22, -40(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 23, -36(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 24, -32(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 25, -28(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 26, -24(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 27, -20(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 28, -16(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 29, -12(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 30, -8(1) # 4-byte Folded Reload +; POWERPC32-AIX-NEXT: lwz 31, -4(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: blr ; POWERPC32-AIX-NEXT: L..BB0_4: ; POWERPC32-AIX-NEXT: li 3, 0 @@ -128,26 +128,26 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-AIX-NEXT: blt 0, L..BB0_4 ; POWERPC64-AIX-NEXT: # %bb.1: # %for.body.preheader ; POWERPC64-AIX-NEXT: addi 4, 4, -1 -; POWERPC64-AIX-NEXT: std 14, -144(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 15, -136(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 16, -128(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 17, -120(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 18, -112(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 19, -104(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 20, -96(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 21, -88(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 22, -80(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 23, -72(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 24, -64(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 25, -56(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 26, -48(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 27, -40(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 31, -8(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 30, -16(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 29, -24(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: std 28, -32(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 27, -40(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 26, -48(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 25, -56(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 24, -64(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 23, -72(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 22, -80(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 21, -88(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 20, -96(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 19, -104(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 18, -112(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 17, -120(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: clrldi 4, 4, 32 ; POWERPC64-AIX-NEXT: addi 4, 4, 1 -; POWERPC64-AIX-NEXT: std 29, -24(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 30, -16(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: std 31, -8(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 16, -128(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 15, -136(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: std 14, -144(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: mtctr 4 ; POWERPC64-AIX-NEXT: li 4, 0 ; POWERPC64-AIX-NEXT: .align 4 @@ -158,25 +158,25 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-AIX-NEXT: #NO_APP ; POWERPC64-AIX-NEXT: bdnz L..BB0_2 ; POWERPC64-AIX-NEXT: # %bb.3: -; POWERPC64-AIX-NEXT: ld 31, -8(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 14, -144(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 15, -136(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 16, -128(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 17, -120(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: extsw 3, 4 -; POWERPC64-AIX-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 26, -48(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 25, -56(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 24, -64(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 23, -72(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 22, -80(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 21, -88(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 20, -96(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 19, -104(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: ld 18, -112(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 17, -120(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 16, -128(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 15, -136(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: ld 14, -144(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 19, -104(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 20, -96(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 21, -88(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 22, -80(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 23, -72(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 24, -64(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 25, -56(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 26, -48(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 27, -40(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; POWERPC64-AIX-NEXT: ld 31, -8(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: blr ; POWERPC64-AIX-NEXT: L..BB0_4: ; POWERPC64-AIX-NEXT: li 4, 0 diff --git a/llvm/test/CodeGen/RISCV/shrinkwrap-split.mir b/llvm/test/CodeGen/RISCV/shrinkwrap-split.mir new file mode 100644 index 0000000000000..60e0a3fab16e1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/shrinkwrap-split.mir @@ -0,0 +1,290 @@ +# RUN: llc -march=riscv64 -run-pass shrink-wrap -enable-shrink-wrap-into-multiple-points=true %s -o - | FileCheck %s + +# CHECK: savePoint: +# CHECK-NEXT: - point: '%bb.0' +# CHECK-NEXT: registers: +# CHECK-NEXT: - '$x1' +# CHECK-NEXT: - '$x26' +# CHECK-NEXT: - '$x27' +# CHECK-NEXT: - '$x9' +# CHECK-NEXT: - point: '%bb.2' +# CHECK-NEXT: registers: +# CHECK-NEXT: - '$x18' +# CHECK-NEXT: - '$x19' +# CHECK-NEXT: - '$x20' +# CHECK-NEXT: - '$x21' +# CHECK-NEXT: - '$x22' +# CHECK-NEXT: - '$x23' +# CHECK-NEXT: - '$x24' +# CHECK-NEXT: - '$x25' +# CHECK-NEXT: - '$x8' +# CHECK-NEXT: restorePoint: +# CHECK-NEXT: - point: '%bb.7' +# CHECK-NEXT: registers: +# CHECK-NEXT: - '$x18' +# CHECK-NEXT: - '$x19' +# CHECK-NEXT: - '$x20' +# CHECK-NEXT: - '$x21' +# CHECK-NEXT: - '$x22' +# CHECK-NEXT: - '$x23' +# CHECK-NEXT: - '$x24' +# CHECK-NEXT: - '$x25' +# CHECK-NEXT: - '$x8' +# CHECK-NEXT: - point: '%bb.8' +# CHECK-NEXT: registers: +# CHECK-NEXT: - '$x1' +# CHECK-NEXT: - '$x26' +# CHECK-NEXT: - '$x27' +# CHECK-NEXT: - '$x9' + +--- | + ; ModuleID = 'shrinkwrap-split.ll' + %struct.task = type { i32, i32, [20 x i32] } + + define i32 @test(ptr %t, i32 %i, i1 %cond) { + entry: + %arr = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2 + %0 = load i32, ptr %arr, align 4 + %arrayidx2 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 1 + %1 = load i32, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 2 + %2 = load i32, ptr %arrayidx4, align 4 + %arrayidx6 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 3 + %3 = load i32, ptr %arrayidx6, align 4 + %arrayidx8 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 4 + %4 = load i32, ptr %arrayidx8, align 4 + %arrayidx10 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 5 + %5 = load i32, ptr %arrayidx10, align 4 + %arrayidx12 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 6 + %6 = load i32, ptr %arrayidx12, align 4 + %arrayidx14 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 7 + %7 = load i32, ptr %arrayidx14, align 4 + %arrayidx16 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 8 + %8 = load i32, ptr %arrayidx16, align 4 + %arrayidx18 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 9 + %9 = load i32, ptr %arrayidx18, align 4 + %arrayidx20 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 10 + %10 = load i32, ptr %arrayidx20, align 4 + %arrayidx22 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 11 + %11 = load i32, ptr %arrayidx22, align 4 + %arrayidx24 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 12 + %12 = load i32, ptr %arrayidx24, align 4 + %arrayidx26 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 13 + %13 = load i32, ptr %arrayidx26, align 4 + %arrayidx28 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 14 + %14 = load i32, ptr %arrayidx28, align 4 + %arrayidx30 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 15 + %15 = load i32, ptr %arrayidx30, align 4 + %arrayidx32 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 16 + %16 = load i32, ptr %arrayidx32, align 4 + %arrayidx34 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 17 + %17 = load i32, ptr %arrayidx34, align 4 + %arrayidx36 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 18 + %18 = load i32, ptr %arrayidx36, align 4 + %arrayidx38 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 19 + %19 = load i32, ptr %arrayidx38, align 4 + %20 = load i32, ptr %t, align 4 + %add = add i32 %10, %0 + %add39 = add i32 %add, %20 + %cmp = icmp slt i32 %add39, %i + br i1 %cmp, label %for.cond.preheader, label %cleanup + + for.cond.preheader: ; preds = %entry + %y = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 1 + %21 = load i32, ptr %y, align 4 + %cmp40.not119 = icmp eq i32 %21, 0 + br i1 %cmp40.not119, label %for.cond.cleanup, label %for.body.preheader + + for.body.preheader: ; preds = %for.cond.preheader + %22 = add i32 %21, -1 + %cond41 = select i1 %cond, i32 %22, i32 %i + store i32 %cond41, ptr %t, align 4 + br label %for.cond.cleanup + + for.cond.cleanup: ; preds = %for.body.preheader, %for.cond.preheader + %23 = phi i32 [ %cond41, %for.body.preheader ], [ %20, %for.cond.preheader ] + %tobool44 = icmp ne i32 %21, 0 + %conv = zext i1 %tobool44 to i32 + %add48 = add i32 %1, %0 + %add49 = add i32 %add48, %2 + %add50 = add i32 %add49, %3 + %add51 = add i32 %add50, %4 + %add52 = add i32 %add51, %5 + %add53 = add i32 %add52, %6 + %add54 = add i32 %add53, %7 + %add55 = add i32 %add54, %8 + %add56 = add i32 %add55, %9 + %add57 = add i32 %add56, %10 + %add58 = add i32 %add57, %11 + %add59 = add i32 %add58, %12 + %add60 = add i32 %add59, %13 + %add61 = add i32 %add60, %14 + %add62 = add i32 %add61, %15 + %add63 = add i32 %add62, %16 + %add64 = add i32 %add63, %17 + %add65 = add i32 %add64, %18 + %add66 = add i32 %add65, %19 + %add67 = add i32 %add66, %conv + %add68 = add i32 %add67, %23 + br label %cleanup + + cleanup: ; preds = %for.cond.cleanup, %entry + %retval.0 = phi i32 [ %add68, %for.cond.cleanup ], [ %i, %entry ] + ret i32 %retval.0 + } +... +--- +name: test +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: true +registers: [] +liveins: + - { reg: '$x10', virtual-reg: '' } + - { reg: '$x11', virtual-reg: '' } + - { reg: '$x12', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + varArgsFrameIndex: 0 + varArgsSaveSize: 0 +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.8(0x40000000) + liveins: $x10, $x11, $x12 + + renamable $x13 = COPY $x10 + renamable $x9 = LW $x10, 8 :: (load (s32) from %ir.arr) + renamable $x16 = LW $x10, 48 :: (load (s32) from %ir.arrayidx20) + renamable $x15 = LW $x10, 0 :: (load (s32) from %ir.t) + renamable $x14 = ADD renamable $x16, renamable $x9 + renamable $x10 = COPY $x11 + renamable $x14 = ADDW killed renamable $x14, renamable $x15 + BLT killed renamable $x14, $x11, %bb.1 + + bb.8: + successors: %bb.7(0x80000000) + liveins: $x10 + + PseudoBR %bb.7 + + bb.1.for.cond.preheader: + successors: %bb.6(0x30000000), %bb.2(0x50000000) + liveins: $x9, $x10, $x12, $x13, $x15, $x16 + + renamable $x20 = LW renamable $x13, 12 :: (load (s32) from %ir.arrayidx2) + renamable $x30 = LW renamable $x13, 16 :: (load (s32) from %ir.arrayidx4) + renamable $x24 = LW renamable $x13, 20 :: (load (s32) from %ir.arrayidx6) + renamable $x31 = LW renamable $x13, 24 :: (load (s32) from %ir.arrayidx8) + renamable $x25 = LW renamable $x13, 28 :: (load (s32) from %ir.arrayidx10) + renamable $x7 = LW renamable $x13, 32 :: (load (s32) from %ir.arrayidx12) + renamable $x21 = LW renamable $x13, 36 :: (load (s32) from %ir.arrayidx14) + renamable $x14 = LW renamable $x13, 40 :: (load (s32) from %ir.arrayidx16) + renamable $x23 = LW renamable $x13, 44 :: (load (s32) from %ir.arrayidx18) + renamable $x29 = LW renamable $x13, 52 :: (load (s32) from %ir.arrayidx22) + renamable $x22 = LW renamable $x13, 56 :: (load (s32) from %ir.arrayidx24) + renamable $x6 = LW renamable $x13, 60 :: (load (s32) from %ir.arrayidx26) + renamable $x17 = LW renamable $x13, 64 :: (load (s32) from %ir.arrayidx28) + renamable $x5 = LW renamable $x13, 68 :: (load (s32) from %ir.arrayidx30) + renamable $x28 = LW renamable $x13, 72 :: (load (s32) from %ir.arrayidx32) + renamable $x18 = LW renamable $x13, 76 :: (load (s32) from %ir.arrayidx34) + renamable $x19 = LW renamable $x13, 80 :: (load (s32) from %ir.arrayidx36) + renamable $x8 = LW renamable $x13, 4 :: (load (s32) from %ir.y) + renamable $x11 = LW renamable $x13, 84 :: (load (s32) from %ir.arrayidx38) + BEQ renamable $x8, $x0, %bb.6 + PseudoBR %bb.2 + + bb.2.for.body.preheader: + successors: %bb.4(0x40000000), %bb.3(0x40000000) + liveins: $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x28, $x29, $x30, $x31 + + BEQ killed renamable $x12, $x0, %bb.4 + + bb.3: + successors: %bb.5(0x80000000) + liveins: $x5, $x6, $x7, $x8, $x9, $x11, $x13, $x14, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x28, $x29, $x30, $x31 + + renamable $x10 = ADDIW renamable $x8, -1 + PseudoBR %bb.5 + + bb.4.for.body.preheader: + successors: %bb.5(0x80000000) + liveins: $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x13, $x14, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x28, $x29, $x30, $x31 + + + bb.5.for.body.preheader: + successors: %bb.6(0x80000000) + liveins: $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x13, $x14, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x28, $x29, $x30, $x31 + + SW renamable $x10, killed renamable $x13, 0 :: (store (s32) into %ir.t) + renamable $x15 = COPY killed renamable $x10 + + bb.6.for.cond.cleanup: + successors: %bb.7(0x80000000) + liveins: $x5, $x6, $x7, $x8, $x9, $x11, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x28, $x29, $x30, $x31 + + renamable $x9 = ADD killed renamable $x20, killed renamable $x9 + renamable $x30 = ADD killed renamable $x30, killed renamable $x24 + renamable $x30 = ADD killed renamable $x9, killed renamable $x30 + renamable $x31 = ADD killed renamable $x31, killed renamable $x25 + renamable $x14 = ADD killed renamable $x21, killed renamable $x14 + renamable $x7 = ADD killed renamable $x31, killed renamable $x7 + renamable $x14 = ADD killed renamable $x14, killed renamable $x23 + renamable $x7 = ADD killed renamable $x30, killed renamable $x7 + renamable $x14 = ADD killed renamable $x14, killed renamable $x16 + renamable $x10 = SLTU $x0, killed renamable $x8 + renamable $x14 = ADD killed renamable $x7, killed renamable $x14 + renamable $x29 = ADD killed renamable $x29, killed renamable $x22 + renamable $x28 = ADD killed renamable $x28, killed renamable $x18 + renamable $x6 = ADD killed renamable $x29, killed renamable $x6 + renamable $x28 = ADD killed renamable $x28, killed renamable $x19 + renamable $x17 = ADD killed renamable $x6, killed renamable $x17 + renamable $x11 = ADD killed renamable $x28, killed renamable $x11 + renamable $x17 = ADD killed renamable $x17, killed renamable $x5 + renamable $x10 = ADD killed renamable $x11, killed renamable $x10 + renamable $x14 = ADD killed renamable $x14, killed renamable $x17 + renamable $x10 = ADD killed renamable $x10, killed renamable $x15 + renamable $x10 = ADDW killed renamable $x14, killed renamable $x10 + + bb.7.cleanup: + liveins: $x10 + + PseudoRET implicit $x10 + +... diff --git a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp index c479233a712e7..548e5071405f0 100644 --- a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp +++ b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp @@ -101,15 +101,9 @@ static void cloneFrameInfo( DstMFI.setCVBytesOfCalleeSavedRegisters( SrcMFI.getCVBytesOfCalleeSavedRegisters()); - assert(SrcMFI.getSavePoints().size() < 2 && - "Multiple restore points not yet supported!"); - DstMFI.setSavePoints( constructSaveRestorePoints(SrcMFI.getSavePoints(), Src2DstMBB)); - assert(SrcMFI.getRestorePoints().size() < 2 && - "Multiple restore points not yet supported!"); - DstMFI.setRestorePoints( constructSaveRestorePoints(SrcMFI.getRestorePoints(), Src2DstMBB));