@@ -1114,6 +1114,47 @@ void LateLowerGCFrame::FixUpRefinements(ArrayRef<int> PHINumbers, State &S)
11141114 }
11151115}
11161116
1117+ // Look through instructions to find all possible allocas that might become the sret argument
1118+ static SmallSetVector<AllocaInst *, 8 > FindSretAllocas (Value* SRetArg) {
1119+ SmallSetVector<AllocaInst *, 8 > allocas;
1120+ if (AllocaInst *OneSRet = dyn_cast<AllocaInst>(SRetArg)) {
1121+ allocas.insert (OneSRet); // Found it directly
1122+ } else {
1123+ SmallSetVector<Value *, 8 > worklist;
1124+ worklist.insert (SRetArg);
1125+ while (!worklist.empty ()) {
1126+ Value *V = worklist.pop_back_val ();
1127+ if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripInBoundsOffsets ())) {
1128+ allocas.insert (Alloca); // Found a candidate
1129+ } else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
1130+ for (Value *Incoming : Phi->incoming_values ()) {
1131+ worklist.insert (Incoming);
1132+ }
1133+ } else if (SelectInst *SI = dyn_cast<SelectInst>(SRetArg)) {
1134+ auto TrueBranch = SI->getTrueValue ();
1135+ auto FalseBranch = SI->getFalseValue ();
1136+ if (TrueBranch && FalseBranch) {
1137+ worklist.insert (TrueBranch);
1138+ worklist.insert (FalseBranch);
1139+ } else {
1140+ llvm_dump (SI);
1141+ assert (false && " Malformed Select" );
1142+ }
1143+ } else {
1144+ llvm_dump (V);
1145+ assert (false && " Unexpected SRet argument" );
1146+ }
1147+ }
1148+ }
1149+ assert (allocas.size () > 0 );
1150+ assert (std::all_of (allocas.begin (), allocas.end (), [&] (AllocaInst* SRetAlloca) JL_NOTSAFEPOINT {
1151+ return (SRetAlloca->getArraySize () == allocas[0 ]->getArraySize () &&
1152+ SRetAlloca->getAllocatedType () == allocas[0 ]->getAllocatedType ());
1153+ }
1154+ ));
1155+ return allocas;
1156+ }
1157+
11171158State LateLowerGCFrame::LocalScan (Function &F) {
11181159 State S (F);
11191160 SmallVector<int , 8 > PHINumbers;
@@ -1165,46 +1206,35 @@ State LateLowerGCFrame::LocalScan(Function &F) {
11651206 Type *ElT = getAttributeAtIndex (CI->getAttributes (), 1 , Attribute::StructRet).getValueAsType ();
11661207 auto tracked = CountTrackedPointers (ElT, true );
11671208 if (tracked.count ) {
1168- AllocaInst *SRet = dyn_cast<AllocaInst>((CI->arg_begin ()[0 ])->stripInBoundsOffsets ());
1169- assert (SRet);
1170- {
1209+ SmallSetVector<AllocaInst *, 8 > allocas = FindSretAllocas ((CI->arg_begin ()[0 ])->stripInBoundsOffsets ());
1210+ // We know that with the right optimizations we can forward a sret directly from an argument
1211+ // This hasn't been seen without adding IPO effects to julia functions but it's possible we need to handle that too
1212+ // If they are tracked.all we can just pass through but if they have a roots bundle it's possible we need to emit some copies ¯\_(ツ)_/¯
1213+ for (AllocaInst *SRet : allocas) {
11711214 if (!(SRet->isStaticAlloca () && isa<PointerType>(ElT) && ElT->getPointerAddressSpace () == AddressSpace::Tracked)) {
11721215 assert (!tracked.derived );
11731216 if (tracked.all ) {
11741217 S.ArrayAllocas [SRet] = tracked.count * cast<ConstantInt>(SRet->getArraySize ())->getZExtValue ();
11751218 }
11761219 else {
11771220 Value *arg1 = (CI->arg_begin ()[1 ])->stripInBoundsOffsets ();
1221+ SmallSetVector<AllocaInst *, 8 > gc_allocas = FindSretAllocas (arg1);
11781222 AllocaInst *SRet_gc = nullptr ;
1179- if (PHINode *Phi = dyn_cast<PHINode>(arg1)) {
1180- for (Value *V : Phi->incoming_values ()) {
1181- if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripInBoundsOffsets ())) {
1182- if (SRet_gc == nullptr ) {
1183- SRet_gc = Alloca;
1184- } else if (SRet_gc == Alloca) {
1185- continue ;
1186- } else {
1187- llvm_dump (Alloca);
1188- llvm_dump (SRet_gc);
1189- assert (false && " Allocas in Phi node should match" );
1190- }
1191- } else {
1192- llvm_dump (V->stripInBoundsOffsets ());
1193- assert (false && " Expected alloca" );
1194- }
1195- }
1196- } else {
1197- SRet_gc = dyn_cast<AllocaInst>(arg1);
1223+ if (gc_allocas.size () == 1 ) {
1224+ SRet_gc = gc_allocas.pop_back_val ();
11981225 }
1199- if (!SRet_gc) {
1226+ else {
12001227 llvm_dump (CI);
1201- llvm_dump (arg1);
1202- assert (false && " Expected alloca" );
1228+ for (AllocaInst *Alloca : gc_allocas) {
1229+ llvm_dump (Alloca);
1230+ }
1231+ assert (false && " Expected single alloca" );
12031232 }
12041233 Type *ElT = SRet_gc->getAllocatedType ();
12051234 if (!(SRet_gc->isStaticAlloca () && isa<PointerType>(ElT) && ElT->getPointerAddressSpace () == AddressSpace::Tracked)) {
12061235 S.ArrayAllocas [SRet_gc] = tracked.count * cast<ConstantInt>(SRet_gc->getArraySize ())->getZExtValue ();
12071236 }
1237+ break ; // Found our gc roots
12081238 }
12091239 }
12101240 }
@@ -1401,6 +1431,8 @@ State LateLowerGCFrame::LocalScan(Function &F) {
14011431 return S;
14021432}
14031433
1434+
1435+
14041436static Value *ExtractScalar (Value *V, Type *VTy, bool isptr, ArrayRef<unsigned > Idxs, IRBuilder<> &irbuilder) {
14051437 Type *T_int32 = Type::getInt32Ty (V->getContext ());
14061438 if (isptr) {
0 commit comments