3535#include < llvm/Passes/PassPlugin.h>
3636
3737// NewPM needs to manually include all the pass headers
38+ #include < llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h>
3839#include < llvm/Transforms/IPO/AlwaysInliner.h>
3940#include < llvm/Transforms/IPO/Annotation2Metadata.h>
4041#include < llvm/Transforms/IPO/ConstantMerge.h>
4647#include < llvm/Transforms/Instrumentation/ThreadSanitizer.h>
4748#include < llvm/Transforms/Scalar/ADCE.h>
4849#include < llvm/Transforms/Scalar/AnnotationRemarks.h>
50+ #include < llvm/Transforms/Scalar/BDCE.h>
4951#include < llvm/Transforms/Scalar/CorrelatedValuePropagation.h>
5052#include < llvm/Transforms/Scalar/DCE.h>
5153#include < llvm/Transforms/Scalar/DeadStoreElimination.h>
7577#include < llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
7678#include < llvm/Transforms/Scalar/SimplifyCFG.h>
7779#include < llvm/Transforms/Scalar/WarnMissedTransforms.h>
80+ #include < llvm/Transforms/Utils/LibCallsShrinkWrap.h>
7881#include < llvm/Transforms/Utils/InjectTLIMappings.h>
82+ #include < llvm/Transforms/Utils/RelLookupTableConverter.h>
7983#include < llvm/Transforms/Vectorize/LoopVectorize.h>
8084#include < llvm/Transforms/Vectorize/SLPVectorizer.h>
8185#include < llvm/Transforms/Vectorize/VectorCombine.h>
@@ -209,10 +213,10 @@ namespace {
209213 .convertSwitchRangeToICmp (true )
210214 .convertSwitchToLookupTable (true )
211215 .forwardSwitchCondToPhi (true )
216+ .needCanonicalLoops (false )
212217 // These mess with loop rotation, so only do them after that
213218 .hoistCommonInsts (true )
214- // Causes an SRET assertion error in late-gc-lowering
215- // .sinkCommonInsts(true)
219+ .sinkCommonInsts (true )
216220 ;
217221 }
218222#if JL_LLVM_VERSION < 150000
@@ -357,7 +361,7 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder
357361 if (O.getSpeedupLevel () >= 1 ) {
358362#if JL_LLVM_VERSION >= 160000
359363 // TODO check the LLVM 15 default.
360- FPM.addPass (SROAPass (SROAOptions::PreserveCFG ));
364+ FPM.addPass (SROAPass (SROAOptions::ModifyCFG ));
361365#else
362366 FPM.addPass (SROAPass ());
363367#endif
@@ -397,20 +401,23 @@ static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB,
397401 if (O.getSpeedupLevel () >= 2 ) {
398402#if JL_LLVM_VERSION >= 160000
399403 // TODO check the LLVM 15 default.
400- FPM.addPass (SROAPass (SROAOptions::PreserveCFG ));
404+ FPM.addPass (SROAPass (SROAOptions::ModifyCFG ));
401405#else
402406 FPM.addPass (SROAPass ());
403407#endif
404408 // SROA can duplicate PHI nodes which can block LowerSIMD
405- FPM.addPass (InstCombinePass ());
409+ FPM.addPass (EarlyCSEPass ());
406410 FPM.addPass (JumpThreadingPass ());
407411 FPM.addPass (CorrelatedValuePropagationPass ());
412+ FPM.addPass (InstCombinePass ());
413+ FPM.addPass (AggressiveInstCombinePass ());
414+ FPM.addPass (LibCallsShrinkWrapPass ());
415+
408416 FPM.addPass (ReassociatePass ());
409- FPM.addPass (EarlyCSEPass ());
410417 JULIA_PASS (FPM.addPass (AllocOptPass ()));
411418 } else { // if (O.getSpeedupLevel() >= 1) (exactly)
412- FPM.addPass (InstCombinePass ());
413419 FPM.addPass (EarlyCSEPass ());
420+ FPM.addPass (InstCombinePass ());
414421 }
415422 invokePeepholeEPCallbacks (FPM, PB, O);
416423 MPM.addPass (createModuleToFunctionPassAdaptor (std::move (FPM)));
@@ -472,16 +479,18 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *
472479 JULIA_PASS (FPM.addPass (AllocOptPass ()));
473480#if JL_LLVM_VERSION >= 160000
474481 // TODO check the LLVM 15 default.
475- FPM.addPass (SROAPass (SROAOptions::PreserveCFG ));
482+ FPM.addPass (SROAPass (SROAOptions::ModifyCFG ));
476483#else
477484 FPM.addPass (SROAPass ());
478485#endif
486+ FPM.addPass (VectorCombinePass (/* TryEarlyFoldsOnly=*/ true ));
479487 FPM.addPass (InstSimplifyPass ());
480488 FPM.addPass (GVNPass ());
481489 FPM.addPass (MemCpyOptPass ());
482490 FPM.addPass (SCCPPass ());
491+ FPM.addPass (BDCEPass ());
483492 FPM.addPass (CorrelatedValuePropagationPass ());
484- FPM.addPass (DCEPass ());
493+ FPM.addPass (ADCEPass ());
485494 FPM.addPass (IRCEPass ());
486495 FPM.addPass (InstCombinePass ());
487496 FPM.addPass (JumpThreadingPass ());
@@ -496,11 +505,12 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *
496505 JULIA_PASS (FPM.addPass (AllocOptPass ()));
497506 {
498507 LoopPassManager LPM;
499- LPM.addPass (LoopDeletionPass ( ));
500- LPM.addPass (LoopInstSimplifyPass ());
501- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM)));
508+ LPM.addPass (LICMPass ( LICMOptions () ));
509+ LPM.addPass (JuliaLICMPass ());
510+ FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM), /* UseMemorySSA = */ true ));
502511 }
503- FPM.addPass (LoopDistributePass ());
512+ FPM.addPass (SimplifyCFGPass (aggressiveSimplifyCFGOptions ()));
513+ FPM.addPass (InstCombinePass ());
504514 }
505515 invokeScalarOptimizerCallbacks (FPM, PB, O);
506516 FPM.addPass (AfterScalarOptimizationMarkerPass ());
@@ -509,6 +519,13 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *
509519static void buildVectorPipeline (FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
510520 FPM.addPass (BeforeVectorizationMarkerPass ());
511521 // TODO look into loop vectorize options
522+ // Rerotate loops that might have been unrotated in the simplification
523+ LoopPassManager LPM;
524+ LPM.addPass (LoopRotatePass ());
525+ LPM.addPass (LoopDeletionPass ());
526+ FPM.addPass (createFunctionToLoopPassAdaptor (
527+ std::move (LPM), /* UseMemorySSA=*/ false , /* UseBlockFrequencyInfo=*/ false ));
528+ FPM.addPass (LoopDistributePass ());
512529 FPM.addPass (InjectTLIMappings ());
513530 FPM.addPass (LoopVectorizePass ());
514531 FPM.addPass (LoopLoadEliminationPass ());
@@ -517,11 +534,13 @@ static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, Optim
517534 FPM.addPass (SLPVectorizerPass ());
518535 invokeVectorizerCallbacks (FPM, PB, O);
519536 FPM.addPass (VectorCombinePass ());
520- FPM.addPass (ADCEPass ());
537+ FPM.addPass (InstCombinePass ());
521538 // TODO add BDCEPass here?
522539 // This unroll will unroll vectorized loops
523540 // as well as loops that we tried but failed to vectorize
524541 FPM.addPass (LoopUnrollPass (LoopUnrollOptions (O.getSpeedupLevel (), /* OnlyWhenForced = */ false , /* ForgetSCEV = */ false )));
542+ FPM.addPass (SROAPass (SROAOptions::PreserveCFG));
543+ FPM.addPass (createFunctionToLoopPassAdaptor (LICMPass (LICMOptions ()), /* UseMemorySSA=*/ true , /* UseBlockFrequencyInfo=*/ false ));
525544 FPM.addPass (AfterVectorizationMarkerPass ());
526545}
527546
0 commit comments