@@ -725,13 +725,29 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
725725 llvm:: LLVMSetVolatile ( store, llvm:: True ) ;
726726 }
727727 if flags. contains ( MemFlags :: NONTEMPORAL ) {
728- // According to LLVM [1] building a nontemporal store must
729- // *always* point to a metadata value of the integer 1.
730- //
731- // [1]: https://llvm.org/docs/LangRef.html#store-instruction
732- let one = self . cx . const_i32 ( 1 ) ;
733- let node = llvm:: LLVMMDNodeInContext ( self . cx . llcx , & one, 1 ) ;
734- llvm:: LLVMSetMetadata ( store, llvm:: MD_nontemporal as c_uint , node) ;
728+ // Make sure that the current target architectures supports "sane" non-temporal
729+ // stores, i.e., non-temporal stores that are equivalent to regular stores except
730+ // for performance. LLVM doesn't seem to care about this, and will happily treat
731+ // `!nontemporal` stores as-if they were normal stores (for reordering optimizations
732+ // etc) even on x86, despite later lowering them to MOVNT which do *not* behave like
733+ // regular stores but require special fences.
734+ // So we keep a list of architectures where `!nontemporal` is known to be truly just
735+ // a hint, and use regular stores everywhere else.
736+ // (In the future, we could alternatively ensure that an sfence gets emitted after a sequence of movnt
737+ // before any kind of synchronizing operation. But it's not clear how to do that with LLVM.)
738+ const WELL_BEHAVED_NONTEMPORAL_ARCHS : & [ & str ] = & [ "aarch64" , "arm" ] ;
739+
740+ let use_nontemporal =
741+ WELL_BEHAVED_NONTEMPORAL_ARCHS . contains ( & & * self . cx . tcx . sess . target . arch ) ;
742+ if use_nontemporal {
743+ // According to LLVM [1] building a nontemporal store must
744+ // *always* point to a metadata value of the integer 1.
745+ //
746+ // [1]: https://llvm.org/docs/LangRef.html#store-instruction
747+ let one = self . cx . const_i32 ( 1 ) ;
748+ let node = llvm:: LLVMMDNodeInContext ( self . cx . llcx , & one, 1 ) ;
749+ llvm:: LLVMSetMetadata ( store, llvm:: MD_nontemporal as c_uint , node) ;
750+ }
735751 }
736752 store
737753 }
0 commit comments