Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/X86ISelLoweringCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ EVT X86TargetLowering::getOptimalMemOpType(
if (Op.size() >= 16 &&
(!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
// FIXME: Check if unaligned 64-byte accesses are slow.
if (Op.size() >= 64 && Subtarget.hasAVX512() &&
if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
(Subtarget.getPreferVectorWidth() >= 512)) {
return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
}
Expand Down Expand Up @@ -395,7 +395,7 @@ bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
return true;
return false;
case 512:
if (Subtarget.hasAVX512())
if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
return true;
return false;
default:
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ X86TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
case TargetTransformInfo::RGK_Scalar:
return TypeSize::getFixed(ST->is64Bit() ? 64 : 32);
case TargetTransformInfo::RGK_FixedWidthVector:
if (ST->hasAVX512() && PreferVectorWidth >= 512)
if (ST->hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
return TypeSize::getFixed(512);
if (ST->hasAVX() && PreferVectorWidth >= 256)
return TypeSize::getFixed(256);
Expand Down Expand Up @@ -6131,7 +6131,8 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
// Only enable vector loads for equality comparison. Right now the vector
// version is not as fast for three way compare (see #33329).
const unsigned PreferredWidth = ST->getPreferVectorWidth();
if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64);
if (PreferredWidth >= 512 && ST->hasAVX512() && ST->hasEVEX512())
Options.LoadSizes.push_back(64);
if (PreferredWidth >= 256 && ST->hasAVX()) Options.LoadSizes.push_back(32);
if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16);
}
Expand Down
29 changes: 29 additions & 0 deletions llvm/test/CodeGen/X86/evex512-mem.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl < %s | FileCheck %s --check-prefix=AVX512
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl,-evex512 < %s | FileCheck %s --check-prefix=AVX256

define void @test1() {
; AVX512-LABEL: test1:
; AVX512: # %bb.0:
; AVX512-NEXT: movq 64, %rax
; AVX512-NEXT: movq %rax, (%rax)
; AVX512-NEXT: vmovups 0, %zmm0
; AVX512-NEXT: vmovups %zmm0, (%rax)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; AVX256-LABEL: test1:
; AVX256: # %bb.0:
; AVX256-NEXT: movq 64, %rax
; AVX256-NEXT: movq %rax, (%rax)
; AVX256-NEXT: vmovups 0, %ymm0
; AVX256-NEXT: vmovups 32, %ymm1
; AVX256-NEXT: vmovups %ymm1, (%rax)
; AVX256-NEXT: vmovups %ymm0, (%rax)
; AVX256-NEXT: vzeroupper
; AVX256-NEXT: retq
call void @llvm.memcpy.p0.p0.i64(ptr align 8 poison, ptr align 8 null, i64 72, i1 false)
ret void
}

declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)