diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index 911ebd41afc5b..22f8248f01851 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -307,19 +307,20 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType, if (!Rhs) Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign); - // Swap bytes if required. - if (NeedsBSwap) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); - Lhs = Builder.CreateCall(Bswap, Lhs); - Rhs = Builder.CreateCall(Bswap, Rhs); - } - // Zero extend if required. if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) { Lhs = Builder.CreateZExt(Lhs, CmpSizeType); Rhs = Builder.CreateZExt(Rhs, CmpSizeType); } + + // Swap bytes if required. + if (NeedsBSwap) { + Type *BSwapType = CmpSizeType ? CmpSizeType : LoadSizeType; + Function *Bswap = + Intrinsic::getDeclaration(CI->getModule(), Intrinsic::bswap, BSwapType); + Lhs = Builder.CreateCall(Bswap, Lhs); + Rhs = Builder.CreateCall(Bswap, Rhs); + } return {Lhs, Rhs}; } @@ -694,10 +695,10 @@ Value *MemCmpExpansion::getMemCmpExpansion() { /// %17 = getelementptr i32, i32* %15, i32 2 /// %18 = load i32, i32* %16 /// %19 = load i32, i32* %17 -/// %20 = call i32 @llvm.bswap.i32(i32 %18) -/// %21 = call i32 @llvm.bswap.i32(i32 %19) -/// %22 = zext i32 %20 to i64 -/// %23 = zext i32 %21 to i64 +/// %20 = zext i32 %18 to i64 +/// %21 = zext i32 %19 to i64 +/// %22 = call i64 @llvm.bswap.i64(i64 %20) +/// %23 = call i64 @llvm.bswap.i64(i64 %21) /// %24 = sub i64 %22, %23 /// %25 = icmp ne i64 %24, 0 /// br i1 %25, label %res_block, label %loadbb2 @@ -710,10 +711,10 @@ Value *MemCmpExpansion::getMemCmpExpansion() { /// %31 = getelementptr i16, i16* %29, i16 6 /// %32 = load i16, i16* %30 /// %33 = load i16, i16* %31 -/// %34 = call i16 @llvm.bswap.i16(i16 %32) -/// %35 = call i16 @llvm.bswap.i16(i16 %33) -/// %36 = zext i16 %34 to i64 -/// %37 = zext i16 %35 to i64 +/// %34 = zext i16 %32 to i64 +/// %35 = zext i16 %33 to i64 +/// %36 = call i64 @llvm.bswap.i64(i16 %34) +/// %37 = call i64 @llvm.bswap.i64(i16 %35) /// %38 = sub i64 %36, %37 /// %39 = icmp ne i64 %38, 0 /// br i1 %39, label %res_block, label %loadbb3 diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll index c0f8f86e6e8b1..a89571656e469 100644 --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll @@ -44,14 +44,12 @@ define i1 @length0_lt(ptr %X, ptr %Y) nounwind { define i32 @length2(ptr %X, ptr %Y) nounwind { ; X86-LABEL: length2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind @@ -75,14 +73,12 @@ define i1 @length2_eq(ptr %X, ptr %Y) nounwind { define i1 @length2_lt(ptr %X, ptr %Y) nounwind { ; X86-LABEL: length2_lt: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -99,10 +95,8 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl (%ecx), %ecx ; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %ax -; X86-NEXT: movzwl %cx, %ecx -; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: bswapl %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: subl %eax, %ecx ; X86-NEXT: testl %ecx, %ecx ; X86-NEXT: setg %al diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll index 56d06021867fa..1f07ba39ecef9 100644 --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll @@ -52,10 +52,8 @@ define i32 @length2(ptr %X, ptr %Y) nounwind { ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: bswapl %eax +; X64-NEXT: bswapl %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind @@ -79,10 +77,8 @@ define i1 @length2_lt(ptr %X, ptr %Y) nounwind { ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: bswapl %eax +; X64-NEXT: bswapl %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -97,10 +93,8 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind { ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: bswapl %eax +; X64-NEXT: bswapl %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al @@ -511,8 +505,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind { ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movl 8(%rdi), %ecx ; X64-NEXT: movl 8(%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: je .LBB29_3 diff --git a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll index 762691151f4bd..8efd4fca91a99 100644 --- a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll @@ -13,14 +13,12 @@ declare dso_local i32 @bcmp(ptr, ptr, i32) define i32 @length2(ptr %X, ptr %Y) nounwind optsize { ; X86-LABEL: length2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll index c0c7b98d471cd..a8df0ac1354f8 100644 --- a/llvm/test/CodeGen/X86/memcmp-optsize.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll @@ -16,10 +16,8 @@ define i32 @length2(ptr %X, ptr %Y) nounwind optsize { ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: bswapl %eax +; X64-NEXT: bswapl %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind @@ -251,8 +249,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind optsize { ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movl 8(%rdi), %ecx ; X64-NEXT: movl 8(%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: je .LBB15_3 diff --git a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll index cb45fd3ebb906..b486eebd54b4a 100644 --- a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll @@ -13,14 +13,12 @@ declare dso_local i32 @bcmp(ptr, ptr, i32) define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 { ; X86-LABEL: length2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll index 720344a22e43b..afb57c8101b82 100644 --- a/llvm/test/CodeGen/X86/memcmp-pgso.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso.ll @@ -16,10 +16,8 @@ define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 { ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: bswapl %eax +; X64-NEXT: bswapl %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind @@ -251,8 +249,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind !prof !14 { ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movl 8(%rdi), %ecx ; X64-NEXT: movl 8(%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: je .LBB15_3 diff --git a/llvm/test/CodeGen/X86/memcmp-x32.ll b/llvm/test/CodeGen/X86/memcmp-x32.ll index ab439b32f2f1b..f5b67ab45f725 100644 --- a/llvm/test/CodeGen/X86/memcmp-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-x32.ll @@ -43,14 +43,12 @@ define i1 @length0_lt(ptr %X, ptr %Y) nounwind { define i32 @length2(ptr %X, ptr %Y) nounwind { ; X86-LABEL: length2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl %m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 2) nounwind @@ -62,9 +60,8 @@ define i32 @length2_const(ptr %X, ptr %Y) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: rolw $8, %ax -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: addl $-12594, %eax # imm = 0xCECE +; X86-NEXT: bswapl %eax +; X86-NEXT: addl $-825360384, %eax # imm = 0xCECE0000 ; X86-NEXT: retl %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind ret i32 %m @@ -75,9 +72,8 @@ define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: rolw $8, %ax -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: addl $-12594, %eax # imm = 0xCECE +; X86-NEXT: bswapl %eax +; X86-NEXT: addl $-825360384, %eax # imm = 0xCECE0000 ; X86-NEXT: testl %eax, %eax ; X86-NEXT: setg %al ; X86-NEXT: retl @@ -103,14 +99,12 @@ define i1 @length2_eq(ptr %X, ptr %Y) nounwind { define i1 @length2_lt(ptr %X, ptr %Y) nounwind { ; X86-LABEL: length2_lt: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -127,10 +121,8 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl (%ecx), %ecx ; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %ax -; X86-NEXT: movzwl %cx, %ecx -; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: bswapl %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: subl %eax, %ecx ; X86-NEXT: testl %ecx, %ecx ; X86-NEXT: setg %al diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll index 1330f3a241a5c..b8c0f509f1d08 100644 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -51,10 +51,8 @@ define i32 @length2(ptr %X, ptr %Y) nounwind { ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: bswapl %eax +; X64-NEXT: bswapl %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind @@ -65,9 +63,8 @@ define i32 @length2_const(ptr %X, ptr %Y) nounwind { ; X64-LABEL: length2_const: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: rolw $8, %ax -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: addl $-12594, %eax # imm = 0xCECE +; X64-NEXT: bswapl %eax +; X64-NEXT: addl $-825360384, %eax # imm = 0xCECE0000 ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind ret i32 %m @@ -77,9 +74,8 @@ define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind { ; X64-LABEL: length2_gt_const: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: rolw $8, %ax -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: addl $-12594, %eax # imm = 0xCECE +; X64-NEXT: bswapl %eax +; X64-NEXT: addl $-825360384, %eax # imm = 0xCECE0000 ; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al ; X64-NEXT: retq @@ -105,10 +101,8 @@ define i1 @length2_lt(ptr %X, ptr %Y) nounwind { ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: bswapl %eax +; X64-NEXT: bswapl %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -123,10 +117,8 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind { ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: bswapl %eax +; X64-NEXT: bswapl %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al @@ -537,8 +529,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind { ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movl 8(%rdi), %ecx ; X64-NEXT: movl 8(%rsi), %edx -; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx +; X64-NEXT: bswapq %rcx +; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: je .LBB31_3 diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll index f56d9688a01e1..bd42d5f8d5085 100644 --- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll @@ -5,14 +5,14 @@ declare i32 @memcmp(ptr nocapture, ptr nocapture, i32) define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X32-LABEL: @cmp2( -; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) -; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32 -; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 -; X32-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] -; X32-NEXT: ret i32 [[TMP9]] +; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1 +; X32-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32 +; X32-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32 +; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X32-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X32-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X32-NEXT: ret i32 [[TMP7]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 2) ret i32 %call @@ -20,14 +20,14 @@ define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonly align 2 %y) { ; X32-LABEL: @cmp2_align2( -; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 2 -; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 2 -; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) -; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32 -; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 -; X32-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] -; X32-NEXT: ret i32 [[TMP9]] +; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 2 +; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 2 +; X32-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32 +; X32-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32 +; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X32-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X32-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X32-NEXT: ret i32 [[TMP7]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 2) ret i32 %call @@ -37,27 +37,27 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X32-LABEL: @cmp3( ; X32-NEXT: br label [[LOADBB:%.*]] ; X32: res_block: -; X32-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]] +; X32-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] ; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X32-NEXT: br label [[ENDBLOCK:%.*]] ; X32: loadbb: -; X32-NEXT: [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP6:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]]) -; X32-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]]) -; X32-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]] -; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 +; X32-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X32-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X32-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; X32: loadbb1: -; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 2 -; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 2 -; X32-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 -; X32-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1 -; X32-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; X32-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 -; X32-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] +; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X32-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X32-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X32-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X32-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X32-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] ; X32-NEXT: br label [[ENDBLOCK]] ; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X32-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 3) @@ -66,16 +66,16 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp4(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X32-LABEL: @cmp4( -; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) -; X32-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] -; X32-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]] -; X32-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 -; X32-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 -; X32-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] -; X32-NEXT: ret i32 [[TMP11]] +; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X32-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X32-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X32-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X32-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X32-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X32-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X32-NEXT: ret i32 [[TMP9]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 4) ret i32 %call @@ -85,27 +85,27 @@ define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X32-LABEL: @cmp5( ; X32-NEXT: br label [[LOADBB:%.*]] ; X32: res_block: -; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP7:%.*]], [[TMP8:%.*]] +; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] ; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X32-NEXT: br label [[ENDBLOCK:%.*]] ; X32: loadbb: -; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] -; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; X32: loadbb1: -; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X32-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 -; X32-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1 -; X32-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; X32-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 -; X32-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] +; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X32-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X32-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X32-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X32-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X32-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] ; X32-NEXT: br label [[ENDBLOCK]] ; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X32-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i32 5) @@ -116,29 +116,29 @@ define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X32-LABEL: @cmp6( ; X32-NEXT: br label [[LOADBB:%.*]] ; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] +; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] ; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] ; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X32-NEXT: br label [[ENDBLOCK:%.*]] ; X32: loadbb: -; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] -; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X32: loadbb1: -; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X32-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP10]], align 1 -; X32-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP11]], align 1 -; X32-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) -; X32-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) -; X32-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32 -; X32-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i32 -; X32-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]] -; X32-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X32-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1 +; X32-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 +; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 +; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 +; X32-NEXT: [[TMP14]] = call i32 @llvm.bswap.i32(i32 [[TMP12]]) +; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]]) +; X32-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP14]], [[TMP15]] +; X32-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X32: endblock: ; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X32-NEXT: ret i32 [[PHI_RES]] @@ -151,27 +151,27 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X32-LABEL: @cmp7( ; X32-NEXT: br label [[LOADBB:%.*]] ; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] ; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X32-NEXT: br label [[ENDBLOCK:%.*]] ; X32: loadbb: -; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] -; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X32: loadbb1: -; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X32-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1 -; X32-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1 -; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) -; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]] -; X32-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X32-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X32-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X32-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X32-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X32: endblock: ; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X32-NEXT: ret i32 [[PHI_RES]] @@ -184,27 +184,27 @@ define i32 @cmp8(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X32-LABEL: @cmp8( ; X32-NEXT: br label [[LOADBB:%.*]] ; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] ; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X32-NEXT: br label [[ENDBLOCK:%.*]] ; X32: loadbb: -; X32-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] -; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X32-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X32-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X32: loadbb1: -; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X32-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1 -; X32-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1 -; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) -; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]] -; X32-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X32-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X32-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X32-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X32-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X32: endblock: ; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X32-NEXT: ret i32 [[PHI_RES]] @@ -287,11 +287,11 @@ define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X32-LABEL: @cmp_eq2( -; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1 +; X32-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -303,20 +303,20 @@ define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X32-LABEL: @cmp_eq3( -; X32-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2 -; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2 -; X32-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X32-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16 -; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16 -; X32-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]] -; X32-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]] -; X32-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0 -; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X32-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1 +; X32-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X32-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X32-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X32-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X32-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X32-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X32-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X32-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -328,11 +328,11 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X32-LABEL: @cmp_eq4( -; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X32-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -344,20 +344,20 @@ define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X32-LABEL: @cmp_eq5( -; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X32-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X32-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32 -; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32 -; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] -; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X32-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X32-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X32-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X32-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X32-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X32-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -369,20 +369,20 @@ define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X32-LABEL: @cmp_eq6( -; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X32-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1 -; X32-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1 -; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 -; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 -; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] -; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]] -; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X32-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X32-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 +; X32-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32 +; X32-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X32-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -394,20 +394,20 @@ define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture readonly align 4 %y) { ; X32-LABEL: @cmp_eq6_align4( -; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 4 -; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 4 -; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X32-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 4 -; X32-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 4 -; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 -; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 -; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] -; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]] -; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 4 +; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 4 +; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X32-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 4 +; X32-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 4 +; X32-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 +; X32-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32 +; X32-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X32-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X32-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -419,18 +419,18 @@ define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture read define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X32-LABEL: @cmp_eq7( -; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1 -; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1 -; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] -; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X32-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X32-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X32-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X32-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -442,18 +442,18 @@ define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp_eq8(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X32-LABEL: @cmp_eq8( -; X32-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X32-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1 -; X32-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1 -; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] -; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X32-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X32-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X32-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X32-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X32-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X32-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X32-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll index 2594f53971393..57a242a77509a 100644 --- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll @@ -6,14 +6,14 @@ declare i32 @memcmp(ptr nocapture, ptr nocapture, i64) define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp2( -; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) -; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32 -; X64-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 -; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] -; X64-NEXT: ret i32 [[TMP9]] +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32 +; X64-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32 +; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-NEXT: ret i32 [[TMP7]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 2) ret i32 %call @@ -21,14 +21,14 @@ define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonly align 2 %y) { ; X64-LABEL: @cmp2_align2( -; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 2 -; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 2 -; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) -; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32 -; X64-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 -; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] -; X64-NEXT: ret i32 [[TMP9]] +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 2 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 2 +; X64-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32 +; X64-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32 +; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; X64-NEXT: ret i32 [[TMP7]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 2) ret i32 %call @@ -38,27 +38,27 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp3( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 2 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 2 -; X64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 -; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] ; X64-NEXT: br label [[ENDBLOCK]] ; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 3) @@ -67,16 +67,16 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp4(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp4( -; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) -; X64-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] -; X64-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]] -; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 -; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 -; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] -; X64-NEXT: ret i32 [[TMP11]] +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-NEXT: ret i32 [[TMP9]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 4) ret i32 %call @@ -86,27 +86,27 @@ define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp5( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP7:%.*]], [[TMP8:%.*]] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 -; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] ; X64-NEXT: br label [[ENDBLOCK]] ; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 5) @@ -117,29 +117,29 @@ define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp6( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) -; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) -; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32 -; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i32 -; X64-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]] -; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 +; X64-NEXT: [[TMP14]] = call i32 @llvm.bswap.i32(i32 [[TMP12]]) +; X64-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]]) +; X64-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP14]], [[TMP15]] +; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -152,27 +152,27 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp7( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X64-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X64-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) -; X64-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]] -; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -183,16 +183,16 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp8(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp8( -; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) -; X64-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]] -; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]] -; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 -; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 -; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] -; X64-NEXT: ret i32 [[TMP11]] +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; X64-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; X64-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; X64-NEXT: ret i32 [[TMP9]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 8) ret i32 %call @@ -202,27 +202,27 @@ define i32 @cmp9(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp9( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP7:%.*]], [[TMP8:%.*]] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP5:%.*]], [[TMP6:%.*]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 -; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] ; X64-NEXT: br label [[ENDBLOCK]] ; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 9) @@ -233,29 +233,29 @@ define i32 @cmp10(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp10( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) -; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) -; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64 -; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i64 -; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]] -; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64 +; X64-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64 +; X64-NEXT: [[TMP14]] = call i64 @llvm.bswap.i64(i64 [[TMP12]]) +; X64-NEXT: [[TMP15]] = call i64 @llvm.bswap.i64(i64 [[TMP13]]) +; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -268,27 +268,27 @@ define i32 @cmp11(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp11( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) -; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) -; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] -; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -301,29 +301,29 @@ define i32 @cmp12(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp12( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) -; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64 -; X64-NEXT: [[TMP19]] = zext i32 [[TMP17]] to i64 -; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]] -; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64 +; X64-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64 +; X64-NEXT: [[TMP14]] = call i64 @llvm.bswap.i64(i64 [[TMP12]]) +; X64-NEXT: [[TMP15]] = call i64 @llvm.bswap.i64(i64 [[TMP13]]) +; X64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; X64-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -336,27 +336,27 @@ define i32 @cmp13(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp13( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 5 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 5 -; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) -; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) -; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] -; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -369,27 +369,27 @@ define i32 @cmp14(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp14( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 6 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 6 -; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) -; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) -; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] -; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -402,27 +402,27 @@ define i32 @cmp15(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp15( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 7 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 7 -; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) -; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) -; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] -; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -435,27 +435,27 @@ define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp16( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: -; X64-NEXT: [[TMP5:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; X64-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; X64-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 1 -; X64-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 1 -; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) -; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) -; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] -; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; X64-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; X64-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; X64-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; X64-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] @@ -466,11 +466,11 @@ define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp_eq2( -; X64-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]] -; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; X64-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 ; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64-NEXT: ret i32 [[CONV]] ; @@ -486,17 +486,17 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2 -; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] -; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -504,20 +504,20 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD-NEXT: ret i32 [[CONV]] ; ; X64_2LD-LABEL: @cmp_eq3( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2 -; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16 -; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y:%.*]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i16 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i16 +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i16 +; X64_2LD-NEXT: [[TMP10:%.*]] = xor i16 [[TMP8]], [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = or i16 [[TMP3]], [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP11]], 0 +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -529,11 +529,11 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp_eq4( -; X64-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] -; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; X64-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 ; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64-NEXT: ret i32 [[CONV]] ; @@ -549,17 +549,17 @@ define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] -; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -567,20 +567,20 @@ define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD-NEXT: ret i32 [[CONV]] ; ; X64_2LD-LABEL: @cmp_eq5( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32 -; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i32 +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i32 +; X64_2LD-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -596,17 +596,17 @@ define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -614,20 +614,20 @@ define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD-NEXT: ret i32 [[CONV]] ; ; X64_2LD-LABEL: @cmp_eq6( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 -; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 -; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] -; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]] -; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32 +; X64_2LD-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -643,17 +643,17 @@ define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture read ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 4 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 4 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 4 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 4 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 4 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 4 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 4 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 4 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -661,20 +661,20 @@ define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture read ; X64_1LD-NEXT: ret i32 [[CONV]] ; ; X64_2LD-LABEL: @cmp_eq6_align4( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 4 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 4 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 4 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 4 -; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 -; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 -; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] -; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]] -; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 4 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 4 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 4 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 4 +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i32 +; X64_2LD-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = or i32 [[TMP3]], [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -690,17 +690,17 @@ define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -708,18 +708,18 @@ define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD-NEXT: ret i32 [[CONV]] ; ; X64_2LD-LABEL: @cmp_eq7( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y:%.*]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]] +; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -731,11 +731,11 @@ define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp_eq8(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp_eq8( -; X64-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; X64-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 ; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64-NEXT: ret i32 [[CONV]] ; @@ -751,17 +751,17 @@ define i32 @cmp_eq9(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] -; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -769,20 +769,20 @@ define i32 @cmp_eq9(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD-NEXT: ret i32 [[CONV]] ; ; X64_2LD-LABEL: @cmp_eq9( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i64 -; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i64 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i8 [[TMP7]] to i64 +; X64_2LD-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -798,17 +798,17 @@ define i32 @cmp_eq10(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -816,20 +816,20 @@ define i32 @cmp_eq10(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD-NEXT: ret i32 [[CONV]] ; ; X64_2LD-LABEL: @cmp_eq10( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64 -; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64 -; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]] -; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]] -; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0 -; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i64 +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP7]] to i64 +; X64_2LD-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -845,17 +845,17 @@ define i32 @cmp_eq11(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -863,18 +863,18 @@ define i32 @cmp_eq11(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD-NEXT: ret i32 [[CONV]] ; ; X64_2LD-LABEL: @cmp_eq11( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 3 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 3 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -890,17 +890,17 @@ define i32 @cmp_eq12(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -908,20 +908,20 @@ define i32 @cmp_eq12(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD-NEXT: ret i32 [[CONV]] ; ; X64_2LD-LABEL: @cmp_eq12( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 8 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 8 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64 -; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64 -; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]] -; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]] -; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0 -; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; X64_2LD-NEXT: [[TMP10:%.*]] = xor i64 [[TMP8]], [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0 +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -937,17 +937,17 @@ define i32 @cmp_eq13(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 5 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 5 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -955,18 +955,18 @@ define i32 @cmp_eq13(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD-NEXT: ret i32 [[CONV]] ; ; X64_2LD-LABEL: @cmp_eq13( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 5 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 5 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -982,17 +982,17 @@ define i32 @cmp_eq14(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 6 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 6 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -1000,18 +1000,18 @@ define i32 @cmp_eq14(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD-NEXT: ret i32 [[CONV]] ; ; X64_2LD-LABEL: @cmp_eq14( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 6 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 6 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -1027,17 +1027,17 @@ define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD: res_block: ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64_1LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64_1LD-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; X64_1LD-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: -; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 7 -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] -; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64_1LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64_1LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64_1LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_1LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_1LD-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; X64_1LD-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -1045,18 +1045,18 @@ define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64_1LD-NEXT: ret i32 [[CONV]] ; ; X64_2LD-LABEL: @cmp_eq15( -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, ptr [[X:%.*]], align 1 -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y:%.*]], align 1 -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] -; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 7 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 7 -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 1 -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 1 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] -; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 -; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 1 +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y:%.*]], align 1 +; X64_2LD-NEXT: [[TMP3:%.*]] = xor i64 [[TMP1]], [[TMP2]] +; X64_2LD-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; X64_2LD-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; X64_2LD-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; X64_2LD-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; X64_2LD-NEXT: [[TMP8:%.*]] = xor i64 [[TMP6]], [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = or i64 [[TMP3]], [[TMP8]] +; X64_2LD-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; X64_2LD-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP11]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -1068,11 +1068,11 @@ define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; X64-LABEL: @cmp_eq16( -; X64-NEXT: [[TMP3:%.*]] = load i128, ptr [[X:%.*]], align 1 -; X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[Y:%.*]], align 1 -; X64-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP3]], [[TMP4]] -; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[X:%.*]], align 1 +; X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[Y:%.*]], align 1 +; X64-NEXT: [[TMP3:%.*]] = icmp ne i128 [[TMP1]], [[TMP2]] +; X64-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 ; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64-NEXT: ret i32 [[CONV]] ;