From 23c1ed570df50f2724982b815a286f5c55265708 Mon Sep 17 00:00:00 2001 From: AZero13 Date: Thu, 11 Sep 2025 09:10:33 -0400 Subject: [PATCH 1/2] Pre-commit test (NFC) --- llvm/test/CodeGen/X86/and-mask-variable.ll | 450 +++++++++++++++++++++ 1 file changed, 450 insertions(+) create mode 100644 llvm/test/CodeGen/X86/and-mask-variable.ll diff --git a/llvm/test/CodeGen/X86/and-mask-variable.ll b/llvm/test/CodeGen/X86/and-mask-variable.ll new file mode 100644 index 0000000000000..54daa86dc6f36 --- /dev/null +++ b/llvm/test/CodeGen/X86/and-mask-variable.ll @@ -0,0 +1,450 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMINOTBM,X86-BMI1 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMITBM,X86-BMI1 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMITBM,X86-BMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMINOTBM,X86-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMINOTBM,X64-BMI1 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMITBM,X64-BMI1 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMITBM,X64-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMINOTBM,X64-BMI2 + +define i32 @mask_pair(i32 %x, i32 %y) { +; X86-NOBMI-LABEL: mask_pair: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: shrl %cl, %eax +; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: retl +; +; X86-BMI1-LABEL: mask_pair: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: mask_pair: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI-LABEL: mask_pair: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: shrl %cl, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: shll %cl, %eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI1-LABEL: mask_pair: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl %edi, %eax +; X64-BMI1-NEXT: shrl %cl, %eax +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shll %cl, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: mask_pair: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq + %shl = shl nsw i32 -1, %y + %and = and i32 %shl, %x + ret i32 %and +} + +define i64 @mask_pair_64(i64 %x, i64 %y) { +; X86-NOBMI-LABEL: mask_pair_64: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl $-1, %edx +; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: testb $32, %cl +; X86-NOBMI-NEXT: je .LBB1_2 +; X86-NOBMI-NEXT: # %bb.1: +; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: .LBB1_2: +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: retl +; +; X86-BMI1-LABEL: mask_pair_64: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movl $-1, %edx +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB1_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB1_2: +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: mask_pair_64: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $-1, %edx +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB1_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB1_2: +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: retl +; +; X64-NOBMI-LABEL: mask_pair_64: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rsi, %rcx +; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: shrq %cl, %rax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI-NEXT: shlq %cl, %rax +; X64-NOBMI-NEXT: retq +; +; X64-BMI1-LABEL: mask_pair_64: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: movq %rdi, %rax +; X64-BMI1-NEXT: shrq %cl, %rax +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shlq %cl, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: mask_pair_64: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq + %shl = shl nsw i64 -1, %y + %and = and i64 %shl, %x + ret i64 %and +} + +define i128 @mask_pair_128(i128 %x, i128 %y) { +; X86-NOBMI-LABEL: mask_pair_128: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebp +; X86-NOBMI-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI-NEXT: pushl %ebx +; X86-NOBMI-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI-NEXT: pushl %edi +; X86-NOBMI-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI-NEXT: subl $76, %esp +; X86-NOBMI-NEXT: .cfi_def_cfa_offset 96 +; X86-NOBMI-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl %ecx, %edx +; X86-NOBMI-NEXT: shrb $3, %dl +; X86-NOBMI-NEXT: andb $12, %dl +; X86-NOBMI-NEXT: movzbl %dl, %esi +; X86-NOBMI-NEXT: movl 44(%esp,%esi), %edi +; X86-NOBMI-NEXT: movl %edi, %ebx +; X86-NOBMI-NEXT: shrl %cl, %ebx +; X86-NOBMI-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl 40(%esp,%esi), %ebx +; X86-NOBMI-NEXT: movl %ebx, %ebp +; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebp +; X86-NOBMI-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl 32(%esp,%esi), %edi +; X86-NOBMI-NEXT: movl 36(%esp,%esi), %esi +; X86-NOBMI-NEXT: movl %esi, %ebp +; X86-NOBMI-NEXT: shrdl %cl, %ebx, %ebp +; X86-NOBMI-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: shrdl %cl, %esi, %edi +; X86-NOBMI-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $0, (%esp) +; X86-NOBMI-NEXT: negb %dl +; X86-NOBMI-NEXT: movsbl %dl, %edi +; X86-NOBMI-NEXT: movl 16(%esp,%edi), %edx +; X86-NOBMI-NEXT: movl 20(%esp,%edi), %esi +; X86-NOBMI-NEXT: movl 24(%esp,%edi), %ebx +; X86-NOBMI-NEXT: movl %ebx, %ebp +; X86-NOBMI-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI-NEXT: movl 28(%esp,%edi), %edi +; X86-NOBMI-NEXT: shldl %cl, %ebx, %edi +; X86-NOBMI-NEXT: movl %edi, 12(%eax) +; X86-NOBMI-NEXT: movl %ebp, 8(%eax) +; X86-NOBMI-NEXT: movl %edx, %edi +; X86-NOBMI-NEXT: shll %cl, %edi +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI-NEXT: shldl %cl, %edx, %esi +; X86-NOBMI-NEXT: movl %esi, 4(%eax) +; X86-NOBMI-NEXT: movl %edi, (%eax) +; X86-NOBMI-NEXT: addl $76, %esp +; X86-NOBMI-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI-NEXT: popl %ebx +; X86-NOBMI-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI-NEXT: popl %ebp +; X86-NOBMI-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI-NEXT: retl $4 +; +; X86-BMI1-LABEL: mask_pair_128: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI1-NEXT: subl $76, %esp +; X86-BMI1-NEXT: .cfi_def_cfa_offset 96 +; X86-BMI1-NEXT: .cfi_offset %esi, -20 +; X86-BMI1-NEXT: .cfi_offset %edi, -16 +; X86-BMI1-NEXT: .cfi_offset %ebx, -12 +; X86-BMI1-NEXT: .cfi_offset %ebp, -8 +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI1-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl %ecx, %edx +; X86-BMI1-NEXT: shrb $3, %dl +; X86-BMI1-NEXT: andb $12, %dl +; X86-BMI1-NEXT: movzbl %dl, %esi +; X86-BMI1-NEXT: movl 44(%esp,%esi), %edi +; X86-BMI1-NEXT: movl %edi, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl 40(%esp,%esi), %ebx +; X86-BMI1-NEXT: movl %ebx, %ebp +; X86-BMI1-NEXT: shrdl %cl, %edi, %ebp +; X86-BMI1-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl 32(%esp,%esi), %edi +; X86-BMI1-NEXT: movl 36(%esp,%esi), %esi +; X86-BMI1-NEXT: movl %esi, %ebp +; X86-BMI1-NEXT: shrdl %cl, %ebx, %ebp +; X86-BMI1-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: shrdl %cl, %esi, %edi +; X86-BMI1-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl $0, (%esp) +; X86-BMI1-NEXT: negb %dl +; X86-BMI1-NEXT: movsbl %dl, %edi +; X86-BMI1-NEXT: movl 16(%esp,%edi), %edx +; X86-BMI1-NEXT: movl 20(%esp,%edi), %esi +; X86-BMI1-NEXT: movl 24(%esp,%edi), %ebx +; X86-BMI1-NEXT: movl %ebx, %ebp +; X86-BMI1-NEXT: shldl %cl, %esi, %ebp +; X86-BMI1-NEXT: movl 28(%esp,%edi), %edi +; X86-BMI1-NEXT: shldl %cl, %ebx, %edi +; X86-BMI1-NEXT: movl %edi, 12(%eax) +; X86-BMI1-NEXT: movl %ebp, 8(%eax) +; X86-BMI1-NEXT: movl %edx, %edi +; X86-BMI1-NEXT: shll %cl, %edi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shldl %cl, %edx, %esi +; X86-BMI1-NEXT: movl %esi, 4(%eax) +; X86-BMI1-NEXT: movl %edi, (%eax) +; X86-BMI1-NEXT: addl $76, %esp +; X86-BMI1-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI1-NEXT: retl $4 +; +; X86-BMI2-LABEL: mask_pair_128: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $76, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 96 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl %ecx, %edx +; X86-BMI2-NEXT: shrb $3, %dl +; X86-BMI2-NEXT: andb $12, %dl +; X86-BMI2-NEXT: movzbl %dl, %esi +; X86-BMI2-NEXT: movl 44(%esp,%esi), %edi +; X86-BMI2-NEXT: shrxl %ecx, %edi, %ebx +; X86-BMI2-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl 40(%esp,%esi), %ebx +; X86-BMI2-NEXT: movl %ebx, %ebp +; X86-BMI2-NEXT: shrdl %cl, %edi, %ebp +; X86-BMI2-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl 32(%esp,%esi), %edi +; X86-BMI2-NEXT: movl 36(%esp,%esi), %esi +; X86-BMI2-NEXT: movl %esi, %ebp +; X86-BMI2-NEXT: shrdl %cl, %ebx, %ebp +; X86-BMI2-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $0, (%esp) +; X86-BMI2-NEXT: negb %dl +; X86-BMI2-NEXT: movsbl %dl, %edi +; X86-BMI2-NEXT: movl 16(%esp,%edi), %edx +; X86-BMI2-NEXT: movl 20(%esp,%edi), %esi +; X86-BMI2-NEXT: movl 24(%esp,%edi), %ebx +; X86-BMI2-NEXT: movl %ebx, %ebp +; X86-BMI2-NEXT: shldl %cl, %esi, %ebp +; X86-BMI2-NEXT: movl 28(%esp,%edi), %edi +; X86-BMI2-NEXT: shldl %cl, %ebx, %edi +; X86-BMI2-NEXT: movl %edi, 12(%eax) +; X86-BMI2-NEXT: movl %ebp, 8(%eax) +; X86-BMI2-NEXT: shlxl %ecx, %edx, %edi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: shldl %cl, %edx, %esi +; X86-BMI2-NEXT: movl %esi, 4(%eax) +; X86-BMI2-NEXT: movl %edi, (%eax) +; X86-BMI2-NEXT: addl $76, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl $4 +; +; X64-NOBMI-LABEL: mask_pair_128: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdx, %rcx +; X64-NOBMI-NEXT: shrdq %cl, %rsi, %rdi +; X64-NOBMI-NEXT: shrq %cl, %rsi +; X64-NOBMI-NEXT: xorl %eax, %eax +; X64-NOBMI-NEXT: testb $64, %cl +; X64-NOBMI-NEXT: cmovneq %rsi, %rdi +; X64-NOBMI-NEXT: cmovneq %rax, %rsi +; X64-NOBMI-NEXT: movq %rdi, %rdx +; X64-NOBMI-NEXT: shlq %cl, %rdx +; X64-NOBMI-NEXT: testb $64, %cl +; X64-NOBMI-NEXT: cmoveq %rdx, %rax +; X64-NOBMI-NEXT: shldq %cl, %rdi, %rsi +; X64-NOBMI-NEXT: testb $64, %cl +; X64-NOBMI-NEXT: cmoveq %rsi, %rdx +; X64-NOBMI-NEXT: retq +; +; X64-BMI1-LABEL: mask_pair_128: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: movq %rdx, %rcx +; X64-BMI1-NEXT: shrdq %cl, %rsi, %rdi +; X64-BMI1-NEXT: shrq %cl, %rsi +; X64-BMI1-NEXT: xorl %eax, %eax +; X64-BMI1-NEXT: testb $64, %cl +; X64-BMI1-NEXT: cmovneq %rsi, %rdi +; X64-BMI1-NEXT: cmovneq %rax, %rsi +; X64-BMI1-NEXT: movq %rdi, %rdx +; X64-BMI1-NEXT: shlq %cl, %rdx +; X64-BMI1-NEXT: testb $64, %cl +; X64-BMI1-NEXT: cmoveq %rdx, %rax +; X64-BMI1-NEXT: shldq %cl, %rdi, %rsi +; X64-BMI1-NEXT: testb $64, %cl +; X64-BMI1-NEXT: cmoveq %rsi, %rdx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: mask_pair_128: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movq %rdx, %rcx +; X64-BMI2-NEXT: shrdq %cl, %rsi, %rdi +; X64-BMI2-NEXT: shrxq %rdx, %rsi, %rdx +; X64-BMI2-NEXT: xorl %esi, %esi +; X64-BMI2-NEXT: testb $64, %cl +; X64-BMI2-NEXT: cmovneq %rdx, %rdi +; X64-BMI2-NEXT: shlxq %rcx, %rdi, %r8 +; X64-BMI2-NEXT: movq %r8, %rax +; X64-BMI2-NEXT: cmovneq %rsi, %rax +; X64-BMI2-NEXT: cmovneq %rsi, %rdx +; X64-BMI2-NEXT: shldq %cl, %rdi, %rdx +; X64-BMI2-NEXT: testb $64, %cl +; X64-BMI2-NEXT: cmovneq %r8, %rdx +; X64-BMI2-NEXT: retq + %shl = shl nsw i128 -1, %y + %and = and i128 %shl, %x + ret i128 %and +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} +; X64: {{.*}} +; X64-BMINOTBM: {{.*}} +; X64-BMITBM: {{.*}} +; X86: {{.*}} +; X86-BMINOTBM: {{.*}} +; X86-BMITBM: {{.*}} From 7bd2b3d3b04317920e45753f9b9834cc863848ec Mon Sep 17 00:00:00 2001 From: AZero13 Date: Thu, 11 Sep 2025 09:12:11 -0400 Subject: [PATCH 2/2] [X86] shouldFoldMaskToVariableShiftPair should be true for scalars up to the biggest legal type For X86, we want to do this for scalars up to the biggest legal type. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +- llvm/test/CodeGen/X86/and-mask-variable.ll | 378 ++++----------------- 2 files changed, 72 insertions(+), 313 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 931a10b700c87..9580adebba712 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3659,11 +3659,8 @@ bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const { if (VT.isVector()) return false; - // 64-bit shifts on 32-bit targets produce really bad bloated code. - if (VT == MVT::i64 && !Subtarget.is64Bit()) - return false; - - return true; + unsigned MaxWidth = Subtarget.is64Bit() ? 64 : 32; + return VT.getScalarSizeInBits() <= MaxWidth; } TargetLowering::ShiftLegalizationStrategy diff --git a/llvm/test/CodeGen/X86/and-mask-variable.ll b/llvm/test/CodeGen/X86/and-mask-variable.ll index 54daa86dc6f36..d89f0db6a0c5b 100644 --- a/llvm/test/CodeGen/X86/and-mask-variable.ll +++ b/llvm/test/CodeGen/X86/and-mask-variable.ll @@ -1,16 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMINOTBM,X86-BMI1 -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMITBM,X86-BMI1 -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMITBM,X86-BMI2 -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMINOTBM,X86-BMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMINOTBM,X64-BMI1 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMITBM,X64-BMI1 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMITBM,X64-BMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMINOTBM,X64-BMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86-NOBMI +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86-BMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64-BMI2 -define i32 @mask_pair(i32 %x, i32 %y) { +define i32 @mask_pair(i32 %x, i32 %y) nounwind { ; X86-NOBMI-LABEL: mask_pair: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx @@ -19,14 +15,6 @@ define i32 @mask_pair(i32 %x, i32 %y) { ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1-LABEL: mask_pair: -; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: shll %cl, %eax -; X86-BMI1-NEXT: retl -; ; X86-BMI2-LABEL: mask_pair: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax @@ -43,15 +31,6 @@ define i32 @mask_pair(i32 %x, i32 %y) { ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1-LABEL: mask_pair: -; X64-BMI1: # %bb.0: -; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl %edi, %eax -; X64-BMI1-NEXT: shrl %cl, %eax -; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shll %cl, %eax -; X64-BMI1-NEXT: retq -; ; X64-BMI2-LABEL: mask_pair: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: shrxl %esi, %edi, %eax @@ -62,7 +41,7 @@ define i32 @mask_pair(i32 %x, i32 %y) { ret i32 %and } -define i64 @mask_pair_64(i64 %x, i64 %y) { +define i64 @mask_pair_64(i64 %x, i64 %y) nounwind { ; X86-NOBMI-LABEL: mask_pair_64: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx @@ -79,22 +58,6 @@ define i64 @mask_pair_64(i64 %x, i64 %y) { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1-LABEL: mask_pair_64: -; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1-NEXT: movl $-1, %edx -; X86-BMI1-NEXT: movl $-1, %eax -; X86-BMI1-NEXT: shll %cl, %eax -; X86-BMI1-NEXT: testb $32, %cl -; X86-BMI1-NEXT: je .LBB1_2 -; X86-BMI1-NEXT: # %bb.1: -; X86-BMI1-NEXT: movl %eax, %edx -; X86-BMI1-NEXT: xorl %eax, %eax -; X86-BMI1-NEXT: .LBB1_2: -; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1-NEXT: retl -; ; X86-BMI2-LABEL: mask_pair_64: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx @@ -119,15 +82,6 @@ define i64 @mask_pair_64(i64 %x, i64 %y) { ; X64-NOBMI-NEXT: shlq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1-LABEL: mask_pair_64: -; X64-BMI1: # %bb.0: -; X64-BMI1-NEXT: movq %rsi, %rcx -; X64-BMI1-NEXT: movq %rdi, %rax -; X64-BMI1-NEXT: shrq %cl, %rax -; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1-NEXT: shlq %cl, %rax -; X64-BMI1-NEXT: retq -; ; X64-BMI2-LABEL: mask_pair_64: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax @@ -138,313 +92,121 @@ define i64 @mask_pair_64(i64 %x, i64 %y) { ret i64 %and } -define i128 @mask_pair_128(i128 %x, i128 %y) { +define i128 @mask_pair_128(i128 %x, i128 %y) nounwind { ; X86-NOBMI-LABEL: mask_pair_128: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: pushl %ebp -; X86-NOBMI-NEXT: .cfi_def_cfa_offset 8 ; X86-NOBMI-NEXT: pushl %ebx -; X86-NOBMI-NEXT: .cfi_def_cfa_offset 12 ; X86-NOBMI-NEXT: pushl %edi -; X86-NOBMI-NEXT: .cfi_def_cfa_offset 16 ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: .cfi_def_cfa_offset 20 -; X86-NOBMI-NEXT: subl $76, %esp -; X86-NOBMI-NEXT: .cfi_def_cfa_offset 96 -; X86-NOBMI-NEXT: .cfi_offset %esi, -20 -; X86-NOBMI-NEXT: .cfi_offset %edi, -16 -; X86-NOBMI-NEXT: .cfi_offset %ebx, -12 -; X86-NOBMI-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI-NEXT: subl $32, %esp ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NOBMI-NEXT: movl %ebx, {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $-1, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $-1, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $-1, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $-1, {{[0-9]+}}(%esp) ; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $0, (%esp) ; X86-NOBMI-NEXT: movl %ecx, %edx ; X86-NOBMI-NEXT: shrb $3, %dl ; X86-NOBMI-NEXT: andb $12, %dl -; X86-NOBMI-NEXT: movzbl %dl, %esi -; X86-NOBMI-NEXT: movl 44(%esp,%esi), %edi -; X86-NOBMI-NEXT: movl %edi, %ebx -; X86-NOBMI-NEXT: shrl %cl, %ebx -; X86-NOBMI-NEXT: movl %ebx, {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: movl 40(%esp,%esi), %ebx -; X86-NOBMI-NEXT: movl %ebx, %ebp -; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebp -; X86-NOBMI-NEXT: movl %ebp, {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: movl 32(%esp,%esi), %edi -; X86-NOBMI-NEXT: movl 36(%esp,%esi), %esi -; X86-NOBMI-NEXT: movl %esi, %ebp -; X86-NOBMI-NEXT: shrdl %cl, %ebx, %ebp -; X86-NOBMI-NEXT: movl %ebp, {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: shrdl %cl, %esi, %edi -; X86-NOBMI-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: movl $0, (%esp) ; X86-NOBMI-NEXT: negb %dl -; X86-NOBMI-NEXT: movsbl %dl, %edi -; X86-NOBMI-NEXT: movl 16(%esp,%edi), %edx -; X86-NOBMI-NEXT: movl 20(%esp,%edi), %esi -; X86-NOBMI-NEXT: movl 24(%esp,%edi), %ebx -; X86-NOBMI-NEXT: movl %ebx, %ebp -; X86-NOBMI-NEXT: shldl %cl, %esi, %ebp -; X86-NOBMI-NEXT: movl 28(%esp,%edi), %edi -; X86-NOBMI-NEXT: shldl %cl, %ebx, %edi -; X86-NOBMI-NEXT: movl %edi, 12(%eax) -; X86-NOBMI-NEXT: movl %ebp, 8(%eax) -; X86-NOBMI-NEXT: movl %edx, %edi -; X86-NOBMI-NEXT: shll %cl, %edi -; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI-NEXT: movsbl %dl, %ebx +; X86-NOBMI-NEXT: movl 24(%esp,%ebx), %edx +; X86-NOBMI-NEXT: movl 28(%esp,%ebx), %esi ; X86-NOBMI-NEXT: shldl %cl, %edx, %esi -; X86-NOBMI-NEXT: movl %esi, 4(%eax) +; X86-NOBMI-NEXT: movl 16(%esp,%ebx), %edi +; X86-NOBMI-NEXT: movl 20(%esp,%ebx), %ebx +; X86-NOBMI-NEXT: shldl %cl, %ebx, %edx +; X86-NOBMI-NEXT: shldl %cl, %edi, %ebx +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI-NEXT: shll %cl, %edi +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI-NEXT: movl %esi, 12(%eax) +; X86-NOBMI-NEXT: movl %edx, 8(%eax) +; X86-NOBMI-NEXT: movl %ebx, 4(%eax) ; X86-NOBMI-NEXT: movl %edi, (%eax) -; X86-NOBMI-NEXT: addl $76, %esp -; X86-NOBMI-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI-NEXT: addl $32, %esp ; X86-NOBMI-NEXT: popl %esi -; X86-NOBMI-NEXT: .cfi_def_cfa_offset 16 ; X86-NOBMI-NEXT: popl %edi -; X86-NOBMI-NEXT: .cfi_def_cfa_offset 12 ; X86-NOBMI-NEXT: popl %ebx -; X86-NOBMI-NEXT: .cfi_def_cfa_offset 8 -; X86-NOBMI-NEXT: popl %ebp -; X86-NOBMI-NEXT: .cfi_def_cfa_offset 4 ; X86-NOBMI-NEXT: retl $4 ; -; X86-BMI1-LABEL: mask_pair_128: -; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: pushl %ebp -; X86-BMI1-NEXT: .cfi_def_cfa_offset 8 -; X86-BMI1-NEXT: pushl %ebx -; X86-BMI1-NEXT: .cfi_def_cfa_offset 12 -; X86-BMI1-NEXT: pushl %edi -; X86-BMI1-NEXT: .cfi_def_cfa_offset 16 -; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: .cfi_def_cfa_offset 20 -; X86-BMI1-NEXT: subl $76, %esp -; X86-BMI1-NEXT: .cfi_def_cfa_offset 96 -; X86-BMI1-NEXT: .cfi_offset %esi, -20 -; X86-BMI1-NEXT: .cfi_offset %edi, -16 -; X86-BMI1-NEXT: .cfi_offset %ebx, -12 -; X86-BMI1-NEXT: .cfi_offset %ebp, -8 -; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI1-NEXT: movl %ebx, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: movl %ecx, %edx -; X86-BMI1-NEXT: shrb $3, %dl -; X86-BMI1-NEXT: andb $12, %dl -; X86-BMI1-NEXT: movzbl %dl, %esi -; X86-BMI1-NEXT: movl 44(%esp,%esi), %edi -; X86-BMI1-NEXT: movl %edi, %ebx -; X86-BMI1-NEXT: shrl %cl, %ebx -; X86-BMI1-NEXT: movl %ebx, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: movl 40(%esp,%esi), %ebx -; X86-BMI1-NEXT: movl %ebx, %ebp -; X86-BMI1-NEXT: shrdl %cl, %edi, %ebp -; X86-BMI1-NEXT: movl %ebp, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: movl 32(%esp,%esi), %edi -; X86-BMI1-NEXT: movl 36(%esp,%esi), %esi -; X86-BMI1-NEXT: movl %esi, %ebp -; X86-BMI1-NEXT: shrdl %cl, %ebx, %ebp -; X86-BMI1-NEXT: movl %ebp, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-BMI1-NEXT: movl $0, (%esp) -; X86-BMI1-NEXT: negb %dl -; X86-BMI1-NEXT: movsbl %dl, %edi -; X86-BMI1-NEXT: movl 16(%esp,%edi), %edx -; X86-BMI1-NEXT: movl 20(%esp,%edi), %esi -; X86-BMI1-NEXT: movl 24(%esp,%edi), %ebx -; X86-BMI1-NEXT: movl %ebx, %ebp -; X86-BMI1-NEXT: shldl %cl, %esi, %ebp -; X86-BMI1-NEXT: movl 28(%esp,%edi), %edi -; X86-BMI1-NEXT: shldl %cl, %ebx, %edi -; X86-BMI1-NEXT: movl %edi, 12(%eax) -; X86-BMI1-NEXT: movl %ebp, 8(%eax) -; X86-BMI1-NEXT: movl %edx, %edi -; X86-BMI1-NEXT: shll %cl, %edi -; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1-NEXT: shldl %cl, %edx, %esi -; X86-BMI1-NEXT: movl %esi, 4(%eax) -; X86-BMI1-NEXT: movl %edi, (%eax) -; X86-BMI1-NEXT: addl $76, %esp -; X86-BMI1-NEXT: .cfi_def_cfa_offset 20 -; X86-BMI1-NEXT: popl %esi -; X86-BMI1-NEXT: .cfi_def_cfa_offset 16 -; X86-BMI1-NEXT: popl %edi -; X86-BMI1-NEXT: .cfi_def_cfa_offset 12 -; X86-BMI1-NEXT: popl %ebx -; X86-BMI1-NEXT: .cfi_def_cfa_offset 8 -; X86-BMI1-NEXT: popl %ebp -; X86-BMI1-NEXT: .cfi_def_cfa_offset 4 -; X86-BMI1-NEXT: retl $4 -; ; X86-BMI2-LABEL: mask_pair_128: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: pushl %ebp -; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 ; X86-BMI2-NEXT: pushl %ebx -; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 ; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 -; X86-BMI2-NEXT: subl $76, %esp -; X86-BMI2-NEXT: .cfi_def_cfa_offset 96 -; X86-BMI2-NEXT: .cfi_offset %esi, -20 -; X86-BMI2-NEXT: .cfi_offset %edi, -16 -; X86-BMI2-NEXT: .cfi_offset %ebx, -12 -; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: subl $32, %esp ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movl %ebx, {{[0-9]+}}(%esp) -; X86-BMI2-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-BMI2-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-BMI2-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $-1, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $-1, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $-1, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $-1, {{[0-9]+}}(%esp) ; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $0, (%esp) ; X86-BMI2-NEXT: movl %ecx, %edx ; X86-BMI2-NEXT: shrb $3, %dl ; X86-BMI2-NEXT: andb $12, %dl -; X86-BMI2-NEXT: movzbl %dl, %esi -; X86-BMI2-NEXT: movl 44(%esp,%esi), %edi -; X86-BMI2-NEXT: shrxl %ecx, %edi, %ebx -; X86-BMI2-NEXT: movl %ebx, {{[0-9]+}}(%esp) -; X86-BMI2-NEXT: movl 40(%esp,%esi), %ebx -; X86-BMI2-NEXT: movl %ebx, %ebp -; X86-BMI2-NEXT: shrdl %cl, %edi, %ebp -; X86-BMI2-NEXT: movl %ebp, {{[0-9]+}}(%esp) -; X86-BMI2-NEXT: movl 32(%esp,%esi), %edi -; X86-BMI2-NEXT: movl 36(%esp,%esi), %esi -; X86-BMI2-NEXT: movl %esi, %ebp -; X86-BMI2-NEXT: shrdl %cl, %ebx, %ebp -; X86-BMI2-NEXT: movl %ebp, {{[0-9]+}}(%esp) -; X86-BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI2-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-BMI2-NEXT: movl $0, (%esp) ; X86-BMI2-NEXT: negb %dl ; X86-BMI2-NEXT: movsbl %dl, %edi -; X86-BMI2-NEXT: movl 16(%esp,%edi), %edx -; X86-BMI2-NEXT: movl 20(%esp,%edi), %esi -; X86-BMI2-NEXT: movl 24(%esp,%edi), %ebx -; X86-BMI2-NEXT: movl %ebx, %ebp -; X86-BMI2-NEXT: shldl %cl, %esi, %ebp -; X86-BMI2-NEXT: movl 28(%esp,%edi), %edi -; X86-BMI2-NEXT: shldl %cl, %ebx, %edi -; X86-BMI2-NEXT: movl %edi, 12(%eax) -; X86-BMI2-NEXT: movl %ebp, 8(%eax) -; X86-BMI2-NEXT: shlxl %ecx, %edx, %edi -; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: movl 24(%esp,%edi), %edx +; X86-BMI2-NEXT: movl 28(%esp,%edi), %esi ; X86-BMI2-NEXT: shldl %cl, %edx, %esi -; X86-BMI2-NEXT: movl %esi, 4(%eax) -; X86-BMI2-NEXT: movl %edi, (%eax) -; X86-BMI2-NEXT: addl $76, %esp -; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: movl 16(%esp,%edi), %ebx +; X86-BMI2-NEXT: movl 20(%esp,%edi), %edi +; X86-BMI2-NEXT: shldl %cl, %edi, %edx +; X86-BMI2-NEXT: shldl %cl, %ebx, %edi +; X86-BMI2-NEXT: shlxl %ecx, %ebx, %ecx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl %esi, 12(%eax) +; X86-BMI2-NEXT: movl %edx, 8(%eax) +; X86-BMI2-NEXT: movl %edi, 4(%eax) +; X86-BMI2-NEXT: movl %ecx, (%eax) +; X86-BMI2-NEXT: addl $32, %esp ; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 ; X86-BMI2-NEXT: popl %edi -; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 ; X86-BMI2-NEXT: popl %ebx -; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 -; X86-BMI2-NEXT: popl %ebp -; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 ; X86-BMI2-NEXT: retl $4 ; ; X64-NOBMI-LABEL: mask_pair_128: ; X64-NOBMI: # %bb.0: ; X64-NOBMI-NEXT: movq %rdx, %rcx -; X64-NOBMI-NEXT: shrdq %cl, %rsi, %rdi -; X64-NOBMI-NEXT: shrq %cl, %rsi +; X64-NOBMI-NEXT: movq $-1, %rdx +; X64-NOBMI-NEXT: movq $-1, %r8 +; X64-NOBMI-NEXT: shlq %cl, %r8 ; X64-NOBMI-NEXT: xorl %eax, %eax ; X64-NOBMI-NEXT: testb $64, %cl -; X64-NOBMI-NEXT: cmovneq %rsi, %rdi -; X64-NOBMI-NEXT: cmovneq %rax, %rsi -; X64-NOBMI-NEXT: movq %rdi, %rdx -; X64-NOBMI-NEXT: shlq %cl, %rdx -; X64-NOBMI-NEXT: testb $64, %cl -; X64-NOBMI-NEXT: cmoveq %rdx, %rax -; X64-NOBMI-NEXT: shldq %cl, %rdi, %rsi -; X64-NOBMI-NEXT: testb $64, %cl -; X64-NOBMI-NEXT: cmoveq %rsi, %rdx +; X64-NOBMI-NEXT: cmovneq %r8, %rdx +; X64-NOBMI-NEXT: cmoveq %r8, %rax +; X64-NOBMI-NEXT: andq %rdi, %rax +; X64-NOBMI-NEXT: andq %rsi, %rdx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1-LABEL: mask_pair_128: -; X64-BMI1: # %bb.0: -; X64-BMI1-NEXT: movq %rdx, %rcx -; X64-BMI1-NEXT: shrdq %cl, %rsi, %rdi -; X64-BMI1-NEXT: shrq %cl, %rsi -; X64-BMI1-NEXT: xorl %eax, %eax -; X64-BMI1-NEXT: testb $64, %cl -; X64-BMI1-NEXT: cmovneq %rsi, %rdi -; X64-BMI1-NEXT: cmovneq %rax, %rsi -; X64-BMI1-NEXT: movq %rdi, %rdx -; X64-BMI1-NEXT: shlq %cl, %rdx -; X64-BMI1-NEXT: testb $64, %cl -; X64-BMI1-NEXT: cmoveq %rdx, %rax -; X64-BMI1-NEXT: shldq %cl, %rdi, %rsi -; X64-BMI1-NEXT: testb $64, %cl -; X64-BMI1-NEXT: cmoveq %rsi, %rdx -; X64-BMI1-NEXT: retq -; ; X64-BMI2-LABEL: mask_pair_128: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movq %rdx, %rcx -; X64-BMI2-NEXT: shrdq %cl, %rsi, %rdi -; X64-BMI2-NEXT: shrxq %rdx, %rsi, %rdx -; X64-BMI2-NEXT: xorl %esi, %esi -; X64-BMI2-NEXT: testb $64, %cl -; X64-BMI2-NEXT: cmovneq %rdx, %rdi -; X64-BMI2-NEXT: shlxq %rcx, %rdi, %r8 -; X64-BMI2-NEXT: movq %r8, %rax -; X64-BMI2-NEXT: cmovneq %rsi, %rax -; X64-BMI2-NEXT: cmovneq %rsi, %rdx -; X64-BMI2-NEXT: shldq %cl, %rdi, %rdx -; X64-BMI2-NEXT: testb $64, %cl -; X64-BMI2-NEXT: cmovneq %r8, %rdx +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shlxq %rdx, %rcx, %r8 +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testb $64, %dl +; X64-BMI2-NEXT: cmovneq %r8, %rcx +; X64-BMI2-NEXT: cmoveq %r8, %rax +; X64-BMI2-NEXT: andq %rdi, %rax +; X64-BMI2-NEXT: andq %rsi, %rcx +; X64-BMI2-NEXT: movq %rcx, %rdx ; X64-BMI2-NEXT: retq %shl = shl nsw i128 -1, %y %and = and i128 %shl, %x ret i128 %and } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-GI: {{.*}} -; CHECK-SD: {{.*}} -; X64: {{.*}} -; X64-BMINOTBM: {{.*}} -; X64-BMITBM: {{.*}} -; X86: {{.*}} -; X86-BMINOTBM: {{.*}} -; X86-BMITBM: {{.*}}