11; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2- ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK
3- ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK
2+ ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
3+ ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
44
55declare void @use (float )
66
7+ ; TODO: The insert is costed as free, so creating a shuffle appears to be a loss.
8+
79define <4 x float > @ext0_v4f32 (<4 x float > %x , <4 x float > %y ) {
810; CHECK-LABEL: @ext0_v4f32(
911; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
@@ -21,9 +23,8 @@ define <4 x float> @ext0_v4f32(<4 x float> %x, <4 x float> %y) {
2123
2224define <4 x float > @ext2_v4f32 (<4 x float > %x , <4 x float > %y ) {
2325; CHECK-LABEL: @ext2_v4f32(
24- ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
25- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
26- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 2
26+ ; CHECK-NEXT: [[TMP1:%.*]] = fneg <4 x float> [[X:%.*]]
27+ ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
2728; CHECK-NEXT: ret <4 x float> [[R]]
2829;
2930 %e = extractelement <4 x float > %x , i32 2
@@ -36,9 +37,8 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
3637
3738define <2 x double > @ext1_v2f64 (<2 x double > %x , <2 x double > %y ) {
3839; CHECK-LABEL: @ext1_v2f64(
39- ; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
40- ; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
41- ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 1
40+ ; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
41+ ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
4242; CHECK-NEXT: ret <2 x double> [[R]]
4343;
4444 %e = extractelement <2 x double > %x , i32 1
@@ -47,26 +47,43 @@ define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
4747 ret <2 x double > %r
4848}
4949
50+ ; The vector fneg would cost twice as much as the scalar op with SSE,
51+ ; so we don't transform there (the shuffle would also be more expensive).
52+
5053define <8 x float > @ext7_v8f32 (<8 x float > %x , <8 x float > %y ) {
51- ; CHECK-LABEL: @ext7_v8f32(
52- ; CHECK-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7
53- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
54- ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7
55- ; CHECK-NEXT: ret <8 x float> [[R]]
54+ ; SSE-LABEL: @ext7_v8f32(
55+ ; SSE-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7
56+ ; SSE-NEXT: [[N:%.*]] = fneg float [[E]]
57+ ; SSE-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7
58+ ; SSE-NEXT: ret <8 x float> [[R]]
59+ ;
60+ ; AVX-LABEL: @ext7_v8f32(
61+ ; AVX-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X:%.*]]
62+ ; AVX-NEXT: [[R:%.*]] = shufflevector <8 x float> [[Y:%.*]], <8 x float> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
63+ ; AVX-NEXT: ret <8 x float> [[R]]
5664;
5765 %e = extractelement <8 x float > %x , i32 7
5866 %n = fneg float %e
5967 %r = insertelement <8 x float > %y , float %n , i32 7
6068 ret <8 x float > %r
6169}
6270
71+ ; Same as above with an extra use of the extracted element.
72+
6373define <8 x float > @ext7_v8f32_use1 (<8 x float > %x , <8 x float > %y ) {
64- ; CHECK-LABEL: @ext7_v8f32_use1(
65- ; CHECK-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 5
66- ; CHECK-NEXT: call void @use(float [[E]])
67- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
68- ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 5
69- ; CHECK-NEXT: ret <8 x float> [[R]]
74+ ; SSE-LABEL: @ext7_v8f32_use1(
75+ ; SSE-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 5
76+ ; SSE-NEXT: call void @use(float [[E]])
77+ ; SSE-NEXT: [[N:%.*]] = fneg float [[E]]
78+ ; SSE-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 5
79+ ; SSE-NEXT: ret <8 x float> [[R]]
80+ ;
81+ ; AVX-LABEL: @ext7_v8f32_use1(
82+ ; AVX-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 5
83+ ; AVX-NEXT: call void @use(float [[E]])
84+ ; AVX-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X]]
85+ ; AVX-NEXT: [[R:%.*]] = shufflevector <8 x float> [[Y:%.*]], <8 x float> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 6, i32 7>
86+ ; AVX-NEXT: ret <8 x float> [[R]]
7087;
7188 %e = extractelement <8 x float > %x , i32 5
7289 call void @use (float %e )
@@ -75,6 +92,8 @@ define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
7592 ret <8 x float > %r
7693}
7794
95+ ; Negative test - the transform is likely not profitable if the fneg has another use.
96+
7897define <8 x float > @ext7_v8f32_use2 (<8 x float > %x , <8 x float > %y ) {
7998; CHECK-LABEL: @ext7_v8f32_use2(
8099; CHECK-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 3
@@ -90,6 +109,8 @@ define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
90109 ret <8 x float > %r
91110}
92111
112+ ; Negative test - can't convert variable index to a shuffle.
113+
93114define <2 x double > @ext_index_var_v2f64 (<2 x double > %x , <2 x double > %y , i32 %index ) {
94115; CHECK-LABEL: @ext_index_var_v2f64(
95116; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 [[INDEX:%.*]]
@@ -103,6 +124,9 @@ define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %
103124 ret <2 x double > %r
104125}
105126
127+ ; Negative test - require same extract/insert index for simple shuffle.
128+ ; TODO: We could handle this by adjusting the cost calculation.
129+
106130define <2 x double > @ext1_v2f64_ins0 (<2 x double > %x , <2 x double > %y ) {
107131; CHECK-LABEL: @ext1_v2f64_ins0(
108132; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
@@ -116,6 +140,8 @@ define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) {
116140 ret <2 x double > %r
117141}
118142
143+ ; Negative test - avoid changing poison ops
144+
119145define <4 x float > @ext12_v4f32 (<4 x float > %x , <4 x float > %y ) {
120146; CHECK-LABEL: @ext12_v4f32(
121147; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 12
0 commit comments