@@ -236,22 +236,20 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
236236; CHECK-NEXT: sunpklo z4.d, z2.s
237237; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
238238; CHECK-NEXT: sunpklo z0.s, z0.h
239- ; CHECK-NEXT: mov z7.d, z1.d
240- ; CHECK-NEXT: sunpklo z2.d, z2.s
239+ ; CHECK-NEXT: sunpklo z7.d, z1.s
240+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
241241; CHECK-NEXT: sunpklo z5.d, z3.s
242242; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
243- ; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
243+ ; CHECK-NEXT: sunpklo z2.d, z2.s
244244; CHECK-NEXT: sunpklo z1.d, z1.s
245- ; CHECK-NEXT: mov z6.d, z0.d
245+ ; CHECK-NEXT: sunpklo z6.d, z0.s
246+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
246247; CHECK-NEXT: sunpklo z3.d, z3.s
247248; CHECK-NEXT: stp q4, q2, [x0]
248- ; CHECK-NEXT: sunpklo z4.d, z7.s
249- ; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
250249; CHECK-NEXT: sunpklo z0.d, z0.s
250+ ; CHECK-NEXT: stp q7, q1, [x0, #32]
251251; CHECK-NEXT: stp q5, q3, [x0, #64]
252- ; CHECK-NEXT: sunpklo z2.d, z6.s
253- ; CHECK-NEXT: stp q1, q4, [x0, #32]
254- ; CHECK-NEXT: stp q0, q2, [x0, #96]
252+ ; CHECK-NEXT: stp q6, q0, [x0, #96]
255253; CHECK-NEXT: ret
256254 %b = sext <16 x i8 > %a to <16 x i64 >
257255 store <16 x i64 > %b , ptr %out
@@ -263,63 +261,57 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
263261; CHECK: // %bb.0:
264262; CHECK-NEXT: ldp q1, q0, [x0]
265263; CHECK-NEXT: add z0.b, z0.b, z0.b
266- ; CHECK-NEXT: add z1.b, z1.b, z1.b
267- ; CHECK-NEXT: mov z2.d, z0.d
268- ; CHECK-NEXT: sunpklo z0.h, z0.b
269- ; CHECK-NEXT: mov z3.d, z1.d
270- ; CHECK-NEXT: sunpklo z1.h, z1.b
264+ ; CHECK-NEXT: add z2.b, z1.b, z1.b
265+ ; CHECK-NEXT: sunpklo z3.h, z0.b
266+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
267+ ; CHECK-NEXT: sunpklo z1.h, z2.b
271268; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
269+ ; CHECK-NEXT: sunpklo z0.h, z0.b
270+ ; CHECK-NEXT: sunpklo z4.s, z3.h
272271; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
273- ; CHECK-NEXT: sunpklo z4.s, z0.h
274- ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
275272; CHECK-NEXT: sunpklo z5.s, z1.h
276- ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
277273; CHECK-NEXT: sunpklo z2.h, z2.b
278- ; CHECK-NEXT: sunpklo z3.h, z3.b
279- ; CHECK-NEXT: sunpklo z0.s, z0.h
280- ; CHECK-NEXT: sunpklo z16.d, z4.s
274+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
275+ ; CHECK-NEXT: sunpklo z6.s, z0.h
276+ ; CHECK-NEXT: sunpklo z3.s, z3.h
277+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
278+ ; CHECK-NEXT: sunpklo z7.d, z4.s
281279; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
282- ; CHECK-NEXT: sunpklo z1.s, z1.h
283- ; CHECK-NEXT: sunpklo z17.d, z5.s
280+ ; CHECK-NEXT: sunpklo z16.d, z5.s
284281; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
285- ; CHECK-NEXT: sunpklo z6.s, z2.h
286- ; CHECK-NEXT: sunpklo z7.s, z3.h
282+ ; CHECK-NEXT: sunpklo z17.s, z2.h
287283; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
288- ; CHECK-NEXT: sunpklo z4.d, z4.s
284+ ; CHECK-NEXT: sunpklo z1.s, z1.h
285+ ; CHECK-NEXT: sunpklo z0.s, z0.h
286+ ; CHECK-NEXT: sunpklo z18.d, z6.s
287+ ; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
288+ ; CHECK-NEXT: sunpklo z19.d, z3.s
289289; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
290- ; CHECK-NEXT: sunpklo z19 .d, z0 .s
290+ ; CHECK-NEXT: sunpklo z4 .d, z4 .s
291291; CHECK-NEXT: sunpklo z5.d, z5.s
292- ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
293292; CHECK-NEXT: sunpklo z2.s, z2.h
294- ; CHECK-NEXT: sunpklo z18.d, z6.s
295- ; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
296- ; CHECK-NEXT: sunpklo z3.s, z3.h
297- ; CHECK-NEXT: stp q16, q4, [x1, #128]
298- ; CHECK-NEXT: mov z16.d, z7.d
299- ; CHECK-NEXT: sunpklo z0.d, z0.s
300- ; CHECK-NEXT: stp q17, q5, [x1]
301- ; CHECK-NEXT: sunpklo z5.d, z7.s
302- ; CHECK-NEXT: sunpklo z4.d, z6.s
303- ; CHECK-NEXT: mov z6.d, z1.d
304- ; CHECK-NEXT: ext z16.b, z16.b, z7.b, #8
305- ; CHECK-NEXT: mov z7.d, z2.d
306- ; CHECK-NEXT: stp q19, q0, [x1, #160]
307- ; CHECK-NEXT: sunpklo z0.d, z2.s
308- ; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
309- ; CHECK-NEXT: sunpklo z1.d, z1.s
310- ; CHECK-NEXT: stp q18, q4, [x1, #192]
311- ; CHECK-NEXT: mov z4.d, z3.d
312- ; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8
313- ; CHECK-NEXT: sunpklo z16.d, z16.s
314293; CHECK-NEXT: sunpklo z6.d, z6.s
315- ; CHECK-NEXT: ext z4.b, z4.b, z3.b, #8
316- ; CHECK-NEXT: sunpklo z2.d, z7.s
317294; CHECK-NEXT: sunpklo z3.d, z3.s
318- ; CHECK-NEXT: stp q5, q16, [x1, #64]
319- ; CHECK-NEXT: stp q1, q6, [x1, #32]
320- ; CHECK-NEXT: sunpklo z1.d, z4.s
321- ; CHECK-NEXT: stp q0, q2, [x1, #224]
322- ; CHECK-NEXT: stp q3, q1, [x1, #96]
295+ ; CHECK-NEXT: stp q16, q5, [x1]
296+ ; CHECK-NEXT: sunpklo z5.d, z1.s
297+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
298+ ; CHECK-NEXT: stp q7, q4, [x1, #128]
299+ ; CHECK-NEXT: sunpklo z4.d, z17.s
300+ ; CHECK-NEXT: ext z17.b, z17.b, z17.b, #8
301+ ; CHECK-NEXT: stp q18, q6, [x1, #192]
302+ ; CHECK-NEXT: sunpklo z6.d, z0.s
303+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
304+ ; CHECK-NEXT: stp q19, q3, [x1, #160]
305+ ; CHECK-NEXT: sunpklo z3.d, z2.s
306+ ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
307+ ; CHECK-NEXT: sunpklo z7.d, z17.s
308+ ; CHECK-NEXT: sunpklo z1.d, z1.s
309+ ; CHECK-NEXT: sunpklo z0.d, z0.s
310+ ; CHECK-NEXT: sunpklo z2.d, z2.s
311+ ; CHECK-NEXT: stp q5, q1, [x1, #32]
312+ ; CHECK-NEXT: stp q4, q7, [x1, #64]
313+ ; CHECK-NEXT: stp q3, q2, [x1, #96]
314+ ; CHECK-NEXT: stp q6, q0, [x1, #224]
323315; CHECK-NEXT: ret
324316 %a = load <32 x i8 >, ptr %in
325317 %b = add <32 x i8 > %a , %a
@@ -661,22 +653,20 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
661653; CHECK-NEXT: uunpklo z4.d, z2.s
662654; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
663655; CHECK-NEXT: uunpklo z0.s, z0.h
664- ; CHECK-NEXT: mov z7.d, z1.d
665- ; CHECK-NEXT: uunpklo z2.d, z2.s
656+ ; CHECK-NEXT: uunpklo z7.d, z1.s
657+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
666658; CHECK-NEXT: uunpklo z5.d, z3.s
667659; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
668- ; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
660+ ; CHECK-NEXT: uunpklo z2.d, z2.s
669661; CHECK-NEXT: uunpklo z1.d, z1.s
670- ; CHECK-NEXT: mov z6.d, z0.d
662+ ; CHECK-NEXT: uunpklo z6.d, z0.s
663+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
671664; CHECK-NEXT: uunpklo z3.d, z3.s
672665; CHECK-NEXT: stp q4, q2, [x0]
673- ; CHECK-NEXT: uunpklo z4.d, z7.s
674- ; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
675666; CHECK-NEXT: uunpklo z0.d, z0.s
667+ ; CHECK-NEXT: stp q7, q1, [x0, #32]
676668; CHECK-NEXT: stp q5, q3, [x0, #64]
677- ; CHECK-NEXT: uunpklo z2.d, z6.s
678- ; CHECK-NEXT: stp q1, q4, [x0, #32]
679- ; CHECK-NEXT: stp q0, q2, [x0, #96]
669+ ; CHECK-NEXT: stp q6, q0, [x0, #96]
680670; CHECK-NEXT: ret
681671 %b = zext <16 x i8 > %a to <16 x i64 >
682672 store <16 x i64 > %b , ptr %out
@@ -688,63 +678,57 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
688678; CHECK: // %bb.0:
689679; CHECK-NEXT: ldp q1, q0, [x0]
690680; CHECK-NEXT: add z0.b, z0.b, z0.b
691- ; CHECK-NEXT: add z1.b, z1.b, z1.b
692- ; CHECK-NEXT: mov z2.d, z0.d
693- ; CHECK-NEXT: uunpklo z0.h, z0.b
694- ; CHECK-NEXT: mov z3.d, z1.d
695- ; CHECK-NEXT: uunpklo z1.h, z1.b
681+ ; CHECK-NEXT: add z2.b, z1.b, z1.b
682+ ; CHECK-NEXT: uunpklo z3.h, z0.b
683+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
684+ ; CHECK-NEXT: uunpklo z1.h, z2.b
696685; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
686+ ; CHECK-NEXT: uunpklo z0.h, z0.b
687+ ; CHECK-NEXT: uunpklo z4.s, z3.h
697688; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
698- ; CHECK-NEXT: uunpklo z4.s, z0.h
699- ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
700689; CHECK-NEXT: uunpklo z5.s, z1.h
701- ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
702690; CHECK-NEXT: uunpklo z2.h, z2.b
703- ; CHECK-NEXT: uunpklo z3.h, z3.b
704- ; CHECK-NEXT: uunpklo z0.s, z0.h
705- ; CHECK-NEXT: uunpklo z16.d, z4.s
691+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
692+ ; CHECK-NEXT: uunpklo z6.s, z0.h
693+ ; CHECK-NEXT: uunpklo z3.s, z3.h
694+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
695+ ; CHECK-NEXT: uunpklo z7.d, z4.s
706696; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
707- ; CHECK-NEXT: uunpklo z1.s, z1.h
708- ; CHECK-NEXT: uunpklo z17.d, z5.s
697+ ; CHECK-NEXT: uunpklo z16.d, z5.s
709698; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
710- ; CHECK-NEXT: uunpklo z6.s, z2.h
711- ; CHECK-NEXT: uunpklo z7.s, z3.h
699+ ; CHECK-NEXT: uunpklo z17.s, z2.h
712700; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
713- ; CHECK-NEXT: uunpklo z4.d, z4.s
701+ ; CHECK-NEXT: uunpklo z1.s, z1.h
702+ ; CHECK-NEXT: uunpklo z0.s, z0.h
703+ ; CHECK-NEXT: uunpklo z18.d, z6.s
704+ ; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
705+ ; CHECK-NEXT: uunpklo z19.d, z3.s
714706; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
715- ; CHECK-NEXT: uunpklo z19 .d, z0 .s
707+ ; CHECK-NEXT: uunpklo z4 .d, z4 .s
716708; CHECK-NEXT: uunpklo z5.d, z5.s
717- ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
718709; CHECK-NEXT: uunpklo z2.s, z2.h
719- ; CHECK-NEXT: uunpklo z18.d, z6.s
720- ; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
721- ; CHECK-NEXT: uunpklo z3.s, z3.h
722- ; CHECK-NEXT: stp q16, q4, [x1, #128]
723- ; CHECK-NEXT: mov z16.d, z7.d
724- ; CHECK-NEXT: uunpklo z0.d, z0.s
725- ; CHECK-NEXT: stp q17, q5, [x1]
726- ; CHECK-NEXT: uunpklo z5.d, z7.s
727- ; CHECK-NEXT: uunpklo z4.d, z6.s
728- ; CHECK-NEXT: mov z6.d, z1.d
729- ; CHECK-NEXT: ext z16.b, z16.b, z7.b, #8
730- ; CHECK-NEXT: mov z7.d, z2.d
731- ; CHECK-NEXT: stp q19, q0, [x1, #160]
732- ; CHECK-NEXT: uunpklo z0.d, z2.s
733- ; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
734- ; CHECK-NEXT: uunpklo z1.d, z1.s
735- ; CHECK-NEXT: stp q18, q4, [x1, #192]
736- ; CHECK-NEXT: mov z4.d, z3.d
737- ; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8
738- ; CHECK-NEXT: uunpklo z16.d, z16.s
739710; CHECK-NEXT: uunpklo z6.d, z6.s
740- ; CHECK-NEXT: ext z4.b, z4.b, z3.b, #8
741- ; CHECK-NEXT: uunpklo z2.d, z7.s
742711; CHECK-NEXT: uunpklo z3.d, z3.s
743- ; CHECK-NEXT: stp q5, q16, [x1, #64]
744- ; CHECK-NEXT: stp q1, q6, [x1, #32]
745- ; CHECK-NEXT: uunpklo z1.d, z4.s
746- ; CHECK-NEXT: stp q0, q2, [x1, #224]
747- ; CHECK-NEXT: stp q3, q1, [x1, #96]
712+ ; CHECK-NEXT: stp q16, q5, [x1]
713+ ; CHECK-NEXT: uunpklo z5.d, z1.s
714+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
715+ ; CHECK-NEXT: stp q7, q4, [x1, #128]
716+ ; CHECK-NEXT: uunpklo z4.d, z17.s
717+ ; CHECK-NEXT: ext z17.b, z17.b, z17.b, #8
718+ ; CHECK-NEXT: stp q18, q6, [x1, #192]
719+ ; CHECK-NEXT: uunpklo z6.d, z0.s
720+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
721+ ; CHECK-NEXT: stp q19, q3, [x1, #160]
722+ ; CHECK-NEXT: uunpklo z3.d, z2.s
723+ ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
724+ ; CHECK-NEXT: uunpklo z7.d, z17.s
725+ ; CHECK-NEXT: uunpklo z1.d, z1.s
726+ ; CHECK-NEXT: uunpklo z0.d, z0.s
727+ ; CHECK-NEXT: uunpklo z2.d, z2.s
728+ ; CHECK-NEXT: stp q5, q1, [x1, #32]
729+ ; CHECK-NEXT: stp q4, q7, [x1, #64]
730+ ; CHECK-NEXT: stp q3, q2, [x1, #96]
731+ ; CHECK-NEXT: stp q6, q0, [x1, #224]
748732; CHECK-NEXT: ret
749733 %a = load <32 x i8 >, ptr %in
750734 %b = add <32 x i8 > %a , %a
0 commit comments