@@ -312,3 +312,107 @@ define i64 @fabs_v2f32_2() {
312312}
313313
314314declare <2 x float > @llvm.fabs.v2f32 (<2 x float > %p )
315+
316+ ; PR70947 - TODO remove duplicate xmm/ymm constant loads
317+ define void @PR70947 (ptr %src , ptr %dst ) {
318+ ; X86-AVX1-LABEL: PR70947:
319+ ; X86-AVX1: # %bb.0:
320+ ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax
321+ ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx
322+ ; X86-AVX1-NEXT: vmovups (%ecx), %ymm0
323+ ; X86-AVX1-NEXT: vmovups 32(%ecx), %xmm1
324+ ; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
325+ ; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
326+ ; X86-AVX1-NEXT: vmovups %ymm0, (%eax)
327+ ; X86-AVX1-NEXT: vmovups %xmm1, 16(%eax)
328+ ; X86-AVX1-NEXT: vzeroupper
329+ ; X86-AVX1-NEXT: retl
330+ ;
331+ ; X86-AVX2-LABEL: PR70947:
332+ ; X86-AVX2: # %bb.0:
333+ ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
334+ ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
335+ ; X86-AVX2-NEXT: vmovups 32(%ecx), %xmm0
336+ ; X86-AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
337+ ; X86-AVX2-NEXT: vandps (%ecx), %ymm1, %ymm1
338+ ; X86-AVX2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
339+ ; X86-AVX2-NEXT: vmovups %ymm1, (%eax)
340+ ; X86-AVX2-NEXT: vmovups %xmm0, 16(%eax)
341+ ; X86-AVX2-NEXT: vzeroupper
342+ ; X86-AVX2-NEXT: retl
343+ ;
344+ ; X86-AVX512VL-LABEL: PR70947:
345+ ; X86-AVX512VL: # %bb.0:
346+ ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
347+ ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx
348+ ; X86-AVX512VL-NEXT: vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
349+ ; X86-AVX512VL-NEXT: vandps (%ecx), %ymm0, %ymm1
350+ ; X86-AVX512VL-NEXT: vandps 32(%ecx), %xmm0, %xmm0
351+ ; X86-AVX512VL-NEXT: vmovups %ymm1, (%eax)
352+ ; X86-AVX512VL-NEXT: vmovups %xmm0, 16(%eax)
353+ ; X86-AVX512VL-NEXT: vzeroupper
354+ ; X86-AVX512VL-NEXT: retl
355+ ;
356+ ; X86-AVX512VLDQ-LABEL: PR70947:
357+ ; X86-AVX512VLDQ: # %bb.0:
358+ ; X86-AVX512VLDQ-NEXT: movl {{[0-9]+}}(%esp), %eax
359+ ; X86-AVX512VLDQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
360+ ; X86-AVX512VLDQ-NEXT: vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
361+ ; X86-AVX512VLDQ-NEXT: vandps (%ecx), %ymm0, %ymm1
362+ ; X86-AVX512VLDQ-NEXT: vandps 32(%ecx), %xmm0, %xmm0
363+ ; X86-AVX512VLDQ-NEXT: vmovups %ymm1, (%eax)
364+ ; X86-AVX512VLDQ-NEXT: vmovups %xmm0, 16(%eax)
365+ ; X86-AVX512VLDQ-NEXT: vzeroupper
366+ ; X86-AVX512VLDQ-NEXT: retl
367+ ;
368+ ; X64-AVX1-LABEL: PR70947:
369+ ; X64-AVX1: # %bb.0:
370+ ; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
371+ ; X64-AVX1-NEXT: vmovups 32(%rdi), %xmm1
372+ ; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
373+ ; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
374+ ; X64-AVX1-NEXT: vmovups %ymm0, (%rsi)
375+ ; X64-AVX1-NEXT: vmovups %xmm1, 16(%rsi)
376+ ; X64-AVX1-NEXT: vzeroupper
377+ ; X64-AVX1-NEXT: retq
378+ ;
379+ ; X64-AVX2-LABEL: PR70947:
380+ ; X64-AVX2: # %bb.0:
381+ ; X64-AVX2-NEXT: vmovups 32(%rdi), %xmm0
382+ ; X64-AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
383+ ; X64-AVX2-NEXT: vandps (%rdi), %ymm1, %ymm1
384+ ; X64-AVX2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
385+ ; X64-AVX2-NEXT: vmovups %ymm1, (%rsi)
386+ ; X64-AVX2-NEXT: vmovups %xmm0, 16(%rsi)
387+ ; X64-AVX2-NEXT: vzeroupper
388+ ; X64-AVX2-NEXT: retq
389+ ;
390+ ; X64-AVX512VL-LABEL: PR70947:
391+ ; X64-AVX512VL: # %bb.0:
392+ ; X64-AVX512VL-NEXT: vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
393+ ; X64-AVX512VL-NEXT: vandps (%rdi), %ymm0, %ymm1
394+ ; X64-AVX512VL-NEXT: vandps 32(%rdi), %xmm0, %xmm0
395+ ; X64-AVX512VL-NEXT: vmovups %ymm1, (%rsi)
396+ ; X64-AVX512VL-NEXT: vmovups %xmm0, 16(%rsi)
397+ ; X64-AVX512VL-NEXT: vzeroupper
398+ ; X64-AVX512VL-NEXT: retq
399+ ;
400+ ; X64-AVX512VLDQ-LABEL: PR70947:
401+ ; X64-AVX512VLDQ: # %bb.0:
402+ ; X64-AVX512VLDQ-NEXT: vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
403+ ; X64-AVX512VLDQ-NEXT: vandps (%rdi), %ymm0, %ymm1
404+ ; X64-AVX512VLDQ-NEXT: vandps 32(%rdi), %xmm0, %xmm0
405+ ; X64-AVX512VLDQ-NEXT: vmovups %ymm1, (%rsi)
406+ ; X64-AVX512VLDQ-NEXT: vmovups %xmm0, 16(%rsi)
407+ ; X64-AVX512VLDQ-NEXT: vzeroupper
408+ ; X64-AVX512VLDQ-NEXT: retq
409+ %src4 = getelementptr inbounds double , ptr %src , i64 4
410+ %dst4 = getelementptr inbounds i32 , ptr %dst , i64 4
411+ %ld0 = load <4 x double >, ptr %src , align 8
412+ %ld4 = load <2 x double >, ptr %src4 , align 8
413+ %fabs0 = tail call <4 x double > @llvm.fabs.v4f64 (<4 x double > %ld0 )
414+ %fabs4 = tail call <2 x double > @llvm.fabs.v2f64 (<2 x double > %ld4 )
415+ store <4 x double > %fabs0 , ptr %dst , align 4
416+ store <2 x double > %fabs4 , ptr %dst4 , align 4
417+ ret void
418+ }
0 commit comments