@@ -93,3 +93,34 @@ define internal void @caller_not_avx4() {
9393}
9494
9595declare i64 @caller_unknown_simple (i64 )
96+
97+ ; FIXME: This call should get inlined, because the callee only contains
98+ ; inline ASM, not real calls.
99+ define <8 x i64 > @caller_inline_asm (ptr %p0 , i64 %k , ptr %p1 , ptr %p2 ) #0 {
100+ ; CHECK-LABEL: define {{[^@]+}}@caller_inline_asm
101+ ; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR2:[0-9]+]] {
102+ ; CHECK-NEXT: [[CALL:%.*]] = call <8 x i64> @callee_inline_asm(ptr [[P0]], i64 [[K]], ptr [[P1]], ptr [[P2]])
103+ ; CHECK-NEXT: ret <8 x i64> [[CALL]]
104+ ;
105+ %call = call <8 x i64 > @callee_inline_asm (ptr %p0 , i64 %k , ptr %p1 , ptr %p2 )
106+ ret <8 x i64 > %call
107+ }
108+
109+ define internal <8 x i64 > @callee_inline_asm (ptr %p0 , i64 %k , ptr %p1 , ptr %p2 ) #1 {
110+ ; CHECK-LABEL: define {{[^@]+}}@callee_inline_asm
111+ ; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR3:[0-9]+]] {
112+ ; CHECK-NEXT: [[SRC:%.*]] = load <8 x i64>, ptr [[P0]], align 64
113+ ; CHECK-NEXT: [[A:%.*]] = load <8 x i64>, ptr [[P1]], align 64
114+ ; CHECK-NEXT: [[B:%.*]] = load <8 x i64>, ptr [[P2]], align 64
115+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 [[K]], <8 x i64> [[A]], <8 x i64> [[B]], <8 x i64> [[SRC]])
116+ ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
117+ ;
118+ %src = load <8 x i64 >, ptr %p0 , align 64
119+ %a = load <8 x i64 >, ptr %p1 , align 64
120+ %b = load <8 x i64 >, ptr %p2 , align 64
121+ %3 = tail call <8 x i64 > asm "vpaddb\09 $($3, $2, $0 {$1}" , "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}" (i64 %k , <8 x i64 > %a , <8 x i64 > %b , <8 x i64 > %src ) #2
122+ ret <8 x i64 > %3
123+ }
124+
125+ attributes #0 = { "min-legal-vector-width" ="512" "target-features" ="+avx,+avx2,+avx512bw,+avx512dq,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu" ="generic" }
126+ attributes #1 = { "min-legal-vector-width" ="512" "target-features" ="+avx,+avx2,+avx512bw,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu" ="generic" }
0 commit comments