[AArch64] Add tests for vector rounding + float-to-int conversions#173516
Merged
valadaptive merged 2 commits intollvm:mainfrom Jan 23, 2026
Merged
[AArch64] Add tests for vector rounding + float-to-int conversions#173516valadaptive merged 2 commits intollvm:mainfrom
valadaptive merged 2 commits intollvm:mainfrom
Conversation
Member
|
@llvm/pr-subscribers-backend-aarch64 Author: None (valadaptive) ChangesRight now we only have tests for the scalar versions of these intrinsics. Patch is 55.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/173516.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt-fptoi.ll
new file mode 100644
index 0000000000000..98c328c6ae9e2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt-fptoi.ll
@@ -0,0 +1,1711 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-NO16
+; RUN: llc < %s -mtriple=arm64-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+
+;
+; Tests for fused round + convert to int patterns (FCVTAS, FCVTAU, FCVTMS, FCVTMU, etc.)
+;
+
+;
+; round + signed -> fcvtas
+;
+
+define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtas_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A)
+ %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtas_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtas_2s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A)
+ %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtas_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta v0.4s, v0.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A)
+ %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtas_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtas_4s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta v0.4s, v0.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A)
+ %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtas_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A)
+ %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtas_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtas_2d_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A)
+ %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+
+;
+; round + unsigned -> fcvtau
+;
+
+define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtau_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta v0.2s, v0.2s
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A)
+ %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtau_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtau_2s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta v0.2s, v0.2s
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A)
+ %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtau_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta v0.4s, v0.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A)
+ %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtau_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtau_4s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta v0.4s, v0.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A)
+ %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtau_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta v0.2d, v0.2d
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A)
+ %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtau_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtau_2d_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frinta v0.2d, v0.2d
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A)
+ %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+
+;
+; roundeven + signed -> fcvtns
+;
+
+define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtns_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
+ %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtns_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtns_2s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
+ %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtns_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.4s, v0.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
+ %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtns_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtns_4s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.4s, v0.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
+ %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtns_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A)
+ %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtns_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtns_2d_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A)
+ %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+
+;
+; roundeven + unsigned -> fcvtnu
+;
+
+define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtnu_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.2s, v0.2s
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
+ %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtnu_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtnu_2s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.2s, v0.2s
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
+ %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtnu_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.4s, v0.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
+ %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtnu_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtnu_4s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.4s, v0.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
+ %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtnu_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.2d, v0.2d
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A)
+ %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtnu_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtnu_2d_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.2d, v0.2d
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A)
+ %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+
+;
+; floor + signed -> fcvtms
+;
+
+define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtms_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
+ %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtms_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtms_2s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
+ %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtms_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm v0.4s, v0.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
+ %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtms_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtms_4s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm v0.4s, v0.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
+ %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtms_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A)
+ %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtms_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtms_2d_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A)
+ %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+
+;
+; floor + unsigned -> fcvtmu
+;
+
+define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtmu_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm v0.2s, v0.2s
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
+ %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtmu_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtmu_2s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm v0.2s, v0.2s
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
+ %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtmu_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtmu_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm v0.4s, v0.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
+ %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtmu_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtmu_4s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm v0.4s, v0.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
+ %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtmu_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm v0.2d, v0.2d
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A)
+ %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtmu_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtmu_2d_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintm v0.2d, v0.2d
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A)
+ %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+
+;
+; ceil + signed -> fcvtps
+;
+
+define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtps_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A)
+ %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtps_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtps_2s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A)
+ %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtps_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtps_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp v0.4s, v0.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A)
+ %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtps_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtps_4s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp v0.4s, v0.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A)
+ %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtps_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A)
+ %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtps_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtps_2d_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A)
+ %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+
+;
+; ceil + unsigned -> fcvtpu
+;
+
+define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtpu_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp v0.2s, v0.2s
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A)
+ %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtpu_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtpu_2s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp v0.2s, v0.2s
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A)
+ %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtpu_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtpu_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp v0.4s, v0.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A)
+ %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtpu_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtpu_4s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp v0.4s, v0.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A)
+ %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtpu_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp v0.2d, v0.2d
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A)
+ %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtpu_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtpu_2d_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintp v0.2d, v0.2d
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A)
+ %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+
+;
+; trunc + signed -> fcvtzs (already the default, but test the fusion)
+;
+
+define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtzs_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A)
+ %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtzs_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtzs_2s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A)
+ %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtzs_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtzs_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz v0.4s, v0.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A)
+ %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtzs_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtzs_4s_sat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz v0.4s, v0.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A)
+ %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtzs_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A)
+ %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64>
+ ret <2 x i64> %t...
[truncated]
|
davemgreen
approved these changes
Jan 7, 2026
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-NO16 | ||
| ; RUN: llc < %s -mtriple=arm64-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 | ||
| ; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 |
Collaborator
There was a problem hiding this comment.
Check gisel without fullfp16 too?
Contributor
Author
There was a problem hiding this comment.
Added to the RUN lines.
davemgreen
approved these changes
Jan 12, 2026
Harrish92
pushed a commit
to Harrish92/llvm-project
that referenced
this pull request
Jan 23, 2026
…lvm#173516) Right now we only have tests for the scalar versions of these intrinsics.
Harrish92
pushed a commit
to Harrish92/llvm-project
that referenced
this pull request
Jan 24, 2026
…lvm#173516) Right now we only have tests for the scalar versions of these intrinsics.
Icohedron
pushed a commit
to Icohedron/llvm-project
that referenced
this pull request
Jan 29, 2026
…lvm#173516) Right now we only have tests for the scalar versions of these intrinsics.
sshrestha-aa
pushed a commit
to sshrestha-aa/llvm-project
that referenced
this pull request
Feb 4, 2026
…lvm#173516) Right now we only have tests for the scalar versions of these intrinsics.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Right now we only have tests for the scalar versions of these intrinsics.