Skip to content

Comments

[AArch64] Add tests for vector rounding + float-to-int conversions#173516

Merged
valadaptive merged 2 commits intollvm:mainfrom
valadaptive:aarch64-vector-round-convert-tests
Jan 23, 2026
Merged

[AArch64] Add tests for vector rounding + float-to-int conversions#173516
valadaptive merged 2 commits intollvm:mainfrom
valadaptive:aarch64-vector-round-convert-tests

Conversation

@valadaptive
Copy link
Contributor

Right now we only have tests for the scalar versions of these intrinsics.

@llvmbot
Copy link
Member

llvmbot commented Dec 25, 2025

@llvm/pr-subscribers-backend-aarch64

Author: None (valadaptive)

Changes

Right now we only have tests for the scalar versions of these intrinsics.


Patch is 55.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/173516.diff

1 Files Affected:

  • (added) llvm/test/CodeGen/AArch64/arm64-vcvt-fptoi.ll (+1711)
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt-fptoi.ll
new file mode 100644
index 0000000000000..98c328c6ae9e2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt-fptoi.ll
@@ -0,0 +1,1711 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-NO16
+; RUN: llc < %s -mtriple=arm64-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+
+;
+; Tests for fused round + convert to int patterns (FCVTAS, FCVTAU, FCVTMS, FCVTMU, etc.)
+;
+
+;
+; round + signed -> fcvtas
+;
+
+define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtas_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A)
+  %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtas_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtas_2s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A)
+  %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtas_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A)
+  %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+  ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtas_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtas_4s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A)
+  %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtas_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A)
+  %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64>
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtas_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtas_2d_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A)
+  %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1)
+  ret <2 x i64> %tmp2
+}
+
+
+;
+; round + unsigned -> fcvtau
+;
+
+define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtau_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A)
+  %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtau_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtau_2s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A)
+  %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtau_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A)
+  %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
+  ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtau_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtau_4s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A)
+  %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtau_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A)
+  %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64>
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtau_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtau_2d_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A)
+  %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1)
+  ret <2 x i64> %tmp2
+}
+
+
+;
+; roundeven + signed -> fcvtns
+;
+
+define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtns_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
+  %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtns_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtns_2s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
+  %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtns_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
+  %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+  ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtns_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtns_4s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
+  %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtns_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A)
+  %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64>
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtns_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtns_2d_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A)
+  %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1)
+  ret <2 x i64> %tmp2
+}
+
+
+;
+; roundeven + unsigned -> fcvtnu
+;
+
+define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtnu_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
+  %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtnu_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtnu_2s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
+  %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtnu_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
+  %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
+  ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtnu_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtnu_4s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
+  %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtnu_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A)
+  %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64>
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtnu_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtnu_2d_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A)
+  %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1)
+  ret <2 x i64> %tmp2
+}
+
+
+;
+; floor + signed -> fcvtms
+;
+
+define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtms_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
+  %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtms_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtms_2s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
+  %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtms_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
+  %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+  ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtms_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtms_4s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
+  %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtms_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A)
+  %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64>
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtms_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtms_2d_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A)
+  %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1)
+  ret <2 x i64> %tmp2
+}
+
+
+;
+; floor + unsigned -> fcvtmu
+;
+
+define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtmu_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
+  %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtmu_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtmu_2s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
+  %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtmu_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtmu_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
+  %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
+  ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtmu_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtmu_4s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
+  %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtmu_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A)
+  %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64>
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtmu_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtmu_2d_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A)
+  %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1)
+  ret <2 x i64> %tmp2
+}
+
+
+;
+; ceil + signed -> fcvtps
+;
+
+define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtps_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A)
+  %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtps_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtps_2s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A)
+  %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtps_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtps_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A)
+  %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+  ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtps_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtps_4s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A)
+  %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtps_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A)
+  %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64>
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtps_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtps_2d_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A)
+  %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1)
+  ret <2 x i64> %tmp2
+}
+
+
+;
+; ceil + unsigned -> fcvtpu
+;
+
+define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtpu_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A)
+  %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtpu_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtpu_2s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzu v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A)
+  %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtpu_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtpu_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A)
+  %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
+  ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtpu_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtpu_4s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A)
+  %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtpu_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A)
+  %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64>
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @fcvtpu_2d_sat(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtpu_2d_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A)
+  %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1)
+  ret <2 x i64> %tmp2
+}
+
+
+;
+; trunc + signed -> fcvtzs (already the default, but test the fusion)
+;
+
+define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtzs_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A)
+  %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @fcvtzs_2s_sat(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtzs_2s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz v0.2s, v0.2s
+; CHECK-NEXT:    fcvtzs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A)
+  %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+
+define <4 x i32> @fcvtzs_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtzs_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A)
+  %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+  ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtzs_4s_sat(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtzs_4s_sat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz v0.4s, v0.4s
+; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A)
+  %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+
+define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtzs_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz v0.2d, v0.2d
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    ret
+  %tmp1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A)
+  %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64>
+  ret <2 x i64> %t...
[truncated]

Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-NO16
; RUN: llc < %s -mtriple=arm64-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check gisel without fullfp16 too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added to the RUN lines.

@valadaptive valadaptive requested a review from davemgreen January 8, 2026 11:43
@valadaptive valadaptive merged commit e028b21 into llvm:main Jan 23, 2026
10 checks passed
Harrish92 pushed a commit to Harrish92/llvm-project that referenced this pull request Jan 23, 2026
…lvm#173516)

Right now we only have tests for the scalar versions of these
intrinsics.
Harrish92 pushed a commit to Harrish92/llvm-project that referenced this pull request Jan 24, 2026
…lvm#173516)

Right now we only have tests for the scalar versions of these
intrinsics.
Icohedron pushed a commit to Icohedron/llvm-project that referenced this pull request Jan 29, 2026
…lvm#173516)

Right now we only have tests for the scalar versions of these
intrinsics.
sshrestha-aa pushed a commit to sshrestha-aa/llvm-project that referenced this pull request Feb 4, 2026
…lvm#173516)

Right now we only have tests for the scalar versions of these
intrinsics.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants