Skip to content

Commit 5055eee

Browse files
[Clang][AArch64] Add missing SME functions to header file. (#75791)
This includes: * __arm_in_streaming_mode() * __arm_has_sme() * __arm_za_disable() * __svundef_za()
1 parent d714be9 commit 5055eee

File tree

5 files changed

+127
-9
lines changed

5 files changed

+127
-9
lines changed

clang/include/clang/Basic/BuiltinsAArch64.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ TARGET_BUILTIN(__builtin_arm_ldg, "v*v*", "t", "mte")
6868
TARGET_BUILTIN(__builtin_arm_stg, "vv*", "t", "mte")
6969
TARGET_BUILTIN(__builtin_arm_subp, "Uiv*v*", "t", "mte")
7070

71+
// SME state function
72+
BUILTIN(__builtin_arm_get_sme_state, "vULi*ULi*", "n")
73+
7174
// Memory Operations
7275
TARGET_BUILTIN(__builtin_arm_mops_memset_tag, "v*v*iz", "", "mte,mops")
7376

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10430,6 +10430,26 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1043010430
return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
1043110431
}
1043210432

10433+
if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
10434+
// Create call to __arm_sme_state and store the results to the two pointers.
10435+
CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
10436+
llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
10437+
false),
10438+
"__arm_sme_state"));
10439+
auto Attrs =
10440+
AttributeList()
10441+
.addFnAttribute(getLLVMContext(), "aarch64_pstate_sm_compatible")
10442+
.addFnAttribute(getLLVMContext(), "aarch64_pstate_za_preserved");
10443+
CI->setAttributes(Attrs);
10444+
CI->setCallingConv(
10445+
llvm::CallingConv::
10446+
AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
10447+
Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
10448+
EmitPointerWithAlignment(E->getArg(0)));
10449+
return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
10450+
EmitPointerWithAlignment(E->getArg(1)));
10451+
}
10452+
1043310453
if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
1043410454
assert((getContext().getTypeSize(E->getType()) == 32) &&
1043510455
"rbit of unusual size!");
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// REQUIRES: aarch64-registered-target
3+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
4+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
6+
7+
#include <arm_sme_draft_spec_subject_to_change.h>
8+
9+
// CHECK-LABEL: @test_in_streaming_mode(
10+
// CHECK-NEXT: entry:
11+
// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]]
12+
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
13+
// CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP1]], 1
14+
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0
15+
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
16+
//
17+
// CPP-CHECK-LABEL: @_Z22test_in_streaming_modev(
18+
// CPP-CHECK-NEXT: entry:
19+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]]
20+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
21+
// CPP-CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP1]], 1
22+
// CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0
23+
// CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]]
24+
//
25+
bool test_in_streaming_mode(void) __arm_streaming_compatible {
26+
return __arm_in_streaming_mode();
27+
}
28+
29+
// CHECK-LABEL: @test_za_disable(
30+
// CHECK-NEXT: entry:
31+
// CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR4:[0-9]+]]
32+
// CHECK-NEXT: ret void
33+
//
34+
// CPP-CHECK-LABEL: @_Z15test_za_disablev(
35+
// CPP-CHECK-NEXT: entry:
36+
// CPP-CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR4:[0-9]+]]
37+
// CPP-CHECK-NEXT: ret void
38+
//
39+
void test_za_disable(void) __arm_streaming_compatible {
40+
__arm_za_disable();
41+
}
42+
43+
// CHECK-LABEL: @test_has_sme(
44+
// CHECK-NEXT: entry:
45+
// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]]
46+
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
47+
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0
48+
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
49+
//
50+
// CPP-CHECK-LABEL: @_Z12test_has_smev(
51+
// CPP-CHECK-NEXT: entry:
52+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]]
53+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
54+
// CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0
55+
// CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]]
56+
//
57+
bool test_has_sme(void) __arm_streaming_compatible {
58+
return __arm_has_sme();
59+
}
60+
61+
// CHECK-LABEL: @test_svundef_za(
62+
// CHECK-NEXT: entry:
63+
// CHECK-NEXT: ret void
64+
//
65+
// CPP-CHECK-LABEL: @_Z15test_svundef_zav(
66+
// CPP-CHECK-NEXT: entry:
67+
// CPP-CHECK-NEXT: ret void
68+
//
69+
void test_svundef_za(void) __arm_streaming_compatible __arm_shared_za {
70+
svundef_za();
71+
}
72+

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qcvtn.c

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,11 @@
88
// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
99
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
1010

11-
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
12-
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
13-
// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
14-
// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
15-
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
11+
// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
12+
// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
13+
// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
1614

17-
#include <arm_sme_draft_spec_subject_to_change.h>
15+
#include <arm_sve.h>
1816

1917
#ifdef SVE_OVERLOADED_FORMS
2018
// A simple used,unused... macro, long enough to represent any SVE builtin.
@@ -23,6 +21,12 @@
2321
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
2422
#endif
2523

24+
#ifndef TEST_SME2
25+
#define ATTR
26+
#else
27+
#define ATTR __arm_streaming
28+
#endif
29+
2630
// CHECK-LABEL: @test_qcvtn_s16_s32_x2(
2731
// CHECK-NEXT: entry:
2832
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0)
@@ -37,7 +41,7 @@
3741
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]])
3842
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
3943
//
40-
svint16_t test_qcvtn_s16_s32_x2(svint32x2_t zn) __arm_streaming_compatible {
44+
svint16_t test_qcvtn_s16_s32_x2(svint32x2_t zn) ATTR {
4145
return SVE_ACLE_FUNC(svqcvtn_s16,_s32_x2,,)(zn);
4246
}
4347

@@ -55,7 +59,7 @@ svint16_t test_qcvtn_s16_s32_x2(svint32x2_t zn) __arm_streaming_compatible {
5559
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqcvtn.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]])
5660
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
5761
//
58-
svuint16_t test_qcvtn_u16_u32_x2(svuint32x2_t zn) __arm_streaming_compatible {
62+
svuint16_t test_qcvtn_u16_u32_x2(svuint32x2_t zn) ATTR {
5963
return SVE_ACLE_FUNC(svqcvtn_u16,_u32_x2,,)(zn);
6064
}
6165

@@ -73,6 +77,6 @@ svuint16_t test_qcvtn_u16_u32_x2(svuint32x2_t zn) __arm_streaming_compatible {
7377
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtun.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]])
7478
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
7579
//
76-
svuint16_t test_qcvtn_u16_s32_x2(svint32x2_t zn) __arm_streaming_compatible {
80+
svuint16_t test_qcvtn_u16_s32_x2(svint32x2_t zn) ATTR {
7781
return SVE_ACLE_FUNC(svqcvtn_u16,_s32_x2,,)(zn);
7882
}

clang/utils/TableGen/SveEmitter.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1603,6 +1603,25 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) {
16031603
OS << "extern \"C\" {\n";
16041604
OS << "#endif\n\n";
16051605

1606+
OS << "void __arm_za_disable(void) __arm_streaming_compatible;\n\n";
1607+
1608+
OS << "__ai bool __arm_has_sme(void) __arm_streaming_compatible {\n";
1609+
OS << " uint64_t x0, x1;\n";
1610+
OS << " __builtin_arm_get_sme_state(&x0, &x1);\n";
1611+
OS << " return x0 & (1ULL << 63);\n";
1612+
OS << "}\n\n";
1613+
1614+
OS << "__ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible "
1615+
"{\n";
1616+
OS << " uint64_t x0, x1;\n";
1617+
OS << " __builtin_arm_get_sme_state(&x0, &x1);\n";
1618+
OS << " return x0 & 1;\n";
1619+
OS << "}\n\n";
1620+
1621+
OS << "__ai __attribute__((target(\"sme\"))) void svundef_za(void) "
1622+
"__arm_streaming_compatible __arm_shared_za "
1623+
"{ }\n\n";
1624+
16061625
createCoreHeaderIntrinsics(OS, *this, ACLEKind::SME);
16071626

16081627
OS << "#ifdef __cplusplus\n";

0 commit comments

Comments
 (0)