diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e7094c21..a4993a0e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,7 @@ jobs: os: - ubuntu-latest - macos-15-intel # x86_64 - - macos-14 # arm64 + - macos-15 # arm64 assembler: - nasm runs-on: ${{ matrix.os }} diff --git a/erasure_code/aarch64/ec_aarch64_dispatcher.c b/erasure_code/aarch64/ec_aarch64_dispatcher.c index f15f65bb..544ba480 100644 --- a/erasure_code/aarch64/ec_aarch64_dispatcher.c +++ b/erasure_code/aarch64/ec_aarch64_dispatcher.c @@ -79,8 +79,6 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod) if (auxval & HWCAP_ASIMD) return gf_vect_dot_prod_neon; #elif defined(__APPLE__) - if (sysctlEnabled(SYSCTL_SVE_KEY)) - return gf_vect_dot_prod_sve; return gf_vect_dot_prod_neon; #endif return gf_vect_dot_prod_base; @@ -96,8 +94,6 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_mad) if (auxval & HWCAP_ASIMD) return gf_vect_mad_neon; #elif defined(__APPLE__) - if (sysctlEnabled(SYSCTL_SVE_KEY)) - return gf_vect_mad_sve; return gf_vect_mad_neon; #endif return gf_vect_mad_base; @@ -117,7 +113,9 @@ DEFINE_INTERFACE_DISPATCHER(ec_encode_data) if (auxval & HWCAP_ASIMD) return ec_encode_data_neon; #elif defined(__APPLE__) - if (sysctlEnabled(SYSCTL_SVE_KEY)) + if (sysctlEnabled(SYSCTL_SME2_KEY)) + return ec_encode_data_sve2; + if (sysctlEnabled(SYSCTL_SME_KEY)) return ec_encode_data_sve; return ec_encode_data_neon; #endif @@ -134,7 +132,7 @@ DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update) if (auxval & HWCAP_ASIMD) return ec_encode_data_update_neon; #elif defined(__APPLE__) - if (sysctlEnabled(SYSCTL_SVE_KEY)) + if (sysctlEnabled(SYSCTL_SME_KEY)) return ec_encode_data_update_sve; return ec_encode_data_update_neon; #endif @@ -151,8 +149,6 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_mul) if (auxval & HWCAP_ASIMD) return gf_vect_mul_neon; #elif defined(__APPLE__) - if (sysctlEnabled(SYSCTL_SVE_KEY)) - return gf_vect_mul_sve; return gf_vect_mul_neon; #endif return gf_vect_mul_base; diff --git a/erasure_code/aarch64/ec_aarch64_highlevel_func.c b/erasure_code/aarch64/ec_aarch64_highlevel_func.c index 882ed1f3..c47ed44b 100644 --- a/erasure_code/aarch64/ec_aarch64_highlevel_func.c +++ b/erasure_code/aarch64/ec_aarch64_highlevel_func.c @@ -139,81 +139,85 @@ ec_encode_data_update_neon(int len, int k, int rows, int vec_i, unsigned char *g } } +#ifdef __APPLE__ +#define ARM_STREAMING __arm_streaming +#else +#define ARM_STREAMING +#endif + /* SVE */ extern void gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char *dest); + unsigned char *dest) ARM_STREAMING; extern void gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; /* SVE2 */ extern void gf_vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char *dest); + unsigned char *dest) ARM_STREAMING; extern void gf_2vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_3vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_4vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_5vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_6vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_7vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char *dest); + unsigned char *dest) ARM_STREAMING; extern void gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; extern void gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); + unsigned char **dest) ARM_STREAMING; -void -ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, - unsigned char **coding) +#ifdef __APPLE__ +__arm_locally_streaming __attribute__((target("+sme"))) +#endif +static void +ec_encode_data_sve_impl(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, + unsigned char **coding) { - if (len < 16) { - ec_encode_data_base(len, k, rows, g_tbls, data, coding); - return; - } - while (rows > 7) { gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding); g_tbls += 4 * k * 32; @@ -257,14 +261,23 @@ ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned cha } void -ec_encode_data_sve2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, - unsigned char **coding) +ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, + unsigned char **coding) { if (len < 16) { ec_encode_data_base(len, k, rows, g_tbls, data, coding); return; } + ec_encode_data_sve_impl(len, k, rows, g_tbls, data, coding); +} +#ifdef __APPLE__ +__arm_locally_streaming __attribute__((target("+sme"))) +#endif +static void +ec_encode_data_sve2_impl(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, + unsigned char **coding) +{ while (rows > 7) { gf_4vect_dot_prod_sve2(len, k, g_tbls, data, coding); g_tbls += 4 * k * 32; @@ -308,13 +321,23 @@ ec_encode_data_sve2(int len, int k, int rows, unsigned char *g_tbls, unsigned ch } void -ec_encode_data_update_sve(int len, int k, int rows, int vec_i, unsigned char *g_tbls, - unsigned char *data, unsigned char **coding) +ec_encode_data_sve2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, + unsigned char **coding) { if (len < 16) { - ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); + ec_encode_data_base(len, k, rows, g_tbls, data, coding); return; } + ec_encode_data_sve2_impl(len, k, rows, g_tbls, data, coding); +} + +#ifdef __APPLE__ +__arm_locally_streaming __attribute__((target("+sme"))) +#endif +static void +ec_encode_data_update_sve_impl(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding) +{ while (rows > 6) { gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding); g_tbls += 6 * k * 32; @@ -344,3 +367,14 @@ ec_encode_data_update_sve(int len, int k, int rows, int vec_i, unsigned char *g_ break; } } + +void +ec_encode_data_update_sve(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding) +{ + if (len < 16) { + ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); + return; + } + ec_encode_data_update_sve_impl(len, k, rows, vec_i, g_tbls, data, coding); +} diff --git a/erasure_code/aarch64/gf_2vect_mad_sve.S b/erasure_code/aarch64/gf_2vect_mad_sve.S index f0ddf011..0152f481 100644 --- a/erasure_code/aarch64/gf_2vect_mad_sve.S +++ b/erasure_code/aarch64/gf_2vect_mad_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -104,7 +108,7 @@ cdecl(gf_2vect_mad_sve): /* vector length agnostic */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass /* prefetch dest data */ prfb pldl2strm, p0, [x_dest1, x_pos] diff --git a/erasure_code/aarch64/gf_3vect_mad_sve.S b/erasure_code/aarch64/gf_3vect_mad_sve.S index 9e0ca5c4..4664ce07 100644 --- a/erasure_code/aarch64/gf_3vect_mad_sve.S +++ b/erasure_code/aarch64/gf_3vect_mad_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -115,7 +119,7 @@ cdecl(gf_3vect_mad_sve): /* vector length agnostic */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass /* dest data prefetch */ prfb pldl2strm, p0, [x_dest1, x_pos] diff --git a/erasure_code/aarch64/gf_4vect_mad_sve.S b/erasure_code/aarch64/gf_4vect_mad_sve.S index 89ec89f5..40a2bd9b 100644 --- a/erasure_code/aarch64/gf_4vect_mad_sve.S +++ b/erasure_code/aarch64/gf_4vect_mad_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -126,7 +130,7 @@ cdecl(gf_4vect_mad_sve): /* vector length agnostic */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass prfb pldl2strm, p0, [x_dest1, x_pos] prfb pldl2strm, p0, [x_dest2, x_pos] diff --git a/erasure_code/aarch64/gf_5vect_mad_sve.S b/erasure_code/aarch64/gf_5vect_mad_sve.S index ab374d36..136ae079 100644 --- a/erasure_code/aarch64/gf_5vect_mad_sve.S +++ b/erasure_code/aarch64/gf_5vect_mad_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -137,7 +141,7 @@ cdecl(gf_5vect_mad_sve): /* vector length agnostic */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass prfb pldl2strm, p0, [x_dest1, x_pos] prfb pldl2strm, p0, [x_dest2, x_pos] diff --git a/erasure_code/aarch64/gf_6vect_mad_sve.S b/erasure_code/aarch64/gf_6vect_mad_sve.S index c4f372cd..34983658 100644 --- a/erasure_code/aarch64/gf_6vect_mad_sve.S +++ b/erasure_code/aarch64/gf_6vect_mad_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -148,7 +152,7 @@ cdecl(gf_6vect_mad_sve): /* vector length agnostic */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass prfb pldl2strm, p0, [x_dest1, x_pos] prfb pldl2strm, p0, [x_dest2, x_pos] diff --git a/erasure_code/aarch64/gf_nvect_dot_prod_sve.c b/erasure_code/aarch64/gf_nvect_dot_prod_sve.c index ebf31399..ab1fcd1f 100644 --- a/erasure_code/aarch64/gf_nvect_dot_prod_sve.c +++ b/erasure_code/aarch64/gf_nvect_dot_prod_sve.c @@ -26,7 +26,13 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ +#ifdef __APPLE__ +#include +#define ARM_STREAMING __arm_streaming +#else #include +#define ARM_STREAMING +#endif #include // This implementation of the nvect_dot_prod uses several techniques for optimization: @@ -51,9 +57,14 @@ // can do this automatically in optimization so a separate implementation isn't required. // We simply allow the compiler to generate SVE2 versions as well. -__attribute__((target("+sve"), always_inline)) static inline void +#ifdef __APPLE__ +__attribute__((target("+sme"), always_inline)) +#else +__attribute__((target("+sve"), always_inline)) +#endif +static inline void gf_nvect_dot_prod_sve_unrolled(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest, int nvect) + unsigned char **dest, int nvect) ARM_STREAMING { if (len < 16) return; @@ -254,103 +265,173 @@ gf_nvect_dot_prod_sve_unrolled(int len, int vlen, unsigned char *gftbls, unsigne } // Optimized wrapper functions -__attribute__((target("+sve"))) void +#ifdef __APPLE__ +__attribute__((target("+sme"))) +#else +__attribute__((target("+sve"))) +#endif +void gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char *dest) + unsigned char *dest) ARM_STREAMING { unsigned char *dest_array[1] = { dest }; gf_nvect_dot_prod_sve_unrolled(len, vlen, gftbls, src, dest_array, 1); } -__attribute__((target("+sve"))) void +#ifdef __APPLE__ +__attribute__((target("+sme"))) +#else +__attribute__((target("+sve"))) +#endif +void gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest) + unsigned char **dest) ARM_STREAMING { gf_nvect_dot_prod_sve_unrolled(len, vlen, gftbls, src, dest, 2); } -__attribute__((target("+sve"))) void +#ifdef __APPLE__ +__attribute__((target("+sme"))) +#else +__attribute__((target("+sve"))) +#endif +void gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest) + unsigned char **dest) ARM_STREAMING { gf_nvect_dot_prod_sve_unrolled(len, vlen, gftbls, src, dest, 3); } -__attribute__((target("+sve"))) void +#ifdef __APPLE__ +__attribute__((target("+sme"))) +#else +__attribute__((target("+sve"))) +#endif +void gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest) + unsigned char **dest) ARM_STREAMING { gf_nvect_dot_prod_sve_unrolled(len, vlen, gftbls, src, dest, 4); } -__attribute__((target("+sve"))) void +#ifdef __APPLE__ +__attribute__((target("+sme"))) +#else +__attribute__((target("+sve"))) +#endif +void gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest) + unsigned char **dest) ARM_STREAMING { gf_nvect_dot_prod_sve_unrolled(len, vlen, gftbls, src, dest, 5); } -__attribute__((target("+sve"))) void +#ifdef __APPLE__ +__attribute__((target("+sme"))) +#else +__attribute__((target("+sve"))) +#endif +void gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest) + unsigned char **dest) ARM_STREAMING { gf_nvect_dot_prod_sve_unrolled(len, vlen, gftbls, src, dest, 6); } -__attribute__((target("+sve"))) void +#ifdef __APPLE__ +__attribute__((target("+sme"))) +#else +__attribute__((target("+sve"))) +#endif +void gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest) + unsigned char **dest) ARM_STREAMING { gf_nvect_dot_prod_sve_unrolled(len, vlen, gftbls, src, dest, 7); } // SVE2 wrapper functions - compiler will optimize eor to eor3 automatically -__attribute__((target("+sve+sve2"))) void +#ifdef __APPLE__ +__attribute__((target("+sme+sme2"))) +#else +__attribute__((target("+sve+sve2"))) +#endif +void gf_vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char *dest) + unsigned char *dest) ARM_STREAMING { unsigned char *dest_array[1] = { dest }; gf_nvect_dot_prod_sve_unrolled(len, vlen, gftbls, src, dest_array, 1); } -__attribute__((target("+sve+sve2"))) void +#ifdef __APPLE__ +__attribute__((target("+sme+sme2"))) +#else +__attribute__((target("+sve+sve2"))) +#endif +void gf_2vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest) + unsigned char **dest) ARM_STREAMING { gf_nvect_dot_prod_sve_unrolled(len, vlen, gftbls, src, dest, 2); } -__attribute__((target("+sve+sve2"))) void +#ifdef __APPLE__ +__attribute__((target("+sme+sme2"))) +#else +__attribute__((target("+sve+sve2"))) +#endif +void gf_3vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest) + unsigned char **dest) ARM_STREAMING { gf_nvect_dot_prod_sve_unrolled(len, vlen, gftbls, src, dest, 3); } -__attribute__((target("+sve+sve2"))) void +#ifdef __APPLE__ +__attribute__((target("+sme+sme2"))) +#else +__attribute__((target("+sve+sve2"))) +#endif +void gf_4vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest) + unsigned char **dest) ARM_STREAMING { gf_nvect_dot_prod_sve_unrolled(len, vlen, gftbls, src, dest, 4); } -__attribute__((target("+sve+sve2"))) void +#ifdef __APPLE__ +__attribute__((target("+sme+sme2"))) +#else +__attribute__((target("+sve+sve2"))) +#endif +void gf_5vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest) + unsigned char **dest) ARM_STREAMING { gf_nvect_dot_prod_sve_unrolled(len, vlen, gftbls, src, dest, 5); } -__attribute__((target("+sve+sve2"))) void +#ifdef __APPLE__ +__attribute__((target("+sme+sme2"))) +#else +__attribute__((target("+sve+sve2"))) +#endif +void gf_6vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest) + unsigned char **dest) ARM_STREAMING { gf_nvect_dot_prod_sve_unrolled(len, vlen, gftbls, src, dest, 6); } -__attribute__((target("+sve+sve2"))) void +#ifdef __APPLE__ +__attribute__((target("+sme+sme2"))) +#else +__attribute__((target("+sve+sve2"))) +#endif +void gf_7vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src, - unsigned char **dest) + unsigned char **dest) ARM_STREAMING { gf_nvect_dot_prod_sve_unrolled(len, vlen, gftbls, src, dest, 7); } diff --git a/erasure_code/aarch64/gf_vect_mad_sve.S b/erasure_code/aarch64/gf_vect_mad_sve.S index 41d6da9d..e254f8c4 100644 --- a/erasure_code/aarch64/gf_vect_mad_sve.S +++ b/erasure_code/aarch64/gf_vect_mad_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -87,7 +91,7 @@ cdecl(gf_vect_mad_sve): /* vector length agnostic */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass /* prefetch dest data */ prfb pldl2strm, p0, [x_dest, x_pos] diff --git a/erasure_code/aarch64/gf_vect_mul_sve.S b/erasure_code/aarch64/gf_vect_mul_sve.S index d2219bf5..486eb136 100644 --- a/erasure_code/aarch64/gf_vect_mul_sve.S +++ b/erasure_code/aarch64/gf_vect_mul_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -92,7 +96,7 @@ cdecl(gf_vect_mul_sve): /* vector length agnostic */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass /* load src data, governed by p0 */ ld1b z_src.b, p0/z, [x_src, x_pos] diff --git a/include/aarch64_multibinary.h b/include/aarch64_multibinary.h index c444f2bb..fedde1b0 100644 --- a/include/aarch64_multibinary.h +++ b/include/aarch64_multibinary.h @@ -218,7 +218,8 @@ #elif defined(__APPLE__) #define SYSCTL_PMULL_KEY "hw.optional.arm.FEAT_PMULL" // from macOS 12 FEAT_* sysctl infos are available #define SYSCTL_CRC32_KEY "hw.optional.armv8_crc32" -#define SYSCTL_SVE_KEY "hw.optional.arm.FEAT_SVE" // this one is just a guess and need to check macOS update +#define SYSCTL_SME_KEY "hw.optional.arm.FEAT_SME" +#define SYSCTL_SME2_KEY "hw.optional.arm.FEAT_SME2" #include #include static inline int sysctlEnabled(const char* name){