Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
os:
- ubuntu-latest
- macos-15-intel # x86_64
- macos-14 # arm64
- macos-15 # arm64
assembler:
- nasm
runs-on: ${{ matrix.os }}
Expand Down
12 changes: 4 additions & 8 deletions erasure_code/aarch64/ec_aarch64_dispatcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,6 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
if (auxval & HWCAP_ASIMD)
return gf_vect_dot_prod_neon;
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return gf_vect_dot_prod_sve;
return gf_vect_dot_prod_neon;
#endif
return gf_vect_dot_prod_base;
Expand All @@ -96,8 +94,6 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
if (auxval & HWCAP_ASIMD)
return gf_vect_mad_neon;
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return gf_vect_mad_sve;
return gf_vect_mad_neon;
#endif
return gf_vect_mad_base;
Expand All @@ -117,7 +113,9 @@ DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
if (auxval & HWCAP_ASIMD)
return ec_encode_data_neon;
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
if (sysctlEnabled(SYSCTL_SME2_KEY))
return ec_encode_data_sve2;
if (sysctlEnabled(SYSCTL_SME_KEY))
return ec_encode_data_sve;
return ec_encode_data_neon;
#endif
Expand All @@ -134,7 +132,7 @@ DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
if (auxval & HWCAP_ASIMD)
return ec_encode_data_update_neon;
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
if (sysctlEnabled(SYSCTL_SME_KEY))
return ec_encode_data_update_sve;
return ec_encode_data_update_neon;
#endif
Expand All @@ -151,8 +149,6 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
if (auxval & HWCAP_ASIMD)
return gf_vect_mul_neon;
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return gf_vect_mul_sve;
return gf_vect_mul_neon;
#endif
return gf_vect_mul_base;
Expand Down
100 changes: 67 additions & 33 deletions erasure_code/aarch64/ec_aarch64_highlevel_func.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,81 +139,85 @@ ec_encode_data_update_neon(int len, int k, int rows, int vec_i, unsigned char *g
}
}

#ifdef __APPLE__
#define ARM_STREAMING __arm_streaming
#else
#define ARM_STREAMING
#endif

/* SVE */
extern void
gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char *dest);
unsigned char *dest) ARM_STREAMING;
extern void
gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;
extern void
gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;
extern void
gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;
extern void
gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;
extern void
gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;
extern void
gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;

/* SVE2 */
extern void
gf_vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char *dest);
unsigned char *dest) ARM_STREAMING;
extern void
gf_2vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;
extern void
gf_3vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;
extern void
gf_4vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;
extern void
gf_5vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;
extern void
gf_6vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;
extern void
gf_7vect_dot_prod_sve2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;

extern void
gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *dest);
unsigned char *dest) ARM_STREAMING;

extern void
gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;
extern void
gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;
extern void
gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;
extern void
gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;
extern void
gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
unsigned char **dest) ARM_STREAMING;

void
ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
#ifdef __APPLE__
__arm_locally_streaming __attribute__((target("+sme")))
#endif
static void
ec_encode_data_sve_impl(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{
if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}

while (rows > 7) {
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 4 * k * 32;
Expand Down Expand Up @@ -257,14 +261,23 @@ ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned cha
}

void
ec_encode_data_sve2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{
if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
ec_encode_data_sve_impl(len, k, rows, g_tbls, data, coding);
}

#ifdef __APPLE__
__arm_locally_streaming __attribute__((target("+sme")))
#endif
static void
ec_encode_data_sve2_impl(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{
while (rows > 7) {
gf_4vect_dot_prod_sve2(len, k, g_tbls, data, coding);
g_tbls += 4 * k * 32;
Expand Down Expand Up @@ -308,13 +321,23 @@ ec_encode_data_sve2(int len, int k, int rows, unsigned char *g_tbls, unsigned ch
}

void
ec_encode_data_update_sve(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
ec_encode_data_sve2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{
if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
ec_encode_data_sve2_impl(len, k, rows, g_tbls, data, coding);
}

#ifdef __APPLE__
__arm_locally_streaming __attribute__((target("+sme")))
#endif
static void
ec_encode_data_update_sve_impl(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{
while (rows > 6) {
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
Expand Down Expand Up @@ -344,3 +367,14 @@ ec_encode_data_update_sve(int len, int k, int rows, int vec_i, unsigned char *g_
break;
}
}

void
ec_encode_data_update_sve(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{
if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return;
}
ec_encode_data_update_sve_impl(len, k, rows, vec_i, g_tbls, data, coding);
}
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_2vect_mad_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -104,7 +108,7 @@ cdecl(gf_2vect_mad_sve):
/* vector length agnostic */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

/* prefetch dest data */
prfb pldl2strm, p0, [x_dest1, x_pos]
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_3vect_mad_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -115,7 +119,7 @@ cdecl(gf_3vect_mad_sve):
/* vector length agnostic */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

/* dest data prefetch */
prfb pldl2strm, p0, [x_dest1, x_pos]
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_4vect_mad_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -126,7 +130,7 @@ cdecl(gf_4vect_mad_sve):
/* vector length agnostic */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

prfb pldl2strm, p0, [x_dest1, x_pos]
prfb pldl2strm, p0, [x_dest2, x_pos]
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_5vect_mad_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -137,7 +141,7 @@ cdecl(gf_5vect_mad_sve):
/* vector length agnostic */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

prfb pldl2strm, p0, [x_dest1, x_pos]
prfb pldl2strm, p0, [x_dest2, x_pos]
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_6vect_mad_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -148,7 +152,7 @@ cdecl(gf_6vect_mad_sve):
/* vector length agnostic */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

prfb pldl2strm, p0, [x_dest1, x_pos]
prfb pldl2strm, p0, [x_dest2, x_pos]
Expand Down
Loading
Loading