Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ jobs:
compiler: clang
cmake-args: -D DEACTIVATE_AVX2=ON

- name: Ubuntu Clang No AVX512
os: ubuntu-latest
compiler: clang
cmake-args: -D DEACTIVATE_AVX512=ON

- name: Ubuntu Clang No ZLIB
os: ubuntu-latest
compiler: clang
Expand Down
33 changes: 28 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
# build a lite version (only with BloscLZ and LZ4/LZ4HC) of the blosc library
# DEACTIVATE_AVX2: default OFF
# do not attempt to build with AVX2 instructions
# DEACTIVATE_AVX512: default OFF
# do not attempt to build with AVX512 instructions
# DEACTIVATE_ZLIB: default OFF
# do not include support for the Zlib library
# DEACTIVATE_ZSTD: default OFF
Expand Down Expand Up @@ -115,6 +117,8 @@ option(BUILD_LITE
"Build a lite version (only with BloscLZ and LZ4/LZ4HC) of the blosc library." OFF)
option(DEACTIVATE_AVX2
"Do not attempt to build with AVX2 instructions" OFF)
option(DEACTIVATE_AVX512
"Do not attempt to build with AVX512 instructions" OFF)
option(DEACTIVATE_ZLIB
"Do not include support for the Zlib library." OFF)
option(DEACTIVATE_ZSTD
Expand Down Expand Up @@ -281,30 +285,42 @@ if(CMAKE_SYSTEM_PROCESSOR STREQUAL i386 OR
else()
set(COMPILER_SUPPORT_AVX2 FALSE)
endif()
if(CMAKE_C_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 4.9)
set(COMPILER_SUPPORT_AVX512 TRUE)
else()
set(COMPILER_SUPPORT_AVX512 FALSE)
endif()
elseif(CMAKE_C_COMPILER_ID STREQUAL Clang OR CMAKE_C_COMPILER_ID STREQUAL AppleClang)
set(COMPILER_SUPPORT_SSE2 TRUE)
if(CMAKE_C_COMPILER_VERSION VERSION_GREATER 3.2 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 3.2)
set(COMPILER_SUPPORT_AVX2 TRUE)
else()
set(COMPILER_SUPPORT_AVX2 FALSE)
endif()
elseif(CMAKE_C_COMPILER_ID STREQUAL Intel)
set(COMPILER_SUPPORT_SSE2 TRUE)
if(CMAKE_C_COMPILER_VERSION VERSION_GREATER 14.0 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 14.0)
set(COMPILER_SUPPORT_AVX2 TRUE)
if(CMAKE_C_COMPILER_VERSION VERSION_GREATER 10.0 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 10.0)
set(COMPILER_SUPPORT_AVX512 TRUE)
else()
set(COMPILER_SUPPORT_AVX2 FALSE)
set(COMPILER_SUPPORT_AVX512 FALSE)
endif()
elseif(CMAKE_C_COMPILER_ID STREQUAL Intel)
# All Intel compilers since the introduction of AVX512 in 2016 should support it, so activate all SIMD flavors
set(COMPILER_SUPPORT_SSE2 TRUE)
set(COMPILER_SUPPORT_AVX2 TRUE)
set(COMPILER_SUPPORT_AVX512 TRUE)
elseif(MSVC)
set(COMPILER_SUPPORT_SSE2 TRUE)
if(CMAKE_C_COMPILER_VERSION VERSION_GREATER 18.00.30501 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 18.00.30501)
set(COMPILER_SUPPORT_AVX2 TRUE)
# AVX512 starts to be supported since Visual Studio 17 15.0
elseif(CMAKE_C_COMPILER_VERSION VERSION_GREATER 19.10.25017 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 19.10.25017)
set(COMPILER_SUPPORT_AVX512 TRUE)
else()
set(COMPILER_SUPPORT_AVX2 FALSE)
endif()
else()
set(COMPILER_SUPPORT_SSE2 FALSE)
set(COMPILER_SUPPORT_AVX2 FALSE)
set(COMPILER_SUPPORT_AVX512 FALSE)
# Unrecognized compiler. Emit a warning message to let the user know hardware-acceleration won't be available.
message(WARNING "Unable to determine which ${CMAKE_SYSTEM_PROCESSOR} hardware features are supported by the C compiler (${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}).")
endif()
Expand Down Expand Up @@ -345,6 +361,13 @@ endif()
# disable AVX2 if specified
if(DEACTIVATE_AVX2)
set(COMPILER_SUPPORT_AVX2 FALSE)
# AVX512 functions in bitshuffle depend on AVX2 too
set(COMPILER_SUPPORT_AVX512 FALSE)
endif()

# disable AVX512 if specified
if(DEACTIVATE_AVX512)
set(COMPILER_SUPPORT_AVX512 FALSE)
endif()

# flags
Expand Down
30 changes: 29 additions & 1 deletion blosc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ list(APPEND SOURCES
blosc/directories.c
blosc/blosc2-stdio.c
blosc/b2nd.c
blosc/b2nd_utils.c
blosc/b2nd_utils.c
)
if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL arm64)
if(COMPILER_SUPPORT_SSE2)
Expand All @@ -273,6 +273,10 @@ if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL arm64)
message(STATUS "Adding run-time support for AVX2")
list(APPEND SOURCES blosc/shuffle-avx2.c blosc/bitshuffle-avx2.c)
endif()
if(COMPILER_SUPPORT_AVX512)
message(STATUS "Adding run-time support for AVX512")
list(APPEND SOURCES blosc/bitshuffle-avx512.c)
endif()
endif()
if(COMPILER_SUPPORT_NEON)
message(STATUS "Adding run-time support for NEON")
Expand Down Expand Up @@ -343,6 +347,30 @@ if(COMPILER_SUPPORT_AVX2)
SOURCE shuffle.c
APPEND PROPERTY COMPILE_DEFINITIONS SHUFFLE_AVX2_ENABLED)
endif()
if(COMPILER_SUPPORT_AVX512)
if(MSVC)
set_source_files_properties(
bitshuffle-avx512.c
PROPERTIES COMPILE_OPTIONS "/arch:AVX512")
set_property(
SOURCE shuffle.c
APPEND PROPERTY COMPILE_OPTIONS "/arch:AVX512")
else()
set_source_files_properties(
bitshuffle-avx512.c
PROPERTIES COMPILE_OPTIONS "-mavx512f;-mavx512bw")
set_property(
SOURCE shuffle.c
APPEND PROPERTY COMPILE_OPTIONS "-mavx512f;-mavx512bw")
endif()

# Define a symbol for the shuffle-dispatch implementation
# so it knows AVX512 is supported even though that file is
# compiled without AVX512 support (for portability).
set_property(
SOURCE shuffle.c
APPEND PROPERTY COMPILE_DEFINITIONS SHUFFLE_AVX512_ENABLED)
endif()
if(COMPILER_SUPPORT_NEON)
set_source_files_properties(
shuffle-neon.c bitshuffle-neon.c
Expand Down
26 changes: 12 additions & 14 deletions blosc/bitshuffle-altivec.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,18 +258,6 @@ int64_t bshuf_trans_byte_elem_128(void* in, void* out, const size_t size) {
}


/* Memory copy with bshuf call signature. */
int64_t bshuf_copy(void* in, void* out, const size_t size,
const size_t elem_size) {

char* in_b = (char*)in;
char* out_b = (char*)out;

memcpy(out_b, in_b, size * elem_size);
return size * elem_size;
}


/* Transpose bytes within elements using best SSE algorithm available. */
int64_t bshuf_trans_byte_elem_altivec(void* in, void* out, const size_t size,
const size_t elem_size, void* tmp_buf) {
Expand Down Expand Up @@ -373,18 +361,24 @@ int64_t bshuf_trans_bit_byte_altivec(void* in, void* out, const size_t size,

/* Transpose bits within elements. */
int64_t bshuf_trans_bit_elem_altivec(void* in, void* out, const size_t size,
const size_t elem_size, void* tmp_buf) {
const size_t elem_size) {

int64_t count;

CHECK_MULT_EIGHT(size);

void* tmp_buf = malloc(size * elem_size);
if (tmp_buf == NULL) return -1;

count = bshuf_trans_byte_elem_altivec(in, out, size, elem_size, tmp_buf);
CHECK_ERR(count);
// bshuf_trans_bit_byte_altivec / bitshuffle1_altivec
count = bshuf_trans_bit_byte_altivec(out, tmp_buf, size, elem_size);
CHECK_ERR(count);
count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);

free(tmp_buf);

return count;
}

Expand Down Expand Up @@ -580,16 +574,20 @@ int64_t bshuf_shuffle_bit_eightelem_altivec(void* in, void* out, const size_t si

/* Untranspose bits within elements. */
int64_t bshuf_untrans_bit_elem_altivec(void* in, void* out, const size_t size,
const size_t elem_size, void* tmp_buf) {
const size_t elem_size) {

int64_t count;

CHECK_MULT_EIGHT(size);

void* tmp_buf = malloc(size * elem_size);
if (tmp_buf == NULL) return -1;

count = bshuf_trans_byte_bitrow_altivec(in, tmp_buf, size, elem_size);
CHECK_ERR(count);
count = bshuf_shuffle_bit_eightelem_altivec(tmp_buf, out, size, elem_size);

free(tmp_buf);
return count;
}

Expand Down
Loading