Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ lite_option(CUDA_WITH_FP16 "Compile with cuda half support"
lite_option(LITE_WITH_ARM_CLANG "when arm lang is clang, its ON." OFF)
lite_option(LITE_WITH_XCODE "when debug in xcode, its ON." OFF)
lite_option(LITE_WITH_ARM82_FP16 "when compile with arm v8.2 fp16, it's ON." OFF)
lite_option(LITE_WITH_ARM8_SVE2 "Enable SVE2 instructions in ARMv8." OFF)
lite_option(LITE_WITH_ARM82_INT8_SDOT "when compile with arm v8.2 int8, it's ON." OFF)
lite_option(LITE_WITH_CODE_META_INFO "include git version in the header file." ON)
lite_option(WITH_NODE_RAW_FS "(Only available when compiling by Emscripten) Whether build with NODERAWFS" OFF)
Expand Down
4 changes: 4 additions & 0 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -305,3 +305,7 @@ if (EMSCRIPTEN)
add_compile_options("-pthread")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread")
endif()

if (LITE_WITH_ARM8_SVE2)
add_definitions("-DLITE_WITH_ARM8_SVE2")
endif()
15 changes: 14 additions & 1 deletion cmake/os/android.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,20 @@ if(ARM_TARGET_LANG STREQUAL "clang")
set(triple aarch64-v8a-linux-android)
if(ANDROID_STL_TYPE MATCHES "^c\\+\\+_")
# Use CMAKE_CXX_STANDARD_LIBRARIES_INIT to ensure libunwind and libc++ is linked in the right order
set(CMAKE_CXX_STANDARD_LIBRARIES_INIT "${CMAKE_CXX_STANDARD_LIBRARIES_INIT} ${ANDROID_NDK}/sources/cxx-stl/llvm-libc++/libs/${ANDROID_ARCH_ABI}/libunwind.a")
set(LIBUNWIND_PATH "${CMAKE_CXX_STANDARD_LIBRARIES_INIT} ${ANDROID_NDK}/sources/cxx-stl/llvm-libc++/libs/${ANDROID_ARCH_ABI}/libunwind.a")
if(EXISTS ${LIBUNWIND_PATH})
message(STATUS "libunwind is in ${LIBUNWIND_PATH}")
else()
# happened when NDK >= 23
file(GLOB_RECURSE WIND_PATH "${CMAKE_ANDROID_NDK}/*/libunwind.a")
foreach(loop_path ${WIND_PATH})
string(FIND ${loop_path} "aarch64" STR_END)
string(SUBSTRING ${loop_path} 0 ${STR_END} REAL_LIBUNWIND_PATH)
break()
endforeach()
set(LIBUNWIND_PATH "${REAL_LIBUNWIND_PATH}aarch64/libunwind.a")
endif()
set(CMAKE_CXX_STANDARD_LIBRARIES_INIT "${CMAKE_CXX_STANDARD_LIBRARIES_INIT} ${LIBUNWIND_PATH}")
if (ANDROID_NATIVE_API_LEVEL LESS 21)
set(CMAKE_CXX_STANDARD_LIBRARIES_INIT "${CMAKE_CXX_STANDARD_LIBRARIES_INIT} ${ANDROID_NDK}/sources/cxx-stl/llvm-libc++/libs/${ANDROID_ARCH_ABI}/libandroid_support.a")
endif()
Expand Down
12 changes: 12 additions & 0 deletions cmake/postproject.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,18 @@ if(ANDROID)
endif()
endif()

if(LITE_WITH_ARM8_SVE2)
if(${ANDROID_NDK_MAJOR})
if(${ANDROID_NDK_MAJOR} GREATER_EQUAL "23")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+sve2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+sve2")
else()
message(FATAL_ERROR "NDK VERSION: ${ANDROID_NDK_MAJOR}, however it must be greater equal 23 when sve2 is ON")
endif()
endif()
endif()


if(LITE_WITH_ARM82_INT8_SDOT)
if(${ANDROID_NDK_MAJOR})
if(${ANDROID_NDK_MAJOR} GREATER "17")
Expand Down
48 changes: 48 additions & 0 deletions lite/backends/arm/math/pooling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
#include "lite/backends/arm/math/funcs.h"
#include "lite/core/parallel_defines.h"

#if defined(__aarch64__) && defined(LITE_WITH_ARM8_SVE2)
#include <arm_sve.h>
#endif

namespace paddle {
namespace lite {
namespace arm {
Expand Down Expand Up @@ -980,6 +984,50 @@ void pooling_global_avg(const float* din,
}
}

#if defined(__aarch64__) && defined(LITE_WITH_ARM8_SVE2)
void pooling_global_avg_sve2(const float* din,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SVE 实现建议在arm/math/ 目录下新建一个 sve 目录,将实现加到这个目录下,提高代码的可读性

float* dout,
int num,
int chout,
int hout,
int wout,
int chin,
int hin,
int win) {
int size_channel_in = win * hin;
auto data_out = static_cast<float*>(dout);
auto data_in = static_cast<const float*>(din);
std::vector<float> vec_tmp;
for (int n = 0; n < num; ++n) {
float* data_out_batch = data_out + n * chout;
const float* data_in_batch = data_in + n * chin * size_channel_in;
LITE_PARALLEL_BEGIN(c, tid, chout) {
const float* data_in_channel =
data_in_batch + c * size_channel_in; // in address
float* data_out_channel = data_out_batch + c;
vec_tmp.clear();
for (int i = 0; i < size_channel_in; i += svcntw()) {
svbool_t pg = svwhilelt_b32(i, size_channel_in);
svfloat32_t vec_x = svld1(pg, &data_in_channel[i]);
float psum = svaddv(pg, vec_x);
vec_tmp.emplace_back(psum);
}
float sum = 0.f;
int size = vec_tmp.size();
float* tmp_data = vec_tmp.data();
for (int i = 0; i < size; i += svcntw()) {
svbool_t pg = svwhilelt_b32(i, size);
svfloat32_t vec_x = svld1(pg, &tmp_data[i]);
float psum = svaddv(pg, vec_x);
sum += psum;
}
data_out_channel[0] = sum / size_channel_in;
}
LITE_PARALLEL_END();
}
}
#endif

void pooling1x1s2p0_max(const float* din,
float* dout,
int num,
Expand Down
12 changes: 12 additions & 0 deletions lite/backends/arm/math/pooling.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,18 @@ void pooling3x3s2p0_avg(const float* din,
int pad_bottom,
int pad_right);

#if defined(__aarch64__) && defined(LITE_WITH_ARM8_SVE2)
void pooling_global_avg_sve2(const float* din,
float* dout,
int num,
int chout,
int hout,
int wout,
int chin,
int hin,
int win);
#endif

} // namespace math
} // namespace arm
} // namespace lite
Expand Down
1 change: 1 addition & 0 deletions lite/core/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ class Context<TargetType::kARM> {
bool has_dot() const { return DeviceInfo::Global().has_dot(); }
bool has_fp16() const { return DeviceInfo::Global().has_fp16(); }
bool has_a53_valid() const { return DeviceInfo::Global().set_a53_valid(); }
bool has_sve2() const { return DeviceInfo::Global().has_sve2(); }

template <typename T>
T* workspace_data() {
Expand Down
63 changes: 63 additions & 0 deletions lite/core/device_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,32 @@

namespace paddle {
namespace lite {
// http://elixir.free-electrons.com/linux/latest/source/arch/arm64/include/uapi/asm/hwcap.h
#if defined(LITE_WITH_ANDROID) && defined(__aarch64__)
#include <asm/hwcap.h> /* Get HWCAP bits from asm/hwcap.h */
#include <sys/auxv.h>
#define AARCH64_HWCAP_SVE (1UL << 22)
#define AARCH64_HWCAP2_SVE2 (1UL << 1)
#define AARCH64_HWCAP2_SVEAES (1UL << 2)
#define AARCH64_HWCAP2_SVEPMULL (1UL << 3)
#define AARCH64_HWCAP2_SVEBITPERM (1UL << 4)
#define AARCH64_HWCAP2_SVESHA3 (1UL << 5)
#define AARCH64_HWCAP2_SVESM4 (1UL << 6)
#define AARCH64_HWCAP2_SVEI8MM (1UL << 9)
#define AARCH64_HWCAP2_SVEF32MM (1UL << 10)
#define AARCH64_HWCAP2_SVEF64MM (1UL << 11)
#define AARCH64_HWCAP2_SVEBF16 (1UL << 12)
#define AARCH64_HWCAP2_I8MM (1UL << 13)
#define AARCH64_HWCAP2_BF16 (1UL << 14)
#define AT_HWCAP 16
#define AT_HWCAP2 26

bool check_sve2_valid() {
auto mask = static_cast<uint32_t>(getauxval(AT_HWCAP2)); // Android API >= 18
if (mask & AARCH64_HWCAP2_SVE2) return true;
return false;
}
#endif

#if ((defined LITE_WITH_ARM) || (defined LITE_WITH_MLU))
LITE_THREAD_LOCAL lite_api::PowerMode DeviceInfo::mode_;
Expand Down Expand Up @@ -225,6 +251,15 @@ void get_cpu_arch(std::vector<ARMArch>* archs, const int cpu_num) {
// 888
arch_type = kX1;
break;
case 0xd46:
arch_type = kA510;
break;
case 0xd47:
arch_type = kA710;
break;
case 0xd48:
arch_type = kX2;
break;
default:
LOG(ERROR) << "Unknow cpu arch: " << arch_id;
}
Expand Down Expand Up @@ -1138,6 +1173,8 @@ void DeviceInfo::RequestPowerRandLowMode(int shift_num, int thread_num) {

bool DeviceInfo::set_a53_valid() { return has_a53_valid_; }

bool DeviceInfo::has_sve2() { return has_sve2_; }

int DeviceInfo::Setup() {
core_num_ = get_cpu_num();
mem_size_ = get_mem_size();
Expand Down Expand Up @@ -1192,6 +1229,13 @@ int DeviceInfo::Setup() {
} else {
has_a53_valid_ = true;
}

// SVE2
has_sve2_ = false;
#if defined(LITE_WITH_ANDROID) && defined(__aarch64__)
has_sve2_ = check_sve2_valid();
#endif

// output info
LOG(INFO) << "ARM multiprocessors name: " << dev_name_;
LOG(INFO) << "ARM multiprocessors number: " << core_num_;
Expand All @@ -1215,6 +1259,7 @@ int DeviceInfo::Setup() {
LOG(INFO) << L3_cache_[i] / 1024 << " KB";
}
LOG(INFO) << "Total memory: " << mem_size_ << "KB";
LOG(INFO) << "SVE2 support: " << has_sve2_;
// set default run mode
SetRunMode(lite_api::PowerMode::LITE_POWER_NO_BIND,
1); // use single thread by default
Expand Down Expand Up @@ -1528,5 +1573,23 @@ FMAType device_fma_level() {

#endif

#if defined(LITE_WITH_ANDROID) && defined(__aarch64__)
#undef AARCH64_HWCAP_SVE
#undef AARCH64_HWCAP2_SVE2
#undef AARCH64_HWCAP2_SVEAES
#undef AARCH64_HWCAP2_SVEPMULL
#undef AARCH64_HWCAP2_SVEBITPERM
#undef AARCH64_HWCAP2_SVESHA3
#undef AARCH64_HWCAP2_SVESM4
#undef AARCH64_HWCAP2_SVEI8MM
#undef AARCH64_HWCAP2_SVEF32MM
#undef AARCH64_HWCAP2_SVEF64MM
#undef AARCH64_HWCAP2_SVEBF16
#undef AARCH64_HWCAP2_I8MM
#undef AARCH64_HWCAP2_BF16
#undef AT_HWCAP
#undef AT_HWCAP2
#endif

} // namespace lite
} // namespace paddle
5 changes: 5 additions & 0 deletions lite/core/device_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@ using L3CacheSetMethod = lite_api::L3CacheSetMethod;
typedef enum {
kAPPLE = 0,
kX1 = 1,
kX2 = 2,
kA35 = 35,
kA53 = 53,
kA55 = 55,
kA57 = 57,
kA510 = 60,
kA72 = 72,
kA73 = 73,
kA75 = 75,
Expand All @@ -52,6 +54,7 @@ typedef enum {
kGold = 79,
kGold_Prime = 80,
kSilver = 81,
kA710 = 82,
kARMArch_UNKOWN = -1
} ARMArch;

Expand All @@ -69,6 +72,7 @@ class DeviceInfo {

int Setup();
bool set_a53_valid();
bool has_sve2();

void SetRunMode(lite_api::PowerMode mode, int thread_num);
void SetCache(int l1size, int l2size, int l3size);
Expand Down Expand Up @@ -151,6 +155,7 @@ class DeviceInfo {
std::vector<bool> fp16_;
std::vector<bool> dot_;
bool has_a53_valid_;
bool has_sve2_;

// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
Expand Down
7 changes: 7 additions & 0 deletions lite/kernels/arm/pool_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ void PoolCompute<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
auto& param = Param<operators::PoolParam>();
auto& in_dims = param.x->dims();
auto& out_dims = param.output->dims();
auto& ctx = this->ctx_->As<ARMContext>();

const float* din = param.x->data<float>();
float* dout = param.output->mutable_data<float>();
Expand Down Expand Up @@ -75,6 +76,12 @@ void PoolCompute<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
lite::arm::math::pooling_global_max(POOL_IN_PARAM);
return;
} else if (pooling_type == "avg") {
#if defined(__aarch64__) && defined(LITE_WITH_ARM8_SVE2)
if (ctx.has_sve2()) {
lite::arm::math::pooling_global_avg_sve2(POOL_IN_PARAM);
return;
}
#endif
lite::arm::math::pooling_global_avg(POOL_IN_PARAM);
return;
}
Expand Down
12 changes: 11 additions & 1 deletion lite/tools/build_android.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ WITH_LOG=ON
WITH_EXCEPTION=OFF
# controls whether to include FP16 kernels, default is OFF
BUILD_ARM82_FP16=OFF
# controls whether to support SVE2 instructions, default is OFF
WITH_ARM8_SVE2=OFF
# options of striping lib according to input model.
OPTMODEL_DIR=""
WITH_STRIP=OFF
Expand Down Expand Up @@ -268,6 +270,7 @@ function make_tiny_publish_so {
-DARM_TARGET_ARCH_ABI=$ARCH \
-DARM_TARGET_LANG=$TOOLCHAIN \
-DLITE_WITH_ARM82_FP16=$BUILD_ARM82_FP16 \
-DLITE_WITH_ARM8_SVE2=$WITH_ARM8_SVE2 \
-DANDROID_STL_TYPE=$ANDROID_STL \
-DLITE_THREAD_POOL=$WITH_THREAD_POOL \
-DWITH_CONVERT_TO_SSA=$WITH_CONVERT_TO_SSA"
Expand Down Expand Up @@ -361,6 +364,7 @@ function make_full_publish_so {
-DLITE_WITH_TRAIN=$WITH_TRAIN \
-DLITE_WITH_PROFILE=$WITH_PROFILE \
-DLITE_WITH_ARM82_FP16=$BUILD_ARM82_FP16 \
-DLITE_WITH_ARM8_SVE2=$WITH_ARM8_SVE2 \
-DLITE_WITH_PRECISION_PROFILE=$WITH_PRECISION_PROFILE \
-DANDROID_STL_TYPE=$ANDROID_STL \
-DWITH_CONVERT_TO_SSA=$WITH_CONVERT_TO_SSA"
Expand Down Expand Up @@ -397,13 +401,15 @@ function print_usage {
echo -e "| --with_static_lib: (OFF|ON); controls whether to publish c++ api static lib, default is OFF |"
echo -e "| --with_cv: (OFF|ON); controls whether to compile cv functions into lib, default is OFF |"
echo -e "| --with_log: (OFF|ON); controls whether to print log information, default is ON |"
echo -e "| --with_convert_to_ssa: (OFF|ON); controls whether to modify input model graph which is not DAG to SSA graph, default is OFF |"
echo -e "| --with_convert_to_ssa: (OFF|ON); controls whether to modify input model graph which is not DAG to SSA graph, default is OFF |"
echo -e "| --with_exception: (OFF|ON); controls whether to throw the exception when error occurs, default is OFF |"
echo -e "| --with_extra: (OFF|ON); controls whether to publish extra operators and kernels for (sequence-related model such as OCR or NLP) |"
echo -e "| --with_profile: (OFF|ON); controls whether to support time profile, default is OFF |"
echo -e "| --with_precision_profile: (OFF|ON); controls whether to support precision profile, default is OFF |"
echo -e "| --with_arm82_fp16: (OFF|ON); controls whether to include FP16 kernels, default is OFF |"
echo -e "| warning: when --with_arm82_fp16=ON, toolchain will be set as clang, arch will be set as armv8. |"
echo -e "| --with_arm8_sve2: (OFF|ON); controls whether to include SVE2 kernels, default is OFF |"
echo -e "| warning: when --with_arm8_sve2=ON, NDK version need >= r23, arch will be set as armv8. |"
echo -e "| --android_api_level: (16~27); control android api level, default is 16 on armv7 and 21 on armv8. You could set a specific |"
echo -e "| android_api_level as you need. |"
echo -e "| | Paddle-Lite Requird / ARM ABI | armv7 | armv8 | |"
Expand Down Expand Up @@ -651,6 +657,10 @@ function main {
WITH_CONVERT_TO_SSA="${i#*=}"
shift
;;
--with_arm8_sve2=*)
WITH_ARM8_SVE2="${i#*=}"
shift
;;
help)
# print help info
print_usage
Expand Down