Skip to content

Commit ca95763

Browse files
Merge pull request #17 from PaddlePaddle/develop
update
2 parents 0da14c9 + 480b284 commit ca95763

File tree

303 files changed

+10242
-3475
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

303 files changed

+10242
-3475
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ option(WITH_STRIP "Strip so files of Whl packages" OFF)
216216

217217
# PY_VERSION
218218
if(NOT PY_VERSION)
219-
set(PY_VERSION 2.7)
219+
set(PY_VERSION 3.6)
220220
endif()
221221
set(PYBIND11_PYTHON_VERSION ${PY_VERSION})
222222

README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<p align="center">
1+
<p align="center">
22
<img align="center" src="doc/imgs/logo.png", width=1600>
33
<p>
44

@@ -50,10 +50,9 @@ Now our developers can acquire Tesla V100 online computing resources for free. I
5050
[Click here to learn more](https://github.com/PaddlePaddle/Fleet)
5151

5252

53-
- **Accelerated High-Performance Inference over Ubiquitous Deployments**
53+
- **High-Performance Inference Engines for Comprehensive Deployment Enviroments**
5454

55-
PaddlePaddle is not only compatible with other open-source frameworks for models training, but also works well on the ubiquitous developments, varying from platforms to devices. More specifically, PaddlePaddle accelerates the inference procedure with the fastest speed-up. Note that, a recent breakthrough of inference speed has been made by PaddlePaddle on Huawei's Kirin NPU, through the hardware/software co-optimization.
56-
[Click here to learn more](https://github.com/PaddlePaddle/Paddle-Lite)
55+
PaddlePaddle is not only compatible with models trained in 3rd party open-source frameworks , but also offers complete inference products for various production scenarios. Our inference product line includes [Paddle Inference](https://paddle-inference.readthedocs.io/en/latest/product_introduction/summary.html): Native inference library for high performance server and cloud inference; [Paddle Serving](https://github.com/PaddlePaddle/Serving): A service-oriented framework suitable for distributed and pipeline productions; [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite): Ultra-Lightweight inference engine for mobile and IoT enviroments; [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs): A frontend inference engine for browser and mini apps. Futhermore, by great amounts of optimization with leading hardwares in each scenarios, Paddle inference engines outperform most of the other mainstream frameworks.
5756

5857

5958
- **Industry-Oriented Models and Libraries with Open Source Repositories**

README_cn.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
1+
22
<p align="center">
33
<img align="center" src="doc/imgs/logo.png", width=1600>
44
<p>
@@ -47,10 +47,9 @@ PaddlePaddle用户可领取**免费Tesla V100在线算力资源**,训练模型
4747
[查看详情](https://github.com/PaddlePaddle/Fleet)
4848

4949

50-
- **多端多平台部署的高性能推理引擎**
50+
- **支持多端多平台的高性能推理部署工具**
5151

52-
飞桨不仅兼容其他开源框架训练的模型,还可以轻松地部署到不同架构的平台设备上。同时,飞桨的推理速度也是全面领先的。尤其经过了跟华为麒麟NPU的软硬一体优化,使得飞桨在NPU上的推理速度进一步突破。
53-
[查看详情](https://github.com/PaddlePaddle/Paddle-Lite)
52+
飞桨不仅广泛兼容第三方开源框架训练的模型部署,并且为不同的场景的生产环境提供了完备的推理引擎,包括适用于高性能服务器及云端推理的原生推理库 [Paddle Inference](https://paddle-inference.readthedocs.io/en/latest/product_introduction/summary.html),面向分布式、流水线生产环境下自动上云、A/B测试等高阶功能的服务化推理框架 [Paddle Serving](https://github.com/PaddlePaddle/Serving),针对于移动端、物联网场景的轻量化推理引擎 [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite),以及在浏览器、小程序等环境下使用的前端推理引擎 [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs)。同时,透过与不同场景下的主流硬件高度适配优化及异构计算的支持, 飞桨的推理性能也领先绝大部分的主流实现。
5453

5554

5655
- **面向产业应用,开源开放覆盖多领域的工业级模型库。**

cmake/cblas.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ if(NOT DEFINED CBLAS_PROVIDER)
7373
string(REGEX MATCH "OpenBLAS ([0-9]+\.[0-9]+\.[0-9]+)" tmp ${config_file})
7474
string(REGEX MATCH "([0-9]+\.[0-9]+\.[0-9]+)" ver ${tmp})
7575

76-
if (${ver} VERSION_EQUAL "0.3.7")
76+
if (${ver} VERSION_GREATER_EQUAL "0.3.7")
7777
set(CBLAS_PROVIDER OPENBLAS)
7878
set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR} ${OPENBLAS_LAPACKE_INC_DIR})
7979
set(CBLAS_LIBRARIES ${OPENBLAS_LIB})

cmake/external/lite.cmake

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,21 @@ if(NOT LINUX)
1818
return()
1919
endif()
2020

21-
if(XPU_SDK_ROOT)
22-
set(LITE_WITH_XPU ON)
23-
include_directories("${XPU_SDK_ROOT}/XTDK/include")
24-
include_directories("${XPU_SDK_ROOT}/XTCL/include")
21+
if (LITE_WITH_XPU)
2522
add_definitions(-DLITE_SUBGRAPH_WITH_XPU)
26-
LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/shlib/")
27-
LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/runtime/shlib/")
23+
IF(WITH_AARCH64)
24+
SET(XPU_SDK_ENV "kylin_aarch64")
25+
ELSEIF(WITH_SUNWAY)
26+
SET(XPU_SDK_ENV "deepin_sw6_64")
27+
ELSEIF(WITH_BDCENTOS)
28+
SET(XPU_SDK_ENV "bdcentos_x86_64")
29+
ELSEIF(WITH_UBUNTU)
30+
SET(XPU_SDK_ENV "ubuntu_x86_64")
31+
ELSEIF(WITH_CENTOS)
32+
SET(XPU_SDK_ENV "centos7_x86_64")
33+
ELSE ()
34+
SET(XPU_SDK_ENV "ubuntu_x86_64")
35+
ENDIF()
2836
endif()
2937

3038
if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
@@ -57,7 +65,8 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
5765
-DWITH_TESTING=OFF
5866
-DLITE_BUILD_EXTRA=ON
5967
-DLITE_WITH_XPU=${LITE_WITH_XPU}
60-
-DXPU_SDK_ROOT=${XPU_SDK_ROOT}
68+
-DXPU_SDK_URL=${XPU_BASE_URL}
69+
-DXPU_SDK_ENV=${XPU_SDK_ENV}
6170
-DLITE_WITH_CODE_META_INFO=OFF
6271
-DLITE_WITH_ARM=ON)
6372
ExternalProject_Add(
@@ -99,7 +108,8 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
99108
-DLITE_WITH_STATIC_CUDA=OFF
100109
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME}
101110
-DLITE_WITH_XPU=${LITE_WITH_XPU}
102-
-DXPU_SDK_ROOT=${XPU_SDK_ROOT}
111+
-DXPU_SDK_URL=${XPU_BASE_URL}
112+
-DXPU_SDK_ENV=${XPU_SDK_ENV}
103113
-DLITE_WITH_CODE_META_INFO=OFF
104114
-DLITE_WITH_ARM=OFF)
105115

@@ -147,6 +157,10 @@ message(STATUS "Paddle-lite BINARY_DIR: ${LITE_BINARY_DIR}")
147157
message(STATUS "Paddle-lite SOURCE_DIR: ${LITE_SOURCE_DIR}")
148158
include_directories(${LITE_SOURCE_DIR})
149159
include_directories(${LITE_BINARY_DIR})
160+
if(LITE_WITH_XPU)
161+
include_directories(${LITE_BINARY_DIR}/third_party/install/xpu/xdnn/include/)
162+
include_directories(${LITE_BINARY_DIR}/third_party/install/xpu/xre/include/)
163+
endif()
150164

151165
function(external_lite_libs alias path)
152166
add_library(${alias} SHARED IMPORTED GLOBAL)

cmake/external/mkldnn.cmake

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,11 @@ ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
101101
# it can be directly contained in wheel or capi
102102
if(WIN32)
103103
SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll)
104+
105+
file(TO_NATIVE_PATH ${MKLDNN_INSTALL_DIR} NATIVE_MKLDNN_INSTALL_DIR)
106+
file(TO_NATIVE_PATH ${MKLDNN_SHARED_LIB} NATIVE_MKLDNN_SHARED_LIB)
104107
ADD_CUSTOM_COMMAND(TARGET ${MKLDNN_PROJECT} POST_BUILD
105-
COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_INSTALL_DIR}/bin/dnnl.dll ${MKLDNN_SHARED_LIB})
108+
COMMAND (copy ${NATIVE_MKLDNN_INSTALL_DIR}\\bin\\dnnl.dll ${NATIVE_MKLDNN_SHARED_LIB} /Y))
106109
add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM
107110
COMMAND dumpbin /exports ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll > ${MKLDNN_INSTALL_DIR}/bin/exports.txt)
108111
add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM

cmake/external/xpu.cmake

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,10 @@ ELSE ()
3333
SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
3434
ENDIF()
3535

36-
SET(XPU_BASE_URL "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev/20210527")
36+
IF(NOT XPU_BASE_URL)
37+
SET(XPU_BASE_URL "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev/20210527")
38+
ENDIF()
39+
3740
SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
3841
SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
3942
SET(XPU_XCCL_URL "${XPU_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
@@ -93,11 +96,7 @@ ELSE(WITH_XPU_BKCL)
9396
TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB})
9497
ENDIF(WITH_XPU_BKCL)
9598

96-
if(NOT XPU_SDK_ROOT)
97-
ADD_DEPENDENCIES(xpulib ${XPU_PROJECT})
98-
else()
99-
ADD_CUSTOM_TARGET(extern_xpu DEPENDS xpulib)
100-
endif()
99+
ADD_DEPENDENCIES(xpulib ${XPU_PROJECT})
101100

102101
# Ensure that xpu/api.h can be included without dependency errors.
103102
file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/.xpu_headers_dummy.cc CONTENT "")

cmake/inference_lib.cmake

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,13 @@ IF(WITH_GPU)
154154
DSTS ${dst_dir})
155155
ENDIF()
156156

157+
IF(WITH_XPU)
158+
set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/install/xpu")
159+
copy(inference_lib_dist
160+
SRCS ${XPU_INC_DIR} ${XPU_LIB_DIR}
161+
DSTS ${dst_dir} ${dst_dir})
162+
ENDIF()
163+
157164
# CMakeCache Info
158165
copy(inference_lib_dist
159166
SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt
@@ -335,7 +342,7 @@ function(version version_file)
335342
file(APPEND ${version_file} "CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n")
336343
if(TENSORRT_FOUND)
337344
file(APPEND ${version_file}
338-
"WITH_TENSORRT: ${TENSORRT_FOUND}\n" "TensorRT version: v${TENSORRT_MAJOR_VERSION}\n")
345+
"WITH_TENSORRT: ${TENSORRT_FOUND}\n" "TensorRT version: v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION}\n")
339346
endif()
340347
if(WITH_LITE)
341348
file(APPEND ${version_file} "WITH_LITE: ${WITH_LITE}\n" "LITE_GIT_TAG: ${LITE_GIT_TAG}\n")

cmake/operators.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ function(op_library TARGET)
208208
endif()
209209

210210
# Define operators that don't need pybind here.
211-
foreach(manual_pybind_op "compare_all_op" "compare_op" "logical_op" "nccl_op"
211+
foreach(manual_pybind_op "compare_all_op" "compare_op" "logical_op" "bitwise_op" "nccl_op"
212212
"tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op"
213213
"fusion_transpose_flatten_concat_op" "fusion_conv_inception_op"
214214
"sync_batch_norm_op" "dgc_op" "fused_fc_elementwise_layernorm_op"

cmake/tensorrt.cmake

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,23 @@ if(TENSORRT_FOUND)
4747
file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS)
4848
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
4949
"${TENSORRT_VERSION_FILE_CONTENTS}")
50+
string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
51+
"${TENSORRT_VERSION_FILE_CONTENTS}")
52+
string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
53+
"${TENSORRT_VERSION_FILE_CONTENTS}")
54+
string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
55+
"${TENSORRT_VERSION_FILE_CONTENTS}")
5056

5157
if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
5258
file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS)
5359
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
5460
"${TENSORRT_VERSION_FILE_CONTENTS}")
61+
string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
62+
"${TENSORRT_VERSION_FILE_CONTENTS}")
63+
string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
64+
"${TENSORRT_VERSION_FILE_CONTENTS}")
65+
string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
66+
"${TENSORRT_VERSION_FILE_CONTENTS}")
5567
endif()
5668

5769
if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
@@ -60,9 +72,15 @@ if(TENSORRT_FOUND)
6072

6173
string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1"
6274
TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}")
75+
string(REGEX REPLACE "define NV_TENSORRT_MINOR +([0-9]+)" "\\1"
76+
TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}")
77+
string(REGEX REPLACE "define NV_TENSORRT_PATCH +([0-9]+)" "\\1"
78+
TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}")
79+
string(REGEX REPLACE "define NV_TENSORRT_BUILD +([0-9]+)" "\\1"
80+
TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}")
6381

6482
message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
65-
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
83+
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} ")
6684
include_directories(${TENSORRT_INCLUDE_DIR})
6785
link_directories(${TENSORRT_LIBRARY})
6886
add_definitions(-DPADDLE_WITH_TENSORRT)

0 commit comments

Comments
 (0)