AnnaTrainingG
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 3 additions & 4 deletions b/‎README.md‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎README_cn.md‎
Lines changed: 3 additions & 4 deletions b/‎README_cn.md‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎cmake/cblas.cmake‎
Lines changed: 1 addition & 1 deletion b/‎cmake/cblas.cmake‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmake/external/lite.cmake‎
Lines changed: 22 additions & 8 deletions b/‎cmake/external/lite.cmake‎
Lines changed: 22 additions & 8 deletions
diff --git a/‎cmake/external/mkldnn.cmake‎
Lines changed: 4 additions & 1 deletion b/‎cmake/external/mkldnn.cmake‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎cmake/external/xpu.cmake‎
Lines changed: 5 additions & 6 deletions b/‎cmake/external/xpu.cmake‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎cmake/inference_lib.cmake‎
Lines changed: 8 additions & 1 deletion b/‎cmake/inference_lib.cmake‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎cmake/operators.cmake‎
Lines changed: 1 addition & 1 deletion b/‎cmake/operators.cmake‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmake/tensorrt.cmake‎
Lines changed: 19 additions & 1 deletion b/‎cmake/tensorrt.cmake‎
Lines changed: 19 additions & 1 deletion
@@ -216,7 +216,7 @@ option(WITH_STRIP       "Strip so files of Whl packages"         OFF)
 
 # PY_VERSION
 if(NOT PY_VERSION)
-  set(PY_VERSION 2.7)
+  set(PY_VERSION 3.6)
 endif()
 set(PYBIND11_PYTHON_VERSION ${PY_VERSION})
 
 
@@ -1,4 +1,4 @@
-<p align="center">
+<p align="center">
 <img align="center" src="doc/imgs/logo.png", width=1600>
 <p>
 
@@ -50,10 +50,9 @@ Now our developers can acquire Tesla V100 online computing resources for free. I
      [Click here to learn more](https://github.com/PaddlePaddle/Fleet)
 
 
-- **Accelerated High-Performance Inference over Ubiquitous Deployments**
+- **High-Performance Inference Engines for Comprehensive Deployment Enviroments**
 
-    PaddlePaddle is not only compatible with other open-source frameworks for models training, but also works well on the ubiquitous developments, varying from platforms to devices. More specifically, PaddlePaddle accelerates the inference procedure with the fastest speed-up. Note that, a recent breakthrough of inference speed has been made by PaddlePaddle on Huawei's Kirin NPU, through the hardware/software co-optimization.
-     [Click here to learn more](https://github.com/PaddlePaddle/Paddle-Lite)
+   PaddlePaddle is not only compatible with models trained in 3rd party open-source frameworks , but also offers complete inference products for various production scenarios. Our inference product line includes [Paddle Inference](https://paddle-inference.readthedocs.io/en/latest/product_introduction/summary.html): Native inference library for high performance server and cloud inference; [Paddle Serving](https://github.com/PaddlePaddle/Serving): A service-oriented framework suitable for distributed and pipeline productions; [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite): Ultra-Lightweight inference engine for mobile and IoT enviroments; [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs): A frontend inference engine for browser and mini apps. Futhermore, by great amounts of optimization with leading hardwares in each scenarios, Paddle inference engines outperform most of the other mainstream frameworks.
 
 
 - **Industry-Oriented Models and Libraries with Open Source Repositories**
 
@@ -1,4 +1,4 @@
-
+
 <p align="center">
 <img align="center" src="doc/imgs/logo.png", width=1600>
 <p>
@@ -47,10 +47,9 @@ PaddlePaddle用户可领取**免费Tesla V100在线算力资源**，训练模型
     [查看详情](https://github.com/PaddlePaddle/Fleet)
 
 
-- **多端多平台部署的高性能推理引擎**
+- **支持多端多平台的高性能推理部署工具**
 
-    飞桨不仅兼容其他开源框架训练的模型，还可以轻松地部署到不同架构的平台设备上。同时，飞桨的推理速度也是全面领先的。尤其经过了跟华为麒麟NPU的软硬一体优化，使得飞桨在NPU上的推理速度进一步突破。
-    [查看详情](https://github.com/PaddlePaddle/Paddle-Lite)
+    飞桨不仅广泛兼容第三方开源框架训练的模型部署，并且为不同的场景的生产环境提供了完备的推理引擎，包括适用于高性能服务器及云端推理的原生推理库 [Paddle Inference](https://paddle-inference.readthedocs.io/en/latest/product_introduction/summary.html)，面向分布式、流水线生产环境下自动上云、A/B测试等高阶功能的服务化推理框架 [Paddle Serving](https://github.com/PaddlePaddle/Serving)，针对于移动端、物联网场景的轻量化推理引擎 [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite)，以及在浏览器、小程序等环境下使用的前端推理引擎 [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs)。同时，透过与不同场景下的主流硬件高度适配优化及异构计算的支持, 飞桨的推理性能也领先绝大部分的主流实现。
 
 
 - **面向产业应用，开源开放覆盖多领域的工业级模型库。**
 
@@ -73,7 +73,7 @@ if(NOT DEFINED CBLAS_PROVIDER)
     string(REGEX MATCH "OpenBLAS ([0-9]+\.[0-9]+\.[0-9]+)" tmp ${config_file})
     string(REGEX MATCH "([0-9]+\.[0-9]+\.[0-9]+)" ver ${tmp})
 
-    if (${ver} VERSION_EQUAL "0.3.7")
+    if (${ver} VERSION_GREATER_EQUAL "0.3.7")
       set(CBLAS_PROVIDER OPENBLAS)
       set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR} ${OPENBLAS_LAPACKE_INC_DIR})
       set(CBLAS_LIBRARIES ${OPENBLAS_LIB})
 
@@ -18,13 +18,21 @@ if(NOT LINUX)
   return()
 endif()
 
-if(XPU_SDK_ROOT)
-  set(LITE_WITH_XPU ON)
-  include_directories("${XPU_SDK_ROOT}/XTDK/include")
-  include_directories("${XPU_SDK_ROOT}/XTCL/include")
+if (LITE_WITH_XPU)
   add_definitions(-DLITE_SUBGRAPH_WITH_XPU)
-  LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/shlib/")
-  LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/runtime/shlib/")
+  IF(WITH_AARCH64)
+    SET(XPU_SDK_ENV "kylin_aarch64")
+  ELSEIF(WITH_SUNWAY)
+    SET(XPU_SDK_ENV "deepin_sw6_64")
+  ELSEIF(WITH_BDCENTOS)
+    SET(XPU_SDK_ENV "bdcentos_x86_64")
+  ELSEIF(WITH_UBUNTU)
+    SET(XPU_SDK_ENV "ubuntu_x86_64")
+  ELSEIF(WITH_CENTOS)
+    SET(XPU_SDK_ENV "centos7_x86_64")
+  ELSE ()
+    SET(XPU_SDK_ENV "ubuntu_x86_64")
+  ENDIF()
 endif()
 
 if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
@@ -57,7 +65,8 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
                            -DWITH_TESTING=OFF
                            -DLITE_BUILD_EXTRA=ON
                            -DLITE_WITH_XPU=${LITE_WITH_XPU}
-                           -DXPU_SDK_ROOT=${XPU_SDK_ROOT}
+                           -DXPU_SDK_URL=${XPU_BASE_URL}
+                           -DXPU_SDK_ENV=${XPU_SDK_ENV}
                            -DLITE_WITH_CODE_META_INFO=OFF
                            -DLITE_WITH_ARM=ON)
     ExternalProject_Add(
@@ -99,7 +108,8 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
                            -DLITE_WITH_STATIC_CUDA=OFF
                            -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME}
                            -DLITE_WITH_XPU=${LITE_WITH_XPU}
-                           -DXPU_SDK_ROOT=${XPU_SDK_ROOT}
+                           -DXPU_SDK_URL=${XPU_BASE_URL}
+                           -DXPU_SDK_ENV=${XPU_SDK_ENV}
                            -DLITE_WITH_CODE_META_INFO=OFF
                            -DLITE_WITH_ARM=OFF)
 
@@ -147,6 +157,10 @@ message(STATUS "Paddle-lite BINARY_DIR: ${LITE_BINARY_DIR}")
 message(STATUS "Paddle-lite SOURCE_DIR: ${LITE_SOURCE_DIR}")
 include_directories(${LITE_SOURCE_DIR})
 include_directories(${LITE_BINARY_DIR})
+if(LITE_WITH_XPU)
+  include_directories(${LITE_BINARY_DIR}/third_party/install/xpu/xdnn/include/)
+  include_directories(${LITE_BINARY_DIR}/third_party/install/xpu/xre/include/)
+endif()
 
 function(external_lite_libs alias path)
   add_library(${alias} SHARED IMPORTED GLOBAL)
 
@@ -101,8 +101,11 @@ ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
 # it can be directly contained in wheel or capi
 if(WIN32)
     SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll)
+
+    file(TO_NATIVE_PATH ${MKLDNN_INSTALL_DIR} NATIVE_MKLDNN_INSTALL_DIR)
+    file(TO_NATIVE_PATH ${MKLDNN_SHARED_LIB} NATIVE_MKLDNN_SHARED_LIB)
     ADD_CUSTOM_COMMAND(TARGET ${MKLDNN_PROJECT} POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_INSTALL_DIR}/bin/dnnl.dll ${MKLDNN_SHARED_LIB})
+        COMMAND (copy ${NATIVE_MKLDNN_INSTALL_DIR}\\bin\\dnnl.dll ${NATIVE_MKLDNN_SHARED_LIB} /Y))
     add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM
         COMMAND dumpbin /exports ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll > ${MKLDNN_INSTALL_DIR}/bin/exports.txt)
     add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM
 
@@ -33,7 +33,10 @@ ELSE ()
   SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
 ENDIF()
 
-SET(XPU_BASE_URL "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev/20210527")
+IF(NOT XPU_BASE_URL)
+  SET(XPU_BASE_URL "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev/20210527")
+ENDIF()
+
 SET(XPU_XRE_URL  "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
 SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
 SET(XPU_XCCL_URL "${XPU_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
@@ -93,11 +96,7 @@ ELSE(WITH_XPU_BKCL)
   TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB})
 ENDIF(WITH_XPU_BKCL)
 
-if(NOT XPU_SDK_ROOT)
-  ADD_DEPENDENCIES(xpulib ${XPU_PROJECT})
-else()
-  ADD_CUSTOM_TARGET(extern_xpu DEPENDS xpulib)
-endif()
+ADD_DEPENDENCIES(xpulib ${XPU_PROJECT})
 
 # Ensure that xpu/api.h can be included without dependency errors.
 file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/.xpu_headers_dummy.cc CONTENT "")
 
@@ -154,6 +154,13 @@ IF(WITH_GPU)
             DSTS ${dst_dir})
 ENDIF()
 
+IF(WITH_XPU)
+    set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/install/xpu")
+    copy(inference_lib_dist
+        SRCS ${XPU_INC_DIR} ${XPU_LIB_DIR}
+        DSTS ${dst_dir} ${dst_dir})
+ENDIF()
+
 # CMakeCache Info
 copy(inference_lib_dist
         SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt
@@ -335,7 +342,7 @@ function(version version_file)
     file(APPEND ${version_file} "CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n")
     if(TENSORRT_FOUND)
         file(APPEND ${version_file}
-                "WITH_TENSORRT: ${TENSORRT_FOUND}\n" "TensorRT version: v${TENSORRT_MAJOR_VERSION}\n")
+                "WITH_TENSORRT: ${TENSORRT_FOUND}\n" "TensorRT version: v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION}\n")
     endif()
     if(WITH_LITE)
         file(APPEND ${version_file} "WITH_LITE: ${WITH_LITE}\n" "LITE_GIT_TAG: ${LITE_GIT_TAG}\n")
 
@@ -208,7 +208,7 @@ function(op_library TARGET)
     endif()
 
     # Define operators that don't need pybind here.
-    foreach(manual_pybind_op "compare_all_op" "compare_op" "logical_op" "nccl_op"
+    foreach(manual_pybind_op "compare_all_op" "compare_op" "logical_op" "bitwise_op" "nccl_op"
 "tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op"
 "fusion_transpose_flatten_concat_op" "fusion_conv_inception_op"
 "sync_batch_norm_op" "dgc_op" "fused_fc_elementwise_layernorm_op"
 
@@ -47,11 +47,23 @@ if(TENSORRT_FOUND)
     file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS)
     string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
         "${TENSORRT_VERSION_FILE_CONTENTS}")
+    string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
+        "${TENSORRT_VERSION_FILE_CONTENTS}")
+    string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
+        "${TENSORRT_VERSION_FILE_CONTENTS}")
+    string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
+        "${TENSORRT_VERSION_FILE_CONTENTS}")
 
     if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
         file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS)
         string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
         "${TENSORRT_VERSION_FILE_CONTENTS}")
+        string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
+        "${TENSORRT_VERSION_FILE_CONTENTS}")
+        string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
+        "${TENSORRT_VERSION_FILE_CONTENTS}")
+        string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
+        "${TENSORRT_VERSION_FILE_CONTENTS}")
     endif()
 
     if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
@@ -60,9 +72,15 @@ if(TENSORRT_FOUND)
 
     string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1"
         TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}")
+    string(REGEX REPLACE "define NV_TENSORRT_MINOR +([0-9]+)" "\\1"
+        TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}")
+    string(REGEX REPLACE "define NV_TENSORRT_PATCH +([0-9]+)" "\\1"
+        TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}")
+    string(REGEX REPLACE "define NV_TENSORRT_BUILD +([0-9]+)" "\\1"
+        TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}")
 
     message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
-        "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
+        "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} ")
     include_directories(${TENSORRT_INCLUDE_DIR})
     link_directories(${TENSORRT_LIBRARY})
     add_definitions(-DPADDLE_WITH_TENSORRT)