[Serving]: add ipu support for serving. (#10) (#470)

czr-gc · web-flow · commit da84da74486a · 2022-11-02T09:50:58.000+08:00
* feat(ipu): add ipu docker for serving. (#10) * feat(ipu): add ipu docker for serving. * feat(ipu): enable ipu docker in serving. * fix(): fix typo and issues in IPU. * remove unused env path. * doc(ipu): add ipu docker build doc and fix typo. * fix(): clean apt cache in docker. * fix(ipu): fix typo.
diff --git a/serving/Dockerfile_ipu b/serving/Dockerfile_ipu
@@ -0,0 +1,64 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM graphcore/poplar:3.0.0
+
+#Install the build dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends curl wget vim git patchelf python3-dev python3-pip \
+    python3-setuptools build-essential libgl1-mesa-glx libglib2.0-dev ca-certificates \
+    libssl-dev zlib1g-dev rapidjson-dev libboost-dev libre2-dev librdmacm-dev libnuma-dev libarchive-dev unzip && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+RUN ln -s /usr/bin/python3 /usr/bin/python;
+RUN pip3 install --upgrade pip
+
+# install cmake
+WORKDIR /home
+RUN wget -q https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz && tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
+ENV PATH=/home/cmake-3.18.6-Linux-x86_64/bin:$PATH
+
+
+#install triton
+ENV TAG=r21.10
+RUN git clone https://github.com/triton-inference-server/server.git -b $TAG && \
+    cd server && \
+    mkdir -p build/tritonserver/install && \
+    python3 build.py \
+      --build-dir `pwd`/build \
+      --no-container-build \
+      --endpoint=grpc \
+      --enable-logging \
+      --enable-stats \
+      --cmake-dir `pwd`/build \
+      --repo-tag=common:$TAG \
+      --repo-tag=core:$TAG \
+      --repo-tag=backend:$TAG \
+      --repo-tag=thirdparty:$TAG \
+      --backend=python:$TAG
+
+COPY python/dist/*.whl /opt/fastdeploy/
+RUN python3 -m pip install  /opt/fastdeploy/*.whl \
+    && rm -rf /opt/fastdeploy/*.whl
+
+# triton server
+RUN mkdir -p /opt/tritonserver && cp -r /home/server/build/tritonserver/install/* /opt/tritonserver
+# python backend
+RUN mkdir -p /opt/tritonserver/backends/python && cp -r /home/server/build/python/install/backends/python /opt/tritonserver/backends/
+# fd_backend
+COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
+
+COPY build/fastdeploy-0.0.3 /opt/fastdeploy/
+RUN mv /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver
+ENV LD_LIBRARY_PATH="/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:$LD_LIBRARY_PATH"
+ENV PATH="/opt/tritonserver/bin:$PATH"
diff --git a/serving/docs/zh_CN/compile.md b/serving/docs/zh_CN/compile.md
@@ -7,7 +7,7 @@
 FastDploy发布的GPU镜像基于[Triton Inference Server](https://github.com/triton-inference-server/server)的21.10版本进行制作，如果有其他CUDA版本需求，可以参照[NVIDIA 官网](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)中展示的版本信息修改Dockerfile和scripts中的脚本.
 
 ```
-# 进入serving目录执行脚本编译fastdeply和服务化的backend
+# 进入serving目录执行脚本编译fastdeploy和服务化的backend
 cd serving
 bash scripts/build.sh
 
@@ -19,11 +19,23 @@ docker build -t paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10 -f servi
 ## 制作CPU镜像
 
 ```
-# 进入serving目录执行脚本编译fastdeply和服务化的backend
+# 进入serving目录执行脚本编译fastdeploy和服务化的backend
 cd serving
 bash scripts/build.sh OFF
 
 # 退出到FastDeploy主目录，制作镜像
 cd ../
 docker build -t paddlepaddle/fastdeploy:0.3.0-cpu-only-21.10 -f serving/Dockerfile_cpu .
 ```
+
+## 制作IPU镜像
+
+```
+# 进入serving目录执行脚本编译fastdeploy和服务化的backend
+cd serving
+bash scripts/build_fd_ipu.sh
+
+# 退出到FastDeploy主目录，制作镜像
+cd ../
+docker build -t paddlepaddle/fastdeploy:0.3.0-ipu-only-21.10 -f serving/Dockerfile_ipu .
+```
diff --git a/serving/scripts/build_fd_ipu.sh b/serving/scripts/build_fd_ipu.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
+    wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
+    tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
+    rm -rf cmake-3.18.6-Linux-x86_64.tar.gz
+fi
+
+# build vision
+docker run -it --rm --name build_fd_vison \
+           -v`pwd`/..:/workspace/fastdeploy \
+           graphcore/poplar:3.0.0 \
+           bash -c \
+           'cd /workspace/fastdeploy/python;
+            rm -rf .setuptools-cmake-build dist;
+            apt-get update;
+            apt-get install -y --no-install-recommends patchelf python3-dev python3-pip python3-setuptools build-essential;
+            ln -s /usr/bin/python3 /usr/bin/python;
+            pip3 install wheel;
+            export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
+            export WITH_GPU=OFF;
+            export WITH_IPU=ON;
+            export ENABLE_PADDLE_BACKEND=ON;
+            export ENABLE_VISION=ON;
+            python setup.py build;
+            python setup.py bdist_wheel'
+
+# build runtime
+docker run -it --rm --name build_fd_runtime \
+           -v`pwd`/..:/workspace/fastdeploy \
+           graphcore/poplar:3.0.0 \
+           bash -c \
+           'cd /workspace/fastdeploy;
+            rm -rf build; mkdir build; cd build;
+            apt-get update;
+            apt-get install -y --no-install-recommends python3-dev python3-pip build-essential;
+            ln -s /usr/bin/python3 /usr/bin/python;
+            export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
+            cmake .. -DENABLE_ORT_BACKEND=OFF -DENABLE_TEXT=OFF -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_BACKEND=ON -DWITH_IPU=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DLIBRARY_NAME=fastdeploy_runtime;
+            make -j`nproc`;
+            make install'
+
+# build backend
+docker run -it --rm --name build_fd_backend \
+           -v`pwd`/..:/workspace/fastdeploy \
+           graphcore/poplar:3.0.0 \
+           bash -c \
+           'cd /workspace/fastdeploy/serving;
+            rm -rf build; mkdir build; cd build;
+            apt-get update; apt-get install -y --no-install-recommends rapidjson-dev build-essential git ca-certificates;
+            export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
+            cmake .. -DTRITON_ENABLE_GPU=OFF -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10; make -j`nproc`'
diff --git a/serving/src/fastdeploy_runtime.cc b/serving/src/fastdeploy_runtime.cc
@@ -238,6 +238,8 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
               runtime_options_->SetPaddleMKLDNN(pd_enable_mkldnn);
             } else if (param_key == "use_paddle_log") {
                 runtime_options_->EnablePaddleLogInfo();
+            } else if (param_key == "use_ipu") {
+              runtime_options_->UseIpu();
             }
           }
         }
@@ -384,7 +386,10 @@ TRITONSERVER_Error* ModelState::LoadModel(
     runtime_options_->UseCpu();
   }
 #else
-  runtime_options_->UseCpu();
+  if (runtime_options_->device != fastdeploy::Device::IPU) {
+    // If Device is set to IPU, just skip CPU setting.
+    runtime_options_->UseCpu();
+  }
 #endif  // TRITON_ENABLE_GPU
 
   *runtime = new fastdeploy::Runtime();

Original file line number	Diff line number	Diff line change
`@@ -238,6 +238,8 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)`
`238`	`238`	`runtime_options_->SetPaddleMKLDNN(pd_enable_mkldnn);`
`239`	`239`	`} else if (param_key == "use_paddle_log") {`
`240`	`240`	`runtime_options_->EnablePaddleLogInfo();`
	`241`	`+ } else if (param_key == "use_ipu") {`
	`242`	`+ runtime_options_->UseIpu();`
`241`	`243`	`}`
`242`	`244`	`}`
`243`	`245`	`}`
`@@ -384,7 +386,10 @@ TRITONSERVER_Error* ModelState::LoadModel(`
`384`	`386`	`runtime_options_->UseCpu();`
`385`	`387`	`}`
`386`	`388`	`#else`
`387`		`- runtime_options_->UseCpu();`
	`389`	`+ if (runtime_options_->device != fastdeploy::Device::IPU) {`
	`390`	`+ // If Device is set to IPU, just skip CPU setting.`
	`391`	`+ runtime_options_->UseCpu();`
	`392`	`+ }`
`388`	`393`	`#endif // TRITON_ENABLE_GPU`
`389`	`394`
`390`	`395`	`*runtime = new fastdeploy::Runtime();`