From a86f38d5d032682971a1975d17ad12fb87c329de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= Date: Thu, 8 Aug 2024 11:56:20 +0200 Subject: [PATCH] [CI/Build] Dockerfile.cpu improvements - use `requirements-build.txt` to install build requirements - use cache bind mounts to speed up builds - use ccache - build wheel and install it instead of using `setup.py install` --- .dockerignore | 3 +++ Dockerfile.cpu | 30 +++++++++++++++++++++--------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/.dockerignore b/.dockerignore index 5cfe0dcb065d..79fa088fa809 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1 +1,4 @@ vllm/*.so +/.venv +/build +dist diff --git a/Dockerfile.cpu b/Dockerfile.cpu index 78730f39721c..35ce5dde99d2 100644 --- a/Dockerfile.cpu +++ b/Dockerfile.cpu @@ -2,14 +2,16 @@ FROM ubuntu:22.04 AS cpu-test-1 -RUN apt-get update -y \ - && apt-get install -y curl git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \ +RUN --mount=type=cache,target=/var/cache/apt \ + apt-get update -y \ + && apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \ && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 # https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html # intel-openmp provides additional performance improvement vs. openmp # tcmalloc provides better memory allocation efficiency, e.g, holding memory in caches to speed up access of commonly-used objects. -RUN pip install intel-openmp +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install intel-openmp ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so:$LD_PRELOAD" @@ -17,22 +19,32 @@ RUN echo 'ulimit -c 0' >> ~/.bashrc RUN pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/cpu/intel_extension_for_pytorch-2.4.0%2Bgitfbaa4bc-cp310-cp310-linux_x86_64.whl -RUN pip install --upgrade pip \ - && pip install wheel packaging ninja "setuptools>=49.4.0" numpy +ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu +RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=bind,src=requirements-build.txt,target=requirements-build.txt \ + pip install --upgrade pip && \ + pip install -r requirements-build.txt FROM cpu-test-1 AS build -COPY ./ /workspace/vllm - WORKDIR /workspace/vllm -RUN pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu +RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=bind,src=requirements-common.txt,target=requirements-common.txt \ + --mount=type=bind,src=requirements-cpu.txt,target=requirements-cpu.txt \ + pip install -v -r requirements-cpu.txt + +COPY ./ ./ # Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ... ARG VLLM_CPU_DISABLE_AVX512 ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} -RUN VLLM_TARGET_DEVICE=cpu python3 setup.py install +ENV CCACHE_DIR=/root/.cache/ccache +RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=cache,target=/root/.cache/ccache \ + VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \ + pip install dist/*.whl WORKDIR /workspace/