opea-project · lvliang-intel · Apr 8, 2025 · Feb 26, 2025 · Feb 26, 2025 · Feb 27, 2025
@@ -45,3 +45,8 @@ services:
       dockerfile: Dockerfile.hpu
     shm_size: '128g'
     image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
+  ipex-llm:
+    build:
+      context: ipex-llm
+      dockerfile: comps/third_parties/ipex/src/Dockerfile
+    image: ${REGISTRY:-opea}/ipex-llm:${TAG:-latest}
@@ -0,0 +1,22 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+
+  ipex:
+    image: ${REGISTRY:-opea}/ipex-llm:${TAG:-latest}
+    container_name: ipex-llm-server
+    ports:
+      - ${IPEX_LLM_PORT:-8688}:8688
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      MODEL_ID: ${MODEL_ID}
+      HF_TOKEN: ${HF_TOKEN}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/comps/third_parties/ipex/deployment/kubernetes/README.md b/comps/third_parties/ipex/deployment/kubernetes/README.md
@@ -0,0 +1,89 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+ARG BASE_IMAGE=ubuntu:22.04
+FROM ${BASE_IMAGE} AS base
+RUN if [ -f /etc/apt/apt.conf.d/proxy.conf ]; then rm /etc/apt/apt.conf.d/proxy.conf; fi && \
+    if [ ! -z ${HTTP_PROXY} ]; then echo "Acquire::http::Proxy \"${HTTP_PROXY}\";" >> /etc/apt/apt.conf.d/proxy.conf; fi && \
+    if [ ! -z ${HTTPS_PROXY} ]; then echo "Acquire::https::Proxy \"${HTTPS_PROXY}\";" >> /etc/apt/apt.conf.d/proxy.conf; fi
+RUN apt update && \
+    apt full-upgrade -y && \
+    DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends -y \
+    ca-certificates \
+    git \
+    curl \
+    wget \
+    vim \
+    numactl \
+    gcc-12 \
+    g++-12 \
+    make
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 && \
+    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 && \
+    update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \
+    update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100
+
+WORKDIR /root
+
+RUN curl -fsSL -v -o miniforge.sh -O https://github.com/conda-forge/miniforge/releases/download/24.7.1-2/Miniforge3-24.7.1-2-Linux-x86_64.sh && \
+    bash miniforge.sh -b -p ./miniforge3 && \
+    rm miniforge.sh
+
+# --build-arg COMPILE=ON to compile from source
+FROM base AS dev
+ARG COMPILE
+RUN git clone https://github.com/intel/intel-extension-for-pytorch.git
+RUN . ~/miniforge3/bin/activate && conda create -y -n compile_py310 python=3.10 && conda activate compile_py310 && \
+    cd intel-extension-for-pytorch/examples/cpu/llm && \
+    export CC=gcc && export CXX=g++ && \
+    if [ -z ${COMPILE} ]; then bash tools/env_setup.sh 14; else bash tools/env_setup.sh 10; fi && \
+    unset CC && unset CXX
+
+FROM base AS deploy
+RUN apt update && \
+    DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends -y \
+    google-perftools \
+    openssh-server \
+    net-tools && \
+    apt clean && \
+    rm -rf /var/lib/apt/lists/* && \
+    if [ -f /etc/apt/apt.conf.d/proxy.conf ]; then rm /etc/apt/apt.conf.d/proxy.conf; fi
+COPY --from=dev /root/intel-extension-for-pytorch/examples/cpu/llm ./llm
+COPY --from=dev /root/intel-extension-for-pytorch/tools/get_libstdcpp_lib.sh ./llm/tools
+RUN . ~/miniforge3/bin/activate && conda create -y -n py310 python=3.10 && conda activate py310 && \
+    cd /usr/lib/x86_64-linux-gnu/ && ln -s libtcmalloc.so.4 libtcmalloc.so && cd && \
+    cd ./llm && \
+    bash tools/env_setup.sh 9 && \
+    python -m pip cache purge && \
+    mv ./oneCCL_release /opt/oneCCL && \
+    chown -R root:root /opt/oneCCL && \
+    sed -i "s|ONECCL_PATH=.*|ONECCL_PATH=/opt/oneCCL|" ./tools/env_activate.sh && \
+    wget https://download.pytorch.org/whl/nightly/cpu/torchvision-0.22.0.dev20250218%2Bcpu-cp310-cp310-linux_x86_64.whl && \
+    pip install torchvision-0.22.0.dev20250218+cpu-cp310-cp310-linux_x86_64.whl && \
+    wget https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.6.0.dev20250218%2Bcpu-cp310-cp310-linux_x86_64.whl && \
+    pip install torchaudio-2.6.0.dev20250218+cpu-cp310-cp310-linux_x86_64.whl && \
+    pip install backoff fastapi uvicorn
+
+ARG PORT_SSH=22
+RUN mkdir /var/run/sshd && \
+    sed -i "s/#Port.*/Port ${PORT_SSH}/" /etc/ssh/sshd_config && \
+    echo "service ssh start" >> /root/.bashrc && \
+    ssh-keygen -b 4096 -f /root/.ssh/id_rsa -N "" && \
+    mv /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys && \
+    echo "Host *\n    Port ${PORT_SSH}\n    IdentityFile /root/.ssh/id_rsa\n    StrictHostKeyChecking no" > /root/.ssh/config
+EXPOSE ${PORT_SSH}
+COPY ./comps/third_parties/ipex/src/ipex_inference.py /root
+COPY ./comps/third_parties/ipex/src/openai_protocol.py /root
+RUN ENTRYPOINT=/usr/local/bin/entrypoint.sh && \
+    echo "#!/bin/bash" > ${ENTRYPOINT} && \
+    echo "CMDS=(); while [ \$# -gt 0 ]; do CMDS+=(\"\$1\"); shift; done;" >> ${ENTRYPOINT} && \
+    echo ". ~/miniforge3/bin/activate" >> ${ENTRYPOINT} && \
+    echo "conda activate py310" >> ${ENTRYPOINT} && \
+    echo "TMP=\$(python -c \"import torch; import os; print(os.path.abspath(os.path.dirname(torch.__file__)))\")" >> ${ENTRYPOINT} && \
+    echo ". \${TMP}/../oneccl_bindings_for_pytorch/env/setvars.sh" >> ${ENTRYPOINT} && \
+    echo "echo \"**Note:** For better performance, please consider to launch workloads with command 'ipexrun'.\"" >> ${ENTRYPOINT} && \
+    echo "python /root/ipex_inference.py" >> ${ENTRYPOINT} && \
+    echo "\"\${CMDS[@]}\"" >> ${ENTRYPOINT} && \
+    chmod +x ${ENTRYPOINT}
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
+
@@ -0,0 +1,29 @@
+# IPEX Serving microservice
+
+[Intel® Extension for PyTorch](https://github.com/intel/intel-extension-for-pytorch) delivers advanced optimizations to accelerate Large Language Model (LLM) inference on Intel hardware. It enhances performance through techniques such as paged attention and ROPE fusion, while also supporting a range of precision formats, including FP32, BF16, Smooth Quantization INT8, and prototype weight-only quantization in INT8/INT4.
+
+For more details, refer to the [README](https://github.com/intel/intel-extension-for-pytorch/blob/main/examples/cpu/llm/README.md)
+
+## 🚀1. Build the Docker Image
+
+```bash
+cd ../../../../
+docker build -f comps/third_parties/ipex/src/Dockerfile --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg COMPILE=ON --build-arg PORT_SSH=2345 -t opea/ipex-llm:latest .
+```
+
+## 🚀2. Start the microservice
+
+```bash
+export MODEL_ID="microsoft/phi-4"
+
+cd comps/third_parties/ipex/deployment/docker_compose
+docker compose -f compose.yaml up -d
+```
+
+## 🚀3. Access the service
+
+Then you need to test your service using the following commands:
+
+```bash
+http_proxy="" curl -X POST -H "Content-Type: application/json" -d '{"model": "microsoft/phi-4", "messages": [{"role": "user", "content": "Hello! What is your name?"}], "max_tokens": 128}' http://localhost:8688/v1/chat/completions
+```