Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
a6685a6
Add ipex microservice for CPU optimization
lvliang-intel Feb 26, 2025
c54f80a
Add ipex microservice for CPU optimization
lvliang-intel Feb 26, 2025
d2727b4
Merge branch 'ipex_llm' of https://github.com/lvliang-intel/GenAIComp…
lvliang-intel Feb 27, 2025
5e3fba0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 27, 2025
b7e8be1
add docker build
lvliang-intel Feb 27, 2025
e45fc90
Merge branch 'ipex_llm' of https://github.com/lvliang-intel/GenAIComp…
lvliang-intel Feb 27, 2025
b06fb8a
Merge branch 'main' of https://github.com/lvliang-intel/GenAIComps in…
lvliang-intel Feb 27, 2025
de64cf2
fix typo
lvliang-intel Feb 27, 2025
1ee6279
fix typo
lvliang-intel Feb 27, 2025
f8325ef
Merge branch 'ipex_llm' of https://github.com/lvliang-intel/GenAIComp…
lvliang-intel Feb 27, 2025
cc3a36e
Merge branch 'main' of https://github.com/lvliang-intel/GenAIComps in…
lvliang-intel Mar 14, 2025
6331234
update code according to comments
lvliang-intel Mar 14, 2025
ec731fb
update readme for comments
lvliang-intel Apr 8, 2025
adee7b0
Merge branch 'main' of https://github.com/lvliang-intel/GenAIComps in…
lvliang-intel Apr 8, 2025
b841ba2
Merge branch 'main' of https://github.com/lvliang-intel/GenAIComps in…
lvliang-intel Apr 8, 2025
9193a44
use local entrypoint.sh
lvliang-intel Apr 8, 2025
72adb1a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/docker/compose/third_parties-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,8 @@ services:
dockerfile: Dockerfile.hpu
shm_size: '128g'
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
ipex-llm:
build:
context: ipex-llm
dockerfile: comps/third_parties/ipex/src/Dockerfile
image: ${REGISTRY:-opea}/ipex-llm:${TAG:-latest}
22 changes: 22 additions & 0 deletions comps/third_parties/ipex/deployment/docker_compose/compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

services:

ipex:
image: ${REGISTRY:-opea}/ipex-llm:${TAG:-latest}
container_name: ipex-llm-server
ports:
- ${IPEX_LLM_PORT:-8688}:8688
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
MODEL_ID: ${MODEL_ID}
HF_TOKEN: ${HF_TOKEN}
restart: unless-stopped

networks:
default:
driver: bridge
Empty file.
89 changes: 89 additions & 0 deletions comps/third_parties/ipex/src/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Copyright (C) 2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

ARG BASE_IMAGE=ubuntu:22.04
FROM ${BASE_IMAGE} AS base
RUN if [ -f /etc/apt/apt.conf.d/proxy.conf ]; then rm /etc/apt/apt.conf.d/proxy.conf; fi && \
if [ ! -z ${HTTP_PROXY} ]; then echo "Acquire::http::Proxy \"${HTTP_PROXY}\";" >> /etc/apt/apt.conf.d/proxy.conf; fi && \
if [ ! -z ${HTTPS_PROXY} ]; then echo "Acquire::https::Proxy \"${HTTPS_PROXY}\";" >> /etc/apt/apt.conf.d/proxy.conf; fi
RUN apt update && \
apt full-upgrade -y && \
DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends -y \
ca-certificates \
git \
curl \
wget \
vim \
numactl \
gcc-12 \
g++-12 \
make
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 && \
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 && \
update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \
update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100

WORKDIR /root

RUN curl -fsSL -v -o miniforge.sh -O https://github.com/conda-forge/miniforge/releases/download/24.7.1-2/Miniforge3-24.7.1-2-Linux-x86_64.sh && \
bash miniforge.sh -b -p ./miniforge3 && \
rm miniforge.sh

# --build-arg COMPILE=ON to compile from source
FROM base AS dev
ARG COMPILE
RUN git clone https://github.com/intel/intel-extension-for-pytorch.git
RUN . ~/miniforge3/bin/activate && conda create -y -n compile_py310 python=3.10 && conda activate compile_py310 && \
cd intel-extension-for-pytorch/examples/cpu/llm && \
export CC=gcc && export CXX=g++ && \
if [ -z ${COMPILE} ]; then bash tools/env_setup.sh 14; else bash tools/env_setup.sh 10; fi && \
unset CC && unset CXX

FROM base AS deploy
RUN apt update && \
DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends -y \
google-perftools \
openssh-server \
net-tools && \
apt clean && \
rm -rf /var/lib/apt/lists/* && \
if [ -f /etc/apt/apt.conf.d/proxy.conf ]; then rm /etc/apt/apt.conf.d/proxy.conf; fi
COPY --from=dev /root/intel-extension-for-pytorch/examples/cpu/llm ./llm
COPY --from=dev /root/intel-extension-for-pytorch/tools/get_libstdcpp_lib.sh ./llm/tools
RUN . ~/miniforge3/bin/activate && conda create -y -n py310 python=3.10 && conda activate py310 && \
cd /usr/lib/x86_64-linux-gnu/ && ln -s libtcmalloc.so.4 libtcmalloc.so && cd && \
cd ./llm && \
bash tools/env_setup.sh 9 && \
python -m pip cache purge && \
mv ./oneCCL_release /opt/oneCCL && \
chown -R root:root /opt/oneCCL && \
sed -i "s|ONECCL_PATH=.*|ONECCL_PATH=/opt/oneCCL|" ./tools/env_activate.sh && \
wget https://download.pytorch.org/whl/nightly/cpu/torchvision-0.22.0.dev20250218%2Bcpu-cp310-cp310-linux_x86_64.whl && \
pip install torchvision-0.22.0.dev20250218+cpu-cp310-cp310-linux_x86_64.whl && \
wget https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.6.0.dev20250218%2Bcpu-cp310-cp310-linux_x86_64.whl && \
pip install torchaudio-2.6.0.dev20250218+cpu-cp310-cp310-linux_x86_64.whl && \
pip install backoff fastapi uvicorn

ARG PORT_SSH=22
RUN mkdir /var/run/sshd && \
sed -i "s/#Port.*/Port ${PORT_SSH}/" /etc/ssh/sshd_config && \
echo "service ssh start" >> /root/.bashrc && \
ssh-keygen -b 4096 -f /root/.ssh/id_rsa -N "" && \
mv /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys && \
echo "Host *\n Port ${PORT_SSH}\n IdentityFile /root/.ssh/id_rsa\n StrictHostKeyChecking no" > /root/.ssh/config
EXPOSE ${PORT_SSH}
COPY ./comps/third_parties/ipex/src/ipex_inference.py /root
COPY ./comps/third_parties/ipex/src/openai_protocol.py /root
RUN ENTRYPOINT=/usr/local/bin/entrypoint.sh && \
echo "#!/bin/bash" > ${ENTRYPOINT} && \
echo "CMDS=(); while [ \$# -gt 0 ]; do CMDS+=(\"\$1\"); shift; done;" >> ${ENTRYPOINT} && \
echo ". ~/miniforge3/bin/activate" >> ${ENTRYPOINT} && \
echo "conda activate py310" >> ${ENTRYPOINT} && \
echo "TMP=\$(python -c \"import torch; import os; print(os.path.abspath(os.path.dirname(torch.__file__)))\")" >> ${ENTRYPOINT} && \
echo ". \${TMP}/../oneccl_bindings_for_pytorch/env/setvars.sh" >> ${ENTRYPOINT} && \
echo "echo \"**Note:** For better performance, please consider to launch workloads with command 'ipexrun'.\"" >> ${ENTRYPOINT} && \
echo "python /root/ipex_inference.py" >> ${ENTRYPOINT} && \
echo "\"\${CMDS[@]}\"" >> ${ENTRYPOINT} && \
chmod +x ${ENTRYPOINT}
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]

29 changes: 29 additions & 0 deletions comps/third_parties/ipex/src/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# IPEX Serving microservice

[Intel® Extension for PyTorch](https://github.com/intel/intel-extension-for-pytorch) delivers advanced optimizations to accelerate Large Language Model (LLM) inference on Intel hardware. It enhances performance through techniques such as paged attention and ROPE fusion, while also supporting a range of precision formats, including FP32, BF16, Smooth Quantization INT8, and prototype weight-only quantization in INT8/INT4.

For more details, refer to the [README](https://github.com/intel/intel-extension-for-pytorch/blob/main/examples/cpu/llm/README.md)

## 🚀1. Build the Docker Image

```bash
cd ../../../../
docker build -f comps/third_parties/ipex/src/Dockerfile --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg COMPILE=ON --build-arg PORT_SSH=2345 -t opea/ipex-llm:latest .
```

## 🚀2. Start the microservice

```bash
export MODEL_ID="microsoft/phi-4"

cd comps/third_parties/ipex/deployment/docker_compose
docker compose -f compose.yaml up -d
```

## 🚀3. Access the service

Then you need to test your service using the following commands:

```bash
http_proxy="" curl -X POST -H "Content-Type: application/json" -d '{"model": "microsoft/phi-4", "messages": [{"role": "user", "content": "Hello! What is your name?"}], "max_tokens": 128}' http://localhost:8688/v1/chat/completions
```
Loading