diff --git a/comps/llms/deployment/docker_compose/compose_text-generation.yaml b/comps/llms/deployment/docker_compose/compose_text-generation.yaml index 3bedf759e5..abf02b5e1a 100644 --- a/comps/llms/deployment/docker_compose/compose_text-generation.yaml +++ b/comps/llms/deployment/docker_compose/compose_text-generation.yaml @@ -186,7 +186,7 @@ services: ZE_AFFINITY_MASK: ${ZE_AFFINITY_MASK} shm_size: 128g entrypoint: /bin/bash -c "\ - chmod +x /llm/vllm_ipex_entrypoint.sh && \ + source /opt/intel/oneapi/setvars.sh --force && \ bash /llm/vllm_ipex_entrypoint.sh" networks: diff --git a/comps/llms/src/text-generation/README_vllm_ipex.md b/comps/llms/src/text-generation/README_vllm_ipex.md index 3823bc28ea..827a2ffa67 100644 --- a/comps/llms/src/text-generation/README_vllm_ipex.md +++ b/comps/llms/src/text-generation/README_vllm_ipex.md @@ -19,7 +19,7 @@ This service provides high-throughput, low-latency LLM serving accelerated by vL You must download the official docker image from [Docker Hub](https://hub.docker.com/r/intel/llm-scaler-vllm) first. ```bash -docker pull intel/llm-scaler-vllm:1.0 +docker pull intel/llm-scaler-vllm:0.10.0-b4 ``` ## Start Microservice @@ -31,9 +31,9 @@ Deploy the vLLM-IPEX model serving using Docker Compose. 1. Export the required environment variables: ```bash - # Use image: intel/llm-scaler-vllm:1.0 + # Use image: intel/llm-scaler-vllm:0.10.0-b4 export REGISTRY=intel - export TAG=1.0 + export TAG=0.10.0-b4 export VIDEO_GROUP_ID=$(getent group video | awk -F: '{printf "%s\n", $3}') export RENDER_GROUP_ID=$(getent group render | awk -F: '{printf "%s\n", $3}') diff --git a/comps/lvms/deployment/docker_compose/compose.yaml b/comps/lvms/deployment/docker_compose/compose.yaml index a8c775224c..ea45e4088d 100644 --- a/comps/lvms/deployment/docker_compose/compose.yaml +++ b/comps/lvms/deployment/docker_compose/compose.yaml @@ -188,7 +188,7 @@ services: ZE_AFFINITY_MASK: ${ZE_AFFINITY_MASK} shm_size: 128g entrypoint: /bin/bash -c "\ - chmod +x /llm/vllm_ipex_entrypoint.sh && \ + source /opt/intel/oneapi/setvars.sh --force && \ bash /llm/vllm_ipex_entrypoint.sh" networks: diff --git a/comps/lvms/src/README_vllm_ipex.md b/comps/lvms/src/README_vllm_ipex.md index 0cea57e9b7..87112b2a3f 100644 --- a/comps/lvms/src/README_vllm_ipex.md +++ b/comps/lvms/src/README_vllm_ipex.md @@ -19,7 +19,7 @@ This service provides high-throughput, low-latency LVM serving accelerated by vL You must download the official docker image from [Docker Hub](https://hub.docker.com/r/intel/llm-scaler-vllm) first. ```bash -docker pull intel/llm-scaler-vllm:1.0 +docker pull intel/llm-scaler-vllm:0.10.0-b4 ``` ## Start Microservice @@ -31,9 +31,9 @@ Deploy the vLLM-IPEX model serving using Docker Compose. 1. Export the required environment variables: ```bash - # Use image: intel/llm-scaler-vllm:1.0 + # Use image: intel/llm-scaler-vllm:0.10.0-b4 export REGISTRY=intel - export TAG=1.0 + export TAG=0.10.0-b4 export ip_address=$(hostname -I | awk '{print $1}') export VIDEO_GROUP_ID=$(getent group video | awk -F: '{printf "%s\n", $3}')