Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
cf60682
DocSum - add files for deploy app with ROCm vLLM
Feb 13, 2025
1fd1de1
DocSum - fix main
Feb 13, 2025
bd2d47e
DocSum - add files for deploy app with ROCm vLLM
Feb 13, 2025
2459ecb
DocSum - fix main
Feb 13, 2025
4d35065
Merge remote-tracking branch 'origin/main'
Feb 19, 2025
6d5049d
DocSum - add files for deploy app with ROCm vLLM
Feb 13, 2025
9dfbdc5
DocSum - fix main
Feb 13, 2025
a8857ae
DocSum - add files for deploy app with ROCm vLLM
Feb 13, 2025
5a38b26
DocSum - fix main
Feb 13, 2025
0e2ef94
Merge remote-tracking branch 'origin/main'
Feb 25, 2025
30071db
Merge branch 'main' of https://github.com/opea-project/GenAIExamples
Mar 11, 2025
00e4397
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
7542064
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
7ad03f7
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
c46c617
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
714a125
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
f91ce3f
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
b7f9693
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
8d45d78
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
2157be9
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
f47055a
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
e57ab2c
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
07f78b3
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
36e9340
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
6a0a17f
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
b229b6e
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
db51c0a
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
321669c
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
df7c258
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 11, 2025
bf427cb
Translation - add files for deploy with ROCm vLLM
Mar 11, 2025
750fbf1
Merge remote-tracking branch 'origin/feature/Translation_vLLM' into f…
Mar 11, 2025
7bb41cc
Merge branch 'main' into feature/Translation_vLLM
artem-astafev Mar 26, 2025
d10bb3f
Change Readme.md change vllm-rocm build from comps
artem-astafev Mar 26, 2025
6b57981
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 26, 2025
391637c
Added response file examples
artem-astafev Mar 26, 2025
0ecf19c
Merge branch 'feature/Translation_vLLM' of https://github.com/chyundu…
artem-astafev Mar 26, 2025
33fc184
Change vllm-rocm image name to image from comps
artem-astafev Mar 27, 2025
dec230c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 27, 2025
e2f654f
Merge branch 'main' into feature/Translation_vLLM
artem-astafev Mar 27, 2025
9cce521
Added Delay before starting validation tests
artem-astafev Mar 27, 2025
34aabb7
Check Nginx Downgrade
artem-astafev Mar 27, 2025
38944d3
Fix for compose_vllm
artem-astafev Mar 27, 2025
6915cb9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 27, 2025
64acfe3
fix nginx start issue
artem-astafev Mar 27, 2025
7b4e0c1
Merge branch 'feature/Translation_vLLM' of https://github.com/chyundu…
artem-astafev Mar 27, 2025
e7bf6c6
fix nginx test issues
artem-astafev Mar 27, 2025
3826aeb
update Readme.md
artem-astafev Mar 27, 2025
5168304
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 27, 2025
77e3950
minor fixes
artem-astafev Mar 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
422 changes: 370 additions & 52 deletions Translation/docker_compose/amd/gpu/rocm/README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Translation/docker_compose/amd/gpu/rocm/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ services:
- translation-backend-server
- translation-ui-server
ports:
- "${TRANSLATION_NGINX_PORT:-80}:80"
- "${TRANSLATION_NGINX_PORT:-80}:8080"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
Expand Down
107 changes: 107 additions & 0 deletions Translation/docker_compose/amd/gpu/rocm/compose_vllm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

services:
translation-vllm-service:
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
container_name: translation-vllm-service
ports:
- "${TRANSLATION_VLLM_SERVICE_PORT:-8081}:8011"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${TRANSLATION_HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${TRANSLATION_HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
WILM_USE_TRITON_FLASH_ATTENTION: 0
PYTORCH_JIT: 0
volumes:
- "./data:/data"
shm_size: 20G
devices:
- /dev/kfd:/dev/kfd
- /dev/dri/:/dev/dri/
cap_add:
- SYS_PTRACE
group_add:
- video
security_opt:
- seccomp:unconfined
- apparmor=unconfined
command: "--model ${TRANSLATION_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 1 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
ipc: host
translation-llm:
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
container_name: translation-llm-textgen-server
depends_on:
- translation-vllm-service
ports:
- "${TRANSLATION_LLM_PORT:-9000}:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LLM_ENDPOINT: ${TRANSLATION_LLM_ENDPOINT}
LLM_MODEL_ID: ${TRANSLATION_LLM_MODEL_ID}
HUGGINGFACEHUB_API_TOKEN: ${TRANSLATION_HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${TRANSLATION_HUGGINGFACEHUB_API_TOKEN}
LLM_COMPONENT_NAME: "OpeaTextGenService"
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
translation-backend-server:
image: ${REGISTRY:-opea}/translation:${TAG:-latest}
container_name: translation-backend-server
depends_on:
- translation-vllm-service
- translation-llm
ports:
- "${TRANSLATION_BACKEND_SERVICE_PORT:-8888}:8888"
environment:
no_proxy: ${no_proxy}
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
MEGA_SERVICE_HOST_IP: ${TRANSLATION_MEGA_SERVICE_HOST_IP}
LLM_SERVICE_HOST_IP: ${TRANSLATION_LLM_SERVICE_HOST_IP}
LLM_SERVICE_PORT: ${TRANSLATION_LLM_PORT}
ipc: host
restart: always
translation-ui-server:
image: ${REGISTRY:-opea}/translation-ui:${TAG:-latest}
container_name: translation-ui-server
depends_on:
- translation-backend-server
ports:
- "${TRANSLATION_FRONTEND_SERVICE_PORT:-5173}:5173"
environment:
no_proxy: ${no_proxy}
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
BASE_URL: ${TRANSLATION_BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always
translation-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: translation-nginx-server
depends_on:
- translation-backend-server
- translation-ui-server
ports:
- "${TRANSLATION_NGINX_PORT:-80}:8080"
environment:
no_proxy: ${no_proxy}
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
FRONTEND_SERVICE_IP: ${TRANSLATION_FRONTEND_SERVICE_IP}
FRONTEND_SERVICE_PORT: ${TRANSLATION_FRONTEND_SERVICE_PORT}
BACKEND_SERVICE_NAME: ${TRANSLATION_BACKEND_SERVICE_NAME}
BACKEND_SERVICE_IP: ${TRANSLATION_BACKEND_SERVICE_IP}
BACKEND_SERVICE_PORT: ${TRANSLATION_BACKEND_SERVICE_PORT}
ipc: host
restart: always
networks:
default:
driver: bridge
23 changes: 23 additions & 0 deletions Translation/docker_compose/amd/gpu/rocm/set_env_vllm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# SPDX-License-Identifier: Apache-2.0

export HOST_IP=''
export EXTERNAL_HOST_IP=''
export TRANSLATION_LLM_MODEL_ID="haoranxu/ALMA-13B"
export TRANSLATION_VLLM_SERVICE_PORT=8088
export TRANSLATION_LLM_ENDPOINT="http://${HOST_IP}:${TRANSLATION_VLLM_SERVICE_PORT}"
export TRANSLATION_LLM_PORT=9088
export TRANSLATION_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export TRANSLATION_MEGA_SERVICE_HOST_IP=${HOST_IP}
export TRANSLATION_LLM_SERVICE_HOST_IP=${HOST_IP}
export TRANSLATION_FRONTEND_SERVICE_IP=${HOST_IP}
export TRANSLATION_FRONTEND_SERVICE_PORT=18122
export TRANSLATION_BACKEND_SERVICE_NAME=translation
export TRANSLATION_BACKEND_SERVICE_IP=${HOST_IP}
export TRANSLATION_BACKEND_SERVICE_PORT=18121
export TRANSLATION_BACKEND_SERVICE_ENDPOINT="http://${EXTERNAL_HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation"
export TRANSLATION_NGINX_PORT=18123
5 changes: 5 additions & 0 deletions Translation/docker_image_build/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,8 @@ services:
dockerfile: comps/third_parties/nginx/src/Dockerfile
extends: translation
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
vllm-rocm:
build:
context: GenAIComps
dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
193 changes: 193 additions & 0 deletions Translation/tests/test_compose_vllm_on_rocm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

set -xe
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}

WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')

function build_docker_images() {
opea_branch=${opea_branch:-"main"}
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
if [[ "${opea_branch}" != "main" ]]; then
cd $WORKPATH
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
find . -type f -name "Dockerfile*" | while read -r file; do
echo "Processing file: $file"
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
done
fi

cd $WORKPATH/docker_image_build
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git

echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="translation translation-ui llm-textgen nginx vllm-rocm"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker images && sleep 3s
}

function start_services() {
cd $WORKPATH/docker_compose/amd/gpu/rocm/

export HOST_IP=${ip_address}
export EXTERNAL_HOST_IP=${ip_address}
export TRANSLATION_LLM_MODEL_ID="haoranxu/ALMA-13B"
export TRANSLATION_VLLM_SERVICE_PORT=8088
export TRANSLATION_LLM_ENDPOINT="http://${HOST_IP}:${TRANSLATION_VLLM_SERVICE_PORT}"
export TRANSLATION_LLM_PORT=9088
export TRANSLATION_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export TRANSLATION_MEGA_SERVICE_HOST_IP=${HOST_IP}
export TRANSLATION_LLM_SERVICE_HOST_IP=${HOST_IP}
export TRANSLATION_FRONTEND_SERVICE_IP=${HOST_IP}
export TRANSLATION_FRONTEND_SERVICE_PORT=5173
export TRANSLATION_BACKEND_SERVICE_NAME=translation
export TRANSLATION_BACKEND_SERVICE_IP=${HOST_IP}
export TRANSLATION_BACKEND_SERVICE_PORT=8089
export TRANSLATION_BACKEND_SERVICE_ENDPOINT="http://${EXTERNAL_HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation"
export TRANSLATION_NGINX_PORT=8090

sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

# Start Docker Containers
docker compose -f compose_vllm.yaml up -d > ${LOG_PATH}/start_services_with_compose.log

n=0
# wait long for llm model download
until [[ "$n" -ge 500 ]]; do
docker logs translation-vllm-service >& ${LOG_PATH}/translation-vllm-service_start.log
if grep -q "Application startup complete" ${LOG_PATH}/translation-vllm-service_start.log; then
echo "vLLM check successful"
break
fi
sleep 10s
n=$((n+1))
done

}

function validate_services() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"

local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."

local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)

if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 1s
}

function validate_microservices() {
# Check if the microservices are running correctly.

# vLLM for llm service
validate_services \
"${ip_address}:${TRANSLATION_VLLM_SERVICE_PORT}/v1/completions" \
"choices" \
"translation-vllm-service" \
"translation-vllm-service" \
'{"model": "haoranxu/ALMA-13B", "prompt": "What is Deep Learning?", "max_tokens": 100, "temperature": 0}'

# llm microservice
validate_services \
"${HOST_IP}:${TRANSLATION_LLM_PORT}/v1/chat/completions" \
"data: " \
"translation-llm" \
"translation-llm-textgen-server" \
'{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}'
}

function validate_megaservice() {
# Curl the Mega Service
validate_services \
"${HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation" \
"translation" \
"translation-backend-server" \
"translation-backend-server" \
'{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'

# test the megeservice via nginx
validate_services \
"${HOST_IP}:${TRANSLATION_NGINX_PORT}/v1/translation" \
"translation" \
"translation-nginx-server" \
"translation-nginx-server" \
'{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
}

function validate_frontend() {
cd $WORKPATH/ui/svelte
local conda_env_name="OPEA_e2e"
export PATH=${HOME}/miniconda3/bin/:$PATH
if conda info --envs | grep -q "$conda_env_name"; then
echo "$conda_env_name exist!"
else
conda create -n ${conda_env_name} python=3.12 -y
fi
source activate ${conda_env_name}

sed -i "s/localhost/$ip_address/g" playwright.config.ts

conda install -c conda-forge nodejs=22.6.0 -y
npm install && npm ci && npx playwright install --with-deps
node -v && npm -v && pip list

exit_status=0
npx playwright test || exit_status=$?

if [ $exit_status -ne 0 ]; then
echo "[TEST INFO]: ---------frontend test failed---------"
exit $exit_status
else
echo "[TEST INFO]: ---------frontend test passed---------"
fi
}

function stop_docker() {
cd $WORKPATH/docker_compose/amd/gpu/rocm/
docker compose -f compose_vllm.yaml stop && docker compose -f compose_vllm.yaml rm -f
}

function main() {

stop_docker

if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_services

validate_microservices
validate_megaservice
validate_frontend

stop_docker
echo y | docker system prune

}

main