From cf60682c8290f1191c5d3e4609a8ad3b8d1b162a Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:02:03 +0700 Subject: [PATCH 01/44] DocSum - add files for deploy app with ROCm vLLM Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 ++ .../amd/gpu/rocm-vllm/README.md | 175 ++++++++++++ .../amd/gpu/rocm-vllm/compose.yaml | 107 ++++++++ .../amd/gpu/rocm-vllm/set_env.sh | 16 ++ DocSum/docker_image_build/build.yaml | 9 + DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ++++++++++++++++++ 6 files changed, 574 insertions(+) create mode 100644 DocSum/Dockerfile-vllm-rocm create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh create mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm new file mode 100644 index 0000000000..f0e8a8743a --- /dev/null +++ b/DocSum/Dockerfile-vllm-rocm @@ -0,0 +1,18 @@ +FROM rocm/vllm-dev:main + +# Set the working directory +WORKDIR /workspace + +# Copy the api_server.py into the image +ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py + +# Expose the port used by the API server +EXPOSE 8011 + +# Set environment variables +ENV HUGGINGFACE_HUB_CACHE=/workspace +ENV WILM_USE_TRITON_FLASH_ATTENTION=0 +ENV PYTORCH_JIT=0 + +# Set the entrypoint to the api_server.py script +ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md new file mode 100644 index 0000000000..4d41a5cd31 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md @@ -0,0 +1,175 @@ +# Build and deploy DocSum Application on AMD GPU (ROCm) + +## Build images + +## 🚀 Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +### 1. Build LLM Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . +``` + +Then run the command `docker images`, you will have the following four Docker Images: + +### 2. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: + +```bash +git clone https://github.com/opea-project/GenAIExamples +cd GenAIExamples/DocSum/ +docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### 3. Build UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` + +### 4. Build React UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . + +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` +4. `opea/docsum-react-ui:latest` + +## 🚀 Start Microservices and MegaService + +### Required Models + +Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. +For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +```bash +export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${host_ip} +export DOCSUM_TGI_SERVICE_PORT="18882" +export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export DOCSUM_LLM_SERVER_PORT="8008" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DocSum_COMPONENT_NAME="OpeaDocSumTgi" +``` + +Note: Please replace with `host_ip` with your external IP address, do not use localhost. + +Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +Example for set isolation for 1 GPU + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 +``` + +Example for set isolation for 2 GPUs + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + - /dev/dri/card1:/dev/dri/card1 + - /dev/dri/renderD129:/dev/dri/renderD129 +``` + +Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +### Start Microservice Docker Containers + +```bash +cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm +docker compose up -d +``` + +### Validate Microservices + +1. TGI Service + + ```bash + curl http://${host_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +2. LLM Microservice + + ```bash + curl http://${host_ip}:9000/v1/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' + ``` + +3. MegaService + + ```bash + curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false + }' + ``` + +## 🚀 Launch the Svelte UI + +Open this URL `http://{host_ip}:5173` in your browser to access the frontend. + +![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) + +Here is an example for summarizing a article. + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) + +## 🚀 Launch the React UI (Optional) + +To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: + +```yaml +docsum-rocm-react-ui-server: + image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} + container_name: docsum-rocm-react-ui-server + depends_on: + - docsum-rocm-backend-server + ports: + - "5174:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} +``` + +Open this URL `http://{host_ip}:5175` in your browser to access the frontend. + +![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml new file mode 100644 index 0000000000..037aa06395 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +services: + docsum-vllm-service: + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + container_name: docsum-vllm-service + ports: + - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + WILM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + volumes: + - "./data:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + docsum-llm-server: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-llm-server + depends_on: + - docsum-vllm-service + ports: + - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" + ipc: host + cap_add: + - SYS_PTRACE + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + LOGFLAG: ${DOCSUM_LOGFLAG:-False} + MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} + restart: unless-stopped + + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "${DOCSUM_WHISPER_PORT:-7066}:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + + docsum-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-backend-server + depends_on: + - docsum-tgi-service + - docsum-llm-server + ports: + - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_HOST_IP: ${HOST_IP} + ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} + ipc: host + restart: always + + docsum-gradio-ui: + image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} + container_name: docsum-ui-server + depends_on: + - docsum-backend-server + ports: + - "${DOCSUM_FRONTEND_PORT:-5173}:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh new file mode 100644 index 0000000000..43e71e0fbf --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP="" +export DOCSUM_MAX_INPUT_TOKENS=2048 +export DOCSUM_MAX_TOTAL_TOKENS=4096 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index 095fd28c93..dc0d546189 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,3 +47,12 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + vllm_rocm: + build: + args: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile-vllm-rocm + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh new file mode 100644 index 0000000000..d0919a019a --- /dev/null +++ b/DocSum/tests/test_compose_on_rocm_vllm.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${ip_address} +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export MEGA_SERVICE_HOST_IP=${HOST_IP} +export LLM_SERVICE_HOST_IP=${HOST_IP} +export ASR_SERVICE_HOST_IP=${HOST_IP} +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm + sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env + # Start Docker Containers + docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "EXPECTED_RESULT==> $EXPECTED_RESULT" + echo "CONTENT==> $CONTENT" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +get_base64_str() { + local file_name=$1 + base64 -w 0 "$file_name" +} + +# Function to generate input data for testing based on the document type +input_data_for_test() { + local document_type=$1 + case $document_type in + ("text") + echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." + ;; + ("audio") + get_base64_str "$WORKPATH/tests/data/test.wav" + ;; + ("video") + get_base64_str "$WORKPATH/tests/data/test.mp4" + ;; + (*) + echo "Invalid document type" >&2 + exit 1 + ;; + esac +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # whisper microservice + ulimit -s 65536 + validate_services \ + "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ + '{"asr_result":"well"}' \ + "whisper-service" \ + "whisper-service" \ + "{\"audio\": \"$(input_data_for_test "audio")\"}" + + # vLLM service + validate_services \ + "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ + "generated_text" \ + "docsum-vllm-service" \ + "docsum-vllm-service" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' + + # llm microservice + validate_services \ + "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ + "text" \ + "docsum-llm-server" \ + "docsum-llm-server" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + +} + +function validate_megaservice() { + local SERVICE_NAME="docsum-backend-server" + local DOCKER_NAME="docsum-backend-server" + local EXPECTED_RESULT="[DONE]" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local URL="${host_ip}:8888/v1/docsum" + local DATA_TYPE="type=text" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_megaservice_json() { + # Curl the Mega Service + echo "" + echo ">>> Checking text data with Content-Type: application/json" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + + echo ">>> Checking audio data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" + + echo ">>> Checking video data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" + +} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ + docker compose stop && docker compose rm -f +} + +function main() { + echo "===========================================" + echo ">>>> Stopping any running Docker containers..." + stop_docker + + echo "===========================================" + if [[ "$IMAGE_REPO" == "opea" ]]; then + echo ">>>> Building Docker images..." + build_docker_images + fi + + echo "===========================================" + echo ">>>> Starting Docker services..." + start_services + + echo "===========================================" + echo ">>>> Validating microservices..." + validate_microservices + + echo "===========================================" + echo ">>>> Validating megaservice..." + validate_megaservice + echo ">>>> Validating validate_megaservice_json..." + validate_megaservice_json + + echo "===========================================" + echo ">>>> Stopping Docker containers..." + stop_docker + + echo "===========================================" + echo ">>>> Pruning Docker system..." + echo y | docker system prune + echo ">>>> Docker system pruned successfully." + echo "===========================================" +} + +main From 1fd1de1530328321d28aa6d9db85fffeb876574c Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:07:05 +0700 Subject: [PATCH 02/44] DocSum - fix main Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 -- .../amd/gpu/rocm-vllm/README.md | 175 ------------ .../amd/gpu/rocm-vllm/compose.yaml | 107 -------- .../amd/gpu/rocm-vllm/set_env.sh | 16 -- DocSum/docker_image_build/build.yaml | 9 - DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ------------------ 6 files changed, 574 deletions(-) delete mode 100644 DocSum/Dockerfile-vllm-rocm delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh delete mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm deleted file mode 100644 index f0e8a8743a..0000000000 --- a/DocSum/Dockerfile-vllm-rocm +++ /dev/null @@ -1,18 +0,0 @@ -FROM rocm/vllm-dev:main - -# Set the working directory -WORKDIR /workspace - -# Copy the api_server.py into the image -ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py - -# Expose the port used by the API server -EXPOSE 8011 - -# Set environment variables -ENV HUGGINGFACE_HUB_CACHE=/workspace -ENV WILM_USE_TRITON_FLASH_ATTENTION=0 -ENV PYTORCH_JIT=0 - -# Set the entrypoint to the api_server.py script -ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md deleted file mode 100644 index 4d41a5cd31..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Build and deploy DocSum Application on AMD GPU (ROCm) - -## Build images - -## 🚀 Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build LLM Image - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . -``` - -Then run the command `docker images`, you will have the following four Docker Images: - -### 2. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples -cd GenAIExamples/DocSum/ -docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 3. Build UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` - -### 4. Build React UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . - -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` -4. `opea/docsum-react-ui:latest` - -## 🚀 Start Microservices and MegaService - -### Required Models - -Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. -For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -```bash -export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${host_ip} -export DOCSUM_TGI_SERVICE_PORT="18882" -export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export DOCSUM_LLM_SERVER_PORT="8008" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DocSum_COMPONENT_NAME="OpeaDocSumTgi" -``` - -Note: Please replace with `host_ip` with your external IP address, do not use localhost. - -Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -Example for set isolation for 1 GPU - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 -``` - -Example for set isolation for 2 GPUs - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 - - /dev/dri/card1:/dev/dri/card1 - - /dev/dri/renderD129:/dev/dri/renderD129 -``` - -Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -### Start Microservice Docker Containers - -```bash -cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm -docker compose up -d -``` - -### Validate Microservices - -1. TGI Service - - ```bash - curl http://${host_ip}:8008/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ - -H 'Content-Type: application/json' - ``` - -2. LLM Microservice - - ```bash - curl http://${host_ip}:9000/v1/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ - -H 'Content-Type: application/json' - ``` - -3. MegaService - - ```bash - curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false - }' - ``` - -## 🚀 Launch the Svelte UI - -Open this URL `http://{host_ip}:5173` in your browser to access the frontend. - -![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) - -Here is an example for summarizing a article. - -![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) - -## 🚀 Launch the React UI (Optional) - -To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: - -```yaml -docsum-rocm-react-ui-server: - image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} - container_name: docsum-rocm-react-ui-server - depends_on: - - docsum-rocm-backend-server - ports: - - "5174:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} -``` - -Open this URL `http://{host_ip}:5175` in your browser to access the frontend. - -![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml deleted file mode 100644 index 037aa06395..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -services: - docsum-vllm-service: - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} - container_name: docsum-vllm-service - ports: - - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - WILM_USE_TRITON_FLASH_ATTENTION: 0 - PYTORCH_JIT: 0 - volumes: - - "./data:/data" - shm_size: 20G - devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ - cap_add: - - SYS_PTRACE - group_add: - - video - security_opt: - - seccomp:unconfined - - apparmor=unconfined - command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" - ipc: host - - docsum-llm-server: - image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - container_name: docsum-llm-server - depends_on: - - docsum-vllm-service - ports: - - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" - ipc: host - cap_add: - - SYS_PTRACE - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} - LOGFLAG: ${DOCSUM_LOGFLAG:-False} - MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} - restart: unless-stopped - - whisper-service: - image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - container_name: whisper-service - ports: - - "${DOCSUM_WHISPER_PORT:-7066}:7066" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - - docsum-backend-server: - image: ${REGISTRY:-opea}/docsum:${TAG:-latest} - container_name: docsum-backend-server - depends_on: - - docsum-tgi-service - - docsum-llm-server - ports: - - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - MEGA_SERVICE_HOST_IP: ${HOST_IP} - LLM_SERVICE_HOST_IP: ${HOST_IP} - ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} - ipc: host - restart: always - - docsum-gradio-ui: - image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} - container_name: docsum-ui-server - depends_on: - - docsum-backend-server - ports: - - "${DOCSUM_FRONTEND_PORT:-5173}:5173" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh deleted file mode 100644 index 43e71e0fbf..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -export HOST_IP="" -export DOCSUM_MAX_INPUT_TOKENS=2048 -export DOCSUM_MAX_TOTAL_TOKENS=4096 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index dc0d546189..095fd28c93 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,12 +47,3 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - vllm_rocm: - build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - context: ../ - dockerfile: ./Dockerfile-vllm-rocm - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh deleted file mode 100644 index d0919a019a..0000000000 --- a/DocSum/tests/test_compose_on_rocm_vllm.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -set -xe -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -export MAX_INPUT_TOKENS=1024 -export MAX_TOTAL_TOKENS=2048 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${ip_address} -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LLM_SERVICE_HOST_IP=${HOST_IP} -export ASR_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" - docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - - docker images && sleep 1s -} - -function start_services() { - cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm - sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env - # Start Docker Containers - docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log - sleep 1m -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - - echo "===========================================" - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "EXPECTED_RESULT==> $EXPECTED_RESULT" - echo "CONTENT==> $CONTENT" - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -get_base64_str() { - local file_name=$1 - base64 -w 0 "$file_name" -} - -# Function to generate input data for testing based on the document type -input_data_for_test() { - local document_type=$1 - case $document_type in - ("text") - echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." - ;; - ("audio") - get_base64_str "$WORKPATH/tests/data/test.wav" - ;; - ("video") - get_base64_str "$WORKPATH/tests/data/test.mp4" - ;; - (*) - echo "Invalid document type" >&2 - exit 1 - ;; - esac -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # whisper microservice - ulimit -s 65536 - validate_services \ - "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ - '{"asr_result":"well"}' \ - "whisper-service" \ - "whisper-service" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # vLLM service - validate_services \ - "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ - "generated_text" \ - "docsum-vllm-service" \ - "docsum-vllm-service" \ - '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' - - # llm microservice - validate_services \ - "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ - "text" \ - "docsum-llm-server" \ - "docsum-llm-server" \ - '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - -} - -function validate_megaservice() { - local SERVICE_NAME="docsum-backend-server" - local DOCKER_NAME="docsum-backend-server" - local EXPECTED_RESULT="[DONE]" - local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." - local URL="${host_ip}:8888/v1/docsum" - local DATA_TYPE="type=text" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_megaservice_json() { - # Curl the Mega Service - echo "" - echo ">>> Checking text data with Content-Type: application/json" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - - echo ">>> Checking audio data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" - - echo ">>> Checking video data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" - -} - -function stop_docker() { - cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ - docker compose stop && docker compose rm -f -} - -function main() { - echo "===========================================" - echo ">>>> Stopping any running Docker containers..." - stop_docker - - echo "===========================================" - if [[ "$IMAGE_REPO" == "opea" ]]; then - echo ">>>> Building Docker images..." - build_docker_images - fi - - echo "===========================================" - echo ">>>> Starting Docker services..." - start_services - - echo "===========================================" - echo ">>>> Validating microservices..." - validate_microservices - - echo "===========================================" - echo ">>>> Validating megaservice..." - validate_megaservice - echo ">>>> Validating validate_megaservice_json..." - validate_megaservice_json - - echo "===========================================" - echo ">>>> Stopping Docker containers..." - stop_docker - - echo "===========================================" - echo ">>>> Pruning Docker system..." - echo y | docker system prune - echo ">>>> Docker system pruned successfully." - echo "===========================================" -} - -main From bd2d47e7e53e1241c27aed0f823fa680d8ecf4e2 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:02:03 +0700 Subject: [PATCH 03/44] DocSum - add files for deploy app with ROCm vLLM Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 ++ .../amd/gpu/rocm-vllm/README.md | 175 ++++++++++++ .../amd/gpu/rocm-vllm/compose.yaml | 107 ++++++++ .../amd/gpu/rocm-vllm/set_env.sh | 16 ++ DocSum/docker_image_build/build.yaml | 9 + DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ++++++++++++++++++ 6 files changed, 574 insertions(+) create mode 100644 DocSum/Dockerfile-vllm-rocm create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh create mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm new file mode 100644 index 0000000000..f0e8a8743a --- /dev/null +++ b/DocSum/Dockerfile-vllm-rocm @@ -0,0 +1,18 @@ +FROM rocm/vllm-dev:main + +# Set the working directory +WORKDIR /workspace + +# Copy the api_server.py into the image +ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py + +# Expose the port used by the API server +EXPOSE 8011 + +# Set environment variables +ENV HUGGINGFACE_HUB_CACHE=/workspace +ENV WILM_USE_TRITON_FLASH_ATTENTION=0 +ENV PYTORCH_JIT=0 + +# Set the entrypoint to the api_server.py script +ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md new file mode 100644 index 0000000000..4d41a5cd31 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md @@ -0,0 +1,175 @@ +# Build and deploy DocSum Application on AMD GPU (ROCm) + +## Build images + +## 🚀 Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +### 1. Build LLM Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . +``` + +Then run the command `docker images`, you will have the following four Docker Images: + +### 2. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: + +```bash +git clone https://github.com/opea-project/GenAIExamples +cd GenAIExamples/DocSum/ +docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### 3. Build UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` + +### 4. Build React UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . + +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` +4. `opea/docsum-react-ui:latest` + +## 🚀 Start Microservices and MegaService + +### Required Models + +Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. +For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +```bash +export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${host_ip} +export DOCSUM_TGI_SERVICE_PORT="18882" +export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export DOCSUM_LLM_SERVER_PORT="8008" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DocSum_COMPONENT_NAME="OpeaDocSumTgi" +``` + +Note: Please replace with `host_ip` with your external IP address, do not use localhost. + +Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +Example for set isolation for 1 GPU + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 +``` + +Example for set isolation for 2 GPUs + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + - /dev/dri/card1:/dev/dri/card1 + - /dev/dri/renderD129:/dev/dri/renderD129 +``` + +Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +### Start Microservice Docker Containers + +```bash +cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm +docker compose up -d +``` + +### Validate Microservices + +1. TGI Service + + ```bash + curl http://${host_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +2. LLM Microservice + + ```bash + curl http://${host_ip}:9000/v1/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' + ``` + +3. MegaService + + ```bash + curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false + }' + ``` + +## 🚀 Launch the Svelte UI + +Open this URL `http://{host_ip}:5173` in your browser to access the frontend. + +![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) + +Here is an example for summarizing a article. + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) + +## 🚀 Launch the React UI (Optional) + +To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: + +```yaml +docsum-rocm-react-ui-server: + image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} + container_name: docsum-rocm-react-ui-server + depends_on: + - docsum-rocm-backend-server + ports: + - "5174:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} +``` + +Open this URL `http://{host_ip}:5175` in your browser to access the frontend. + +![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml new file mode 100644 index 0000000000..037aa06395 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +services: + docsum-vllm-service: + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + container_name: docsum-vllm-service + ports: + - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + WILM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + volumes: + - "./data:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + docsum-llm-server: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-llm-server + depends_on: + - docsum-vllm-service + ports: + - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" + ipc: host + cap_add: + - SYS_PTRACE + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + LOGFLAG: ${DOCSUM_LOGFLAG:-False} + MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} + restart: unless-stopped + + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "${DOCSUM_WHISPER_PORT:-7066}:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + + docsum-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-backend-server + depends_on: + - docsum-tgi-service + - docsum-llm-server + ports: + - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_HOST_IP: ${HOST_IP} + ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} + ipc: host + restart: always + + docsum-gradio-ui: + image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} + container_name: docsum-ui-server + depends_on: + - docsum-backend-server + ports: + - "${DOCSUM_FRONTEND_PORT:-5173}:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh new file mode 100644 index 0000000000..43e71e0fbf --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP="" +export DOCSUM_MAX_INPUT_TOKENS=2048 +export DOCSUM_MAX_TOTAL_TOKENS=4096 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index 095fd28c93..dc0d546189 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,3 +47,12 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + vllm_rocm: + build: + args: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile-vllm-rocm + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh new file mode 100644 index 0000000000..d0919a019a --- /dev/null +++ b/DocSum/tests/test_compose_on_rocm_vllm.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${ip_address} +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export MEGA_SERVICE_HOST_IP=${HOST_IP} +export LLM_SERVICE_HOST_IP=${HOST_IP} +export ASR_SERVICE_HOST_IP=${HOST_IP} +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm + sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env + # Start Docker Containers + docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "EXPECTED_RESULT==> $EXPECTED_RESULT" + echo "CONTENT==> $CONTENT" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +get_base64_str() { + local file_name=$1 + base64 -w 0 "$file_name" +} + +# Function to generate input data for testing based on the document type +input_data_for_test() { + local document_type=$1 + case $document_type in + ("text") + echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." + ;; + ("audio") + get_base64_str "$WORKPATH/tests/data/test.wav" + ;; + ("video") + get_base64_str "$WORKPATH/tests/data/test.mp4" + ;; + (*) + echo "Invalid document type" >&2 + exit 1 + ;; + esac +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # whisper microservice + ulimit -s 65536 + validate_services \ + "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ + '{"asr_result":"well"}' \ + "whisper-service" \ + "whisper-service" \ + "{\"audio\": \"$(input_data_for_test "audio")\"}" + + # vLLM service + validate_services \ + "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ + "generated_text" \ + "docsum-vllm-service" \ + "docsum-vllm-service" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' + + # llm microservice + validate_services \ + "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ + "text" \ + "docsum-llm-server" \ + "docsum-llm-server" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + +} + +function validate_megaservice() { + local SERVICE_NAME="docsum-backend-server" + local DOCKER_NAME="docsum-backend-server" + local EXPECTED_RESULT="[DONE]" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local URL="${host_ip}:8888/v1/docsum" + local DATA_TYPE="type=text" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_megaservice_json() { + # Curl the Mega Service + echo "" + echo ">>> Checking text data with Content-Type: application/json" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + + echo ">>> Checking audio data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" + + echo ">>> Checking video data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" + +} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ + docker compose stop && docker compose rm -f +} + +function main() { + echo "===========================================" + echo ">>>> Stopping any running Docker containers..." + stop_docker + + echo "===========================================" + if [[ "$IMAGE_REPO" == "opea" ]]; then + echo ">>>> Building Docker images..." + build_docker_images + fi + + echo "===========================================" + echo ">>>> Starting Docker services..." + start_services + + echo "===========================================" + echo ">>>> Validating microservices..." + validate_microservices + + echo "===========================================" + echo ">>>> Validating megaservice..." + validate_megaservice + echo ">>>> Validating validate_megaservice_json..." + validate_megaservice_json + + echo "===========================================" + echo ">>>> Stopping Docker containers..." + stop_docker + + echo "===========================================" + echo ">>>> Pruning Docker system..." + echo y | docker system prune + echo ">>>> Docker system pruned successfully." + echo "===========================================" +} + +main From 2459ecbc53fdb7c9c449930700cff290de15c152 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:07:05 +0700 Subject: [PATCH 04/44] DocSum - fix main Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 -- .../amd/gpu/rocm-vllm/README.md | 175 ------------ .../amd/gpu/rocm-vllm/compose.yaml | 107 -------- .../amd/gpu/rocm-vllm/set_env.sh | 16 -- DocSum/docker_image_build/build.yaml | 9 - DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ------------------ 6 files changed, 574 deletions(-) delete mode 100644 DocSum/Dockerfile-vllm-rocm delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh delete mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm deleted file mode 100644 index f0e8a8743a..0000000000 --- a/DocSum/Dockerfile-vllm-rocm +++ /dev/null @@ -1,18 +0,0 @@ -FROM rocm/vllm-dev:main - -# Set the working directory -WORKDIR /workspace - -# Copy the api_server.py into the image -ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py - -# Expose the port used by the API server -EXPOSE 8011 - -# Set environment variables -ENV HUGGINGFACE_HUB_CACHE=/workspace -ENV WILM_USE_TRITON_FLASH_ATTENTION=0 -ENV PYTORCH_JIT=0 - -# Set the entrypoint to the api_server.py script -ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md deleted file mode 100644 index 4d41a5cd31..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Build and deploy DocSum Application on AMD GPU (ROCm) - -## Build images - -## 🚀 Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build LLM Image - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . -``` - -Then run the command `docker images`, you will have the following four Docker Images: - -### 2. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples -cd GenAIExamples/DocSum/ -docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 3. Build UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` - -### 4. Build React UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . - -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` -4. `opea/docsum-react-ui:latest` - -## 🚀 Start Microservices and MegaService - -### Required Models - -Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. -For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -```bash -export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${host_ip} -export DOCSUM_TGI_SERVICE_PORT="18882" -export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export DOCSUM_LLM_SERVER_PORT="8008" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DocSum_COMPONENT_NAME="OpeaDocSumTgi" -``` - -Note: Please replace with `host_ip` with your external IP address, do not use localhost. - -Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -Example for set isolation for 1 GPU - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 -``` - -Example for set isolation for 2 GPUs - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 - - /dev/dri/card1:/dev/dri/card1 - - /dev/dri/renderD129:/dev/dri/renderD129 -``` - -Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -### Start Microservice Docker Containers - -```bash -cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm -docker compose up -d -``` - -### Validate Microservices - -1. TGI Service - - ```bash - curl http://${host_ip}:8008/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ - -H 'Content-Type: application/json' - ``` - -2. LLM Microservice - - ```bash - curl http://${host_ip}:9000/v1/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ - -H 'Content-Type: application/json' - ``` - -3. MegaService - - ```bash - curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false - }' - ``` - -## 🚀 Launch the Svelte UI - -Open this URL `http://{host_ip}:5173` in your browser to access the frontend. - -![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) - -Here is an example for summarizing a article. - -![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) - -## 🚀 Launch the React UI (Optional) - -To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: - -```yaml -docsum-rocm-react-ui-server: - image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} - container_name: docsum-rocm-react-ui-server - depends_on: - - docsum-rocm-backend-server - ports: - - "5174:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} -``` - -Open this URL `http://{host_ip}:5175` in your browser to access the frontend. - -![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml deleted file mode 100644 index 037aa06395..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -services: - docsum-vllm-service: - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} - container_name: docsum-vllm-service - ports: - - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - WILM_USE_TRITON_FLASH_ATTENTION: 0 - PYTORCH_JIT: 0 - volumes: - - "./data:/data" - shm_size: 20G - devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ - cap_add: - - SYS_PTRACE - group_add: - - video - security_opt: - - seccomp:unconfined - - apparmor=unconfined - command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" - ipc: host - - docsum-llm-server: - image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - container_name: docsum-llm-server - depends_on: - - docsum-vllm-service - ports: - - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" - ipc: host - cap_add: - - SYS_PTRACE - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} - LOGFLAG: ${DOCSUM_LOGFLAG:-False} - MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} - restart: unless-stopped - - whisper-service: - image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - container_name: whisper-service - ports: - - "${DOCSUM_WHISPER_PORT:-7066}:7066" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - - docsum-backend-server: - image: ${REGISTRY:-opea}/docsum:${TAG:-latest} - container_name: docsum-backend-server - depends_on: - - docsum-tgi-service - - docsum-llm-server - ports: - - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - MEGA_SERVICE_HOST_IP: ${HOST_IP} - LLM_SERVICE_HOST_IP: ${HOST_IP} - ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} - ipc: host - restart: always - - docsum-gradio-ui: - image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} - container_name: docsum-ui-server - depends_on: - - docsum-backend-server - ports: - - "${DOCSUM_FRONTEND_PORT:-5173}:5173" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh deleted file mode 100644 index 43e71e0fbf..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -export HOST_IP="" -export DOCSUM_MAX_INPUT_TOKENS=2048 -export DOCSUM_MAX_TOTAL_TOKENS=4096 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index dc0d546189..095fd28c93 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,12 +47,3 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - vllm_rocm: - build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - context: ../ - dockerfile: ./Dockerfile-vllm-rocm - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh deleted file mode 100644 index d0919a019a..0000000000 --- a/DocSum/tests/test_compose_on_rocm_vllm.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -set -xe -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -export MAX_INPUT_TOKENS=1024 -export MAX_TOTAL_TOKENS=2048 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${ip_address} -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LLM_SERVICE_HOST_IP=${HOST_IP} -export ASR_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" - docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - - docker images && sleep 1s -} - -function start_services() { - cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm - sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env - # Start Docker Containers - docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log - sleep 1m -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - - echo "===========================================" - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "EXPECTED_RESULT==> $EXPECTED_RESULT" - echo "CONTENT==> $CONTENT" - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -get_base64_str() { - local file_name=$1 - base64 -w 0 "$file_name" -} - -# Function to generate input data for testing based on the document type -input_data_for_test() { - local document_type=$1 - case $document_type in - ("text") - echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." - ;; - ("audio") - get_base64_str "$WORKPATH/tests/data/test.wav" - ;; - ("video") - get_base64_str "$WORKPATH/tests/data/test.mp4" - ;; - (*) - echo "Invalid document type" >&2 - exit 1 - ;; - esac -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # whisper microservice - ulimit -s 65536 - validate_services \ - "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ - '{"asr_result":"well"}' \ - "whisper-service" \ - "whisper-service" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # vLLM service - validate_services \ - "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ - "generated_text" \ - "docsum-vllm-service" \ - "docsum-vllm-service" \ - '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' - - # llm microservice - validate_services \ - "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ - "text" \ - "docsum-llm-server" \ - "docsum-llm-server" \ - '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - -} - -function validate_megaservice() { - local SERVICE_NAME="docsum-backend-server" - local DOCKER_NAME="docsum-backend-server" - local EXPECTED_RESULT="[DONE]" - local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." - local URL="${host_ip}:8888/v1/docsum" - local DATA_TYPE="type=text" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_megaservice_json() { - # Curl the Mega Service - echo "" - echo ">>> Checking text data with Content-Type: application/json" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - - echo ">>> Checking audio data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" - - echo ">>> Checking video data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" - -} - -function stop_docker() { - cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ - docker compose stop && docker compose rm -f -} - -function main() { - echo "===========================================" - echo ">>>> Stopping any running Docker containers..." - stop_docker - - echo "===========================================" - if [[ "$IMAGE_REPO" == "opea" ]]; then - echo ">>>> Building Docker images..." - build_docker_images - fi - - echo "===========================================" - echo ">>>> Starting Docker services..." - start_services - - echo "===========================================" - echo ">>>> Validating microservices..." - validate_microservices - - echo "===========================================" - echo ">>>> Validating megaservice..." - validate_megaservice - echo ">>>> Validating validate_megaservice_json..." - validate_megaservice_json - - echo "===========================================" - echo ">>>> Stopping Docker containers..." - stop_docker - - echo "===========================================" - echo ">>>> Pruning Docker system..." - echo y | docker system prune - echo ">>>> Docker system pruned successfully." - echo "===========================================" -} - -main From 6d5049dd1c6bb3e201c4ca807da6950e0ab4b9d2 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:02:03 +0700 Subject: [PATCH 05/44] DocSum - add files for deploy app with ROCm vLLM Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 ++ .../amd/gpu/rocm-vllm/README.md | 175 ++++++++++++ .../amd/gpu/rocm-vllm/compose.yaml | 107 ++++++++ .../amd/gpu/rocm-vllm/set_env.sh | 16 ++ DocSum/docker_image_build/build.yaml | 9 + DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ++++++++++++++++++ 6 files changed, 574 insertions(+) create mode 100644 DocSum/Dockerfile-vllm-rocm create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh create mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm new file mode 100644 index 0000000000..f0e8a8743a --- /dev/null +++ b/DocSum/Dockerfile-vllm-rocm @@ -0,0 +1,18 @@ +FROM rocm/vllm-dev:main + +# Set the working directory +WORKDIR /workspace + +# Copy the api_server.py into the image +ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py + +# Expose the port used by the API server +EXPOSE 8011 + +# Set environment variables +ENV HUGGINGFACE_HUB_CACHE=/workspace +ENV WILM_USE_TRITON_FLASH_ATTENTION=0 +ENV PYTORCH_JIT=0 + +# Set the entrypoint to the api_server.py script +ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md new file mode 100644 index 0000000000..4d41a5cd31 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md @@ -0,0 +1,175 @@ +# Build and deploy DocSum Application on AMD GPU (ROCm) + +## Build images + +## 🚀 Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +### 1. Build LLM Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . +``` + +Then run the command `docker images`, you will have the following four Docker Images: + +### 2. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: + +```bash +git clone https://github.com/opea-project/GenAIExamples +cd GenAIExamples/DocSum/ +docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### 3. Build UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` + +### 4. Build React UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . + +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` +4. `opea/docsum-react-ui:latest` + +## 🚀 Start Microservices and MegaService + +### Required Models + +Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. +For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +```bash +export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${host_ip} +export DOCSUM_TGI_SERVICE_PORT="18882" +export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export DOCSUM_LLM_SERVER_PORT="8008" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DocSum_COMPONENT_NAME="OpeaDocSumTgi" +``` + +Note: Please replace with `host_ip` with your external IP address, do not use localhost. + +Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +Example for set isolation for 1 GPU + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 +``` + +Example for set isolation for 2 GPUs + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + - /dev/dri/card1:/dev/dri/card1 + - /dev/dri/renderD129:/dev/dri/renderD129 +``` + +Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +### Start Microservice Docker Containers + +```bash +cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm +docker compose up -d +``` + +### Validate Microservices + +1. TGI Service + + ```bash + curl http://${host_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +2. LLM Microservice + + ```bash + curl http://${host_ip}:9000/v1/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' + ``` + +3. MegaService + + ```bash + curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false + }' + ``` + +## 🚀 Launch the Svelte UI + +Open this URL `http://{host_ip}:5173` in your browser to access the frontend. + +![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) + +Here is an example for summarizing a article. + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) + +## 🚀 Launch the React UI (Optional) + +To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: + +```yaml +docsum-rocm-react-ui-server: + image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} + container_name: docsum-rocm-react-ui-server + depends_on: + - docsum-rocm-backend-server + ports: + - "5174:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} +``` + +Open this URL `http://{host_ip}:5175` in your browser to access the frontend. + +![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml new file mode 100644 index 0000000000..037aa06395 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +services: + docsum-vllm-service: + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + container_name: docsum-vllm-service + ports: + - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + WILM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + volumes: + - "./data:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + docsum-llm-server: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-llm-server + depends_on: + - docsum-vllm-service + ports: + - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" + ipc: host + cap_add: + - SYS_PTRACE + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + LOGFLAG: ${DOCSUM_LOGFLAG:-False} + MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} + restart: unless-stopped + + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "${DOCSUM_WHISPER_PORT:-7066}:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + + docsum-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-backend-server + depends_on: + - docsum-tgi-service + - docsum-llm-server + ports: + - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_HOST_IP: ${HOST_IP} + ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} + ipc: host + restart: always + + docsum-gradio-ui: + image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} + container_name: docsum-ui-server + depends_on: + - docsum-backend-server + ports: + - "${DOCSUM_FRONTEND_PORT:-5173}:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh new file mode 100644 index 0000000000..43e71e0fbf --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP="" +export DOCSUM_MAX_INPUT_TOKENS=2048 +export DOCSUM_MAX_TOTAL_TOKENS=4096 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index 095fd28c93..dc0d546189 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,3 +47,12 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + vllm_rocm: + build: + args: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile-vllm-rocm + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh new file mode 100644 index 0000000000..d0919a019a --- /dev/null +++ b/DocSum/tests/test_compose_on_rocm_vllm.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${ip_address} +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export MEGA_SERVICE_HOST_IP=${HOST_IP} +export LLM_SERVICE_HOST_IP=${HOST_IP} +export ASR_SERVICE_HOST_IP=${HOST_IP} +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm + sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env + # Start Docker Containers + docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "EXPECTED_RESULT==> $EXPECTED_RESULT" + echo "CONTENT==> $CONTENT" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +get_base64_str() { + local file_name=$1 + base64 -w 0 "$file_name" +} + +# Function to generate input data for testing based on the document type +input_data_for_test() { + local document_type=$1 + case $document_type in + ("text") + echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." + ;; + ("audio") + get_base64_str "$WORKPATH/tests/data/test.wav" + ;; + ("video") + get_base64_str "$WORKPATH/tests/data/test.mp4" + ;; + (*) + echo "Invalid document type" >&2 + exit 1 + ;; + esac +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # whisper microservice + ulimit -s 65536 + validate_services \ + "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ + '{"asr_result":"well"}' \ + "whisper-service" \ + "whisper-service" \ + "{\"audio\": \"$(input_data_for_test "audio")\"}" + + # vLLM service + validate_services \ + "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ + "generated_text" \ + "docsum-vllm-service" \ + "docsum-vllm-service" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' + + # llm microservice + validate_services \ + "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ + "text" \ + "docsum-llm-server" \ + "docsum-llm-server" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + +} + +function validate_megaservice() { + local SERVICE_NAME="docsum-backend-server" + local DOCKER_NAME="docsum-backend-server" + local EXPECTED_RESULT="[DONE]" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local URL="${host_ip}:8888/v1/docsum" + local DATA_TYPE="type=text" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_megaservice_json() { + # Curl the Mega Service + echo "" + echo ">>> Checking text data with Content-Type: application/json" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + + echo ">>> Checking audio data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" + + echo ">>> Checking video data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" + +} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ + docker compose stop && docker compose rm -f +} + +function main() { + echo "===========================================" + echo ">>>> Stopping any running Docker containers..." + stop_docker + + echo "===========================================" + if [[ "$IMAGE_REPO" == "opea" ]]; then + echo ">>>> Building Docker images..." + build_docker_images + fi + + echo "===========================================" + echo ">>>> Starting Docker services..." + start_services + + echo "===========================================" + echo ">>>> Validating microservices..." + validate_microservices + + echo "===========================================" + echo ">>>> Validating megaservice..." + validate_megaservice + echo ">>>> Validating validate_megaservice_json..." + validate_megaservice_json + + echo "===========================================" + echo ">>>> Stopping Docker containers..." + stop_docker + + echo "===========================================" + echo ">>>> Pruning Docker system..." + echo y | docker system prune + echo ">>>> Docker system pruned successfully." + echo "===========================================" +} + +main From 9dfbdc5cffe708b084e7367d6df2910908f5e76a Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:07:05 +0700 Subject: [PATCH 06/44] DocSum - fix main Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 -- .../amd/gpu/rocm-vllm/README.md | 175 ------------ .../amd/gpu/rocm-vllm/compose.yaml | 107 -------- .../amd/gpu/rocm-vllm/set_env.sh | 16 -- DocSum/docker_image_build/build.yaml | 9 - DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ------------------ 6 files changed, 574 deletions(-) delete mode 100644 DocSum/Dockerfile-vllm-rocm delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh delete mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm deleted file mode 100644 index f0e8a8743a..0000000000 --- a/DocSum/Dockerfile-vllm-rocm +++ /dev/null @@ -1,18 +0,0 @@ -FROM rocm/vllm-dev:main - -# Set the working directory -WORKDIR /workspace - -# Copy the api_server.py into the image -ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py - -# Expose the port used by the API server -EXPOSE 8011 - -# Set environment variables -ENV HUGGINGFACE_HUB_CACHE=/workspace -ENV WILM_USE_TRITON_FLASH_ATTENTION=0 -ENV PYTORCH_JIT=0 - -# Set the entrypoint to the api_server.py script -ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md deleted file mode 100644 index 4d41a5cd31..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Build and deploy DocSum Application on AMD GPU (ROCm) - -## Build images - -## 🚀 Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build LLM Image - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . -``` - -Then run the command `docker images`, you will have the following four Docker Images: - -### 2. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples -cd GenAIExamples/DocSum/ -docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 3. Build UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` - -### 4. Build React UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . - -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` -4. `opea/docsum-react-ui:latest` - -## 🚀 Start Microservices and MegaService - -### Required Models - -Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. -For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -```bash -export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${host_ip} -export DOCSUM_TGI_SERVICE_PORT="18882" -export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export DOCSUM_LLM_SERVER_PORT="8008" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DocSum_COMPONENT_NAME="OpeaDocSumTgi" -``` - -Note: Please replace with `host_ip` with your external IP address, do not use localhost. - -Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -Example for set isolation for 1 GPU - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 -``` - -Example for set isolation for 2 GPUs - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 - - /dev/dri/card1:/dev/dri/card1 - - /dev/dri/renderD129:/dev/dri/renderD129 -``` - -Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -### Start Microservice Docker Containers - -```bash -cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm -docker compose up -d -``` - -### Validate Microservices - -1. TGI Service - - ```bash - curl http://${host_ip}:8008/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ - -H 'Content-Type: application/json' - ``` - -2. LLM Microservice - - ```bash - curl http://${host_ip}:9000/v1/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ - -H 'Content-Type: application/json' - ``` - -3. MegaService - - ```bash - curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false - }' - ``` - -## 🚀 Launch the Svelte UI - -Open this URL `http://{host_ip}:5173` in your browser to access the frontend. - -![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) - -Here is an example for summarizing a article. - -![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) - -## 🚀 Launch the React UI (Optional) - -To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: - -```yaml -docsum-rocm-react-ui-server: - image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} - container_name: docsum-rocm-react-ui-server - depends_on: - - docsum-rocm-backend-server - ports: - - "5174:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} -``` - -Open this URL `http://{host_ip}:5175` in your browser to access the frontend. - -![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml deleted file mode 100644 index 037aa06395..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -services: - docsum-vllm-service: - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} - container_name: docsum-vllm-service - ports: - - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - WILM_USE_TRITON_FLASH_ATTENTION: 0 - PYTORCH_JIT: 0 - volumes: - - "./data:/data" - shm_size: 20G - devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ - cap_add: - - SYS_PTRACE - group_add: - - video - security_opt: - - seccomp:unconfined - - apparmor=unconfined - command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" - ipc: host - - docsum-llm-server: - image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - container_name: docsum-llm-server - depends_on: - - docsum-vllm-service - ports: - - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" - ipc: host - cap_add: - - SYS_PTRACE - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} - LOGFLAG: ${DOCSUM_LOGFLAG:-False} - MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} - restart: unless-stopped - - whisper-service: - image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - container_name: whisper-service - ports: - - "${DOCSUM_WHISPER_PORT:-7066}:7066" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - - docsum-backend-server: - image: ${REGISTRY:-opea}/docsum:${TAG:-latest} - container_name: docsum-backend-server - depends_on: - - docsum-tgi-service - - docsum-llm-server - ports: - - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - MEGA_SERVICE_HOST_IP: ${HOST_IP} - LLM_SERVICE_HOST_IP: ${HOST_IP} - ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} - ipc: host - restart: always - - docsum-gradio-ui: - image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} - container_name: docsum-ui-server - depends_on: - - docsum-backend-server - ports: - - "${DOCSUM_FRONTEND_PORT:-5173}:5173" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh deleted file mode 100644 index 43e71e0fbf..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -export HOST_IP="" -export DOCSUM_MAX_INPUT_TOKENS=2048 -export DOCSUM_MAX_TOTAL_TOKENS=4096 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index dc0d546189..095fd28c93 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,12 +47,3 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - vllm_rocm: - build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - context: ../ - dockerfile: ./Dockerfile-vllm-rocm - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh deleted file mode 100644 index d0919a019a..0000000000 --- a/DocSum/tests/test_compose_on_rocm_vllm.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -set -xe -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -export MAX_INPUT_TOKENS=1024 -export MAX_TOTAL_TOKENS=2048 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${ip_address} -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LLM_SERVICE_HOST_IP=${HOST_IP} -export ASR_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" - docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - - docker images && sleep 1s -} - -function start_services() { - cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm - sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env - # Start Docker Containers - docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log - sleep 1m -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - - echo "===========================================" - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "EXPECTED_RESULT==> $EXPECTED_RESULT" - echo "CONTENT==> $CONTENT" - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -get_base64_str() { - local file_name=$1 - base64 -w 0 "$file_name" -} - -# Function to generate input data for testing based on the document type -input_data_for_test() { - local document_type=$1 - case $document_type in - ("text") - echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." - ;; - ("audio") - get_base64_str "$WORKPATH/tests/data/test.wav" - ;; - ("video") - get_base64_str "$WORKPATH/tests/data/test.mp4" - ;; - (*) - echo "Invalid document type" >&2 - exit 1 - ;; - esac -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # whisper microservice - ulimit -s 65536 - validate_services \ - "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ - '{"asr_result":"well"}' \ - "whisper-service" \ - "whisper-service" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # vLLM service - validate_services \ - "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ - "generated_text" \ - "docsum-vllm-service" \ - "docsum-vllm-service" \ - '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' - - # llm microservice - validate_services \ - "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ - "text" \ - "docsum-llm-server" \ - "docsum-llm-server" \ - '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - -} - -function validate_megaservice() { - local SERVICE_NAME="docsum-backend-server" - local DOCKER_NAME="docsum-backend-server" - local EXPECTED_RESULT="[DONE]" - local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." - local URL="${host_ip}:8888/v1/docsum" - local DATA_TYPE="type=text" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_megaservice_json() { - # Curl the Mega Service - echo "" - echo ">>> Checking text data with Content-Type: application/json" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - - echo ">>> Checking audio data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" - - echo ">>> Checking video data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" - -} - -function stop_docker() { - cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ - docker compose stop && docker compose rm -f -} - -function main() { - echo "===========================================" - echo ">>>> Stopping any running Docker containers..." - stop_docker - - echo "===========================================" - if [[ "$IMAGE_REPO" == "opea" ]]; then - echo ">>>> Building Docker images..." - build_docker_images - fi - - echo "===========================================" - echo ">>>> Starting Docker services..." - start_services - - echo "===========================================" - echo ">>>> Validating microservices..." - validate_microservices - - echo "===========================================" - echo ">>>> Validating megaservice..." - validate_megaservice - echo ">>>> Validating validate_megaservice_json..." - validate_megaservice_json - - echo "===========================================" - echo ">>>> Stopping Docker containers..." - stop_docker - - echo "===========================================" - echo ">>>> Pruning Docker system..." - echo y | docker system prune - echo ">>>> Docker system pruned successfully." - echo "===========================================" -} - -main From a8857ae326b2d71ca66bc6f86715ac9ab467ac85 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:02:03 +0700 Subject: [PATCH 07/44] DocSum - add files for deploy app with ROCm vLLM Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 ++ .../amd/gpu/rocm-vllm/README.md | 175 ++++++++++++ .../amd/gpu/rocm-vllm/compose.yaml | 107 ++++++++ .../amd/gpu/rocm-vllm/set_env.sh | 16 ++ DocSum/docker_image_build/build.yaml | 9 + DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ++++++++++++++++++ 6 files changed, 574 insertions(+) create mode 100644 DocSum/Dockerfile-vllm-rocm create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh create mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm new file mode 100644 index 0000000000..f0e8a8743a --- /dev/null +++ b/DocSum/Dockerfile-vllm-rocm @@ -0,0 +1,18 @@ +FROM rocm/vllm-dev:main + +# Set the working directory +WORKDIR /workspace + +# Copy the api_server.py into the image +ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py + +# Expose the port used by the API server +EXPOSE 8011 + +# Set environment variables +ENV HUGGINGFACE_HUB_CACHE=/workspace +ENV WILM_USE_TRITON_FLASH_ATTENTION=0 +ENV PYTORCH_JIT=0 + +# Set the entrypoint to the api_server.py script +ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md new file mode 100644 index 0000000000..4d41a5cd31 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md @@ -0,0 +1,175 @@ +# Build and deploy DocSum Application on AMD GPU (ROCm) + +## Build images + +## 🚀 Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +### 1. Build LLM Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . +``` + +Then run the command `docker images`, you will have the following four Docker Images: + +### 2. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: + +```bash +git clone https://github.com/opea-project/GenAIExamples +cd GenAIExamples/DocSum/ +docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### 3. Build UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` + +### 4. Build React UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . + +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` +4. `opea/docsum-react-ui:latest` + +## 🚀 Start Microservices and MegaService + +### Required Models + +Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. +For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +```bash +export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${host_ip} +export DOCSUM_TGI_SERVICE_PORT="18882" +export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export DOCSUM_LLM_SERVER_PORT="8008" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DocSum_COMPONENT_NAME="OpeaDocSumTgi" +``` + +Note: Please replace with `host_ip` with your external IP address, do not use localhost. + +Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +Example for set isolation for 1 GPU + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 +``` + +Example for set isolation for 2 GPUs + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + - /dev/dri/card1:/dev/dri/card1 + - /dev/dri/renderD129:/dev/dri/renderD129 +``` + +Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +### Start Microservice Docker Containers + +```bash +cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm +docker compose up -d +``` + +### Validate Microservices + +1. TGI Service + + ```bash + curl http://${host_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +2. LLM Microservice + + ```bash + curl http://${host_ip}:9000/v1/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' + ``` + +3. MegaService + + ```bash + curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false + }' + ``` + +## 🚀 Launch the Svelte UI + +Open this URL `http://{host_ip}:5173` in your browser to access the frontend. + +![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) + +Here is an example for summarizing a article. + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) + +## 🚀 Launch the React UI (Optional) + +To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: + +```yaml +docsum-rocm-react-ui-server: + image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} + container_name: docsum-rocm-react-ui-server + depends_on: + - docsum-rocm-backend-server + ports: + - "5174:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} +``` + +Open this URL `http://{host_ip}:5175` in your browser to access the frontend. + +![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml new file mode 100644 index 0000000000..037aa06395 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +services: + docsum-vllm-service: + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + container_name: docsum-vllm-service + ports: + - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + WILM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + volumes: + - "./data:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + docsum-llm-server: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-llm-server + depends_on: + - docsum-vllm-service + ports: + - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" + ipc: host + cap_add: + - SYS_PTRACE + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + LOGFLAG: ${DOCSUM_LOGFLAG:-False} + MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} + restart: unless-stopped + + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "${DOCSUM_WHISPER_PORT:-7066}:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + + docsum-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-backend-server + depends_on: + - docsum-tgi-service + - docsum-llm-server + ports: + - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_HOST_IP: ${HOST_IP} + ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} + ipc: host + restart: always + + docsum-gradio-ui: + image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} + container_name: docsum-ui-server + depends_on: + - docsum-backend-server + ports: + - "${DOCSUM_FRONTEND_PORT:-5173}:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh new file mode 100644 index 0000000000..43e71e0fbf --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP="" +export DOCSUM_MAX_INPUT_TOKENS=2048 +export DOCSUM_MAX_TOTAL_TOKENS=4096 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index 095fd28c93..dc0d546189 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,3 +47,12 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + vllm_rocm: + build: + args: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile-vllm-rocm + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh new file mode 100644 index 0000000000..d0919a019a --- /dev/null +++ b/DocSum/tests/test_compose_on_rocm_vllm.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${ip_address} +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export MEGA_SERVICE_HOST_IP=${HOST_IP} +export LLM_SERVICE_HOST_IP=${HOST_IP} +export ASR_SERVICE_HOST_IP=${HOST_IP} +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm + sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env + # Start Docker Containers + docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "EXPECTED_RESULT==> $EXPECTED_RESULT" + echo "CONTENT==> $CONTENT" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +get_base64_str() { + local file_name=$1 + base64 -w 0 "$file_name" +} + +# Function to generate input data for testing based on the document type +input_data_for_test() { + local document_type=$1 + case $document_type in + ("text") + echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." + ;; + ("audio") + get_base64_str "$WORKPATH/tests/data/test.wav" + ;; + ("video") + get_base64_str "$WORKPATH/tests/data/test.mp4" + ;; + (*) + echo "Invalid document type" >&2 + exit 1 + ;; + esac +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # whisper microservice + ulimit -s 65536 + validate_services \ + "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ + '{"asr_result":"well"}' \ + "whisper-service" \ + "whisper-service" \ + "{\"audio\": \"$(input_data_for_test "audio")\"}" + + # vLLM service + validate_services \ + "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ + "generated_text" \ + "docsum-vllm-service" \ + "docsum-vllm-service" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' + + # llm microservice + validate_services \ + "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ + "text" \ + "docsum-llm-server" \ + "docsum-llm-server" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + +} + +function validate_megaservice() { + local SERVICE_NAME="docsum-backend-server" + local DOCKER_NAME="docsum-backend-server" + local EXPECTED_RESULT="[DONE]" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local URL="${host_ip}:8888/v1/docsum" + local DATA_TYPE="type=text" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_megaservice_json() { + # Curl the Mega Service + echo "" + echo ">>> Checking text data with Content-Type: application/json" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + + echo ">>> Checking audio data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" + + echo ">>> Checking video data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" + +} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ + docker compose stop && docker compose rm -f +} + +function main() { + echo "===========================================" + echo ">>>> Stopping any running Docker containers..." + stop_docker + + echo "===========================================" + if [[ "$IMAGE_REPO" == "opea" ]]; then + echo ">>>> Building Docker images..." + build_docker_images + fi + + echo "===========================================" + echo ">>>> Starting Docker services..." + start_services + + echo "===========================================" + echo ">>>> Validating microservices..." + validate_microservices + + echo "===========================================" + echo ">>>> Validating megaservice..." + validate_megaservice + echo ">>>> Validating validate_megaservice_json..." + validate_megaservice_json + + echo "===========================================" + echo ">>>> Stopping Docker containers..." + stop_docker + + echo "===========================================" + echo ">>>> Pruning Docker system..." + echo y | docker system prune + echo ">>>> Docker system pruned successfully." + echo "===========================================" +} + +main From 5a38b266ac77a2bf0766cefab14ec62f28633a8d Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:07:05 +0700 Subject: [PATCH 08/44] DocSum - fix main Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 -- .../amd/gpu/rocm-vllm/README.md | 175 ------------ .../amd/gpu/rocm-vllm/compose.yaml | 107 -------- .../amd/gpu/rocm-vllm/set_env.sh | 16 -- DocSum/docker_image_build/build.yaml | 9 - DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ------------------ 6 files changed, 574 deletions(-) delete mode 100644 DocSum/Dockerfile-vllm-rocm delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh delete mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm deleted file mode 100644 index f0e8a8743a..0000000000 --- a/DocSum/Dockerfile-vllm-rocm +++ /dev/null @@ -1,18 +0,0 @@ -FROM rocm/vllm-dev:main - -# Set the working directory -WORKDIR /workspace - -# Copy the api_server.py into the image -ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py - -# Expose the port used by the API server -EXPOSE 8011 - -# Set environment variables -ENV HUGGINGFACE_HUB_CACHE=/workspace -ENV WILM_USE_TRITON_FLASH_ATTENTION=0 -ENV PYTORCH_JIT=0 - -# Set the entrypoint to the api_server.py script -ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md deleted file mode 100644 index 4d41a5cd31..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Build and deploy DocSum Application on AMD GPU (ROCm) - -## Build images - -## 🚀 Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build LLM Image - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . -``` - -Then run the command `docker images`, you will have the following four Docker Images: - -### 2. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples -cd GenAIExamples/DocSum/ -docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 3. Build UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` - -### 4. Build React UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . - -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` -4. `opea/docsum-react-ui:latest` - -## 🚀 Start Microservices and MegaService - -### Required Models - -Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. -For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -```bash -export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${host_ip} -export DOCSUM_TGI_SERVICE_PORT="18882" -export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export DOCSUM_LLM_SERVER_PORT="8008" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DocSum_COMPONENT_NAME="OpeaDocSumTgi" -``` - -Note: Please replace with `host_ip` with your external IP address, do not use localhost. - -Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -Example for set isolation for 1 GPU - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 -``` - -Example for set isolation for 2 GPUs - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 - - /dev/dri/card1:/dev/dri/card1 - - /dev/dri/renderD129:/dev/dri/renderD129 -``` - -Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -### Start Microservice Docker Containers - -```bash -cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm -docker compose up -d -``` - -### Validate Microservices - -1. TGI Service - - ```bash - curl http://${host_ip}:8008/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ - -H 'Content-Type: application/json' - ``` - -2. LLM Microservice - - ```bash - curl http://${host_ip}:9000/v1/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ - -H 'Content-Type: application/json' - ``` - -3. MegaService - - ```bash - curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false - }' - ``` - -## 🚀 Launch the Svelte UI - -Open this URL `http://{host_ip}:5173` in your browser to access the frontend. - -![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) - -Here is an example for summarizing a article. - -![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) - -## 🚀 Launch the React UI (Optional) - -To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: - -```yaml -docsum-rocm-react-ui-server: - image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} - container_name: docsum-rocm-react-ui-server - depends_on: - - docsum-rocm-backend-server - ports: - - "5174:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} -``` - -Open this URL `http://{host_ip}:5175` in your browser to access the frontend. - -![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml deleted file mode 100644 index 037aa06395..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -services: - docsum-vllm-service: - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} - container_name: docsum-vllm-service - ports: - - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - WILM_USE_TRITON_FLASH_ATTENTION: 0 - PYTORCH_JIT: 0 - volumes: - - "./data:/data" - shm_size: 20G - devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ - cap_add: - - SYS_PTRACE - group_add: - - video - security_opt: - - seccomp:unconfined - - apparmor=unconfined - command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" - ipc: host - - docsum-llm-server: - image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - container_name: docsum-llm-server - depends_on: - - docsum-vllm-service - ports: - - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" - ipc: host - cap_add: - - SYS_PTRACE - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} - LOGFLAG: ${DOCSUM_LOGFLAG:-False} - MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} - restart: unless-stopped - - whisper-service: - image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - container_name: whisper-service - ports: - - "${DOCSUM_WHISPER_PORT:-7066}:7066" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - - docsum-backend-server: - image: ${REGISTRY:-opea}/docsum:${TAG:-latest} - container_name: docsum-backend-server - depends_on: - - docsum-tgi-service - - docsum-llm-server - ports: - - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - MEGA_SERVICE_HOST_IP: ${HOST_IP} - LLM_SERVICE_HOST_IP: ${HOST_IP} - ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} - ipc: host - restart: always - - docsum-gradio-ui: - image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} - container_name: docsum-ui-server - depends_on: - - docsum-backend-server - ports: - - "${DOCSUM_FRONTEND_PORT:-5173}:5173" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh deleted file mode 100644 index 43e71e0fbf..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -export HOST_IP="" -export DOCSUM_MAX_INPUT_TOKENS=2048 -export DOCSUM_MAX_TOTAL_TOKENS=4096 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index dc0d546189..095fd28c93 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,12 +47,3 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - vllm_rocm: - build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - context: ../ - dockerfile: ./Dockerfile-vllm-rocm - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh deleted file mode 100644 index d0919a019a..0000000000 --- a/DocSum/tests/test_compose_on_rocm_vllm.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -set -xe -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -export MAX_INPUT_TOKENS=1024 -export MAX_TOTAL_TOKENS=2048 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${ip_address} -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LLM_SERVICE_HOST_IP=${HOST_IP} -export ASR_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" - docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - - docker images && sleep 1s -} - -function start_services() { - cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm - sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env - # Start Docker Containers - docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log - sleep 1m -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - - echo "===========================================" - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "EXPECTED_RESULT==> $EXPECTED_RESULT" - echo "CONTENT==> $CONTENT" - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -get_base64_str() { - local file_name=$1 - base64 -w 0 "$file_name" -} - -# Function to generate input data for testing based on the document type -input_data_for_test() { - local document_type=$1 - case $document_type in - ("text") - echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." - ;; - ("audio") - get_base64_str "$WORKPATH/tests/data/test.wav" - ;; - ("video") - get_base64_str "$WORKPATH/tests/data/test.mp4" - ;; - (*) - echo "Invalid document type" >&2 - exit 1 - ;; - esac -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # whisper microservice - ulimit -s 65536 - validate_services \ - "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ - '{"asr_result":"well"}' \ - "whisper-service" \ - "whisper-service" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # vLLM service - validate_services \ - "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ - "generated_text" \ - "docsum-vllm-service" \ - "docsum-vllm-service" \ - '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' - - # llm microservice - validate_services \ - "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ - "text" \ - "docsum-llm-server" \ - "docsum-llm-server" \ - '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - -} - -function validate_megaservice() { - local SERVICE_NAME="docsum-backend-server" - local DOCKER_NAME="docsum-backend-server" - local EXPECTED_RESULT="[DONE]" - local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." - local URL="${host_ip}:8888/v1/docsum" - local DATA_TYPE="type=text" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_megaservice_json() { - # Curl the Mega Service - echo "" - echo ">>> Checking text data with Content-Type: application/json" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - - echo ">>> Checking audio data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" - - echo ">>> Checking video data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" - -} - -function stop_docker() { - cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ - docker compose stop && docker compose rm -f -} - -function main() { - echo "===========================================" - echo ">>>> Stopping any running Docker containers..." - stop_docker - - echo "===========================================" - if [[ "$IMAGE_REPO" == "opea" ]]; then - echo ">>>> Building Docker images..." - build_docker_images - fi - - echo "===========================================" - echo ">>>> Starting Docker services..." - start_services - - echo "===========================================" - echo ">>>> Validating microservices..." - validate_microservices - - echo "===========================================" - echo ">>>> Validating megaservice..." - validate_megaservice - echo ">>>> Validating validate_megaservice_json..." - validate_megaservice_json - - echo "===========================================" - echo ">>>> Stopping Docker containers..." - stop_docker - - echo "===========================================" - echo ">>>> Pruning Docker system..." - echo y | docker system prune - echo ">>>> Docker system pruned successfully." - echo "===========================================" -} - -main From c37963be4601540da764a89286df570385bb0c60 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Tue, 11 Mar 2025 15:54:22 +0700 Subject: [PATCH 09/44] SearchQnA - add files for deploy with ROCm vLLM Signed-off-by: Chingis Yundunov --- .../amd/gpu/rocm/README_vllm.md | 179 ++++++++++++++++++ .../amd/gpu/rocm/compose_vllm.yaml | 176 +++++++++++++++++ .../amd/gpu/rocm/set_env_vllm.sh | 40 ++++ 3 files changed, 395 insertions(+) create mode 100644 SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md create mode 100644 SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml create mode 100644 SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md b/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md new file mode 100644 index 0000000000..f527f81dbf --- /dev/null +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md @@ -0,0 +1,179 @@ +# Build and deploy SearchQnA Application on AMD GPU (ROCm) + +## Build images + +### Build Embedding Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/Dockerfile . +``` + +### Build Retriever Image + +```bash +docker build --no-cache -t opea/web-retriever-chroma:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/web_retrievers/src/Dockerfile . +``` + +### Build Rerank Image + +```bash +docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/rerankings/src/Dockerfile . +``` + +### Build the LLM Docker Image + +```bash +docker build -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile . +``` + +### Build the MegaService Docker Image + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/SearchQnA +docker build --no-cache -t opea/searchqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### Build the UI Docker Image + +```bash +cd GenAIExamples/SearchQnA/ui +docker build --no-cache -t opea/opea/searchqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . +``` + +## Deploy SearchQnA Application + +### Features of Docker compose for AMD GPUs + +1. Added forwarding of GPU devices to the container TGI service with instructions: + +```yaml +shm_size: 1g +devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ +cap_add: + - SYS_PTRACE +group_add: + - video +security_opt: + - seccomp:unconfined +``` + +In this case, all GPUs are thrown. To reset a specific GPU, you need to use specific device names cardN and renderN. + +For example: + +```yaml +shm_size: 1g +devices: + - /dev/kfd:/dev/kfd + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/render128:/dev/dri/render128 +cap_add: + - SYS_PTRACE +group_add: + - video +security_opt: + - seccomp:unconfined +``` + +To find out which GPU device IDs cardN and renderN correspond to the same GPU, use the GPU driver utility + +### Go to the directory with the Docker compose file + +```bash +cd GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm +``` + +### Set environments + +In the file "GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh " it is necessary to set the required values. Parameter assignments are specified in the comments for each variable setting command + +```bash +chmod +x set_env.sh +. set_env.sh +``` + +### Run services + +``` +docker compose up -d +``` + +# Validate the MicroServices and MegaService + +## Validate TEI service + +```bash +curl http://${SEARCH_HOST_IP}:3001/embed \ + -X POST \ + -d '{"inputs":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' +``` + +## Validate Embedding service + +```bash +curl http://${SEARCH_HOST_IP}:3002/v1/embeddings\ + -X POST \ + -d '{"text":"hello"}' \ + -H 'Content-Type: application/json' +``` + +## Validate Web Retriever service + +```bash +export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") +curl http://${SEARCH_HOST_IP}:3003/v1/web_retrieval \ + -X POST \ + -d "{\"text\":\"What is the 2024 holiday schedule?\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' +``` + +## Validate TEI Reranking service + +```bash +curl http://${SEARCH_HOST_IP}:3004/rerank \ + -X POST \ + -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ + -H 'Content-Type: application/json' +``` + +## Validate Reranking service + +```bash +curl http://${SEARCH_HOST_IP}:3005/v1/reranking\ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -H 'Content-Type: application/json' +``` + +## Validate TGI service + +```bash +curl http://${SEARCH_HOST_IP}:3006/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' +``` + +## Validate LLM service + +```bash +curl http://${SEARCH_HOST_IP}:3007/v1/chat/completions\ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -H 'Content-Type: application/json' +``` + +## Validate MegaService + +```bash +curl http://${SEARCH_HOST_IP}:3008/v1/searchqna -H "Content-Type: application/json" -d '{ + "messages": "What is the latest news? Give me also the source link.", + "stream": "True" + }' +``` diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml new file mode 100644 index 0000000000..8fa5327806 --- /dev/null +++ b/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -0,0 +1,176 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-License-Identifier: Apache-2.0 + +services: + search-tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: search-tei-embedding-server + ports: + - "${SEARCH_TEI_EMBEDDING_PORT:-3001}:80" + volumes: + - "${MODEL_PATH:-./data}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + command: --model-id ${SEARCH_EMBEDDING_MODEL_ID} --auto-truncate + search-embedding: + image: ${REGISTRY:-opea}/embedding:${TAG:-latest} + container_name: search-embedding-server + depends_on: + - search-tei-embedding-service + ports: + - "${SEARCH_EMBEDDING_SERVICE_PORT:-3002}:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_HOST_IP: ${SEARCH_HOST_IP} + TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT} + HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + restart: unless-stopped + search-web-retriever: + image: ${REGISTRY:-opea}/web-retriever:${TAG:-latest} + container_name: search-web-retriever-server + ports: + - "${SEARCH_WEB_RETRIEVER_SERVICE_PORT:-3003}:7077" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT} + GOOGLE_API_KEY: ${SEARCH_GOOGLE_API_KEY} + GOOGLE_CSE_ID: ${SEARCH_GOOGLE_CSE_ID} + restart: unless-stopped + search-tei-reranking-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: search-tei-reranking-server + ports: + - "${SEARCH_TEI_RERANKING_PORT:-3004}:80" + volumes: + - "${MODEL_PATH:-./data}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + command: --model-id ${SEARCH_RERANK_MODEL_ID} --auto-truncate + search-reranking: + image: ${REGISTRY:-opea}/reranking:${TAG:-latest} + container_name: search-reranking-server + depends_on: + - search-tei-reranking-service + ports: + - "${SEARCH_RERANK_SERVICE_PORT:-3005}:8000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_RERANKING_ENDPOINT: ${SEARCH_TEI_RERANKING_ENDPOINT} + HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + restart: unless-stopped + search-vllm-service: + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + container_name: search-vllm-service + ports: + - "${SEARCH_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + WILM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + volumes: + - "./data:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${SEARCH_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + search-llm: + image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} + container_name: search-llm-server + depends_on: + - search-vllm-service + ports: + - "${SEARCH_LLM_SERVICE_PORT:-3007}:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${SEARCH_LLM_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${SEARCH_LLM_MODEL_ID} + HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} + LLM_COMPONENT_NAME: "OpeaTextGenService" + restart: unless-stopped + search-backend-server: + image: ${REGISTRY:-opea}/searchqna:${TAG:-latest} + container_name: search-backend-server + depends_on: + - search-tei-embedding-service + - search-embedding + - search-web-retriever + - search-tei-reranking-service + - search-reranking + - search-vllm-service + - search-llm + ports: + - "${SEARCH_BACKEND_SERVICE_PORT:-3008}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${SEARCH_MEGA_SERVICE_HOST_IP} + EMBEDDING_SERVICE_HOST_IP: ${SEARCH_EMBEDDING_SERVICE_HOST_IP} + EMBEDDING_SERVICE_PORT: ${SEARCH_EMBEDDING_SERVICE_PORT} + WEB_RETRIEVER_SERVICE_HOST_IP: ${SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP} + WEB_RETRIEVER_SERVICE_PORT: ${SEARCH_WEB_RETRIEVER_SERVICE_PORT} + RERANK_SERVICE_HOST_IP: ${SEARCH_RERANK_SERVICE_HOST_IP} + RERANK_SERVICE_PORT: ${SEARCH_RERANK_SERVICE_PORT} + LLM_SERVICE_HOST_IP: ${SEARCH_LLM_SERVICE_HOST_IP} + LLM_SERVICE_PORT: ${SEARCH_LLM_SERVICE_PORT} + ipc: host + restart: always + search-ui-server: + image: ${REGISTRY:-opea}/searchqna-ui:${TAG:-latest} + container_name: search-ui-server + depends_on: + - search-backend-server + ports: + - "${SEARCH_FRONTEND_SERVICE_PORT:-5173}:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_BASE_URL: ${SEARCH_BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh new file mode 100644 index 0000000000..7bb2e10253 --- /dev/null +++ b/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP='' +export EXTERNAL_HOST_IP='' +export MODEL_PATH="./data" +export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' +export SEARCH_TEI_EMBEDDING_PORT=3001 +export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${HOST_IP}:${SERACH_TEI_EMBEDDING_PORT} +export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' +export SEARCH_TEI_RERANKING_PORT=3004 +export SEARCH_TEI_RERANKING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT} +export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export SEARCH_OPENAI_API_KEY=${OPENAI_API_KEY} + +export SEARCH_VLLM_SERVICE_PORT=3080 +export SEARCH_LLM_ENDPOINT=http://${HOST_IP}:${SEARCH_VLLM_SERVICE_PORT} +export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3' + +export SEARCH_MEGA_SERVICE_HOST_IP=${EXTERNAL_HOST_IP} +export SEARCH_EMBEDDING_SERVICE_HOST_IP=${HOST_IP} +export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} +export SEARCH_RERANK_SERVICE_HOST_IP=${HOST_IP} +export SEARCH_LLM_SERVICE_HOST_IP=${HOST_IP} + +export SEARCH_EMBEDDING_SERVICE_PORT=3002 +export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003 +export SEARCH_RERANK_SERVICE_PORT=3005 +export SEARCH_LLM_SERVICE_PORT=3007 + +export SEARCH_FRONTEND_SERVICE_PORT=18143 +export SEARCH_BACKEND_SERVICE_PORT=18142 +export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${EXTERNAL_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna + +export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} +export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} From 76115dea86358dcd46f5869f8dedef71ca019bf0 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Tue, 11 Mar 2025 15:58:20 +0700 Subject: [PATCH 10/44] SearchQnA - add files for deploy with ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/Dockerfile-vllm-rocm | 18 ++++++++++++++++++ SearchQnA/docker_image_build/build.yaml | 9 +++++++++ 2 files changed, 27 insertions(+) create mode 100644 SearchQnA/Dockerfile-vllm-rocm diff --git a/SearchQnA/Dockerfile-vllm-rocm b/SearchQnA/Dockerfile-vllm-rocm new file mode 100644 index 0000000000..ca68154db7 --- /dev/null +++ b/SearchQnA/Dockerfile-vllm-rocm @@ -0,0 +1,18 @@ +# Copyright (c) 2024 Advanced Micro Devices, Inc. + +FROM rocm/vllm:rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6 + +# Set the working directory +WORKDIR /workspace + +# Expose the port used by the API server +EXPOSE 8011 + +# Set environment variables +ENV HUGGINGFACE_HUB_CACHE=/workspace +ENV VLLM_USE_TRITON_FLASH_ATTENTION=0 +ENV PYTORCH_JIT=0 + +# Set the entrypoint to the api_server.py script +RUN cp /usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py /workspace/api_server.py +ENTRYPOINT ["python3", "/workspace/api_server.py"] \ No newline at end of file diff --git a/SearchQnA/docker_image_build/build.yaml b/SearchQnA/docker_image_build/build.yaml index 179ab564b0..254a860e9c 100644 --- a/SearchQnA/docker_image_build/build.yaml +++ b/SearchQnA/docker_image_build/build.yaml @@ -41,3 +41,12 @@ services: dockerfile: comps/llms/src/text-generation/Dockerfile extends: searchqna image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} + vllm-rocm: + build: + args: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile-vllm-rocm + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} From cf9cb238dfec93d92c4de43ca061aa4392ff72a3 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Tue, 11 Mar 2025 17:49:44 +0700 Subject: [PATCH 11/44] SearchQnA - add files for deploy with ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml | 1 + SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml index 8fa5327806..907a02ea58 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -33,6 +33,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TEI_EMBEDDING_HOST_IP: ${SEARCH_HOST_IP} + TEI_EMBEDDING_PORT: ${SEARCH_TEI_EMBEDDING_PORT} TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT} HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh index 7bb2e10253..ba44036f1c 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -10,7 +10,7 @@ export EXTERNAL_HOST_IP='' export MODEL_PATH="./data" export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' export SEARCH_TEI_EMBEDDING_PORT=3001 -export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${HOST_IP}:${SERACH_TEI_EMBEDDING_PORT} +export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_EMBEDDING_PORT} export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' export SEARCH_TEI_RERANKING_PORT=3004 export SEARCH_TEI_RERANKING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT} From f40cc25cc8b4ff01c117049734ff777b15a0b90d Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Tue, 11 Mar 2025 18:00:24 +0700 Subject: [PATCH 12/44] SearchQnA - add files for deploy with ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/tests/test_compose_vllm_on_rocm.sh | 141 +++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 SearchQnA/tests/test_compose_vllm_on_rocm.sh diff --git a/SearchQnA/tests/test_compose_vllm_on_rocm.sh b/SearchQnA/tests/test_compose_vllm_on_rocm.sh new file mode 100644 index 0000000000..b54ad5e66a --- /dev/null +++ b/SearchQnA/tests/test_compose_vllm_on_rocm.sh @@ -0,0 +1,141 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_PATH=${model_cache:-"./data"} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH/docker_image_build + git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="searchqna searchqna-ui embedding web-retriever reranking llm-textgen vllm-rocm" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + docker images && sleep 3s +} + +function start_services() { + cd $WORKPATH/docker_compose/amd/gpu/rocm/ + export HOST_IP=${ip_address} + export EXTERNAL_HOST_IP=${ip_address} + export MODEL_PATH="./data" + export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' + export SEARCH_TEI_EMBEDDING_PORT=3001 + export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_EMBEDDING_PORT} + export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' + export SEARCH_TEI_RERANKING_PORT=3004 + export SEARCH_TEI_RERANKING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT} + export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export SEARCH_OPENAI_API_KEY=${OPENAI_API_KEY} + export SEARCH_VLLM_SERVICE_PORT=3080 + export SEARCH_LLM_ENDPOINT=http://${HOST_IP}:${SEARCH_VLLM_SERVICE_PORT} + export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3' + export SEARCH_MEGA_SERVICE_HOST_IP=${EXTERNAL_HOST_IP} + export SEARCH_EMBEDDING_SERVICE_HOST_IP=${HOST_IP} + export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} + export SEARCH_RERANK_SERVICE_HOST_IP=${HOST_IP} + export SEARCH_LLM_SERVICE_HOST_IP=${HOST_IP} + export SEARCH_EMBEDDING_SERVICE_PORT=3002 + export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003 + export SEARCH_RERANK_SERVICE_PORT=3005 + export SEARCH_LLM_SERVICE_PORT=3007 + export SEARCH_FRONTEND_SERVICE_PORT=5173 + export SEARCH_BACKEND_SERVICE_PORT=3008 + export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${EXTERNAL_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna + export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} + export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} + + sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env + + # Start Docker Containers + docker compose -f compose_vllm.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs search-vllm-service > $LOG_PATH/search-vllm-service_start.log + if grep -q "Application startup complete" $LOG_PATH/search-vllm-service_start.log; then + break + fi + sleep 10s + n=$((n+1)) + done +} + + +function validate_megaservice() { + result=$(http_proxy="" curl http://${HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna -XPOST -d '{"messages": "What is black myth wukong?", "stream": "False"}' -H 'Content-Type: application/json') + echo $result + + if [[ $result == *"the"* ]]; then + docker logs search-web-retriever-server + docker logs search-backend-server + echo "Result correct." + else + docker logs search-web-retriever-server + docker logs search-backend-server + echo "Result wrong." + exit 1 + fi + +} + +function validate_frontend() { + cd $WORKPATH/ui/svelte + local conda_env_name="OPEA_e2e" + export PATH=${HOME}/miniconda3/bin/:$PATH + if conda info --envs | grep -q "$conda_env_name"; then + echo "$conda_env_name exist!" + else + conda create -n ${conda_env_name} python=3.12 -y + fi + source activate ${conda_env_name} + + sed -i "s/localhost/$ip_address/g" playwright.config.ts + + conda install -c conda-forge nodejs=22.6.0 -y + npm install && npm ci && npx playwright install --with-deps + node -v && npm -v && pip list + + exit_status=0 + npx playwright test || exit_status=$? + + if [ $exit_status -ne 0 ]; then + echo "[TEST INFO]: ---------frontend test failed---------" + exit $exit_status + else + echo "[TEST INFO]: ---------frontend test passed---------" + fi +} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm/ + docker compose -f compose_vllm.yaml stop && docker compose -f compose_vllm.yaml rm -f +} + +function main() { + + stop_docker + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + start_services + + validate_megaservice + validate_frontend + + stop_docker + echo y | docker system prune + +} + +main From 39dc7fd2b78cd1df61f71aeb3d22c8c1ea9927e9 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Tue, 11 Mar 2025 18:06:18 +0700 Subject: [PATCH 13/44] SearchQnA - add files for deploy with ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/tests/test_compose_vllm_on_rocm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SearchQnA/tests/test_compose_vllm_on_rocm.sh b/SearchQnA/tests/test_compose_vllm_on_rocm.sh index b54ad5e66a..7d71a1d8bd 100644 --- a/SearchQnA/tests/test_compose_vllm_on_rocm.sh +++ b/SearchQnA/tests/test_compose_vllm_on_rocm.sh @@ -64,7 +64,7 @@ function start_services() { docker compose -f compose_vllm.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 100 ]]; do - docker logs search-vllm-service > $LOG_PATH/search-vllm-service_start.log + docker logs search-vllm-service >& $LOG_PATH/search-vllm-service_start.log if grep -q "Application startup complete" $LOG_PATH/search-vllm-service_start.log; then break fi From 3b2e5f2b78ac67c6dbc194cd3887f5bf22f52df0 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Wed, 12 Mar 2025 01:13:50 +0700 Subject: [PATCH 14/44] SearchQnA - add files for deploy with ROCm vLLM Signed-off-by: Chingis Yundunov --- .../amd/gpu/rocm/README_vllm.md | 385 ++++++++++++++---- 1 file changed, 301 insertions(+), 84 deletions(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md b/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md index f527f81dbf..e5f38bea3f 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md @@ -1,179 +1,396 @@ -# Build and deploy SearchQnA Application on AMD GPU (ROCm) +Copyright (C) 2024 Advanced Micro Devices, Inc. -## Build images +# Deploy SearchQnA application -### Build Embedding Image +## 1. Clone repo and build Docker images + +### 1.1. Cloning repo + +Create an empty directory in home directory and navigate to it: ```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/Dockerfile . +mkdir -p ~/searchqna-test && cd ~/searchqna-test ``` -### Build Retriever Image +Cloning GenAIExamples repo for build Docker images: ```bash -docker build --no-cache -t opea/web-retriever-chroma:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/web_retrievers/src/Dockerfile . +git clone https://github.com/opea-project/GenAIExamples.git ``` -### Build Rerank Image +### 1.2. Navigate to repo directory and switching to the desired version of the code: + +If you are using the main branch, then you do not need to make the transition, the main branch is used by default ```bash -docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/rerankings/src/Dockerfile . +cd ~/searchqna-test/GenAIExamples/SearchQnA/docker_image_build +git clone https://github.com/opea-project/GenAIComps.git ``` -### Build the LLM Docker Image +If you are using a specific branch or tag, then we perform git checkout to the desired version. ```bash -docker build -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile . +### Replace "v1.2" with the code version you need (branch or tag) +cd cd ~/searchqna-test/GenAIExamples/SearchQnA/docker_image_build && git checkout v1.2 +git clone https://github.com/opea-project/GenAIComps.git ``` -### Build the MegaService Docker Image +### 1.3. Build Docker images repo + +#### Build Docker image: ```bash -git clone https://github.com/opea-project/GenAIExamples.git -cd GenAIExamples/SearchQnA -docker build --no-cache -t opea/searchqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +docker compose -f build.yaml build --no-cache ``` -### Build the UI Docker Image +### 1.4. Pull Docker images from Docker Hub ```bash -cd GenAIExamples/SearchQnA/ui -docker build --no-cache -t opea/opea/searchqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . +docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 ``` -## Deploy SearchQnA Application +### 1.5. Checking for the necessary Docker images -### Features of Docker compose for AMD GPUs +After assembling the images, you can check their presence in the list of available images using the command: -1. Added forwarding of GPU devices to the container TGI service with instructions: - -```yaml -shm_size: 1g -devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ -cap_add: - - SYS_PTRACE -group_add: - - video -security_opt: - - seccomp:unconfined +```bash +docker image ls ``` -In this case, all GPUs are thrown. To reset a specific GPU, you need to use specific device names cardN and renderN. +The output of the command should contain images: -For example: +- ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 +- opea/embedding:latest +- opea/web-retriever:latest +- opea/reranking:latest +- opea/llm-vllm-rocm:latest +- opea/llm-textgen:latest +- opea/searchqna:latest +- opea/searchqna-ui:latest -```yaml -shm_size: 1g -devices: - - /dev/kfd:/dev/kfd - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/render128:/dev/dri/render128 -cap_add: - - SYS_PTRACE -group_add: - - video -security_opt: - - seccomp:unconfined +## 2. Set deploy environment variables + +### Setting variables in the operating system environment + +#### Set variables: + +```bash +### Replace the string 'your_huggingfacehub_token' with your HuggingFacehub repository access token. +export HUGGINGFACEHUB_API_TOKEN='your_huggingfacehub_token' +### Replace the string 'your_google_api_key' with your GOOGLE API KEY. +export GOOGLE_API_KEY='your_google_api_key' +### Replace the string 'your_google_cse_id' with your GOOGLE CSE identifier. +export GOOGLE_CSE_ID='your_google_cse_id' +``` + +### Setting variables in the file set_env_vllm.sh + +```bash +cd cd cd ~/searchqna-test/GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm +### The example uses the Nano text editor. You can use any convenient text editor +nano set_env_vllm.sh ``` -To find out which GPU device IDs cardN and renderN correspond to the same GPU, use the GPU driver utility +Set the values of the variables: -### Go to the directory with the Docker compose file +- **HOST_IP, HOST_IP_EXTERNAL** - These variables are used to configure the name/address of the service in the operating system environment for the application services to interact with each other and with the outside world. + + If your server uses only an internal address and is not accessible from the Internet, then the values for these two variables will be the same and the value will be equal to the server's internal name/address. + + If your server uses only an external, Internet-accessible address, then the values for these two variables will be the same and the value will be equal to the server's external name/address. + + If your server is located on an internal network, has an internal address, but is accessible from the Internet via a proxy/firewall/load balancer, then the HOST_IP variable will have a value equal to the internal name/address of the server, and the EXTERNAL_HOST_IP variable will have a value equal to the external name/address of the proxy/firewall/load balancer behind which the server is located. + + We set these values in the file set_env_vllm.sh + +- **Variables with names like "%%%%\_PORT"** - These variables set the IP port numbers for establishing network connections to the application services. + The values shown in the file set_env_vllm.sh they are the values used for the development and testing of the application, as well as configured for the environment in which the development is performed. These values must be configured in accordance with the rules of network access to your environment's server, and must not overlap with the IP ports of other applications that are already in use. + +If you are in a proxy environment, also set the proxy-related environment variables: ```bash -cd GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm +export http_proxy="Your_HTTP_Proxy" +export https_proxy="Your_HTTPs_Proxy" ``` -### Set environments +- **Variables with names like "%%%%\_PORT"** - These variables set the IP port numbers for establishing network connections to the application services. + The values shown in the file **launch_agent_service_vllm_rocm.sh** they are the values used for the development and testing of the application, as well as configured for the environment in which the development is performed. These values must be configured in accordance with the rules of network access to your environment's server, and must not overlap with the IP ports of other applications that are already in use. -In the file "GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh " it is necessary to set the required values. Parameter assignments are specified in the comments for each variable setting command +## 3. Deploy application + +### 3.1. Deploying applications using Docker Compose ```bash -chmod +x set_env.sh -. set_env.sh +cd cd ~/searchqna-test/GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm/ +docker compose -f compose_vllm up -d ``` -### Run services +After starting the containers, you need to view their status with the command: +```bash +docker ps ``` -docker compose up -d + +The following containers should be running: + +- search-web-retriever-server +- search-vllm-service +- search-tei-embedding-server +- search-tei-reranking-server +- search-reranking-server +- search-embedding-server +- search-llm-server +- search-backend-server +- search-ui-server + +Containers should not restart. + +#### 3.1.1. Configuring GPU forwarding + +By default, in the Docker Compose file, compose_vllm.yaml is configured to forward all GPUs to the search-vllm-service container. +To use certain GPUs, you need to configure the forwarding of certain devices from the host system to the container. +The configuration must be done in: + +```yaml +services: + ####### + vllm-service: + devices: +``` + +Example for set isolation for 1 GPU + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 ``` -# Validate the MicroServices and MegaService +Example for set isolation for 2 GPUs -## Validate TEI service +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + - /dev/dri/card1:/dev/dri/card1 + - /dev/dri/renderD129:/dev/dri/renderD129 +``` + +### 3.2. Checking the application services + +#### 3.2.1. Checking vllm-service + +Verification is performed in two ways: + +- Checking the container logs + + ```bash + docker logs search-vllm-service + ``` + + A message like this should appear in the logs: + + ```commandline + INFO: Started server process [1] + INFO: Waiting for application startup. + INFO: Application startup complete. + INFO: Uvicorn running on http://0.0.0.0:8011 (Press CTRL+C to quit) + ``` + +- Сhecking the response from the service + ```bash + ### curl request + ### Replace 18110 with the value set in the startup script in the variable VLLM_SERVICE_PORT + curl http://${HOST_IP}:${SEARCH_VLLM_SERVICE_PORT}/v1/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Intel/neural-chat-7b-v3-3", + "prompt": "What is a Deep Learning?", + "max_tokens": 30, + "temperature": 0 + }' + ``` + The response from the service must be in the form of JSON: + ```json + { + "id": "cmpl-1d7d175d36d0491cba3abaa8b5bd6991", + "object": "text_completion", + "created": 1740411135, + "model": "Intel/neural-chat-7b-v3-3", + "choices": [ + { + "index": 0, + "text": " Deep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is called \"deep\" because it", + "logprobs": null, + "finish_reason": "length", + "stop_reason": null, + "prompt_logprobs": null + } + ], + "usage": { "prompt_tokens": 7, "total_tokens": 37, "completion_tokens": 30, "prompt_tokens_details": null } + } + ``` + The value of "choice.text" must contain a response from the service that makes sense. + If such a response is present, then the search-vllm-service is considered verified. + +#### 3.2.2. Checking search-llm + +Сhecking the response from the service + +```bash +curl http://${HOST_IP}:${SEARCH_LLM_SERVICE_PORT}/v1/chat/completions\ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"stream":true}' \ + -H 'Content-Type: application/json' +``` + +The response from the service must be in the form of JSON: + +```textmate +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":"\n","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":"\n","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":"Deep","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" Learning","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" is","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" a","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" subset","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" of","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" Machine","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" Learning","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" that","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" is","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" concerned","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" with","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" algorithms","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" inspired","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":"length","index":0,"logprobs":null,"text":" by","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: [DONE] +``` + +The value of "choices.text" must contain a response from the service that makes sense. +If such a response is present, then the search-llm is considered verified. + +#### 3.2.3. Checking search-tei-embedding-service + +Сhecking the response from the service ```bash -curl http://${SEARCH_HOST_IP}:3001/embed \ +curl http://${HOST_IP}:${SEARCH_TEI_EMBEDDING_PORT}/embed \ -X POST \ -d '{"inputs":"What is Deep Learning?"}' \ -H 'Content-Type: application/json' ``` -## Validate Embedding service +The response from the service must be in the form of text: + +```textmate +[[0.00037115702,-0.06356819,0.0024758505,........-0.08894698,0.045917906,-0.00475913,0.034920968,-0.0064531155,-0.00689886,-0.06119457,0.021173967,-0.027787622,-0.02472986,0.03998034,0.03737826,-0.0067949123,0.022558564,-0.04570635,-0.033072025,0.022725677,0.016026087,-0.02125421,-0.02984927,-0.0049473033]] +``` + +If the output value is similar to the example given, we consider the service to be successfully launched. + +#### 3.2.4. Checking search-embedding + +Сhecking the response from the service ```bash -curl http://${SEARCH_HOST_IP}:3002/v1/embeddings\ +curl http://${HOST_IP}:${SEARCH_EMBEDDING_SERVICE_PORT}/v1/embeddings\ -X POST \ - -d '{"text":"hello"}' \ + -d '{"input":"hello"}' \ -H 'Content-Type: application/json' ``` -## Validate Web Retriever service +The response from the service must be in the form of text: + +```json +{"object":"list","model":"BAAI/bge-base-en-v1.5","data":[{"index":0,"object":"embedding","embedding":[0.0007791813,0.042613804,0.020304274,-0.0070378557,0.059366036,-0.0044034636]}],"usage":{"prompt_tokens":3,"total_tokens":3,"completion_tokens":0}} +``` + +If the output value is similar to the example given, we consider the service to be successfully launched. + +#### 3.2.5. Checking search-web-retriever + +Сhecking the response from the service ```bash export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") -curl http://${SEARCH_HOST_IP}:3003/v1/web_retrieval \ +curl http://${HOST_IP}:${SEARCH_WEB_RETRIEVER_SERVICE_PORT}/v1/web_retrieval \ -X POST \ -d "{\"text\":\"What is the 2024 holiday schedule?\",\"embedding\":${your_embedding}}" \ -H 'Content-Type: application/json' ``` -## Validate TEI Reranking service +The response from the service must be in the form of JSON: + +```json +{"id":"67cace517e36aff3f10a756b87a9125b","retrieved_docs":[{"downstream_black_list":[],"id":"4ba6bc05cff877011da321bbd03c05a8","text":"* Message from the Director\n * Introduction\n * Workforce Planning and AnalysisToggle submenu\n * Early Career Talent\n * Evaluation System Development\n * Innovation\n * Leading Practices\n * Resources\n * Career Paths for Federal Program and Project Management GuideToggle submenu\n * Introduction\n * Purpose and Objectives\n * Data and Methodology\n * Differentiating Job Titles for Program and Project Managers\n * Understanding the Career Path\n * Success Factors\n * Competency Development Framework Part I\n * Competency Development Framework Part II\n * Credentials and Certifications\n * Appendix A: Key Terminology\n * Appendix B: Subject Matter Expert (SME) Workshops List of Participating Agencies\n * Appendix C: List of Designated Title & Number for Each Job Series\n * Appendix D: Program and Project Competency Model and Competency Definitions\n * Appendix E: Program and Project Management Competency Model Proficiency Level\n * FY 2024 Human Capital ReviewsToggle submenu\n * Message from the Director\n * Introduction\n * Data Driven Decision Making\n * Strategic Planning\n * Artificial Intelligence\n * Resources \n description: Welcome to opm.gov \n \n title: Federal Holidays \n \n source: https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/ \n"},{"downstream_black_list":[],"id":"b3908b0a74cb115a09a0928beda79bc5","text":"If you have a question whether a particular State office is open or closed,\nplease contact that office.\n\nBack to Top\n\nCommissioner Beth Fastiggi \n120 State Street, Montpelier, VT 05620 \n(802) 828-3491 \nDHR.General@vermont.gov\n\n_Contact Us \nHR Field Representative Locator_ \n_Alerts/Closings/Delays_ \nCurrent Road & Driving Conditions \nPublic Records Officer Contact Information and Public Records Database \nTransparent and Open Government Information​\n\n### Connect with Us\n\nTwitter\n\nFacebook\n\nInstagram\n\nYouTube\n\nRSS\n\n## Need Assistance?\n\nClick here for a list of Department contacts.\n\n\n\n## How Do I?\n\n * Learn about benefit plans\n * Contact EAP\n * Answer payroll questions\n * See pay periods & pay dates\n * Find wellness programs\n * Learn about leave benefits\n * Make a public records request\n * Find info about classification\n * OWA Email Login\n * Find workforce reports\n * View Workforce Dashboard\n * Employment Verification\n\n## Popular Links\n\n * Classroom/Online Training\n * Collective Bargaining Agreements\n * Employee/Manager Self Service\n * Job Specifications\n * Pay Charts\n * Pay Dates\n * Policy & Procedure Manual\n * Retirement Planning\n * Retirement Office \n * State Holiday Schedule\n * Time Entry & Approval\n * VTHR Login\n\n__\n\nReturn to top\n\nCopyright (C) 2025 State of Vermont All rights reserved. | \n\n * Policies\n * Accessibility Policy\n * Privacy Policy\n * Feedback Survey \n title: State Holiday Schedule | Department of Human Resources \n \n source: https://humanresources.vermont.gov/benefits-wellness/holiday-schedule \n"},{"downstream_black_list":[],"id":"b52e0a8865ebfc6f93cc5e366e9b57b0","text":"## Revenue and Spending\n\n * Contracts \n * Monthly Revenue Watch\n * Severance Taxes\n * Sources of Revenue Guide\n * Taxes of Texas: A Field Guide (PDF)\n\n## Budget and \nFinance\n\n * Financial Reports and Forecasts\n * Budget Process Primer\n * Texas Investments\n * Understanding the ACFR\n * Cash Report and ACFR (PDF)\n\n## Open Data Tools and Information\n\n * Analysis and Reports\n * Dashboards\n * Pension Search Tool\n * Search Datasets\n * Secure Information and File Transfer (SIFT)\n\n## Local Governments\n\n * Eminent Domain\n * Hotel Occupancy Tax\n * Local Government Debt\n * Sheriffs' and Constables' Fees\n * SPD Financials and Taxes \n * Tax Allocation Summaries\n * Transparency Stars\n * Reports and Tools\n\n * Economy\n\n## __ Economy Home\n\n## Fiscal Notes\n\n * Latest Articles\n * Archives\n * About _Fiscal Notes_\n * Republish\n\n## In Depth\n\n * Regional Reports\n * Forecasts\n * Key Economic Indicators\n * Economic Data(Good for Texas Tours)\n * Special Reports\n\n## Economic Development Programs\n\n * Property Tax Programs\n * Sales Tax Programs\n * Grants and Special Assessments\n * Search Tools and Data\n\n * Purchasing\n\n## __ Purchasing Home\n\n## Statewide Contracts\n\n * Search Statewide Contracts\n * Contract Development\n * Contract Management\n * Procurement Oversight & Delegation\n * Texas Multiple Award Schedule (TXMAS)\n * txsmartbuy.gov\n * DIR Contracts \n description: Office holiday schedule for fiscal 2024. \n \n title: State of Texas Holiday Schedule - Fiscal 2025 \n \n source: https://comptroller.texas.gov/about/holidays.php \n"},{"downstream_black_list":[],"id":"ee75f07d60742868abfae486bbc1849d","text":"Skip to page navigation\n\nAn official website of the United States government\n\nHere's how you know\n\nHere's how you know\n\n**Official websites use .gov** \nA **.gov** website belongs to an official government organization in the\nUnited States.\n\n**Secure .gov websites use HTTPS** \nA **lock** ( Lock A locked padlock ) or **https://** means you’ve safely\nconnected to the .gov website. Share sensitive information only on official,\nsecure websites.\n\nMenu\n\nSearch all of OPM Submit\n\nSections\n\n * About Toggle submenu\n\n * Our Agency\n * Who We Are\n * Our Work\n * Mission & History\n * Careers at OPM\n * Doing Business with OPM\n * Reports & Publications\n * Open Government\n * Get Help\n * Contact Us\n * News\n * Data\n * 2023 Agency Financial Report\n * Combined Federal Campaign\n * 2023 Annual Performance Report\n * FY 2025 Congressional Budget Justification\n * 2024 Agency Financial Report\n * 2024 Annual Performance Report\n\n * Policy Toggle submenu \n description: Welcome to opm.gov \n \n title: Federal Holidays \n \n source: https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/ \n"}],"initial_query":"What is the 2024 holiday schedule?","top_n":1} +``` + +The value of "retrieved_docs.text" must contain a response from the service that makes sense. +If such a response is present, then the search-web-retriever is considered verified. + +#### 3.2.6. Checking search-tei-reranking-service + +Сhecking the response from the service ```bash -curl http://${SEARCH_HOST_IP}:3004/rerank \ +curl http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT}/rerank \ -X POST \ -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ -H 'Content-Type: application/json' ``` -## Validate Reranking service +The response from the service must be in the form of JSON: -```bash -curl http://${SEARCH_HOST_IP}:3005/v1/reranking\ - -X POST \ - -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ - -H 'Content-Type: application/json' +```json +[{"index":1,"score":0.94238955},{"index":0,"score":0.120219156}] ``` -## Validate TGI service +If the output value is similar to the example given, we consider the service to be successfully launched. + +#### 3.2.7. Checking search-reranking + +Сhecking the response from the service ```bash -curl http://${SEARCH_HOST_IP}:3006/generate \ +curl http://${HOST_IP}:${SEARCH_RERANK_SERVICE_PORT}/v1/reranking\ -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ -H 'Content-Type: application/json' ``` -## Validate LLM service +The response from the service must be in the form of JSON: -```bash -curl http://${SEARCH_HOST_IP}:3007/v1/chat/completions\ - -X POST \ - -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ - -H 'Content-Type: application/json' +```json +{"id":"26e5d7f6259b8a184387f13fc9c54038","model":null,"query":"What is Deep Learning?","max_tokens":1024,"max_new_tokens":1024,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"frequency_penalty":0.0,"presence_penalty":0.0,"repetition_penalty":1.03,"stream":true,"language":"auto","chat_template":null,"documents":["Deep learning is..."]} ``` -## Validate MegaService +If the output value is similar to the example given, we consider the service to be successfully launched. + +#### 3.2.8. Checking search-backend-server + +Сhecking the response from the service ```bash -curl http://${SEARCH_HOST_IP}:3008/v1/searchqna -H "Content-Type: application/json" -d '{ +curl http://${HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna -H "Content-Type: application/json" -d '{ "messages": "What is the latest news? Give me also the source link.", "stream": "True" }' ``` + +The response from the service must be in the form of JSON: + +```textmate +data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":"\n","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" ","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" View","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" the","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" latest","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" news","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":".","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +....... +data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":"com","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":"/","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":"stop","index":0,"logprobs":null,"text":"","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: [DONE] +``` + +If the output value is similar to the example given, we consider the service to be successfully launched. From 78179d203c5fbe95b0181008cba39f2f4de38763 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 11 Mar 2025 18:22:56 +0000 Subject: [PATCH 15/44] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../amd/gpu/rocm/README_vllm.md | 65 +++++++++++++++++-- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md b/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md index e5f38bea3f..6f18350100 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md @@ -301,7 +301,18 @@ curl http://${HOST_IP}:${SEARCH_EMBEDDING_SERVICE_PORT}/v1/embeddings\ The response from the service must be in the form of text: ```json -{"object":"list","model":"BAAI/bge-base-en-v1.5","data":[{"index":0,"object":"embedding","embedding":[0.0007791813,0.042613804,0.020304274,-0.0070378557,0.059366036,-0.0044034636]}],"usage":{"prompt_tokens":3,"total_tokens":3,"completion_tokens":0}} +{ + "object": "list", + "model": "BAAI/bge-base-en-v1.5", + "data": [ + { + "index": 0, + "object": "embedding", + "embedding": [0.0007791813, 0.042613804, 0.020304274, -0.0070378557, 0.059366036, -0.0044034636] + } + ], + "usage": { "prompt_tokens": 3, "total_tokens": 3, "completion_tokens": 0 } +} ``` If the output value is similar to the example given, we consider the service to be successfully launched. @@ -321,7 +332,33 @@ curl http://${HOST_IP}:${SEARCH_WEB_RETRIEVER_SERVICE_PORT}/v1/web_retrieval \ The response from the service must be in the form of JSON: ```json -{"id":"67cace517e36aff3f10a756b87a9125b","retrieved_docs":[{"downstream_black_list":[],"id":"4ba6bc05cff877011da321bbd03c05a8","text":"* Message from the Director\n * Introduction\n * Workforce Planning and AnalysisToggle submenu\n * Early Career Talent\n * Evaluation System Development\n * Innovation\n * Leading Practices\n * Resources\n * Career Paths for Federal Program and Project Management GuideToggle submenu\n * Introduction\n * Purpose and Objectives\n * Data and Methodology\n * Differentiating Job Titles for Program and Project Managers\n * Understanding the Career Path\n * Success Factors\n * Competency Development Framework Part I\n * Competency Development Framework Part II\n * Credentials and Certifications\n * Appendix A: Key Terminology\n * Appendix B: Subject Matter Expert (SME) Workshops List of Participating Agencies\n * Appendix C: List of Designated Title & Number for Each Job Series\n * Appendix D: Program and Project Competency Model and Competency Definitions\n * Appendix E: Program and Project Management Competency Model Proficiency Level\n * FY 2024 Human Capital ReviewsToggle submenu\n * Message from the Director\n * Introduction\n * Data Driven Decision Making\n * Strategic Planning\n * Artificial Intelligence\n * Resources \n description: Welcome to opm.gov \n \n title: Federal Holidays \n \n source: https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/ \n"},{"downstream_black_list":[],"id":"b3908b0a74cb115a09a0928beda79bc5","text":"If you have a question whether a particular State office is open or closed,\nplease contact that office.\n\nBack to Top\n\nCommissioner Beth Fastiggi \n120 State Street, Montpelier, VT 05620 \n(802) 828-3491 \nDHR.General@vermont.gov\n\n_Contact Us \nHR Field Representative Locator_ \n_Alerts/Closings/Delays_ \nCurrent Road & Driving Conditions \nPublic Records Officer Contact Information and Public Records Database \nTransparent and Open Government Information​\n\n### Connect with Us\n\nTwitter\n\nFacebook\n\nInstagram\n\nYouTube\n\nRSS\n\n## Need Assistance?\n\nClick here for a list of Department contacts.\n\n\n\n## How Do I?\n\n * Learn about benefit plans\n * Contact EAP\n * Answer payroll questions\n * See pay periods & pay dates\n * Find wellness programs\n * Learn about leave benefits\n * Make a public records request\n * Find info about classification\n * OWA Email Login\n * Find workforce reports\n * View Workforce Dashboard\n * Employment Verification\n\n## Popular Links\n\n * Classroom/Online Training\n * Collective Bargaining Agreements\n * Employee/Manager Self Service\n * Job Specifications\n * Pay Charts\n * Pay Dates\n * Policy & Procedure Manual\n * Retirement Planning\n * Retirement Office \n * State Holiday Schedule\n * Time Entry & Approval\n * VTHR Login\n\n__\n\nReturn to top\n\nCopyright (C) 2025 State of Vermont All rights reserved. | \n\n * Policies\n * Accessibility Policy\n * Privacy Policy\n * Feedback Survey \n title: State Holiday Schedule | Department of Human Resources \n \n source: https://humanresources.vermont.gov/benefits-wellness/holiday-schedule \n"},{"downstream_black_list":[],"id":"b52e0a8865ebfc6f93cc5e366e9b57b0","text":"## Revenue and Spending\n\n * Contracts \n * Monthly Revenue Watch\n * Severance Taxes\n * Sources of Revenue Guide\n * Taxes of Texas: A Field Guide (PDF)\n\n## Budget and \nFinance\n\n * Financial Reports and Forecasts\n * Budget Process Primer\n * Texas Investments\n * Understanding the ACFR\n * Cash Report and ACFR (PDF)\n\n## Open Data Tools and Information\n\n * Analysis and Reports\n * Dashboards\n * Pension Search Tool\n * Search Datasets\n * Secure Information and File Transfer (SIFT)\n\n## Local Governments\n\n * Eminent Domain\n * Hotel Occupancy Tax\n * Local Government Debt\n * Sheriffs' and Constables' Fees\n * SPD Financials and Taxes \n * Tax Allocation Summaries\n * Transparency Stars\n * Reports and Tools\n\n * Economy\n\n## __ Economy Home\n\n## Fiscal Notes\n\n * Latest Articles\n * Archives\n * About _Fiscal Notes_\n * Republish\n\n## In Depth\n\n * Regional Reports\n * Forecasts\n * Key Economic Indicators\n * Economic Data(Good for Texas Tours)\n * Special Reports\n\n## Economic Development Programs\n\n * Property Tax Programs\n * Sales Tax Programs\n * Grants and Special Assessments\n * Search Tools and Data\n\n * Purchasing\n\n## __ Purchasing Home\n\n## Statewide Contracts\n\n * Search Statewide Contracts\n * Contract Development\n * Contract Management\n * Procurement Oversight & Delegation\n * Texas Multiple Award Schedule (TXMAS)\n * txsmartbuy.gov\n * DIR Contracts \n description: Office holiday schedule for fiscal 2024. \n \n title: State of Texas Holiday Schedule - Fiscal 2025 \n \n source: https://comptroller.texas.gov/about/holidays.php \n"},{"downstream_black_list":[],"id":"ee75f07d60742868abfae486bbc1849d","text":"Skip to page navigation\n\nAn official website of the United States government\n\nHere's how you know\n\nHere's how you know\n\n**Official websites use .gov** \nA **.gov** website belongs to an official government organization in the\nUnited States.\n\n**Secure .gov websites use HTTPS** \nA **lock** ( Lock A locked padlock ) or **https://** means you’ve safely\nconnected to the .gov website. Share sensitive information only on official,\nsecure websites.\n\nMenu\n\nSearch all of OPM Submit\n\nSections\n\n * About Toggle submenu\n\n * Our Agency\n * Who We Are\n * Our Work\n * Mission & History\n * Careers at OPM\n * Doing Business with OPM\n * Reports & Publications\n * Open Government\n * Get Help\n * Contact Us\n * News\n * Data\n * 2023 Agency Financial Report\n * Combined Federal Campaign\n * 2023 Annual Performance Report\n * FY 2025 Congressional Budget Justification\n * 2024 Agency Financial Report\n * 2024 Annual Performance Report\n\n * Policy Toggle submenu \n description: Welcome to opm.gov \n \n title: Federal Holidays \n \n source: https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/ \n"}],"initial_query":"What is the 2024 holiday schedule?","top_n":1} +{ + "id": "67cace517e36aff3f10a756b87a9125b", + "retrieved_docs": [ + { + "downstream_black_list": [], + "id": "4ba6bc05cff877011da321bbd03c05a8", + "text": "* Message from the Director\n * Introduction\n * Workforce Planning and AnalysisToggle submenu\n * Early Career Talent\n * Evaluation System Development\n * Innovation\n * Leading Practices\n * Resources\n * Career Paths for Federal Program and Project Management GuideToggle submenu\n * Introduction\n * Purpose and Objectives\n * Data and Methodology\n * Differentiating Job Titles for Program and Project Managers\n * Understanding the Career Path\n * Success Factors\n * Competency Development Framework Part I\n * Competency Development Framework Part II\n * Credentials and Certifications\n * Appendix A: Key Terminology\n * Appendix B: Subject Matter Expert (SME) Workshops List of Participating Agencies\n * Appendix C: List of Designated Title & Number for Each Job Series\n * Appendix D: Program and Project Competency Model and Competency Definitions\n * Appendix E: Program and Project Management Competency Model Proficiency Level\n * FY 2024 Human Capital ReviewsToggle submenu\n * Message from the Director\n * Introduction\n * Data Driven Decision Making\n * Strategic Planning\n * Artificial Intelligence\n * Resources \n description: Welcome to opm.gov \n \n title: Federal Holidays \n \n source: https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/ \n" + }, + { + "downstream_black_list": [], + "id": "b3908b0a74cb115a09a0928beda79bc5", + "text": "If you have a question whether a particular State office is open or closed,\nplease contact that office.\n\nBack to Top\n\nCommissioner Beth Fastiggi \n120 State Street, Montpelier, VT 05620 \n(802) 828-3491 \nDHR.General@vermont.gov\n\n_Contact Us \nHR Field Representative Locator_ \n_Alerts/Closings/Delays_ \nCurrent Road & Driving Conditions \nPublic Records Officer Contact Information and Public Records Database \nTransparent and Open Government Information​\n\n### Connect with Us\n\nTwitter\n\nFacebook\n\nInstagram\n\nYouTube\n\nRSS\n\n## Need Assistance?\n\nClick here for a list of Department contacts.\n\n\n\n## How Do I?\n\n * Learn about benefit plans\n * Contact EAP\n * Answer payroll questions\n * See pay periods & pay dates\n * Find wellness programs\n * Learn about leave benefits\n * Make a public records request\n * Find info about classification\n * OWA Email Login\n * Find workforce reports\n * View Workforce Dashboard\n * Employment Verification\n\n## Popular Links\n\n * Classroom/Online Training\n * Collective Bargaining Agreements\n * Employee/Manager Self Service\n * Job Specifications\n * Pay Charts\n * Pay Dates\n * Policy & Procedure Manual\n * Retirement Planning\n * Retirement Office \n * State Holiday Schedule\n * Time Entry & Approval\n * VTHR Login\n\n__\n\nReturn to top\n\nCopyright (C) 2025 State of Vermont All rights reserved. | \n\n * Policies\n * Accessibility Policy\n * Privacy Policy\n * Feedback Survey \n title: State Holiday Schedule | Department of Human Resources \n \n source: https://humanresources.vermont.gov/benefits-wellness/holiday-schedule \n" + }, + { + "downstream_black_list": [], + "id": "b52e0a8865ebfc6f93cc5e366e9b57b0", + "text": "## Revenue and Spending\n\n * Contracts \n * Monthly Revenue Watch\n * Severance Taxes\n * Sources of Revenue Guide\n * Taxes of Texas: A Field Guide (PDF)\n\n## Budget and \nFinance\n\n * Financial Reports and Forecasts\n * Budget Process Primer\n * Texas Investments\n * Understanding the ACFR\n * Cash Report and ACFR (PDF)\n\n## Open Data Tools and Information\n\n * Analysis and Reports\n * Dashboards\n * Pension Search Tool\n * Search Datasets\n * Secure Information and File Transfer (SIFT)\n\n## Local Governments\n\n * Eminent Domain\n * Hotel Occupancy Tax\n * Local Government Debt\n * Sheriffs' and Constables' Fees\n * SPD Financials and Taxes \n * Tax Allocation Summaries\n * Transparency Stars\n * Reports and Tools\n\n * Economy\n\n## __ Economy Home\n\n## Fiscal Notes\n\n * Latest Articles\n * Archives\n * About _Fiscal Notes_\n * Republish\n\n## In Depth\n\n * Regional Reports\n * Forecasts\n * Key Economic Indicators\n * Economic Data(Good for Texas Tours)\n * Special Reports\n\n## Economic Development Programs\n\n * Property Tax Programs\n * Sales Tax Programs\n * Grants and Special Assessments\n * Search Tools and Data\n\n * Purchasing\n\n## __ Purchasing Home\n\n## Statewide Contracts\n\n * Search Statewide Contracts\n * Contract Development\n * Contract Management\n * Procurement Oversight & Delegation\n * Texas Multiple Award Schedule (TXMAS)\n * txsmartbuy.gov\n * DIR Contracts \n description: Office holiday schedule for fiscal 2024. \n \n title: State of Texas Holiday Schedule - Fiscal 2025 \n \n source: https://comptroller.texas.gov/about/holidays.php \n" + }, + { + "downstream_black_list": [], + "id": "ee75f07d60742868abfae486bbc1849d", + "text": "Skip to page navigation\n\nAn official website of the United States government\n\nHere's how you know\n\nHere's how you know\n\n**Official websites use .gov** \nA **.gov** website belongs to an official government organization in the\nUnited States.\n\n**Secure .gov websites use HTTPS** \nA **lock** ( Lock A locked padlock ) or **https://** means you’ve safely\nconnected to the .gov website. Share sensitive information only on official,\nsecure websites.\n\nMenu\n\nSearch all of OPM Submit\n\nSections\n\n * About Toggle submenu\n\n * Our Agency\n * Who We Are\n * Our Work\n * Mission & History\n * Careers at OPM\n * Doing Business with OPM\n * Reports & Publications\n * Open Government\n * Get Help\n * Contact Us\n * News\n * Data\n * 2023 Agency Financial Report\n * Combined Federal Campaign\n * 2023 Annual Performance Report\n * FY 2025 Congressional Budget Justification\n * 2024 Agency Financial Report\n * 2024 Annual Performance Report\n\n * Policy Toggle submenu \n description: Welcome to opm.gov \n \n title: Federal Holidays \n \n source: https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/ \n" + } + ], + "initial_query": "What is the 2024 holiday schedule?", + "top_n": 1 +} ``` The value of "retrieved_docs.text" must contain a response from the service that makes sense. @@ -341,7 +378,10 @@ curl http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT}/rerank \ The response from the service must be in the form of JSON: ```json -[{"index":1,"score":0.94238955},{"index":0,"score":0.120219156}] +[ + { "index": 1, "score": 0.94238955 }, + { "index": 0, "score": 0.120219156 } +] ``` If the output value is similar to the example given, we consider the service to be successfully launched. @@ -360,7 +400,24 @@ curl http://${HOST_IP}:${SEARCH_RERANK_SERVICE_PORT}/v1/reranking\ The response from the service must be in the form of JSON: ```json -{"id":"26e5d7f6259b8a184387f13fc9c54038","model":null,"query":"What is Deep Learning?","max_tokens":1024,"max_new_tokens":1024,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"frequency_penalty":0.0,"presence_penalty":0.0,"repetition_penalty":1.03,"stream":true,"language":"auto","chat_template":null,"documents":["Deep learning is..."]} +{ + "id": "26e5d7f6259b8a184387f13fc9c54038", + "model": null, + "query": "What is Deep Learning?", + "max_tokens": 1024, + "max_new_tokens": 1024, + "top_k": 10, + "top_p": 0.95, + "typical_p": 0.95, + "temperature": 0.01, + "frequency_penalty": 0.0, + "presence_penalty": 0.0, + "repetition_penalty": 1.03, + "stream": true, + "language": "auto", + "chat_template": null, + "documents": ["Deep learning is..."] +} ``` If the output value is similar to the example given, we consider the service to be successfully launched. From a5fd7738a392035e445044354622fb32048e7c44 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Wed, 12 Mar 2025 01:24:09 +0700 Subject: [PATCH 16/44] SearchQnA - add files for deploy with ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md b/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md index e5f38bea3f..0a08fed447 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md @@ -188,7 +188,7 @@ Verification is performed in two ways: A message like this should appear in the logs: - ```commandline + ```textmate INFO: Started server process [1] INFO: Waiting for application startup. INFO: Application startup complete. From ea3eb0d2e284bafb3fc678ee2c28db908eca7f75 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Wed, 12 Mar 2025 01:40:00 +0700 Subject: [PATCH 17/44] SearchQnA - add files for deploy with ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md b/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md index c046bc8952..213dac7c62 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md @@ -331,7 +331,7 @@ curl http://${HOST_IP}:${SEARCH_WEB_RETRIEVER_SERVICE_PORT}/v1/web_retrieval \ The response from the service must be in the form of JSON: -```json +```textmate { "id": "67cace517e36aff3f10a756b87a9125b", "retrieved_docs": [ From 6684f776fa9584f0d74d67c67963f2228c78f44a Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Mon, 24 Mar 2025 17:56:21 +0700 Subject: [PATCH 18/44] Update Readme.md and add MODEL_PATH var Signed-off-by: Artem Astafev --- .../docker_compose/amd/gpu/rocm/README.md | 386 +++++++++++++-- .../amd/gpu/rocm/README_vllm.md | 453 ------------------ .../amd/gpu/rocm/compose_vllm.yaml | 2 +- 3 files changed, 335 insertions(+), 506 deletions(-) delete mode 100644 SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README.md b/SearchQnA/docker_compose/amd/gpu/rocm/README.md index f527f81dbf..5d688c682e 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README.md @@ -1,53 +1,121 @@ # Build and deploy SearchQnA Application on AMD GPU (ROCm) -## Build images +## Build Docker Images -### Build Embedding Image +### 1. Build Docker Image -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/src/Dockerfile . -``` +- #### Create application install directory and go to it: -### Build Retriever Image + ```bash + mkdir ~/searchqna-install && cd searchqna-install + ``` -```bash -docker build --no-cache -t opea/web-retriever-chroma:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/web_retrievers/src/Dockerfile . -``` +- #### Clone the repository GenAIExamples (the default repository branch "main" is used here): -### Build Rerank Image + ```bash + git clone https://github.com/opea-project/GenAIExamples.git + ``` -```bash -docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/rerankings/src/Dockerfile . -``` + If you need to use a specific branch/tag of the GenAIExamples repository, then (v1.3 replace with its own value): -### Build the LLM Docker Image + ```bash + git clone https://github.com/opea-project/GenAIExamples.git && cd GenAIExamples && git checkout v1.3 + ``` -```bash -docker build -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile . -``` + We remind you that when using a specific version of the code, you need to use the README from this version: -### Build the MegaService Docker Image +- #### Go to build directory: -```bash -git clone https://github.com/opea-project/GenAIExamples.git -cd GenAIExamples/SearchQnA -docker build --no-cache -t opea/searchqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` + ```bash + cd ~/searchqna-install/GenAIExamples/SearchQnA/docker_image_build + ``` -### Build the UI Docker Image +- Cleaning up the GenAIComps repository if it was previously cloned in this directory. + This is necessary if the build was performed earlier and the GenAIComps folder exists and is not empty: -```bash -cd GenAIExamples/SearchQnA/ui -docker build --no-cache -t opea/opea/searchqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . -``` + ```bash + echo Y | rm -R GenAIComps + ``` + +- #### Clone the repository GenAIComps (the default repository branch "main" is used here): + + ```bash + git clone https://github.com/opea-project/GenAIComps.git + ``` + + If you use a specific tag of the GenAIExamples repository, + then you should also use the corresponding tag for GenAIComps. (v1.3 replace with its own value): + + ```bash + git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout v1.3 + ``` + + We remind you that when using a specific version of the code, you need to use the README from this version. + +- #### Setting the list of images for the build (from the build file.yaml) + + If you want to deploy a vLLM-based or TGI-based application, then the set of services is installed as follows: + + #### vLLM-based application + + ```bash + service_list="vllm-rocm llm-textgen reranking web-retriever embedding searchqna-ui searchqna" + ``` + + #### TGI-based application + + ```bash + service_list="llm-textgen reranking web-retriever embedding searchqna-ui searchqna" + ``` + +- #### Optional. Pull TGI Docker Image (Do this if you want to use TGI) + + ```bash + docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + ``` + +- #### Build Docker Images + + ```bash + docker compose -f build.yaml build ${service_list} --no-cache + ``` + + After the build, we check the list of images with the command: -## Deploy SearchQnA Application + ```bash + docker image ls + ``` -### Features of Docker compose for AMD GPUs + The list of images should include: -1. Added forwarding of GPU devices to the container TGI service with instructions: + ##### vLLM-based application: + + - opea/vllm-rocm:latest + - opea/llm-textgen:latest + - opea/reranking:latest + - opea/searchqna:latest + - opea/searchqna-ui:latest + - opea/web-retriever:latest + + ##### TGI-based application: + + - ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + - opea/llm-textgen:latest + - opea/reranking:latest + - opea/searchqna:latest + - opea/searchqna-ui:latest + - opea/web-retriever:latest + +--- + +## Deploy the SearchQnA Application + +### Docker Compose Configuration for AMD GPUs + +To enable GPU support for AMD GPUs, the following configuration is added to the Docker Compose file: + +- compose_vllm.yaml - for vLLM-based application +- compose.yaml - for TGI-based ```yaml shm_size: 1g @@ -62,16 +130,14 @@ security_opt: - seccomp:unconfined ``` -In this case, all GPUs are thrown. To reset a specific GPU, you need to use specific device names cardN and renderN. - -For example: +This configuration forwards all available GPUs to the container. To use a specific GPU, specify its `cardN` and `renderN` device IDs. For example: ```yaml shm_size: 1g devices: - /dev/kfd:/dev/kfd - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/render128:/dev/dri/render128 + - /dev/dri/renderD128:/dev/dri/renderD128 cap_add: - SYS_PTRACE group_add: @@ -80,41 +146,239 @@ security_opt: - seccomp:unconfined ``` -To find out which GPU device IDs cardN and renderN correspond to the same GPU, use the GPU driver utility +**How to Identify GPU Device IDs:** +Use AMD GPU driver utilities to determine the correct `cardN` and `renderN` IDs for your GPU. + +### Set deploy environment variables + +#### Setting variables in the operating system environment: + +##### Set variable HUGGINGFACEHUB_API_TOKEN: + +```bash +### Replace the string 'your_huggingfacehub_token' with your HuggingFacehub repository access token. +export HUGGINGFACEHUB_API_TOKEN='your_huggingfacehub_token' +``` + +#### Set variables value in set_env\*\*\*\*.sh file: -### Go to the directory with the Docker compose file +Go to Docker Compose directory: ```bash -cd GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm +cd ~/searchqna-install/GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm ``` -### Set environments +The example uses the Nano text editor. You can use any convenient text editor: + +#### If you use vLLM + +```bash +nano set_env_vllm.sh +``` + +#### If you use TGI + +```bash +nano set_env.sh +``` -In the file "GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh " it is necessary to set the required values. Parameter assignments are specified in the comments for each variable setting command +If you are in a proxy environment, also set the proxy-related environment variables: + +```bash +export http_proxy="Your_HTTP_Proxy" +export https_proxy="Your_HTTPs_Proxy" +``` + +Set the values of the variables: + +- **HOST_IP, HOST_IP_EXTERNAL** - These variables are used to configure the name/address of the service in the operating system environment for the application services to interact with each other and with the outside world. + + If your server uses only an internal address and is not accessible from the Internet, then the values for these two variables will be the same and the value will be equal to the server's internal name/address. + + If your server uses only an external, Internet-accessible address, then the values for these two variables will be the same and the value will be equal to the server's external name/address. + + If your server is located on an internal network, has an internal address, but is accessible from the Internet via a proxy/firewall/load balancer, then the HOST_IP variable will have a value equal to the internal name/address of the server, and the EXTERNAL_HOST_IP variable will have a value equal to the external name/address of the proxy/firewall/load balancer behind which the server is located. + + We set these values in the file set_env\*\*\*\*.sh + +- **Variables with names like "**\*\*\*\*\*\*\_PORT"\*\* - These variables set the IP port numbers for establishing network connections to the application services. + The values shown in the file set_env.sh or set_env_vllm they are the values used for the development and testing of the application, as well as configured for the environment in which the development is performed. These values must be configured in accordance with the rules of network access to your environment's server, and must not overlap with the IP ports of other applications that are already in use. + +#### Set variables with script set_env\*\*\*\*.sh + +#### If you use vLLM + +```bash +. set_env_vllm.sh +``` + +#### If you use TGI ```bash -chmod +x set_env.sh . set_env.sh ``` -### Run services +### Start the services: +#### If you use vLLM + +```bash +docker compose -f compose_vllm.yaml up -d ``` -docker compose up -d + +#### If you use TGI + +```bash +docker compose -f compose.yaml up -d ``` -# Validate the MicroServices and MegaService +All containers should be running and should not restart: + +##### If you use vLLM: + +- search-vllm-service +- search-llm-server +- search-web-retriever-server +- search-tei-embedding-server +- search-tei-reranking-server +- search-reranking-server +- search-embedding-server +- search-backend-server +- search-ui-server + +##### If you use TGI: + +- search-tgi-service +- search-llm-serverr +- search-web-retriever-server +- search-tei-embedding-server +- search-tei-reranking-server +- search-reranking-server +- search-embedding-server +- search-backend-server +- search-ui-server + +--- +## Validate the Services -## Validate TEI service +### 1. Validate the vLLM/TGI Service + +#### If you use vLLM: ```bash -curl http://${SEARCH_HOST_IP}:3001/embed \ - -X POST \ - -d '{"inputs":"What is Deep Learning?"}' \ - -H 'Content-Type: application/json' +DATA='{"model": "Intel/neural-chat-7b-v3-3", '\ +'"messages": [{"role": "user", "content": "Implement a high-level API for a TODO list application. '\ +'The API takes as input an operation request and updates the TODO list in place. '\ +'If the request is invalid, raise an exception."}], "max_tokens": 256}' + +curl http://${HOST_IP}:${A_VLLM_SERVICE_PORT}/v1/chat/completions \ + -X POST \ + -d "$DATA" \ + -H 'Content-Type: application/json' ``` -## Validate Embedding service +Checking the response from the service. The response should be similar to JSON: + +````json +{ + "id": "chatcmpl-142f34ef35b64a8db3deedd170fed951", + "object": "chat.completion", + "created": 1742270316, + "model": "Intel/neural-chat-7b-v3-3", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "```python\nfrom typing import Optional, List, Dict, Union\nfrom pydantic import BaseModel, validator\n\nclass OperationRequest(BaseModel):\n # Assuming OperationRequest is already defined as per the given text\n pass\n\nclass UpdateOperation(OperationRequest):\n new_items: List[str]\n\n def apply_and_maybe_raise(self, updatable_item: \"Updatable todo list\") -> None:\n # Assuming updatable_item is an instance of Updatable todo list\n self.validate()\n updatable_item.add_items(self.new_items)\n\nclass Updatable:\n # Abstract class for items that can be updated\n pass\n\nclass TodoList(Updatable):\n # Class that represents a todo list\n items: List[str]\n\n def add_items(self, new_items: List[str]) -> None:\n self.items.extend(new_items)\n\ndef handle_request(operation_request: OperationRequest) -> None:\n # Function to handle an operation request\n if isinstance(operation_request, UpdateOperation):\n operation_request.apply_and_maybe_raise(get_todo_list_for_update())\n else:\n raise ValueError(\"Invalid operation request\")\n\ndef get_todo_list_for_update() -> TodoList:\n # Function to get the todo list for update\n # Assuming this function returns the", + "tool_calls": [] + }, + "logprobs": null, + "finish_reason": "length", + "stop_reason": null + } + ], + "usage": { "prompt_tokens": 66, "total_tokens": 322, "completion_tokens": 256, "prompt_tokens_details": null }, + "prompt_logprobs": null +} +```` + +If the service response has a meaningful response in the value of the "choices.message.content" key, +then we consider the vLLM service to be successfully launched + +#### If you use TGI: + +```bash +DATA='{"inputs":"Implement a high-level API for a TODO list application. '\ +'The API takes as input an operation request and updates the TODO list in place. '\ +'If the request is invalid, raise an exception.",'\ +'"parameters":{"max_new_tokens":256,"do_sample": true}}' + +curl http://${HOST_IP}:${CODEGEN_TGI_SERVICE_PORT}/generate \ + -X POST \ + -d "$DATA" \ + -H 'Content-Type: application/json' +``` + +Checking the response from the service. The response should be similar to JSON: + +````json +{ + "generated_text": " The supported operations are \"add_task\", \"complete_task\", and \"remove_task\". Each operation can be defined with a corresponding function in the API.\n\nAdd your API in the following format:\n\n```\nTODO App API\n\nsupported operations:\n\noperation name description\n----------------------- ------------------------------------------------\n \n```\n\nUse type hints for function parameters and return values. Specify a text description of the API's supported operations.\n\nUse the following code snippet as a starting point for your high-level API function:\n\n```\nclass TodoAPI:\n def __init__(self, tasks: List[str]):\n self.tasks = tasks # List of tasks to manage\n\n def add_task(self, task: str) -> None:\n self.tasks.append(task)\n\n def complete_task(self, task: str) -> None:\n self.tasks = [t for t in self.tasks if t != task]\n\n def remove_task(self, task: str) -> None:\n self.tasks = [t for t in self.tasks if t != task]\n\n def handle_request(self, request: Dict[str, str]) -> None:\n operation = request.get('operation')\n if operation == 'add_task':\n self.add_task(request.get('task'))\n elif" +} +```` + +If the service response has a meaningful response in the value of the "generated_text" key, +then we consider the TGI service to be successfully launched + +### 2. Validate the LLM Service + +```bash +DATA='{"query":"Implement a high-level API for a TODO list application. '\ +'The API takes as input an operation request and updates the TODO list in place. '\ +'If the request is invalid, raise an exception.",'\ +'"max_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,'\ +'"repetition_penalty":1.03,"stream":false}' + +curl http://${HOST_IP}:${CODEGEN_LLM_SERVICE_PORT}/v1/chat/completions \ + -X POST \ + -d "$DATA" \ + -H 'Content-Type: application/json' +``` + +Checking the response from the service. The response should be similar to JSON: + +````json +{ + "id": "cmpl-4e89a590b1af46bfb37ce8f12b2996f8", + "choices": [ + { + "finish_reason": "length", + "index": 0, + "logprobs": null, + "text": " The API should support the following operations:\n\n1. Add a new task to the TODO list.\n2. Remove a task from the TODO list.\n3. Mark a task as completed.\n4. Retrieve the list of all tasks.\n\nThe API should also support the following features:\n\n1. The ability to filter tasks based on their completion status.\n2. The ability to sort tasks based on their priority.\n3. The ability to search for tasks based on their description.\n\nHere is an example of how the API can be used:\n\n```python\ntodo_list = []\napi = TodoListAPI(todo_list)\n\n# Add tasks\napi.add_task(\"Buy groceries\")\napi.add_task(\"Finish homework\")\n\n# Mark a task as completed\napi.mark_task_completed(\"Buy groceries\")\n\n# Retrieve the list of all tasks\nprint(api.get_all_tasks())\n\n# Filter tasks based on completion status\nprint(api.filter_tasks(completed=True))\n\n# Sort tasks based on priority\napi.sort_tasks(priority=\"high\")\n\n# Search for tasks based on description\nprint(api.search_tasks(description=\"homework\"))\n```\n\nIn this example, the `TodoListAPI` class is used to manage the TODO list. The `add_task` method adds a new task to the list, the `mark_task_completed` method", + "stop_reason": null, + "prompt_logprobs": null + } + ], + "created": 1742270567, + "model": "Qwen/Qwen2.5-Coder-7B-Instruct", + "object": "text_completion", + "system_fingerprint": null, + "usage": { + "completion_tokens": 256, + "prompt_tokens": 37, + "total_tokens": 293, + "completion_tokens_details": null, + "prompt_tokens_details": null + } +} +```` + +If the service response has a meaningful response in the value of the "choices.text" key, +then we consider the vLLM service to be successfully launched + +### 2. Validate Embedding service ```bash curl http://${SEARCH_HOST_IP}:3002/v1/embeddings\ @@ -177,3 +441,21 @@ curl http://${SEARCH_HOST_IP}:3008/v1/searchqna -H "Content-Type: application/js "stream": "True" }' ``` + + + +### 5. Stop application + +#### If you use vLLM + +```bash +cd ~/searchqna-install/GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm +docker compose -f compose_vllm.yaml down +``` + +#### If you use TGI + +```bash +cd ~/searchqna-install/GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm +docker compose -f compose.yaml down +``` diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md b/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md deleted file mode 100644 index 213dac7c62..0000000000 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README_vllm.md +++ /dev/null @@ -1,453 +0,0 @@ -Copyright (C) 2024 Advanced Micro Devices, Inc. - -# Deploy SearchQnA application - -## 1. Clone repo and build Docker images - -### 1.1. Cloning repo - -Create an empty directory in home directory and navigate to it: - -```bash -mkdir -p ~/searchqna-test && cd ~/searchqna-test -``` - -Cloning GenAIExamples repo for build Docker images: - -```bash -git clone https://github.com/opea-project/GenAIExamples.git -``` - -### 1.2. Navigate to repo directory and switching to the desired version of the code: - -If you are using the main branch, then you do not need to make the transition, the main branch is used by default - -```bash -cd ~/searchqna-test/GenAIExamples/SearchQnA/docker_image_build -git clone https://github.com/opea-project/GenAIComps.git -``` - -If you are using a specific branch or tag, then we perform git checkout to the desired version. - -```bash -### Replace "v1.2" with the code version you need (branch or tag) -cd cd ~/searchqna-test/GenAIExamples/SearchQnA/docker_image_build && git checkout v1.2 -git clone https://github.com/opea-project/GenAIComps.git -``` - -### 1.3. Build Docker images repo - -#### Build Docker image: - -```bash -docker compose -f build.yaml build --no-cache -``` - -### 1.4. Pull Docker images from Docker Hub - -```bash -docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 -``` - -### 1.5. Checking for the necessary Docker images - -After assembling the images, you can check their presence in the list of available images using the command: - -```bash -docker image ls -``` - -The output of the command should contain images: - -- ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 -- opea/embedding:latest -- opea/web-retriever:latest -- opea/reranking:latest -- opea/llm-vllm-rocm:latest -- opea/llm-textgen:latest -- opea/searchqna:latest -- opea/searchqna-ui:latest - -## 2. Set deploy environment variables - -### Setting variables in the operating system environment - -#### Set variables: - -```bash -### Replace the string 'your_huggingfacehub_token' with your HuggingFacehub repository access token. -export HUGGINGFACEHUB_API_TOKEN='your_huggingfacehub_token' -### Replace the string 'your_google_api_key' with your GOOGLE API KEY. -export GOOGLE_API_KEY='your_google_api_key' -### Replace the string 'your_google_cse_id' with your GOOGLE CSE identifier. -export GOOGLE_CSE_ID='your_google_cse_id' -``` - -### Setting variables in the file set_env_vllm.sh - -```bash -cd cd cd ~/searchqna-test/GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm -### The example uses the Nano text editor. You can use any convenient text editor -nano set_env_vllm.sh -``` - -Set the values of the variables: - -- **HOST_IP, HOST_IP_EXTERNAL** - These variables are used to configure the name/address of the service in the operating system environment for the application services to interact with each other and with the outside world. - - If your server uses only an internal address and is not accessible from the Internet, then the values for these two variables will be the same and the value will be equal to the server's internal name/address. - - If your server uses only an external, Internet-accessible address, then the values for these two variables will be the same and the value will be equal to the server's external name/address. - - If your server is located on an internal network, has an internal address, but is accessible from the Internet via a proxy/firewall/load balancer, then the HOST_IP variable will have a value equal to the internal name/address of the server, and the EXTERNAL_HOST_IP variable will have a value equal to the external name/address of the proxy/firewall/load balancer behind which the server is located. - - We set these values in the file set_env_vllm.sh - -- **Variables with names like "%%%%\_PORT"** - These variables set the IP port numbers for establishing network connections to the application services. - The values shown in the file set_env_vllm.sh they are the values used for the development and testing of the application, as well as configured for the environment in which the development is performed. These values must be configured in accordance with the rules of network access to your environment's server, and must not overlap with the IP ports of other applications that are already in use. - -If you are in a proxy environment, also set the proxy-related environment variables: - -```bash -export http_proxy="Your_HTTP_Proxy" -export https_proxy="Your_HTTPs_Proxy" -``` - -- **Variables with names like "%%%%\_PORT"** - These variables set the IP port numbers for establishing network connections to the application services. - The values shown in the file **launch_agent_service_vllm_rocm.sh** they are the values used for the development and testing of the application, as well as configured for the environment in which the development is performed. These values must be configured in accordance with the rules of network access to your environment's server, and must not overlap with the IP ports of other applications that are already in use. - -## 3. Deploy application - -### 3.1. Deploying applications using Docker Compose - -```bash -cd cd ~/searchqna-test/GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm/ -docker compose -f compose_vllm up -d -``` - -After starting the containers, you need to view their status with the command: - -```bash -docker ps -``` - -The following containers should be running: - -- search-web-retriever-server -- search-vllm-service -- search-tei-embedding-server -- search-tei-reranking-server -- search-reranking-server -- search-embedding-server -- search-llm-server -- search-backend-server -- search-ui-server - -Containers should not restart. - -#### 3.1.1. Configuring GPU forwarding - -By default, in the Docker Compose file, compose_vllm.yaml is configured to forward all GPUs to the search-vllm-service container. -To use certain GPUs, you need to configure the forwarding of certain devices from the host system to the container. -The configuration must be done in: - -```yaml -services: - ####### - vllm-service: - devices: -``` - -Example for set isolation for 1 GPU - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 -``` - -Example for set isolation for 2 GPUs - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 - - /dev/dri/card1:/dev/dri/card1 - - /dev/dri/renderD129:/dev/dri/renderD129 -``` - -### 3.2. Checking the application services - -#### 3.2.1. Checking vllm-service - -Verification is performed in two ways: - -- Checking the container logs - - ```bash - docker logs search-vllm-service - ``` - - A message like this should appear in the logs: - - ```textmate - INFO: Started server process [1] - INFO: Waiting for application startup. - INFO: Application startup complete. - INFO: Uvicorn running on http://0.0.0.0:8011 (Press CTRL+C to quit) - ``` - -- Сhecking the response from the service - ```bash - ### curl request - ### Replace 18110 with the value set in the startup script in the variable VLLM_SERVICE_PORT - curl http://${HOST_IP}:${SEARCH_VLLM_SERVICE_PORT}/v1/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "Intel/neural-chat-7b-v3-3", - "prompt": "What is a Deep Learning?", - "max_tokens": 30, - "temperature": 0 - }' - ``` - The response from the service must be in the form of JSON: - ```json - { - "id": "cmpl-1d7d175d36d0491cba3abaa8b5bd6991", - "object": "text_completion", - "created": 1740411135, - "model": "Intel/neural-chat-7b-v3-3", - "choices": [ - { - "index": 0, - "text": " Deep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is called \"deep\" because it", - "logprobs": null, - "finish_reason": "length", - "stop_reason": null, - "prompt_logprobs": null - } - ], - "usage": { "prompt_tokens": 7, "total_tokens": 37, "completion_tokens": 30, "prompt_tokens_details": null } - } - ``` - The value of "choice.text" must contain a response from the service that makes sense. - If such a response is present, then the search-vllm-service is considered verified. - -#### 3.2.2. Checking search-llm - -Сhecking the response from the service - -```bash -curl http://${HOST_IP}:${SEARCH_LLM_SERVICE_PORT}/v1/chat/completions\ - -X POST \ - -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"stream":true}' \ - -H 'Content-Type: application/json' -``` - -The response from the service must be in the form of JSON: - -```textmate -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":"\n","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":"\n","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":"Deep","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" Learning","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" is","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" a","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" subset","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" of","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" Machine","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" Learning","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" that","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" is","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" concerned","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" with","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" algorithms","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" inspired","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-ee61c243a172423d836f78cfddb63b93","choices":[{"finish_reason":"length","index":0,"logprobs":null,"text":" by","stop_reason":null}],"created":1741715027,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: [DONE] -``` - -The value of "choices.text" must contain a response from the service that makes sense. -If such a response is present, then the search-llm is considered verified. - -#### 3.2.3. Checking search-tei-embedding-service - -Сhecking the response from the service - -```bash -curl http://${HOST_IP}:${SEARCH_TEI_EMBEDDING_PORT}/embed \ - -X POST \ - -d '{"inputs":"What is Deep Learning?"}' \ - -H 'Content-Type: application/json' -``` - -The response from the service must be in the form of text: - -```textmate -[[0.00037115702,-0.06356819,0.0024758505,........-0.08894698,0.045917906,-0.00475913,0.034920968,-0.0064531155,-0.00689886,-0.06119457,0.021173967,-0.027787622,-0.02472986,0.03998034,0.03737826,-0.0067949123,0.022558564,-0.04570635,-0.033072025,0.022725677,0.016026087,-0.02125421,-0.02984927,-0.0049473033]] -``` - -If the output value is similar to the example given, we consider the service to be successfully launched. - -#### 3.2.4. Checking search-embedding - -Сhecking the response from the service - -```bash -curl http://${HOST_IP}:${SEARCH_EMBEDDING_SERVICE_PORT}/v1/embeddings\ - -X POST \ - -d '{"input":"hello"}' \ - -H 'Content-Type: application/json' -``` - -The response from the service must be in the form of text: - -```json -{ - "object": "list", - "model": "BAAI/bge-base-en-v1.5", - "data": [ - { - "index": 0, - "object": "embedding", - "embedding": [0.0007791813, 0.042613804, 0.020304274, -0.0070378557, 0.059366036, -0.0044034636] - } - ], - "usage": { "prompt_tokens": 3, "total_tokens": 3, "completion_tokens": 0 } -} -``` - -If the output value is similar to the example given, we consider the service to be successfully launched. - -#### 3.2.5. Checking search-web-retriever - -Сhecking the response from the service - -```bash -export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") -curl http://${HOST_IP}:${SEARCH_WEB_RETRIEVER_SERVICE_PORT}/v1/web_retrieval \ - -X POST \ - -d "{\"text\":\"What is the 2024 holiday schedule?\",\"embedding\":${your_embedding}}" \ - -H 'Content-Type: application/json' -``` - -The response from the service must be in the form of JSON: - -```textmate -{ - "id": "67cace517e36aff3f10a756b87a9125b", - "retrieved_docs": [ - { - "downstream_black_list": [], - "id": "4ba6bc05cff877011da321bbd03c05a8", - "text": "* Message from the Director\n * Introduction\n * Workforce Planning and AnalysisToggle submenu\n * Early Career Talent\n * Evaluation System Development\n * Innovation\n * Leading Practices\n * Resources\n * Career Paths for Federal Program and Project Management GuideToggle submenu\n * Introduction\n * Purpose and Objectives\n * Data and Methodology\n * Differentiating Job Titles for Program and Project Managers\n * Understanding the Career Path\n * Success Factors\n * Competency Development Framework Part I\n * Competency Development Framework Part II\n * Credentials and Certifications\n * Appendix A: Key Terminology\n * Appendix B: Subject Matter Expert (SME) Workshops List of Participating Agencies\n * Appendix C: List of Designated Title & Number for Each Job Series\n * Appendix D: Program and Project Competency Model and Competency Definitions\n * Appendix E: Program and Project Management Competency Model Proficiency Level\n * FY 2024 Human Capital ReviewsToggle submenu\n * Message from the Director\n * Introduction\n * Data Driven Decision Making\n * Strategic Planning\n * Artificial Intelligence\n * Resources \n description: Welcome to opm.gov \n \n title: Federal Holidays \n \n source: https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/ \n" - }, - { - "downstream_black_list": [], - "id": "b3908b0a74cb115a09a0928beda79bc5", - "text": "If you have a question whether a particular State office is open or closed,\nplease contact that office.\n\nBack to Top\n\nCommissioner Beth Fastiggi \n120 State Street, Montpelier, VT 05620 \n(802) 828-3491 \nDHR.General@vermont.gov\n\n_Contact Us \nHR Field Representative Locator_ \n_Alerts/Closings/Delays_ \nCurrent Road & Driving Conditions \nPublic Records Officer Contact Information and Public Records Database \nTransparent and Open Government Information​\n\n### Connect with Us\n\nTwitter\n\nFacebook\n\nInstagram\n\nYouTube\n\nRSS\n\n## Need Assistance?\n\nClick here for a list of Department contacts.\n\n\n\n## How Do I?\n\n * Learn about benefit plans\n * Contact EAP\n * Answer payroll questions\n * See pay periods & pay dates\n * Find wellness programs\n * Learn about leave benefits\n * Make a public records request\n * Find info about classification\n * OWA Email Login\n * Find workforce reports\n * View Workforce Dashboard\n * Employment Verification\n\n## Popular Links\n\n * Classroom/Online Training\n * Collective Bargaining Agreements\n * Employee/Manager Self Service\n * Job Specifications\n * Pay Charts\n * Pay Dates\n * Policy & Procedure Manual\n * Retirement Planning\n * Retirement Office \n * State Holiday Schedule\n * Time Entry & Approval\n * VTHR Login\n\n__\n\nReturn to top\n\nCopyright (C) 2025 State of Vermont All rights reserved. | \n\n * Policies\n * Accessibility Policy\n * Privacy Policy\n * Feedback Survey \n title: State Holiday Schedule | Department of Human Resources \n \n source: https://humanresources.vermont.gov/benefits-wellness/holiday-schedule \n" - }, - { - "downstream_black_list": [], - "id": "b52e0a8865ebfc6f93cc5e366e9b57b0", - "text": "## Revenue and Spending\n\n * Contracts \n * Monthly Revenue Watch\n * Severance Taxes\n * Sources of Revenue Guide\n * Taxes of Texas: A Field Guide (PDF)\n\n## Budget and \nFinance\n\n * Financial Reports and Forecasts\n * Budget Process Primer\n * Texas Investments\n * Understanding the ACFR\n * Cash Report and ACFR (PDF)\n\n## Open Data Tools and Information\n\n * Analysis and Reports\n * Dashboards\n * Pension Search Tool\n * Search Datasets\n * Secure Information and File Transfer (SIFT)\n\n## Local Governments\n\n * Eminent Domain\n * Hotel Occupancy Tax\n * Local Government Debt\n * Sheriffs' and Constables' Fees\n * SPD Financials and Taxes \n * Tax Allocation Summaries\n * Transparency Stars\n * Reports and Tools\n\n * Economy\n\n## __ Economy Home\n\n## Fiscal Notes\n\n * Latest Articles\n * Archives\n * About _Fiscal Notes_\n * Republish\n\n## In Depth\n\n * Regional Reports\n * Forecasts\n * Key Economic Indicators\n * Economic Data(Good for Texas Tours)\n * Special Reports\n\n## Economic Development Programs\n\n * Property Tax Programs\n * Sales Tax Programs\n * Grants and Special Assessments\n * Search Tools and Data\n\n * Purchasing\n\n## __ Purchasing Home\n\n## Statewide Contracts\n\n * Search Statewide Contracts\n * Contract Development\n * Contract Management\n * Procurement Oversight & Delegation\n * Texas Multiple Award Schedule (TXMAS)\n * txsmartbuy.gov\n * DIR Contracts \n description: Office holiday schedule for fiscal 2024. \n \n title: State of Texas Holiday Schedule - Fiscal 2025 \n \n source: https://comptroller.texas.gov/about/holidays.php \n" - }, - { - "downstream_black_list": [], - "id": "ee75f07d60742868abfae486bbc1849d", - "text": "Skip to page navigation\n\nAn official website of the United States government\n\nHere's how you know\n\nHere's how you know\n\n**Official websites use .gov** \nA **.gov** website belongs to an official government organization in the\nUnited States.\n\n**Secure .gov websites use HTTPS** \nA **lock** ( Lock A locked padlock ) or **https://** means you’ve safely\nconnected to the .gov website. Share sensitive information only on official,\nsecure websites.\n\nMenu\n\nSearch all of OPM Submit\n\nSections\n\n * About Toggle submenu\n\n * Our Agency\n * Who We Are\n * Our Work\n * Mission & History\n * Careers at OPM\n * Doing Business with OPM\n * Reports & Publications\n * Open Government\n * Get Help\n * Contact Us\n * News\n * Data\n * 2023 Agency Financial Report\n * Combined Federal Campaign\n * 2023 Annual Performance Report\n * FY 2025 Congressional Budget Justification\n * 2024 Agency Financial Report\n * 2024 Annual Performance Report\n\n * Policy Toggle submenu \n description: Welcome to opm.gov \n \n title: Federal Holidays \n \n source: https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/ \n" - } - ], - "initial_query": "What is the 2024 holiday schedule?", - "top_n": 1 -} -``` - -The value of "retrieved_docs.text" must contain a response from the service that makes sense. -If such a response is present, then the search-web-retriever is considered verified. - -#### 3.2.6. Checking search-tei-reranking-service - -Сhecking the response from the service - -```bash -curl http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT}/rerank \ - -X POST \ - -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ - -H 'Content-Type: application/json' -``` - -The response from the service must be in the form of JSON: - -```json -[ - { "index": 1, "score": 0.94238955 }, - { "index": 0, "score": 0.120219156 } -] -``` - -If the output value is similar to the example given, we consider the service to be successfully launched. - -#### 3.2.7. Checking search-reranking - -Сhecking the response from the service - -```bash -curl http://${HOST_IP}:${SEARCH_RERANK_SERVICE_PORT}/v1/reranking\ - -X POST \ - -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ - -H 'Content-Type: application/json' -``` - -The response from the service must be in the form of JSON: - -```json -{ - "id": "26e5d7f6259b8a184387f13fc9c54038", - "model": null, - "query": "What is Deep Learning?", - "max_tokens": 1024, - "max_new_tokens": 1024, - "top_k": 10, - "top_p": 0.95, - "typical_p": 0.95, - "temperature": 0.01, - "frequency_penalty": 0.0, - "presence_penalty": 0.0, - "repetition_penalty": 1.03, - "stream": true, - "language": "auto", - "chat_template": null, - "documents": ["Deep learning is..."] -} -``` - -If the output value is similar to the example given, we consider the service to be successfully launched. - -#### 3.2.8. Checking search-backend-server - -Сhecking the response from the service - -```bash -curl http://${HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna -H "Content-Type: application/json" -d '{ - "messages": "What is the latest news? Give me also the source link.", - "stream": "True" - }' -``` - -The response from the service must be in the form of JSON: - -```textmate -data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":"\n","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" ","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" View","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" the","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" latest","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" news","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":".","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -....... -data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":"com","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":"/","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: {"id":"cmpl-602c2cd2745c4095ad8957f7e5ed8ca7","choices":[{"finish_reason":"stop","index":0,"logprobs":null,"text":"","stop_reason":null}],"created":1741716737,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} -data: [DONE] -``` - -If the output value is similar to the example given, we consider the service to be successfully launched. diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml index 907a02ea58..eee4f25701 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -97,7 +97,7 @@ services: WILM_USE_TRITON_FLASH_ATTENTION: 0 PYTORCH_JIT: 0 volumes: - - "./data:/data" + - "${MODEL_PATH:-./data}:/data" shm_size: 20G devices: - /dev/kfd:/dev/kfd From 0cb3cee02b721334d816f5d99fccb858b7950fae Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 Mar 2025 10:57:50 +0000 Subject: [PATCH 19/44] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- SearchQnA/docker_compose/amd/gpu/rocm/README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README.md b/SearchQnA/docker_compose/amd/gpu/rocm/README.md index 5d688c682e..b358153251 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README.md @@ -105,7 +105,7 @@ - opea/searchqna:latest - opea/searchqna-ui:latest - opea/web-retriever:latest - + --- ## Deploy the SearchQnA Application @@ -259,6 +259,7 @@ All containers should be running and should not restart: - search-ui-server --- + ## Validate the Services ### 1. Validate the vLLM/TGI Service @@ -442,8 +443,6 @@ curl http://${SEARCH_HOST_IP}:3008/v1/searchqna -H "Content-Type: application/js }' ``` - - ### 5. Stop application #### If you use vLLM From c27fb91098c8701ea1c1d89a6a5c16be16b9338e Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Mon, 24 Mar 2025 20:01:32 +0700 Subject: [PATCH 20/44] Update README.md Signed-off-by: Artem Astafev --- .../docker_compose/amd/gpu/rocm/README.md | 97 ++++++++----------- 1 file changed, 43 insertions(+), 54 deletions(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README.md b/SearchQnA/docker_compose/amd/gpu/rocm/README.md index 5d688c682e..7a1ee1f774 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README.md @@ -267,11 +267,9 @@ All containers should be running and should not restart: ```bash DATA='{"model": "Intel/neural-chat-7b-v3-3", '\ -'"messages": [{"role": "user", "content": "Implement a high-level API for a TODO list application. '\ -'The API takes as input an operation request and updates the TODO list in place. '\ -'If the request is invalid, raise an exception."}], "max_tokens": 256}' +'"messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 256}' -curl http://${HOST_IP}:${A_VLLM_SERVICE_PORT}/v1/chat/completions \ +curl http://${HOST_IP}:${SEARCHQNA_VLLM_SERVICE_PORT}/v1/chat/completions \ -X POST \ -d "$DATA" \ -H 'Content-Type: application/json' @@ -281,25 +279,24 @@ Checking the response from the service. The response should be similar to JSON: ````json { - "id": "chatcmpl-142f34ef35b64a8db3deedd170fed951", - "object": "chat.completion", - "created": 1742270316, - "model": "Intel/neural-chat-7b-v3-3", - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "```python\nfrom typing import Optional, List, Dict, Union\nfrom pydantic import BaseModel, validator\n\nclass OperationRequest(BaseModel):\n # Assuming OperationRequest is already defined as per the given text\n pass\n\nclass UpdateOperation(OperationRequest):\n new_items: List[str]\n\n def apply_and_maybe_raise(self, updatable_item: \"Updatable todo list\") -> None:\n # Assuming updatable_item is an instance of Updatable todo list\n self.validate()\n updatable_item.add_items(self.new_items)\n\nclass Updatable:\n # Abstract class for items that can be updated\n pass\n\nclass TodoList(Updatable):\n # Class that represents a todo list\n items: List[str]\n\n def add_items(self, new_items: List[str]) -> None:\n self.items.extend(new_items)\n\ndef handle_request(operation_request: OperationRequest) -> None:\n # Function to handle an operation request\n if isinstance(operation_request, UpdateOperation):\n operation_request.apply_and_maybe_raise(get_todo_list_for_update())\n else:\n raise ValueError(\"Invalid operation request\")\n\ndef get_todo_list_for_update() -> TodoList:\n # Function to get the todo list for update\n # Assuming this function returns the", - "tool_calls": [] + "id":"chatcmpl-512d16e876774d13a323514e96122cbc", + "object":"chat.completion", + "created":1742819098, + "model":"Intel/neural-chat-7b-v3-3", + "choices":[ + { + "index":0, + "message":{ + "role":"assistant", + "content":" Deep Learning is a subset of Machine Learning that relies on Artificial Neural Networks to perform tasks like image recognition, natural language processing, and predictive analytics. It aims to model the inner workings of the human brain through the intelligent analysis of big data. Essentially, Deep Learning algorithms try to process and learn from raw data in multiple layers to extract patterns, ultimately enabling systems to recognize complex patterns and make predictions more accurately. Although thoughest it involves much data and computation power, recently it is often referred through increasingly simple approaches thanks to advances in computation hardware. Deep Learning developed from previous techniques like neural networks, but has proven more powerful and effective for various tasks that involve massive data volumes and complex decisions.", + "tool_calls":[] }, - "logprobs": null, - "finish_reason": "length", - "stop_reason": null - } - ], - "usage": { "prompt_tokens": 66, "total_tokens": 322, "completion_tokens": 256, "prompt_tokens_details": null }, - "prompt_logprobs": null + "logprobs":null, + "finish_reason":"stop", + "stop_reason":null} + ], + "usage":{"prompt_tokens":15,"total_tokens":161,"completion_tokens":146,"prompt_tokens_details":null}, + "prompt_logprobs":null } ```` @@ -309,12 +306,10 @@ then we consider the vLLM service to be successfully launched #### If you use TGI: ```bash -DATA='{"inputs":"Implement a high-level API for a TODO list application. '\ -'The API takes as input an operation request and updates the TODO list in place. '\ -'If the request is invalid, raise an exception.",'\ +DATA='{"inputs":"IWhat is Deep Learning?",'\ '"parameters":{"max_new_tokens":256,"do_sample": true}}' -curl http://${HOST_IP}:${CODEGEN_TGI_SERVICE_PORT}/generate \ +curl http://${HOST_IP}:${SEARCHQNA_TGI_SERVICE_PORT}/generate \ -X POST \ -d "$DATA" \ -H 'Content-Type: application/json' @@ -324,7 +319,7 @@ Checking the response from the service. The response should be similar to JSON: ````json { - "generated_text": " The supported operations are \"add_task\", \"complete_task\", and \"remove_task\". Each operation can be defined with a corresponding function in the API.\n\nAdd your API in the following format:\n\n```\nTODO App API\n\nsupported operations:\n\noperation name description\n----------------------- ------------------------------------------------\n \n```\n\nUse type hints for function parameters and return values. Specify a text description of the API's supported operations.\n\nUse the following code snippet as a starting point for your high-level API function:\n\n```\nclass TodoAPI:\n def __init__(self, tasks: List[str]):\n self.tasks = tasks # List of tasks to manage\n\n def add_task(self, task: str) -> None:\n self.tasks.append(task)\n\n def complete_task(self, task: str) -> None:\n self.tasks = [t for t in self.tasks if t != task]\n\n def remove_task(self, task: str) -> None:\n self.tasks = [t for t in self.tasks if t != task]\n\n def handle_request(self, request: Dict[str, str]) -> None:\n operation = request.get('operation')\n if operation == 'add_task':\n self.add_task(request.get('task'))\n elif" + "generated_text":"\n\nDeep Learning is a subset of machine learning, which focuses on developing methods inspired by the functioning of the human brain; more specifically, the way it processes and acquires various types of knowledge and information. To enable deep learning, the networks are composed of multiple processing layers that form a hierarchy, with each layer learning more complex and abstraction levels of data representation.\n\nThe principle of Deep Learning is to emulate the structure of neurons in the human brain to construct artificial neural networks capable to accomplish complicated pattern recognition tasks more effectively and accurately. Therefore, these neural networks contain a series of hierarchical components, where units in earlier layers receive simple inputs and are activated by these inputs. The activation of the units in later layers are the results of multiple nonlinear transformations generated from reconstructing and integrating the information in previous layers. In other words, by combining various pieces of information at each layer, a Deep Learning network can extract the input features that best represent the structure of data, providing their outputs at the last layer or final level of abstraction.\n\nThe main idea of using these 'deep' networks in contrast to regular algorithms is that they are capable of representing hierarchical relationships that exist within the data and learn these representations by" } ```` @@ -334,13 +329,11 @@ then we consider the TGI service to be successfully launched ### 2. Validate the LLM Service ```bash -DATA='{"query":"Implement a high-level API for a TODO list application. '\ -'The API takes as input an operation request and updates the TODO list in place. '\ -'If the request is invalid, raise an exception.",'\ +DATA='{"query":"What is Deep Learning?",'\ '"max_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,'\ '"repetition_penalty":1.03,"stream":false}' -curl http://${HOST_IP}:${CODEGEN_LLM_SERVICE_PORT}/v1/chat/completions \ +curl http://${HOST_IP}:${SEARCHQNA}/v1/chat/completions \ -X POST \ -d "$DATA" \ -H 'Content-Type: application/json' @@ -350,35 +343,31 @@ Checking the response from the service. The response should be similar to JSON: ````json { - "id": "cmpl-4e89a590b1af46bfb37ce8f12b2996f8", - "choices": [ - { - "finish_reason": "length", - "index": 0, - "logprobs": null, - "text": " The API should support the following operations:\n\n1. Add a new task to the TODO list.\n2. Remove a task from the TODO list.\n3. Mark a task as completed.\n4. Retrieve the list of all tasks.\n\nThe API should also support the following features:\n\n1. The ability to filter tasks based on their completion status.\n2. The ability to sort tasks based on their priority.\n3. The ability to search for tasks based on their description.\n\nHere is an example of how the API can be used:\n\n```python\ntodo_list = []\napi = TodoListAPI(todo_list)\n\n# Add tasks\napi.add_task(\"Buy groceries\")\napi.add_task(\"Finish homework\")\n\n# Mark a task as completed\napi.mark_task_completed(\"Buy groceries\")\n\n# Retrieve the list of all tasks\nprint(api.get_all_tasks())\n\n# Filter tasks based on completion status\nprint(api.filter_tasks(completed=True))\n\n# Sort tasks based on priority\napi.sort_tasks(priority=\"high\")\n\n# Search for tasks based on description\nprint(api.search_tasks(description=\"homework\"))\n```\n\nIn this example, the `TodoListAPI` class is used to manage the TODO list. The `add_task` method adds a new task to the list, the `mark_task_completed` method", - "stop_reason": null, - "prompt_logprobs": null - } - ], - "created": 1742270567, - "model": "Qwen/Qwen2.5-Coder-7B-Instruct", - "object": "text_completion", - "system_fingerprint": null, - "usage": { - "completion_tokens": 256, - "prompt_tokens": 37, - "total_tokens": 293, - "completion_tokens_details": null, - "prompt_tokens_details": null - } + "id":"chatcmpl-512d16e876774d13a323514e96122cbc", + "object":"chat.completion", + "created":1742819098, + "model":"Intel/neural-chat-7b-v3-3", + "choices":[ + { + "index":0, + "message":{ + "role":"assistant", + "content":" Deep Learning is a subset of Machine Learning that relies on Artificial Neural Networks to perform tasks like image recognition, natural language processing, and predictive analytics. It aims to model the inner workings of the human brain through the intelligent analysis of big data. Essentially, Deep Learning algorithms try to process and learn from raw data in multiple layers to extract patterns, ultimately enabling systems to recognize complex patterns and make predictions more accurately. Although thoughest it involves much data and computation power, recently it is often referred through increasingly simple approaches thanks to advances in computation hardware. Deep Learning developed from previous techniques like neural networks, but has proven more powerful and effective for various tasks that involve massive data volumes and complex decisions.", + "tool_calls":[] + }, + "logprobs":null, + "finish_reason":"stop", + "stop_reason":null} + ], + "usage":{"prompt_tokens":15,"total_tokens":161,"completion_tokens":146,"prompt_tokens_details":null}, + "prompt_logprobs":null } ```` If the service response has a meaningful response in the value of the "choices.text" key, then we consider the vLLM service to be successfully launched -### 2. Validate Embedding service +### 3. Validate Embedding service ```bash curl http://${SEARCH_HOST_IP}:3002/v1/embeddings\ From 73fc5224e19b14d4b3b692d6ee58e4d16d399a18 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 Mar 2025 13:03:11 +0000 Subject: [PATCH 21/44] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../docker_compose/amd/gpu/rocm/README.md | 84 ++++++++++--------- 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README.md b/SearchQnA/docker_compose/amd/gpu/rocm/README.md index d2f476483a..a4e124eb80 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README.md @@ -278,28 +278,29 @@ curl http://${HOST_IP}:${SEARCHQNA_VLLM_SERVICE_PORT}/v1/chat/completions \ Checking the response from the service. The response should be similar to JSON: -````json +```json { - "id":"chatcmpl-512d16e876774d13a323514e96122cbc", - "object":"chat.completion", - "created":1742819098, - "model":"Intel/neural-chat-7b-v3-3", - "choices":[ - { - "index":0, - "message":{ - "role":"assistant", - "content":" Deep Learning is a subset of Machine Learning that relies on Artificial Neural Networks to perform tasks like image recognition, natural language processing, and predictive analytics. It aims to model the inner workings of the human brain through the intelligent analysis of big data. Essentially, Deep Learning algorithms try to process and learn from raw data in multiple layers to extract patterns, ultimately enabling systems to recognize complex patterns and make predictions more accurately. Although thoughest it involves much data and computation power, recently it is often referred through increasingly simple approaches thanks to advances in computation hardware. Deep Learning developed from previous techniques like neural networks, but has proven more powerful and effective for various tasks that involve massive data volumes and complex decisions.", - "tool_calls":[] + "id": "chatcmpl-512d16e876774d13a323514e96122cbc", + "object": "chat.completion", + "created": 1742819098, + "model": "Intel/neural-chat-7b-v3-3", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": " Deep Learning is a subset of Machine Learning that relies on Artificial Neural Networks to perform tasks like image recognition, natural language processing, and predictive analytics. It aims to model the inner workings of the human brain through the intelligent analysis of big data. Essentially, Deep Learning algorithms try to process and learn from raw data in multiple layers to extract patterns, ultimately enabling systems to recognize complex patterns and make predictions more accurately. Although thoughest it involves much data and computation power, recently it is often referred through increasingly simple approaches thanks to advances in computation hardware. Deep Learning developed from previous techniques like neural networks, but has proven more powerful and effective for various tasks that involve massive data volumes and complex decisions.", + "tool_calls": [] }, - "logprobs":null, - "finish_reason":"stop", - "stop_reason":null} - ], - "usage":{"prompt_tokens":15,"total_tokens":161,"completion_tokens":146,"prompt_tokens_details":null}, - "prompt_logprobs":null + "logprobs": null, + "finish_reason": "stop", + "stop_reason": null + } + ], + "usage": { "prompt_tokens": 15, "total_tokens": 161, "completion_tokens": 146, "prompt_tokens_details": null }, + "prompt_logprobs": null } -```` +``` If the service response has a meaningful response in the value of the "choices.message.content" key, then we consider the vLLM service to be successfully launched @@ -318,11 +319,11 @@ curl http://${HOST_IP}:${SEARCHQNA_TGI_SERVICE_PORT}/generate \ Checking the response from the service. The response should be similar to JSON: -````json +```json { - "generated_text":"\n\nDeep Learning is a subset of machine learning, which focuses on developing methods inspired by the functioning of the human brain; more specifically, the way it processes and acquires various types of knowledge and information. To enable deep learning, the networks are composed of multiple processing layers that form a hierarchy, with each layer learning more complex and abstraction levels of data representation.\n\nThe principle of Deep Learning is to emulate the structure of neurons in the human brain to construct artificial neural networks capable to accomplish complicated pattern recognition tasks more effectively and accurately. Therefore, these neural networks contain a series of hierarchical components, where units in earlier layers receive simple inputs and are activated by these inputs. The activation of the units in later layers are the results of multiple nonlinear transformations generated from reconstructing and integrating the information in previous layers. In other words, by combining various pieces of information at each layer, a Deep Learning network can extract the input features that best represent the structure of data, providing their outputs at the last layer or final level of abstraction.\n\nThe main idea of using these 'deep' networks in contrast to regular algorithms is that they are capable of representing hierarchical relationships that exist within the data and learn these representations by" + "generated_text": "\n\nDeep Learning is a subset of machine learning, which focuses on developing methods inspired by the functioning of the human brain; more specifically, the way it processes and acquires various types of knowledge and information. To enable deep learning, the networks are composed of multiple processing layers that form a hierarchy, with each layer learning more complex and abstraction levels of data representation.\n\nThe principle of Deep Learning is to emulate the structure of neurons in the human brain to construct artificial neural networks capable to accomplish complicated pattern recognition tasks more effectively and accurately. Therefore, these neural networks contain a series of hierarchical components, where units in earlier layers receive simple inputs and are activated by these inputs. The activation of the units in later layers are the results of multiple nonlinear transformations generated from reconstructing and integrating the information in previous layers. In other words, by combining various pieces of information at each layer, a Deep Learning network can extract the input features that best represent the structure of data, providing their outputs at the last layer or final level of abstraction.\n\nThe main idea of using these 'deep' networks in contrast to regular algorithms is that they are capable of representing hierarchical relationships that exist within the data and learn these representations by" } -```` +``` If the service response has a meaningful response in the value of the "generated_text" key, then we consider the TGI service to be successfully launched @@ -342,28 +343,29 @@ curl http://${HOST_IP}:${SEARCHQNA}/v1/chat/completions \ Checking the response from the service. The response should be similar to JSON: -````json +```json { - "id":"chatcmpl-512d16e876774d13a323514e96122cbc", - "object":"chat.completion", - "created":1742819098, - "model":"Intel/neural-chat-7b-v3-3", - "choices":[ - { - "index":0, - "message":{ - "role":"assistant", - "content":" Deep Learning is a subset of Machine Learning that relies on Artificial Neural Networks to perform tasks like image recognition, natural language processing, and predictive analytics. It aims to model the inner workings of the human brain through the intelligent analysis of big data. Essentially, Deep Learning algorithms try to process and learn from raw data in multiple layers to extract patterns, ultimately enabling systems to recognize complex patterns and make predictions more accurately. Although thoughest it involves much data and computation power, recently it is often referred through increasingly simple approaches thanks to advances in computation hardware. Deep Learning developed from previous techniques like neural networks, but has proven more powerful and effective for various tasks that involve massive data volumes and complex decisions.", - "tool_calls":[] + "id": "chatcmpl-512d16e876774d13a323514e96122cbc", + "object": "chat.completion", + "created": 1742819098, + "model": "Intel/neural-chat-7b-v3-3", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": " Deep Learning is a subset of Machine Learning that relies on Artificial Neural Networks to perform tasks like image recognition, natural language processing, and predictive analytics. It aims to model the inner workings of the human brain through the intelligent analysis of big data. Essentially, Deep Learning algorithms try to process and learn from raw data in multiple layers to extract patterns, ultimately enabling systems to recognize complex patterns and make predictions more accurately. Although thoughest it involves much data and computation power, recently it is often referred through increasingly simple approaches thanks to advances in computation hardware. Deep Learning developed from previous techniques like neural networks, but has proven more powerful and effective for various tasks that involve massive data volumes and complex decisions.", + "tool_calls": [] }, - "logprobs":null, - "finish_reason":"stop", - "stop_reason":null} - ], - "usage":{"prompt_tokens":15,"total_tokens":161,"completion_tokens":146,"prompt_tokens_details":null}, - "prompt_logprobs":null + "logprobs": null, + "finish_reason": "stop", + "stop_reason": null + } + ], + "usage": { "prompt_tokens": 15, "total_tokens": 161, "completion_tokens": 146, "prompt_tokens_details": null }, + "prompt_logprobs": null } -```` +``` If the service response has a meaningful response in the value of the "choices.text" key, then we consider the vLLM service to be successfully launched From 74621ab7471926673e3d1eb16e827b8fdd8ceea7 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Tue, 25 Mar 2025 11:07:34 +0700 Subject: [PATCH 22/44] Update Readme.md and set_env files Signed-off-by: Artem Astafev --- .../docker_compose/amd/gpu/rocm/README.md | 57 ++++++++++--------- .../docker_compose/amd/gpu/rocm/set_env.sh | 6 +- .../amd/gpu/rocm/set_env_vllm.sh | 4 +- 3 files changed, 36 insertions(+), 31 deletions(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README.md b/SearchQnA/docker_compose/amd/gpu/rocm/README.md index d2f476483a..8730a64ccc 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README.md @@ -334,7 +334,7 @@ DATA='{"query":"What is Deep Learning?",'\ '"max_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,'\ '"repetition_penalty":1.03,"stream":false}' -curl http://${HOST_IP}:${SEARCHQNA}/v1/chat/completions \ +curl http://${HOST_IP}:${SEARCHQNA_TGI_SERVICE_PORT}/v1/chat/completions \ -X POST \ -d "$DATA" \ -H 'Content-Type: application/json' @@ -371,13 +371,28 @@ then we consider the vLLM service to be successfully launched ### 3. Validate Embedding service ```bash -curl http://${SEARCH_HOST_IP}:3002/v1/embeddings\ +curl http://${SEARCH_HOST_IP}:${SEARCHQNA_TGI_SERVICE_PORT}/v1/embeddings\ -X POST \ -d '{"text":"hello"}' \ -H 'Content-Type: application/json' ``` -## Validate Web Retriever service +Checking the response from the service. The response should be similar to JSON: + +````json +{ + "detail":[ + { + "type":"missing", + "loc":["body","input"], + "msg":"Field required", + "input":{"text":"hello"} + } + ] +} +```` + +### 4. Validate Web Retriever service ```bash export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") @@ -387,43 +402,33 @@ curl http://${SEARCH_HOST_IP}:3003/v1/web_retrieval \ -H 'Content-Type: application/json' ``` -## Validate TEI Reranking service +### 5. Validate the TEI Service ```bash -curl http://${SEARCH_HOST_IP}:3004/rerank \ - -X POST \ - -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ - -H 'Content-Type: application/json' -``` - -## Validate Reranking service +DATA='{"inputs":"What is Deep Learning?"}' -```bash -curl http://${SEARCH_HOST_IP}:3005/v1/reranking\ +curl http://${HOST_IP}:${SEARCHQNA_TEI_SERVICE_PORT}//embed \ -X POST \ - -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -d "$DATA" \ -H 'Content-Type: application/json' ``` -## Validate TGI service +Checking the response from the service. The response should be similar to Array of Data: -```bash -curl http://${SEARCH_HOST_IP}:3006/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ - -H 'Content-Type: application/json' -``` +````json +[[0.00037115702,-0.06356819,0.0024758505,-0.012360337,0.050739925,0.023380278,0.022216318,0.0008076447,-0.0003412891,-0.033479452,-0.024872458,0.0064700204,-0.00731641,0.06648339,0.0013361155,0.047004532,0.062838696,-0.021005465,0.011151533,0.044124223,-0.050683793,-0.062093593,-0.03992629,0.017758112,-0.0013727234,0.0022603935,-0.04363493,0.012822347,-0.02408198,0.011079012,0.028809752,-0.008898206,0.037047423,-0.027456092,0.016162485,0.04173568,-0.039396558,-0.035203997,-0.022387454,-0.019808905,-0.01864915,-0.042313505,-0.0120891025,0.048949677,-0.08100209,0.017953783,-0.12084276,0.0024097406,-0.022705944,-0.012279724,-0.07547717,0.051262986,0.03203861,-0.019056482,0.04271625,0.015248945,0.004222296,-0.08073051,0.010240495,-0.05635268,0.052041706,0.03712775,-0.01854724,-0.02750096,-0.00096631586,-0.026202224,0.024124105,0.042904165,-0.023528703,-0.0034956702,-0.028778492,0.029217377,-0.020601744,-0.0049860086,-0.05246627,-0.011162583,0.012888553,0.014507065,0.08219481,-0.008273658,0.0036607939,0.062248874,0.042562004,0.03170365,0.0046070544,0.00065274147,-0.019365542,-0.004698561,-0.0449053,0.02275239,0.01039843,-0.053169794,0.060175993,0.051545423,0.014204941,0.0076600607,0.013906856,-0.035385784,-0.011683805,-0.014732695,-0.02331647,-0.059045117,-0.016870823,-0.014698294,-0.048483565,0.026726946,0.05227064,-0.013973138,0.014551645,-0.019573484,-0.0013427412,-0.008475066,-0.0025058866,-0.048502546,-0.043069497,-0.0077841803,-0.016379999,0.0037450534,-0.025010578,-0.04592572,0.034388185,0.03836159,0.0019682923,0.021373231,-0.03391387,0.015393363,0.003937917,0.01832765,0.0045520393,-0.02696203,0.020696502,0.016930614,-0.007926859,0.021834886,-0.014779224,0.00073025556,-0.020250296,0.006635754,0.025785012,0.009847587,-0.002533611,-0.057919327,0.03010091,-0.03554674,0.054443054,-0.015446536,-0.0079982905,-0.0042982297,-0.018884834,0.0027541735,-0.044417977,0.05555447,-0.018901609,-0.049503766,0.008309782,0.039867956,-0.0004423662,0.0059798234,0.03447887,0.023205558,0.058959927,-0.019526886,-0.054637823,-0.009800092,-0.024515655,-0.05426387,0.05535355,0.024482403,-0.020081121,0.024965372,-0.002176406,-0.011429285,0.02036594,-0.011996402,0.011601014,0.04732072,0.028819714,0.03407571,0.0430521,0.05145868,-0.065615594,0.046596047,-0.008815781,-0.0063788523,-0.044762302,-0.03171996,0.04966251,-0.010887125,0.036779672,0.014379601,-0.06393863,-0.036413074,-0.033719108,-0.037734028,0.033251368,-0.01693572,-0.015116194,0.082118206,-0.011095621,0.046565905,0.054315507,-0.051471975,0.0153609,-0.016379755,-0.02725454,0.029903106,0.01588181,-0.043773234,-0.0034126595,0.0034703915,0.0074963053,-0.049301904,-0.005326988,0.0014556781,0.043266784,0.03043187,-0.008008064,-0.0047954894,0.0065719066,-0.018209687,0.00520577,-0.04222329,0.024618099,0.0030018033,0.008215917,0.088026844,0.041226704,-0.05174175,0.035067245,-0.037319127,0.0037409177,0.024523623,-0.0126059465,0.019197112,0.013823613,-0.02756309,0.014537172,0.010373209,0.045283005,-0.033583794,-0.07042238,0.0071703074,-0.047405772,0.052970607,0.01187145,0.009470498,0.033309255,-0.014022496,-0.01466476,-0.016799983,-0.004560339,-0.00007741032,0.016623817,0.02886948,-0.023846539,-0.05926324,0.0019861246,-0.0097210035,0.10283416,0.027582858,-0.050722197,0.051445477,-0.027595742,0.022260211,-0.025540655,-0.09528184,-0.028447622,-0.020006616,0.08766454,-0.014110661,0.04828308,0.0074301455,0.03928737,-0.0000046884684,-0.026885474,0.005424345,0.054999787,0.055203326,-0.012640017,-0.0435913,-0.024285164,0.06663095,0.005627971,-0.015168387,0.027197381,-0.026075814,-0.003045215,-0.008655605,-0.009072627,0.004339306,0.03589536,0.061759293,-0.04240408,0.04873947,0.021134883,0.053518154,0.045864865,-0.027563328,-0.01566489,0.00018125105,-0.007070503,0.039647527,-0.021650534,0.038786504,0.02006178,-0.013114097,0.07950984,-0.014730525,-0.19681875,-0.013000412,0.018087342,-0.0073786196,0.038186155,-0.059353005,-0.0058362517,-0.009970051,0.0016716863,-0.023077143,-0.02714242,-0.006529649,0.037998736,0.025349554,0.019855456,-0.016530242,0.00880591,-0.016678277,-0.03673031,0.045423195,-0.03146899,-0.029318942,-0.012635296,0.071473934,-0.02904274,0.027330637,-0.084734075,-0.05050938,-0.0030655882,-0.0022098075,-0.02383695,-0.028460467,-0.03240081,0.048773084,0.023262978,0.016216593,0.027833678,-0.039854486,-0.002443358,0.01758309,-0.033520985,-0.04862155,0.0030191801,-0.040858116,0.045017388,0.01576234,-0.09301789,-0.04828378,-0.014886363,0.0012595668,-0.010673225,-0.02463904,-0.06783802,-0.0012545382,0.015514673,-0.004911741,0.0025960177,-0.012014308,-0.024893451,0.036577918,-0.003223495,-0.020390507,-0.022805423,-0.059310623,-0.02081245,-0.023387661,-0.061122973,-0.06244,0.017364288,0.033477243,-0.010211365,0.04805492,-0.0644543,-0.048770227,0.0068986556,-0.025725175,-0.029574871,-0.00949049,0.05490974,0.027187059,0.00826158,-0.06282722,0.035274204,0.012130771,-0.009545266,-0.048487406,0.04640102,-0.037075754,-0.020248186,-0.02851919,0.064635284,-0.0064534973,-0.026640853,-0.026290758,0.035040796,0.020074066,0.0032996435,0.02883776,-0.012944289,0.019450067,-0.02121465,-0.024558635,-0.04377821,-0.016631315,-0.04083968,-0.021962307,-0.010120014,0.02998998,0.10129919,-0.0025703132,-0.03771752,0.01426784,0.025374308,0.00082124525,0.00029568642,-0.030749727,0.016260363,0.0014756168,0.018676473,-0.03861688,-0.032052398,0.056064054,0.005533946,0.04515451,0.015364342,-0.02965325,0.0009782034,0.01524649,0.019077078,-0.025799321,0.020865263,-0.00037949806,0.012502633,0.0090223905,-0.0015367466,-0.012833919,-0.011109666,-0.006981191,-0.009670439,0.009430074,-0.007729517,0.0016868497,0.016697595,-0.015892748,-0.020780738,0.049529854,-0.07344469,0.0607613,-0.0068755895,-0.014736902,0.014770749,-0.028858911,0.025249828,-0.058469485,0.030096894,-0.007117604,0.010155325,-0.0065526864,-0.028654601,-0.04420291,0.009965181,0.030222228,-0.010007972,0.0104629295,0.05589087,0.05443477,-0.02641796,-0.061689503,0.03118466,0.012150501,0.03404673,-0.029666431,-0.008654386,-0.031682808,-0.014843155,0.036703967,0.026411135,-0.005715008,0.024990784,0.058862202,0.017355891,0.039204415,-0.0034798204,0.033091135,0.050439566,0.032798093,-0.029705318,0.005968363,-0.055048566,0.028009748,-0.03823961,0.024362633,-0.017294712,-0.019563003,-0.019944556,-0.027790153,-0.01866823,0.047109686,-0.0033735516,-0.020653522,-0.039765686,-0.019055683,-0.0263571,-0.023188936,0.049641415,-0.077975206,0.030659853,0.048734687,0.044718176,0.036765084,-0.011803315,-0.027699227,-0.07258002,-0.08741319,-0.0392474,-0.042096145,-0.0040325304,0.01667375,0.026754893,-0.030304687,0.029919326,0.024295082,0.011638254,-0.012232291,-0.047564257,-0.036413006,0.026577674,0.036411874,0.00057670544,0.017877145,0.009268524,-0.006965588,0.011874776,-0.005112591,-0.034651127,0.03160231,-0.052825063,0.014719321,-0.0139615545,-0.016238235,0.002020219,0.02526055,-0.07056756,0.010022732,-0.014104433,-0.005984697,-0.00897443,0.021115793,-0.043804843,-0.027990978,0.060727082,0.0040618493,-0.038511537,-0.048857935,0.024104802,-0.059829835,-0.029107396,-0.05538522,-0.06930553,-0.0057559577,-0.022053827,-0.00876388,-0.0056931996,0.029746206,0.0224666,0.008767829,-0.03966822,-0.006478918,0.06567699,-0.01581077,-0.03742192,-0.06186453,-0.028619587,0.08638498,0.031267703,-0.0008673075,0.003113204,0.012213491,0.020067157,-0.02849485,0.0018909829,0.02714576,0.0026566028,-0.03609787,0.0060567204,-0.047545094,-0.0046444787,-0.021402694,-0.023118727,-0.015218381,-0.043136228,-0.0438743,-0.005564044,-0.009355076,-0.028500054,0.009921202,0.027966693,0.06036647,0.06929019,0.007004997,-0.024255225,0.04914266,0.0032520234,0.0044063884,-0.029372599,0.038042217,-0.035385627,-0.04905816,0.047601648,0.0071805464,-0.008339494,-0.035425205,0.036915354,0.024695326,-0.038979523,0.01886513,0.013804558,-0.04848749,-0.04819779,0.022526458,-0.029244151,0.041152976,0.04666112,0.020387372,0.037857335,0.060002513,0.011064769,-0.032094717,0.070615225,0.04814509,0.017521046,0.074162334,-0.04956284,0.07335939,-0.009453019,-0.06289444,0.024246441,0.021851622,0.01857824,0.02037353,-0.017273203,0.021301785,0.05051385,0.053983003,-0.01588495,0.054096334,0.05107405,0.0720548,-0.029601721,0.04816011,0.006444874,-0.02505102,0.013238045,-0.021370836,0.025479412,-0.048463117,0.03514722,0.08079718,0.00369719,-0.015530819,0.0021374116,0.03247959,0.11611161,-0.021934662,-0.029833768,0.016046036,-0.00634777,-0.06037879,-0.005574648,0.028324481,-0.021840915,0.03284168,-0.022047363,-0.03463407,0.011823492,-0.03520137,-0.014746701,-0.03972389,-0.02124471,0.026924072,-0.0022506462,0.04452787,-0.015707701,-0.0065392647,0.0066317394,-0.005149294,-0.07763598,0.054278333,0.027830306,-0.03989325,-0.026995605,-0.024925973,-0.0024197767,0.07852477,-0.034251966,0.03694585,0.044244047,0.012739273,0.0037145729,0.008245091,0.013920077,-0.010570776,-0.021823786,0.057918977,-0.075884886,-0.054011993,0.0039594076,0.003970741,-0.038295034,-0.03029311,0.063210145,-0.08822839,-0.061069354,0.08516593,0.020341832,0.08075477,0.03257605,0.0039170105,0.029395742,0.012290831,-0.06368765,0.023519376,-0.0173505,-0.001395915,0.017215127,0.043243848,0.04967547,0.028518617,0.021273924,-0.0023932487,-0.030911915,-0.05524172,-0.045551147,0.042072143,-0.027773965,-0.03693362,0.028450156,0.06675585,-0.061626967,-0.08894698,0.045917906,-0.00475913,0.034920968,-0.0064531155,-0.00689886,-0.06119457,0.021173967,-0.027787622,-0.02472986,0.03998034,0.03737826,-0.0067949123,0.022558564,-0.04570635,-0.033072025,0.022725677,0.016026087,-0.02125421,-0.02984927,-0.0049473033]] +```` -## Validate LLM service +### 6. Validate Reranking service ```bash -curl http://${SEARCH_HOST_IP}:3007/v1/chat/completions\ +curl http://${SEARCH_HOST_IP}:3005/v1/reranking\ -X POST \ - -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ -H 'Content-Type: application/json' ``` -## Validate MegaService +### 7 Validate MegaService ```bash curl http://${SEARCH_HOST_IP}:3008/v1/searchqna -H "Content-Type: application/json" -d '{ @@ -432,7 +437,7 @@ curl http://${SEARCH_HOST_IP}:3008/v1/searchqna -H "Content-Type: application/js }' ``` -### 5. Stop application +### 8. Stop application #### If you use vLLM diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh index ca8fd3a82f..6c6f398a33 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -5,8 +5,8 @@ # SPDX-License-Identifier: Apache-2.0 -export SEARCH_HOST_IP=10.53.22.29 -export SEARCH_EXTERNAL_HOST_IP=68.69.180.77 +export SEARCH_HOST_IP=${host_ip} +export SEARCH_EXTERNAL_HOST_IP="" export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${SEARCH_HOST_IP}:3001 export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' @@ -30,7 +30,7 @@ export SEARCH_LLM_SERVICE_PORT=3007 export SEARCH_FRONTEND_SERVICE_PORT=18143 export SEARCH_BACKEND_SERVICE_PORT=18142 -export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${SEARCH_EXTERNAL_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna +export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh index ba44036f1c..4023683e8c 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -5,7 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 -export HOST_IP='' +export HOST_IP=${host_ip} export EXTERNAL_HOST_IP='' export MODEL_PATH="./data" export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' @@ -34,7 +34,7 @@ export SEARCH_LLM_SERVICE_PORT=3007 export SEARCH_FRONTEND_SERVICE_PORT=18143 export SEARCH_BACKEND_SERVICE_PORT=18142 -export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${EXTERNAL_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna +export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} From ccefa76e51a5258e0980e806ec6324730dfb51b2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Mar 2025 04:08:42 +0000 Subject: [PATCH 23/44] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../docker_compose/amd/gpu/rocm/README.md | 122 ++++++++++++++++-- 1 file changed, 110 insertions(+), 12 deletions(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README.md b/SearchQnA/docker_compose/amd/gpu/rocm/README.md index 7c92fd7c8e..3d407cb3b3 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README.md @@ -381,18 +381,18 @@ curl http://${SEARCH_HOST_IP}:${SEARCHQNA_TGI_SERVICE_PORT}/v1/embeddings\ Checking the response from the service. The response should be similar to JSON: -````json +```json { - "detail":[ + "detail": [ { - "type":"missing", - "loc":["body","input"], - "msg":"Field required", - "input":{"text":"hello"} - } - ] + "type": "missing", + "loc": ["body", "input"], + "msg": "Field required", + "input": { "text": "hello" } + } + ] } -```` +``` ### 4. Validate Web Retriever service @@ -417,9 +417,107 @@ curl http://${HOST_IP}:${SEARCHQNA_TEI_SERVICE_PORT}//embed \ Checking the response from the service. The response should be similar to Array of Data: -````json -[[0.00037115702,-0.06356819,0.0024758505,-0.012360337,0.050739925,0.023380278,0.022216318,0.0008076447,-0.0003412891,-0.033479452,-0.024872458,0.0064700204,-0.00731641,0.06648339,0.0013361155,0.047004532,0.062838696,-0.021005465,0.011151533,0.044124223,-0.050683793,-0.062093593,-0.03992629,0.017758112,-0.0013727234,0.0022603935,-0.04363493,0.012822347,-0.02408198,0.011079012,0.028809752,-0.008898206,0.037047423,-0.027456092,0.016162485,0.04173568,-0.039396558,-0.035203997,-0.022387454,-0.019808905,-0.01864915,-0.042313505,-0.0120891025,0.048949677,-0.08100209,0.017953783,-0.12084276,0.0024097406,-0.022705944,-0.012279724,-0.07547717,0.051262986,0.03203861,-0.019056482,0.04271625,0.015248945,0.004222296,-0.08073051,0.010240495,-0.05635268,0.052041706,0.03712775,-0.01854724,-0.02750096,-0.00096631586,-0.026202224,0.024124105,0.042904165,-0.023528703,-0.0034956702,-0.028778492,0.029217377,-0.020601744,-0.0049860086,-0.05246627,-0.011162583,0.012888553,0.014507065,0.08219481,-0.008273658,0.0036607939,0.062248874,0.042562004,0.03170365,0.0046070544,0.00065274147,-0.019365542,-0.004698561,-0.0449053,0.02275239,0.01039843,-0.053169794,0.060175993,0.051545423,0.014204941,0.0076600607,0.013906856,-0.035385784,-0.011683805,-0.014732695,-0.02331647,-0.059045117,-0.016870823,-0.014698294,-0.048483565,0.026726946,0.05227064,-0.013973138,0.014551645,-0.019573484,-0.0013427412,-0.008475066,-0.0025058866,-0.048502546,-0.043069497,-0.0077841803,-0.016379999,0.0037450534,-0.025010578,-0.04592572,0.034388185,0.03836159,0.0019682923,0.021373231,-0.03391387,0.015393363,0.003937917,0.01832765,0.0045520393,-0.02696203,0.020696502,0.016930614,-0.007926859,0.021834886,-0.014779224,0.00073025556,-0.020250296,0.006635754,0.025785012,0.009847587,-0.002533611,-0.057919327,0.03010091,-0.03554674,0.054443054,-0.015446536,-0.0079982905,-0.0042982297,-0.018884834,0.0027541735,-0.044417977,0.05555447,-0.018901609,-0.049503766,0.008309782,0.039867956,-0.0004423662,0.0059798234,0.03447887,0.023205558,0.058959927,-0.019526886,-0.054637823,-0.009800092,-0.024515655,-0.05426387,0.05535355,0.024482403,-0.020081121,0.024965372,-0.002176406,-0.011429285,0.02036594,-0.011996402,0.011601014,0.04732072,0.028819714,0.03407571,0.0430521,0.05145868,-0.065615594,0.046596047,-0.008815781,-0.0063788523,-0.044762302,-0.03171996,0.04966251,-0.010887125,0.036779672,0.014379601,-0.06393863,-0.036413074,-0.033719108,-0.037734028,0.033251368,-0.01693572,-0.015116194,0.082118206,-0.011095621,0.046565905,0.054315507,-0.051471975,0.0153609,-0.016379755,-0.02725454,0.029903106,0.01588181,-0.043773234,-0.0034126595,0.0034703915,0.0074963053,-0.049301904,-0.005326988,0.0014556781,0.043266784,0.03043187,-0.008008064,-0.0047954894,0.0065719066,-0.018209687,0.00520577,-0.04222329,0.024618099,0.0030018033,0.008215917,0.088026844,0.041226704,-0.05174175,0.035067245,-0.037319127,0.0037409177,0.024523623,-0.0126059465,0.019197112,0.013823613,-0.02756309,0.014537172,0.010373209,0.045283005,-0.033583794,-0.07042238,0.0071703074,-0.047405772,0.052970607,0.01187145,0.009470498,0.033309255,-0.014022496,-0.01466476,-0.016799983,-0.004560339,-0.00007741032,0.016623817,0.02886948,-0.023846539,-0.05926324,0.0019861246,-0.0097210035,0.10283416,0.027582858,-0.050722197,0.051445477,-0.027595742,0.022260211,-0.025540655,-0.09528184,-0.028447622,-0.020006616,0.08766454,-0.014110661,0.04828308,0.0074301455,0.03928737,-0.0000046884684,-0.026885474,0.005424345,0.054999787,0.055203326,-0.012640017,-0.0435913,-0.024285164,0.06663095,0.005627971,-0.015168387,0.027197381,-0.026075814,-0.003045215,-0.008655605,-0.009072627,0.004339306,0.03589536,0.061759293,-0.04240408,0.04873947,0.021134883,0.053518154,0.045864865,-0.027563328,-0.01566489,0.00018125105,-0.007070503,0.039647527,-0.021650534,0.038786504,0.02006178,-0.013114097,0.07950984,-0.014730525,-0.19681875,-0.013000412,0.018087342,-0.0073786196,0.038186155,-0.059353005,-0.0058362517,-0.009970051,0.0016716863,-0.023077143,-0.02714242,-0.006529649,0.037998736,0.025349554,0.019855456,-0.016530242,0.00880591,-0.016678277,-0.03673031,0.045423195,-0.03146899,-0.029318942,-0.012635296,0.071473934,-0.02904274,0.027330637,-0.084734075,-0.05050938,-0.0030655882,-0.0022098075,-0.02383695,-0.028460467,-0.03240081,0.048773084,0.023262978,0.016216593,0.027833678,-0.039854486,-0.002443358,0.01758309,-0.033520985,-0.04862155,0.0030191801,-0.040858116,0.045017388,0.01576234,-0.09301789,-0.04828378,-0.014886363,0.0012595668,-0.010673225,-0.02463904,-0.06783802,-0.0012545382,0.015514673,-0.004911741,0.0025960177,-0.012014308,-0.024893451,0.036577918,-0.003223495,-0.020390507,-0.022805423,-0.059310623,-0.02081245,-0.023387661,-0.061122973,-0.06244,0.017364288,0.033477243,-0.010211365,0.04805492,-0.0644543,-0.048770227,0.0068986556,-0.025725175,-0.029574871,-0.00949049,0.05490974,0.027187059,0.00826158,-0.06282722,0.035274204,0.012130771,-0.009545266,-0.048487406,0.04640102,-0.037075754,-0.020248186,-0.02851919,0.064635284,-0.0064534973,-0.026640853,-0.026290758,0.035040796,0.020074066,0.0032996435,0.02883776,-0.012944289,0.019450067,-0.02121465,-0.024558635,-0.04377821,-0.016631315,-0.04083968,-0.021962307,-0.010120014,0.02998998,0.10129919,-0.0025703132,-0.03771752,0.01426784,0.025374308,0.00082124525,0.00029568642,-0.030749727,0.016260363,0.0014756168,0.018676473,-0.03861688,-0.032052398,0.056064054,0.005533946,0.04515451,0.015364342,-0.02965325,0.0009782034,0.01524649,0.019077078,-0.025799321,0.020865263,-0.00037949806,0.012502633,0.0090223905,-0.0015367466,-0.012833919,-0.011109666,-0.006981191,-0.009670439,0.009430074,-0.007729517,0.0016868497,0.016697595,-0.015892748,-0.020780738,0.049529854,-0.07344469,0.0607613,-0.0068755895,-0.014736902,0.014770749,-0.028858911,0.025249828,-0.058469485,0.030096894,-0.007117604,0.010155325,-0.0065526864,-0.028654601,-0.04420291,0.009965181,0.030222228,-0.010007972,0.0104629295,0.05589087,0.05443477,-0.02641796,-0.061689503,0.03118466,0.012150501,0.03404673,-0.029666431,-0.008654386,-0.031682808,-0.014843155,0.036703967,0.026411135,-0.005715008,0.024990784,0.058862202,0.017355891,0.039204415,-0.0034798204,0.033091135,0.050439566,0.032798093,-0.029705318,0.005968363,-0.055048566,0.028009748,-0.03823961,0.024362633,-0.017294712,-0.019563003,-0.019944556,-0.027790153,-0.01866823,0.047109686,-0.0033735516,-0.020653522,-0.039765686,-0.019055683,-0.0263571,-0.023188936,0.049641415,-0.077975206,0.030659853,0.048734687,0.044718176,0.036765084,-0.011803315,-0.027699227,-0.07258002,-0.08741319,-0.0392474,-0.042096145,-0.0040325304,0.01667375,0.026754893,-0.030304687,0.029919326,0.024295082,0.011638254,-0.012232291,-0.047564257,-0.036413006,0.026577674,0.036411874,0.00057670544,0.017877145,0.009268524,-0.006965588,0.011874776,-0.005112591,-0.034651127,0.03160231,-0.052825063,0.014719321,-0.0139615545,-0.016238235,0.002020219,0.02526055,-0.07056756,0.010022732,-0.014104433,-0.005984697,-0.00897443,0.021115793,-0.043804843,-0.027990978,0.060727082,0.0040618493,-0.038511537,-0.048857935,0.024104802,-0.059829835,-0.029107396,-0.05538522,-0.06930553,-0.0057559577,-0.022053827,-0.00876388,-0.0056931996,0.029746206,0.0224666,0.008767829,-0.03966822,-0.006478918,0.06567699,-0.01581077,-0.03742192,-0.06186453,-0.028619587,0.08638498,0.031267703,-0.0008673075,0.003113204,0.012213491,0.020067157,-0.02849485,0.0018909829,0.02714576,0.0026566028,-0.03609787,0.0060567204,-0.047545094,-0.0046444787,-0.021402694,-0.023118727,-0.015218381,-0.043136228,-0.0438743,-0.005564044,-0.009355076,-0.028500054,0.009921202,0.027966693,0.06036647,0.06929019,0.007004997,-0.024255225,0.04914266,0.0032520234,0.0044063884,-0.029372599,0.038042217,-0.035385627,-0.04905816,0.047601648,0.0071805464,-0.008339494,-0.035425205,0.036915354,0.024695326,-0.038979523,0.01886513,0.013804558,-0.04848749,-0.04819779,0.022526458,-0.029244151,0.041152976,0.04666112,0.020387372,0.037857335,0.060002513,0.011064769,-0.032094717,0.070615225,0.04814509,0.017521046,0.074162334,-0.04956284,0.07335939,-0.009453019,-0.06289444,0.024246441,0.021851622,0.01857824,0.02037353,-0.017273203,0.021301785,0.05051385,0.053983003,-0.01588495,0.054096334,0.05107405,0.0720548,-0.029601721,0.04816011,0.006444874,-0.02505102,0.013238045,-0.021370836,0.025479412,-0.048463117,0.03514722,0.08079718,0.00369719,-0.015530819,0.0021374116,0.03247959,0.11611161,-0.021934662,-0.029833768,0.016046036,-0.00634777,-0.06037879,-0.005574648,0.028324481,-0.021840915,0.03284168,-0.022047363,-0.03463407,0.011823492,-0.03520137,-0.014746701,-0.03972389,-0.02124471,0.026924072,-0.0022506462,0.04452787,-0.015707701,-0.0065392647,0.0066317394,-0.005149294,-0.07763598,0.054278333,0.027830306,-0.03989325,-0.026995605,-0.024925973,-0.0024197767,0.07852477,-0.034251966,0.03694585,0.044244047,0.012739273,0.0037145729,0.008245091,0.013920077,-0.010570776,-0.021823786,0.057918977,-0.075884886,-0.054011993,0.0039594076,0.003970741,-0.038295034,-0.03029311,0.063210145,-0.08822839,-0.061069354,0.08516593,0.020341832,0.08075477,0.03257605,0.0039170105,0.029395742,0.012290831,-0.06368765,0.023519376,-0.0173505,-0.001395915,0.017215127,0.043243848,0.04967547,0.028518617,0.021273924,-0.0023932487,-0.030911915,-0.05524172,-0.045551147,0.042072143,-0.027773965,-0.03693362,0.028450156,0.06675585,-0.061626967,-0.08894698,0.045917906,-0.00475913,0.034920968,-0.0064531155,-0.00689886,-0.06119457,0.021173967,-0.027787622,-0.02472986,0.03998034,0.03737826,-0.0067949123,0.022558564,-0.04570635,-0.033072025,0.022725677,0.016026087,-0.02125421,-0.02984927,-0.0049473033]] -```` +```json +[ + [ + 0.00037115702, -0.06356819, 0.0024758505, -0.012360337, 0.050739925, 0.023380278, 0.022216318, 0.0008076447, + -0.0003412891, -0.033479452, -0.024872458, 0.0064700204, -0.00731641, 0.06648339, 0.0013361155, 0.047004532, + 0.062838696, -0.021005465, 0.011151533, 0.044124223, -0.050683793, -0.062093593, -0.03992629, 0.017758112, + -0.0013727234, 0.0022603935, -0.04363493, 0.012822347, -0.02408198, 0.011079012, 0.028809752, -0.008898206, + 0.037047423, -0.027456092, 0.016162485, 0.04173568, -0.039396558, -0.035203997, -0.022387454, -0.019808905, + -0.01864915, -0.042313505, -0.0120891025, 0.048949677, -0.08100209, 0.017953783, -0.12084276, 0.0024097406, + -0.022705944, -0.012279724, -0.07547717, 0.051262986, 0.03203861, -0.019056482, 0.04271625, 0.015248945, + 0.004222296, -0.08073051, 0.010240495, -0.05635268, 0.052041706, 0.03712775, -0.01854724, -0.02750096, + -0.00096631586, -0.026202224, 0.024124105, 0.042904165, -0.023528703, -0.0034956702, -0.028778492, 0.029217377, + -0.020601744, -0.0049860086, -0.05246627, -0.011162583, 0.012888553, 0.014507065, 0.08219481, -0.008273658, + 0.0036607939, 0.062248874, 0.042562004, 0.03170365, 0.0046070544, 0.00065274147, -0.019365542, -0.004698561, + -0.0449053, 0.02275239, 0.01039843, -0.053169794, 0.060175993, 0.051545423, 0.014204941, 0.0076600607, 0.013906856, + -0.035385784, -0.011683805, -0.014732695, -0.02331647, -0.059045117, -0.016870823, -0.014698294, -0.048483565, + 0.026726946, 0.05227064, -0.013973138, 0.014551645, -0.019573484, -0.0013427412, -0.008475066, -0.0025058866, + -0.048502546, -0.043069497, -0.0077841803, -0.016379999, 0.0037450534, -0.025010578, -0.04592572, 0.034388185, + 0.03836159, 0.0019682923, 0.021373231, -0.03391387, 0.015393363, 0.003937917, 0.01832765, 0.0045520393, -0.02696203, + 0.020696502, 0.016930614, -0.007926859, 0.021834886, -0.014779224, 0.00073025556, -0.020250296, 0.006635754, + 0.025785012, 0.009847587, -0.002533611, -0.057919327, 0.03010091, -0.03554674, 0.054443054, -0.015446536, + -0.0079982905, -0.0042982297, -0.018884834, 0.0027541735, -0.044417977, 0.05555447, -0.018901609, -0.049503766, + 0.008309782, 0.039867956, -0.0004423662, 0.0059798234, 0.03447887, 0.023205558, 0.058959927, -0.019526886, + -0.054637823, -0.009800092, -0.024515655, -0.05426387, 0.05535355, 0.024482403, -0.020081121, 0.024965372, + -0.002176406, -0.011429285, 0.02036594, -0.011996402, 0.011601014, 0.04732072, 0.028819714, 0.03407571, 0.0430521, + 0.05145868, -0.065615594, 0.046596047, -0.008815781, -0.0063788523, -0.044762302, -0.03171996, 0.04966251, + -0.010887125, 0.036779672, 0.014379601, -0.06393863, -0.036413074, -0.033719108, -0.037734028, 0.033251368, + -0.01693572, -0.015116194, 0.082118206, -0.011095621, 0.046565905, 0.054315507, -0.051471975, 0.0153609, + -0.016379755, -0.02725454, 0.029903106, 0.01588181, -0.043773234, -0.0034126595, 0.0034703915, 0.0074963053, + -0.049301904, -0.005326988, 0.0014556781, 0.043266784, 0.03043187, -0.008008064, -0.0047954894, 0.0065719066, + -0.018209687, 0.00520577, -0.04222329, 0.024618099, 0.0030018033, 0.008215917, 0.088026844, 0.041226704, + -0.05174175, 0.035067245, -0.037319127, 0.0037409177, 0.024523623, -0.0126059465, 0.019197112, 0.013823613, + -0.02756309, 0.014537172, 0.010373209, 0.045283005, -0.033583794, -0.07042238, 0.0071703074, -0.047405772, + 0.052970607, 0.01187145, 0.009470498, 0.033309255, -0.014022496, -0.01466476, -0.016799983, -0.004560339, + -0.00007741032, 0.016623817, 0.02886948, -0.023846539, -0.05926324, 0.0019861246, -0.0097210035, 0.10283416, + 0.027582858, -0.050722197, 0.051445477, -0.027595742, 0.022260211, -0.025540655, -0.09528184, -0.028447622, + -0.020006616, 0.08766454, -0.014110661, 0.04828308, 0.0074301455, 0.03928737, -0.0000046884684, -0.026885474, + 0.005424345, 0.054999787, 0.055203326, -0.012640017, -0.0435913, -0.024285164, 0.06663095, 0.005627971, + -0.015168387, 0.027197381, -0.026075814, -0.003045215, -0.008655605, -0.009072627, 0.004339306, 0.03589536, + 0.061759293, -0.04240408, 0.04873947, 0.021134883, 0.053518154, 0.045864865, -0.027563328, -0.01566489, + 0.00018125105, -0.007070503, 0.039647527, -0.021650534, 0.038786504, 0.02006178, -0.013114097, 0.07950984, + -0.014730525, -0.19681875, -0.013000412, 0.018087342, -0.0073786196, 0.038186155, -0.059353005, -0.0058362517, + -0.009970051, 0.0016716863, -0.023077143, -0.02714242, -0.006529649, 0.037998736, 0.025349554, 0.019855456, + -0.016530242, 0.00880591, -0.016678277, -0.03673031, 0.045423195, -0.03146899, -0.029318942, -0.012635296, + 0.071473934, -0.02904274, 0.027330637, -0.084734075, -0.05050938, -0.0030655882, -0.0022098075, -0.02383695, + -0.028460467, -0.03240081, 0.048773084, 0.023262978, 0.016216593, 0.027833678, -0.039854486, -0.002443358, + 0.01758309, -0.033520985, -0.04862155, 0.0030191801, -0.040858116, 0.045017388, 0.01576234, -0.09301789, + -0.04828378, -0.014886363, 0.0012595668, -0.010673225, -0.02463904, -0.06783802, -0.0012545382, 0.015514673, + -0.004911741, 0.0025960177, -0.012014308, -0.024893451, 0.036577918, -0.003223495, -0.020390507, -0.022805423, + -0.059310623, -0.02081245, -0.023387661, -0.061122973, -0.06244, 0.017364288, 0.033477243, -0.010211365, 0.04805492, + -0.0644543, -0.048770227, 0.0068986556, -0.025725175, -0.029574871, -0.00949049, 0.05490974, 0.027187059, + 0.00826158, -0.06282722, 0.035274204, 0.012130771, -0.009545266, -0.048487406, 0.04640102, -0.037075754, + -0.020248186, -0.02851919, 0.064635284, -0.0064534973, -0.026640853, -0.026290758, 0.035040796, 0.020074066, + 0.0032996435, 0.02883776, -0.012944289, 0.019450067, -0.02121465, -0.024558635, -0.04377821, -0.016631315, + -0.04083968, -0.021962307, -0.010120014, 0.02998998, 0.10129919, -0.0025703132, -0.03771752, 0.01426784, + 0.025374308, 0.00082124525, 0.00029568642, -0.030749727, 0.016260363, 0.0014756168, 0.018676473, -0.03861688, + -0.032052398, 0.056064054, 0.005533946, 0.04515451, 0.015364342, -0.02965325, 0.0009782034, 0.01524649, 0.019077078, + -0.025799321, 0.020865263, -0.00037949806, 0.012502633, 0.0090223905, -0.0015367466, -0.012833919, -0.011109666, + -0.006981191, -0.009670439, 0.009430074, -0.007729517, 0.0016868497, 0.016697595, -0.015892748, -0.020780738, + 0.049529854, -0.07344469, 0.0607613, -0.0068755895, -0.014736902, 0.014770749, -0.028858911, 0.025249828, + -0.058469485, 0.030096894, -0.007117604, 0.010155325, -0.0065526864, -0.028654601, -0.04420291, 0.009965181, + 0.030222228, -0.010007972, 0.0104629295, 0.05589087, 0.05443477, -0.02641796, -0.061689503, 0.03118466, 0.012150501, + 0.03404673, -0.029666431, -0.008654386, -0.031682808, -0.014843155, 0.036703967, 0.026411135, -0.005715008, + 0.024990784, 0.058862202, 0.017355891, 0.039204415, -0.0034798204, 0.033091135, 0.050439566, 0.032798093, + -0.029705318, 0.005968363, -0.055048566, 0.028009748, -0.03823961, 0.024362633, -0.017294712, -0.019563003, + -0.019944556, -0.027790153, -0.01866823, 0.047109686, -0.0033735516, -0.020653522, -0.039765686, -0.019055683, + -0.0263571, -0.023188936, 0.049641415, -0.077975206, 0.030659853, 0.048734687, 0.044718176, 0.036765084, + -0.011803315, -0.027699227, -0.07258002, -0.08741319, -0.0392474, -0.042096145, -0.0040325304, 0.01667375, + 0.026754893, -0.030304687, 0.029919326, 0.024295082, 0.011638254, -0.012232291, -0.047564257, -0.036413006, + 0.026577674, 0.036411874, 0.00057670544, 0.017877145, 0.009268524, -0.006965588, 0.011874776, -0.005112591, + -0.034651127, 0.03160231, -0.052825063, 0.014719321, -0.0139615545, -0.016238235, 0.002020219, 0.02526055, + -0.07056756, 0.010022732, -0.014104433, -0.005984697, -0.00897443, 0.021115793, -0.043804843, -0.027990978, + 0.060727082, 0.0040618493, -0.038511537, -0.048857935, 0.024104802, -0.059829835, -0.029107396, -0.05538522, + -0.06930553, -0.0057559577, -0.022053827, -0.00876388, -0.0056931996, 0.029746206, 0.0224666, 0.008767829, + -0.03966822, -0.006478918, 0.06567699, -0.01581077, -0.03742192, -0.06186453, -0.028619587, 0.08638498, 0.031267703, + -0.0008673075, 0.003113204, 0.012213491, 0.020067157, -0.02849485, 0.0018909829, 0.02714576, 0.0026566028, + -0.03609787, 0.0060567204, -0.047545094, -0.0046444787, -0.021402694, -0.023118727, -0.015218381, -0.043136228, + -0.0438743, -0.005564044, -0.009355076, -0.028500054, 0.009921202, 0.027966693, 0.06036647, 0.06929019, 0.007004997, + -0.024255225, 0.04914266, 0.0032520234, 0.0044063884, -0.029372599, 0.038042217, -0.035385627, -0.04905816, + 0.047601648, 0.0071805464, -0.008339494, -0.035425205, 0.036915354, 0.024695326, -0.038979523, 0.01886513, + 0.013804558, -0.04848749, -0.04819779, 0.022526458, -0.029244151, 0.041152976, 0.04666112, 0.020387372, 0.037857335, + 0.060002513, 0.011064769, -0.032094717, 0.070615225, 0.04814509, 0.017521046, 0.074162334, -0.04956284, 0.07335939, + -0.009453019, -0.06289444, 0.024246441, 0.021851622, 0.01857824, 0.02037353, -0.017273203, 0.021301785, 0.05051385, + 0.053983003, -0.01588495, 0.054096334, 0.05107405, 0.0720548, -0.029601721, 0.04816011, 0.006444874, -0.02505102, + 0.013238045, -0.021370836, 0.025479412, -0.048463117, 0.03514722, 0.08079718, 0.00369719, -0.015530819, + 0.0021374116, 0.03247959, 0.11611161, -0.021934662, -0.029833768, 0.016046036, -0.00634777, -0.06037879, + -0.005574648, 0.028324481, -0.021840915, 0.03284168, -0.022047363, -0.03463407, 0.011823492, -0.03520137, + -0.014746701, -0.03972389, -0.02124471, 0.026924072, -0.0022506462, 0.04452787, -0.015707701, -0.0065392647, + 0.0066317394, -0.005149294, -0.07763598, 0.054278333, 0.027830306, -0.03989325, -0.026995605, -0.024925973, + -0.0024197767, 0.07852477, -0.034251966, 0.03694585, 0.044244047, 0.012739273, 0.0037145729, 0.008245091, + 0.013920077, -0.010570776, -0.021823786, 0.057918977, -0.075884886, -0.054011993, 0.0039594076, 0.003970741, + -0.038295034, -0.03029311, 0.063210145, -0.08822839, -0.061069354, 0.08516593, 0.020341832, 0.08075477, 0.03257605, + 0.0039170105, 0.029395742, 0.012290831, -0.06368765, 0.023519376, -0.0173505, -0.001395915, 0.017215127, + 0.043243848, 0.04967547, 0.028518617, 0.021273924, -0.0023932487, -0.030911915, -0.05524172, -0.045551147, + 0.042072143, -0.027773965, -0.03693362, 0.028450156, 0.06675585, -0.061626967, -0.08894698, 0.045917906, + -0.00475913, 0.034920968, -0.0064531155, -0.00689886, -0.06119457, 0.021173967, -0.027787622, -0.02472986, + 0.03998034, 0.03737826, -0.0067949123, 0.022558564, -0.04570635, -0.033072025, 0.022725677, 0.016026087, + -0.02125421, -0.02984927, -0.0049473033 + ] +] +``` ### 6. Validate Reranking service From a32d8bdd0dd63d84dbe3aef9c91fec84adb6c2dc Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Tue, 25 Mar 2025 12:10:56 +0700 Subject: [PATCH 24/44] Add sleep before running validate Signed-off-by: Artem Astafev --- SearchQnA/tests/test_compose_on_rocm.sh | 2 ++ SearchQnA/tests/test_compose_vllm_on_rocm.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/SearchQnA/tests/test_compose_on_rocm.sh b/SearchQnA/tests/test_compose_on_rocm.sh index 27de2b9bb0..1dbb1ce895 100644 --- a/SearchQnA/tests/test_compose_on_rocm.sh +++ b/SearchQnA/tests/test_compose_on_rocm.sh @@ -127,6 +127,8 @@ function main() { if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi start_services + sleep 2m + validate_megaservice validate_frontend diff --git a/SearchQnA/tests/test_compose_vllm_on_rocm.sh b/SearchQnA/tests/test_compose_vllm_on_rocm.sh index 7d71a1d8bd..52b352c38c 100644 --- a/SearchQnA/tests/test_compose_vllm_on_rocm.sh +++ b/SearchQnA/tests/test_compose_vllm_on_rocm.sh @@ -130,6 +130,8 @@ function main() { if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi start_services + sleep 2m + validate_megaservice validate_frontend From 0740267722b7f7fbd48e2f6cabf125ce3bc3ce34 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Tue, 25 Mar 2025 14:57:03 +0700 Subject: [PATCH 25/44] Increase timeout before tests Signed-off-by: Artem Astafev --- SearchQnA/tests/test_compose_on_rocm.sh | 2 +- SearchQnA/tests/test_compose_vllm_on_rocm.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/SearchQnA/tests/test_compose_on_rocm.sh b/SearchQnA/tests/test_compose_on_rocm.sh index 1dbb1ce895..fb506344de 100644 --- a/SearchQnA/tests/test_compose_on_rocm.sh +++ b/SearchQnA/tests/test_compose_on_rocm.sh @@ -127,7 +127,7 @@ function main() { if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi start_services - sleep 2m + sleep 5m validate_megaservice validate_frontend diff --git a/SearchQnA/tests/test_compose_vllm_on_rocm.sh b/SearchQnA/tests/test_compose_vllm_on_rocm.sh index 52b352c38c..e06c6be0c2 100644 --- a/SearchQnA/tests/test_compose_vllm_on_rocm.sh +++ b/SearchQnA/tests/test_compose_vllm_on_rocm.sh @@ -130,7 +130,7 @@ function main() { if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi start_services - sleep 2m + sleep 5m validate_megaservice validate_frontend From f997214442aadb9e8150befbb37924fde8b28177 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Tue, 25 Mar 2025 16:09:36 +0700 Subject: [PATCH 26/44] SearchQnA - fix files for deploy on ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/tests/test_compose_on_rocm.sh | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/SearchQnA/tests/test_compose_on_rocm.sh b/SearchQnA/tests/test_compose_on_rocm.sh index fb506344de..48cacc4bff 100644 --- a/SearchQnA/tests/test_compose_on_rocm.sh +++ b/SearchQnA/tests/test_compose_on_rocm.sh @@ -68,6 +68,7 @@ function start_services() { sleep 5s n=$((n+1)) done + sleep 20 } @@ -123,17 +124,15 @@ function stop_docker() { function main() { - stop_docker - if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi +# stop_docker +# if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi start_services - sleep 5m - validate_megaservice validate_frontend - stop_docker - echo y | docker system prune +# stop_docker +# echo y | docker system prune } From 6bc9f86e96b92775674434eef34d465aebefd8c0 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Tue, 25 Mar 2025 16:14:08 +0700 Subject: [PATCH 27/44] SearchQnA - fix files for deploy on ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml b/SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml index fef008250d..d9f96ec1c6 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml +++ b/SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml @@ -117,13 +117,13 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${SEARCH_TGI_LLM_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} LLM_ENDPOINT: ${SEARCH_TGI_LLM_ENDPOINT} LLM_MODEL_ID: ${SEARCH_LLM_MODEL_ID} LLM_MODEL: ${SEARCH_LLM_MODEL_ID} HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} - OPENAI_API_KEY: ${SEARCH_OPENAI_API_KEY} + LLM_COMPONENT_NAME: "OpeaTextGenService" + restart: unless-stopped search-backend-server: image: ${REGISTRY:-opea}/searchqna:${TAG:-latest} From 78368445d383a6e827a37e79c65f23a8c1d9eee6 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Wed, 26 Mar 2025 09:27:09 +0700 Subject: [PATCH 28/44] SearchQnA - fix files for deploy on ROCm vLLM Signed-off-by: Chingis Yundunov --- .../docker_compose/amd/gpu/rocm/README.md | 26 ++++---- .../docker_compose/amd/gpu/rocm/compose.yaml | 60 ++++++++++--------- .../amd/gpu/rocm/compose_vllm.yaml | 14 ++++- .../docker_compose/amd/gpu/rocm/set_env.sh | 44 +++++++------- .../amd/gpu/rocm/set_env_vllm.sh | 48 +++++++-------- 5 files changed, 101 insertions(+), 91 deletions(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README.md b/SearchQnA/docker_compose/amd/gpu/rocm/README.md index 3d407cb3b3..5c0e686729 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README.md @@ -91,20 +91,20 @@ ##### vLLM-based application: - opea/vllm-rocm:latest - - opea/llm-textgen:latest - - opea/reranking:latest - - opea/searchqna:latest - - opea/searchqna-ui:latest - - opea/web-retriever:latest + - opea/llm-textgen:latest + - opea/reranking:latest + - opea/searchqna:latest + - opea/searchqna-ui:latest + - opea/web-retriever:latest ##### TGI-based application: - ghcr.io/huggingface/text-generation-inference:2.3.1-rocm - - opea/llm-textgen:latest - - opea/reranking:latest - - opea/searchqna:latest - - opea/searchqna-ui:latest - - opea/web-retriever:latest + - opea/llm-textgen:latest + - opea/reranking:latest + - opea/searchqna:latest + - opea/searchqna-ui:latest + - opea/web-retriever:latest --- @@ -121,7 +121,7 @@ To enable GPU support for AMD GPUs, the following configuration is added to the shm_size: 1g devices: - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ + - /dev/dri:/dev/dri cap_add: - SYS_PTRACE group_add: @@ -158,6 +158,10 @@ Use AMD GPU driver utilities to determine the correct `cardN` and `renderN` IDs ```bash ### Replace the string 'your_huggingfacehub_token' with your HuggingFacehub repository access token. export HUGGINGFACEHUB_API_TOKEN='your_huggingfacehub_token' +### Replace the string 'your_google_api_token' with your Google API access token +export GOOGLE_API_KEY='your_google_api_token' +### Replace the string 'your_google_cse_id' with your Google CSE ID +export GOOGLE_CSE_ID='your_google_cse_id' ``` #### Set variables value in set_env\*\*\*\*.sh file: diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml b/SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml index d9f96ec1c6..b104e20b49 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml +++ b/SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml @@ -8,9 +8,9 @@ services: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: search-tei-embedding-server ports: - - "3001:80" + - "${SEARCH_TEI_EMBEDDING_PORT:-3001}:80" volumes: - - "${MODEL_PATH:-./data}:/data" + - "${MODEL_CACHE:-./data}:/data" shm_size: 1g environment: no_proxy: ${no_proxy} @@ -20,13 +20,14 @@ services: HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} command: --model-id ${SEARCH_EMBEDDING_MODEL_ID} --auto-truncate + search-embedding: image: ${REGISTRY:-opea}/embedding:${TAG:-latest} container_name: search-embedding-server depends_on: - search-tei-embedding-service ports: - - "3002:6000" + - "${SEARCH_EMBEDDING_SERVICE_PORT:-3002}:6000" ipc: host environment: no_proxy: ${no_proxy} @@ -36,11 +37,12 @@ services: TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT} HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped + search-web-retriever: image: ${REGISTRY:-opea}/web-retriever:${TAG:-latest} container_name: search-web-retriever-server ports: - - "3003:7077" + - "${SEARCH_WEB_RETRIEVER_SERVICE_PORT:-3003}:7077" ipc: host environment: no_proxy: ${no_proxy} @@ -50,26 +52,28 @@ services: GOOGLE_API_KEY: ${SEARCH_GOOGLE_API_KEY} GOOGLE_CSE_ID: ${SEARCH_GOOGLE_CSE_ID} restart: unless-stopped + search-tei-reranking-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: search-tei-reranking-server ports: - - "3004:80" + - "${SEARCH_TEI_RERANKING_PORT:-3004}:80" volumes: - - "${MODEL_PATH:-./data}:/data" + - "${MODEL_CACHE:-./data}:/data" shm_size: 1g environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} command: --model-id ${SEARCH_RERANK_MODEL_ID} --auto-truncate + search-reranking: image: ${REGISTRY:-opea}/reranking:${TAG:-latest} container_name: search-reranking-server depends_on: - search-tei-reranking-service ports: - - "3005:8000" + - "${SEARCH_RERANK_SERVICE_PORT:-3005}:8000" ipc: host environment: no_proxy: ${no_proxy} @@ -80,13 +84,14 @@ services: HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped + search-tgi-service: image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm container_name: search-tgi-service ports: - - "3006:80" + - "${SEARCH_TGI_SERVICE_PORT:-3006}:80" volumes: - - "${MODEL_PATH:-./data}:/data" + - "${MODEL_CACHE:-./data}:/data" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} @@ -96,7 +101,7 @@ services: shm_size: 1g devices: - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ + - /dev/dri:/dev/dri cap_add: - SYS_PTRACE group_add: @@ -105,13 +110,14 @@ services: - seccomp:unconfined ipc: host command: --model-id ${SEARCH_LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048 + search-llm: image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} container_name: search-llm-server depends_on: - search-tgi-service ports: - - "3007:9000" + - "${SEARCH_LLM_SERVICE_PORT:-3007}:9000" ipc: host environment: no_proxy: ${no_proxy} @@ -139,18 +145,18 @@ services: ports: - "${SEARCH_BACKEND_SERVICE_PORT:-3008}:8888" environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - MEGA_SERVICE_HOST_IP=${SEARCH_MEGA_SERVICE_HOST_IP} - - EMBEDDING_SERVICE_HOST_IP=${SEARCH_EMBEDDING_SERVICE_HOST_IP} - - WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP} - - RERANK_SERVICE_HOST_IP=${SEARCH_RERANK_SERVICE_HOST_IP} - - LLM_SERVICE_HOST_IP=${SEARCH_LLM_SERVICE_HOST_IP} - - EMBEDDING_SERVICE_PORT=${SEARCH_EMBEDDING_SERVICE_PORT} - - WEB_RETRIEVER_SERVICE_PORT=${SEARCH_WEB_RETRIEVER_SERVICE_PORT} - - RERANK_SERVICE_PORT=${SEARCH_RERANK_SERVICE_PORT} - - LLM_SERVICE_PORT=${SEARCH_LLM_SERVICE_PORT} + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${SEARCH_MEGA_SERVICE_HOST_IP} + EMBEDDING_SERVICE_HOST_IP: ${SEARCH_EMBEDDING_SERVICE_HOST_IP} + WEB_RETRIEVER_SERVICE_HOST_IP: ${SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP} + RERANK_SERVICE_HOST_IP: ${SEARCH_RERANK_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${SEARCH_LLM_SERVICE_HOST_IP} + EMBEDDING_SERVICE_PORT: ${SEARCH_EMBEDDING_SERVICE_PORT} + WEB_RETRIEVER_SERVICE_PORT: ${SEARCH_WEB_RETRIEVER_SERVICE_PORT} + RERANK_SERVICE_PORT: ${SEARCH_RERANK_SERVICE_PORT} + LLM_SERVICE_PORT: ${SEARCH_LLM_SERVICE_PORT} ipc: host restart: always search-ui-server: @@ -161,10 +167,10 @@ services: ports: - "${SEARCH_FRONTEND_SERVICE_PORT:-5173}:5173" environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - BACKEND_BASE_URL=${SEARCH_BACKEND_SERVICE_ENDPOINT} + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_BASE_URL: ${SEARCH_BACKEND_SERVICE_ENDPOINT} ipc: host restart: always diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml index eee4f25701..7f5428f332 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -10,7 +10,7 @@ services: ports: - "${SEARCH_TEI_EMBEDDING_PORT:-3001}:80" volumes: - - "${MODEL_PATH:-./data}:/data" + - "${MODEL_CACHE:-./data}:/data" shm_size: 1g environment: no_proxy: ${no_proxy} @@ -20,6 +20,7 @@ services: HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} command: --model-id ${SEARCH_EMBEDDING_MODEL_ID} --auto-truncate + search-embedding: image: ${REGISTRY:-opea}/embedding:${TAG:-latest} container_name: search-embedding-server @@ -37,6 +38,7 @@ services: TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT} HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped + search-web-retriever: image: ${REGISTRY:-opea}/web-retriever:${TAG:-latest} container_name: search-web-retriever-server @@ -51,19 +53,21 @@ services: GOOGLE_API_KEY: ${SEARCH_GOOGLE_API_KEY} GOOGLE_CSE_ID: ${SEARCH_GOOGLE_CSE_ID} restart: unless-stopped + search-tei-reranking-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: search-tei-reranking-server ports: - "${SEARCH_TEI_RERANKING_PORT:-3004}:80" volumes: - - "${MODEL_PATH:-./data}:/data" + - "${MODEL_CACHE:-./data}:/data" shm_size: 1g environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} command: --model-id ${SEARCH_RERANK_MODEL_ID} --auto-truncate + search-reranking: image: ${REGISTRY:-opea}/reranking:${TAG:-latest} container_name: search-reranking-server @@ -81,6 +85,7 @@ services: HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped + search-vllm-service: image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} container_name: search-vllm-service @@ -97,7 +102,7 @@ services: WILM_USE_TRITON_FLASH_ATTENTION: 0 PYTORCH_JIT: 0 volumes: - - "${MODEL_PATH:-./data}:/data" + - "${MODEL_CACHE:-./data}:/data" shm_size: 20G devices: - /dev/kfd:/dev/kfd @@ -111,6 +116,7 @@ services: - apparmor=unconfined command: "--model ${SEARCH_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" ipc: host + search-llm: image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} container_name: search-llm-server @@ -129,6 +135,7 @@ services: HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN} LLM_COMPONENT_NAME: "OpeaTextGenService" restart: unless-stopped + search-backend-server: image: ${REGISTRY:-opea}/searchqna:${TAG:-latest} container_name: search-backend-server @@ -157,6 +164,7 @@ services: LLM_SERVICE_PORT: ${SEARCH_LLM_SERVICE_PORT} ipc: host restart: always + search-ui-server: image: ${REGISTRY:-opea}/searchqna-ui:${TAG:-latest} container_name: search-ui-server diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh index 6c6f398a33..fb8be31659 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -1,36 +1,34 @@ #!/usr/bin/env bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 +# Copyright (C) 2025 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 +export HOST_IP='' -export SEARCH_HOST_IP=${host_ip} -export SEARCH_EXTERNAL_HOST_IP="" export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' -export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${SEARCH_HOST_IP}:3001 -export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' -export SEARCH_TEI_RERANKING_ENDPOINT=http://${SEARCH_HOST_IP}:3004 +export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} +export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export SEARCH_OPENAI_API_KEY=${OPENAI_API_KEY} - -export SEARCH_TGI_LLM_ENDPOINT=http://${SEARCH_HOST_IP}:3006 export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3' +export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' -export SEARCH_MEGA_SERVICE_HOST_IP=${SEARCH_EXTERNAL_HOST_IP} -export SEARCH_EMBEDDING_SERVICE_HOST_IP=${SEARCH_HOST_IP} -export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_HOST_IP} -export SEARCH_RERANK_SERVICE_HOST_IP=${SEARCH_HOST_IP} -export SEARCH_LLM_SERVICE_HOST_IP=${SEARCH_HOST_IP} - +export SEARCH_BACKEND_SERVICE_PORT=18142 export SEARCH_EMBEDDING_SERVICE_PORT=3002 -export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003 -export SEARCH_RERANK_SERVICE_PORT=3005 +export SEARCH_FRONTEND_SERVICE_PORT=18143 export SEARCH_LLM_SERVICE_PORT=3007 +export SEARCH_RERANK_SERVICE_PORT=3005 +export SEARCH_TEI_EMBEDDING_PORT=3001 +export SEARCH_TEI_RERANKING_PORT=3004 +export SEARCH_TGI_SERVICE_PORT=3006 +export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003 -export SEARCH_FRONTEND_SERVICE_PORT=18143 -export SEARCH_BACKEND_SERVICE_PORT=18142 export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna +export SEARCH_EMBEDDING_SERVICE_HOST_IP=${HOST_IP} +export SEARCH_LLM_SERVICE_HOST_IP=${HOST_IP} +export SEARCH_MEGA_SERVICE_HOST_IP=${HOST_IP} +export SEARCH_RERANK_SERVICE_HOST_IP=${HOST_IP} +export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_EMBEDDING_PORT} +export SEARCH_TEI_RERANKING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT} +export SEARCH_TGI_LLM_ENDPOINT=http://${HOST_IP}:3006 +export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} + -export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} -export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh index 4023683e8c..0c0db884be 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -1,40 +1,34 @@ #!/usr/bin/env bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 +# Copyright (C) 2025 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 +export HOST_IP='' -export HOST_IP=${host_ip} -export EXTERNAL_HOST_IP='' -export MODEL_PATH="./data" export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' -export SEARCH_TEI_EMBEDDING_PORT=3001 -export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_EMBEDDING_PORT} -export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' -export SEARCH_TEI_RERANKING_PORT=3004 -export SEARCH_TEI_RERANKING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT} +export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} +export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export SEARCH_OPENAI_API_KEY=${OPENAI_API_KEY} - -export SEARCH_VLLM_SERVICE_PORT=3080 -export SEARCH_LLM_ENDPOINT=http://${HOST_IP}:${SEARCH_VLLM_SERVICE_PORT} export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3' +export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' -export SEARCH_MEGA_SERVICE_HOST_IP=${EXTERNAL_HOST_IP} -export SEARCH_EMBEDDING_SERVICE_HOST_IP=${HOST_IP} -export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} -export SEARCH_RERANK_SERVICE_HOST_IP=${HOST_IP} -export SEARCH_LLM_SERVICE_HOST_IP=${HOST_IP} +export MODEL_PATH="./data" +export SEARCH_BACKEND_SERVICE_PORT=18142 export SEARCH_EMBEDDING_SERVICE_PORT=3002 -export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003 -export SEARCH_RERANK_SERVICE_PORT=3005 +export SEARCH_FRONTEND_SERVICE_PORT=18143 export SEARCH_LLM_SERVICE_PORT=3007 +export SEARCH_RERANK_SERVICE_PORT=3005 +export SEARCH_TEI_EMBEDDING_PORT=3001 +export SEARCH_TEI_RERANKING_PORT=3004 +export SEARCH_VLLM_SERVICE_PORT=3080 +export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003 -export SEARCH_FRONTEND_SERVICE_PORT=18143 -export SEARCH_BACKEND_SERVICE_PORT=18142 export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna - -export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} -export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} +export SEARCH_EMBEDDING_SERVICE_HOST_IP=${HOST_IP} +export SEARCH_LLM_ENDPOINT=http://${HOST_IP}:${SEARCH_VLLM_SERVICE_PORT} +export SEARCH_LLM_SERVICE_HOST_IP=${HOST_IP} +export SEARCH_MEGA_SERVICE_HOST_IP=${HOST_IP} +export SEARCH_RERANK_SERVICE_HOST_IP=${HOST_IP} +export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_EMBEDDING_PORT} +export SEARCH_TEI_RERANKING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT} +export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} From 3803cbbbf8cf763b808744cff8b69539a76b2451 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Mar 2025 02:27:50 +0000 Subject: [PATCH 29/44] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh index fb8be31659..10d7c0dc19 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -30,5 +30,3 @@ export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_EMBEDDING_PO export SEARCH_TEI_RERANKING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT} export SEARCH_TGI_LLM_ENDPOINT=http://${HOST_IP}:3006 export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} - - From 75b731ddd62c21aa8d2198d84455b05d69a669aa Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Wed, 26 Mar 2025 09:34:44 +0700 Subject: [PATCH 30/44] SearchQnA - fix files for deploy on ROCm vLLM Signed-off-by: Chingis Yundunov --- .../docker_compose/amd/gpu/rocm/set_env.sh | 3 +- .../amd/gpu/rocm/set_env_vllm.sh | 3 +- SearchQnA/tests/test_compose_on_rocm.sh | 44 +++++++++++-------- SearchQnA/tests/test_compose_vllm_on_rocm.sh | 44 ++++++++++--------- 4 files changed, 53 insertions(+), 41 deletions(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh index fb8be31659..e93b3717ee 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -3,6 +3,7 @@ # Copyright (C) 2025 Advanced Micro Devices, Inc. export HOST_IP='' +export EXTERNAL_HOST_IP='' export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} @@ -21,7 +22,7 @@ export SEARCH_TEI_RERANKING_PORT=3004 export SEARCH_TGI_SERVICE_PORT=3006 export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003 -export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna +export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${EXTERNAL_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna export SEARCH_EMBEDDING_SERVICE_HOST_IP=${HOST_IP} export SEARCH_LLM_SERVICE_HOST_IP=${HOST_IP} export SEARCH_MEGA_SERVICE_HOST_IP=${HOST_IP} diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh index 0c0db884be..d59d242d38 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/SearchQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -3,6 +3,7 @@ # Copyright (C) 2025 Advanced Micro Devices, Inc. export HOST_IP='' +export EXTERNAL_HOST_IP='' export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} @@ -23,7 +24,7 @@ export SEARCH_TEI_RERANKING_PORT=3004 export SEARCH_VLLM_SERVICE_PORT=3080 export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003 -export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna +export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${EXTERNAL_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna export SEARCH_EMBEDDING_SERVICE_HOST_IP=${HOST_IP} export SEARCH_LLM_ENDPOINT=http://${HOST_IP}:${SEARCH_VLLM_SERVICE_PORT} export SEARCH_LLM_SERVICE_HOST_IP=${HOST_IP} diff --git a/SearchQnA/tests/test_compose_on_rocm.sh b/SearchQnA/tests/test_compose_on_rocm.sh index 48cacc4bff..4fdda4884b 100644 --- a/SearchQnA/tests/test_compose_on_rocm.sh +++ b/SearchQnA/tests/test_compose_on_rocm.sh @@ -30,30 +30,36 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/amd/gpu/rocm/ - export SEARCH_HOST_IP=${ip_address} - export SEARCH_EXTERNAL_HOST_IP=${ip_address} + + export HOST_IP=${ip_address} + export EXTERNAL_HOST_IP=${ip_address} + export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' - export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${SEARCH_HOST_IP}:3001 - export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' - export SEARCH_TEI_RERANKING_ENDPOINT=http://${SEARCH_HOST_IP}:3004 + export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} + export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export SEARCH_OPENAI_API_KEY=${OPENAI_API_KEY} - export SEARCH_TGI_LLM_ENDPOINT=http://${SEARCH_HOST_IP}:3006 export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3' - export SEARCH_MEGA_SERVICE_HOST_IP=${SEARCH_EXTERNAL_HOST_IP} - export SEARCH_EMBEDDING_SERVICE_HOST_IP=${SEARCH_HOST_IP} - export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_HOST_IP} - export SEARCH_RERANK_SERVICE_HOST_IP=${SEARCH_HOST_IP} - export SEARCH_LLM_SERVICE_HOST_IP=${SEARCH_HOST_IP} + export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' + + export SEARCH_BACKEND_SERVICE_PORT=3008 export SEARCH_EMBEDDING_SERVICE_PORT=3002 - export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003 - export SEARCH_RERANK_SERVICE_PORT=3005 - export SEARCH_LLM_SERVICE_PORT=3007 export SEARCH_FRONTEND_SERVICE_PORT=5173 - export SEARCH_BACKEND_SERVICE_PORT=3008 - export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${SEARCH_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna - export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} - export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} + export SEARCH_LLM_SERVICE_PORT=3007 + export SEARCH_RERANK_SERVICE_PORT=3005 + export SEARCH_TEI_EMBEDDING_PORT=3001 + export SEARCH_TEI_RERANKING_PORT=3004 + export SEARCH_TGI_SERVICE_PORT=3006 + export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003 + + export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${EXTERNAL_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna + export SEARCH_EMBEDDING_SERVICE_HOST_IP=${HOST_IP} + export SEARCH_LLM_SERVICE_HOST_IP=${HOST_IP} + export SEARCH_MEGA_SERVICE_HOST_IP=${HOST_IP} + export SEARCH_RERANK_SERVICE_HOST_IP=${HOST_IP} + export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_EMBEDDING_PORT} + export SEARCH_TEI_RERANKING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT} + export SEARCH_TGI_LLM_ENDPOINT=http://${HOST_IP}:3006 + export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env diff --git a/SearchQnA/tests/test_compose_vllm_on_rocm.sh b/SearchQnA/tests/test_compose_vllm_on_rocm.sh index e06c6be0c2..200f8388a9 100644 --- a/SearchQnA/tests/test_compose_vllm_on_rocm.sh +++ b/SearchQnA/tests/test_compose_vllm_on_rocm.sh @@ -29,34 +29,38 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/amd/gpu/rocm/ + export HOST_IP=${ip_address} export EXTERNAL_HOST_IP=${ip_address} - export MODEL_PATH="./data" + export SEARCH_EMBEDDING_MODEL_ID='BAAI/bge-base-en-v1.5' - export SEARCH_TEI_EMBEDDING_PORT=3001 - export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_EMBEDDING_PORT} - export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' - export SEARCH_TEI_RERANKING_PORT=3004 - export SEARCH_TEI_RERANKING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT} + export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} + export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} export SEARCH_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export SEARCH_OPENAI_API_KEY=${OPENAI_API_KEY} - export SEARCH_VLLM_SERVICE_PORT=3080 - export SEARCH_LLM_ENDPOINT=http://${HOST_IP}:${SEARCH_VLLM_SERVICE_PORT} export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3' - export SEARCH_MEGA_SERVICE_HOST_IP=${EXTERNAL_HOST_IP} - export SEARCH_EMBEDDING_SERVICE_HOST_IP=${HOST_IP} - export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} - export SEARCH_RERANK_SERVICE_HOST_IP=${HOST_IP} - export SEARCH_LLM_SERVICE_HOST_IP=${HOST_IP} + export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' + + export MODEL_PATH="./data" + + export SEARCH_BACKEND_SERVICE_PORT=3008 export SEARCH_EMBEDDING_SERVICE_PORT=3002 - export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003 - export SEARCH_RERANK_SERVICE_PORT=3005 - export SEARCH_LLM_SERVICE_PORT=3007 export SEARCH_FRONTEND_SERVICE_PORT=5173 - export SEARCH_BACKEND_SERVICE_PORT=3008 + export SEARCH_LLM_SERVICE_PORT=3007 + export SEARCH_RERANK_SERVICE_PORT=3005 + export SEARCH_TEI_EMBEDDING_PORT=3001 + export SEARCH_TEI_RERANKING_PORT=3004 + export SEARCH_VLLM_SERVICE_PORT=3080 + export SEARCH_WEB_RETRIEVER_SERVICE_PORT=3003 + export SEARCH_BACKEND_SERVICE_ENDPOINT=http://${EXTERNAL_HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna - export SEARCH_GOOGLE_API_KEY=${GOOGLE_API_KEY} - export SEARCH_GOOGLE_CSE_ID=${GOOGLE_CSE_ID} + export SEARCH_EMBEDDING_SERVICE_HOST_IP=${HOST_IP} + export SEARCH_LLM_ENDPOINT=http://${HOST_IP}:${SEARCH_VLLM_SERVICE_PORT} + export SEARCH_LLM_SERVICE_HOST_IP=${HOST_IP} + export SEARCH_MEGA_SERVICE_HOST_IP=${HOST_IP} + export SEARCH_RERANK_SERVICE_HOST_IP=${HOST_IP} + export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_EMBEDDING_PORT} + export SEARCH_TEI_RERANKING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT} + export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env From a2cc5defdc79b03792f71c1ac705a7e6a1701a1c Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Wed, 26 Mar 2025 09:43:05 +0700 Subject: [PATCH 31/44] SearchQnA - fix files for deploy on ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh index 170a2fdced..faedeb3f54 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ b/SearchQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -29,5 +29,5 @@ export SEARCH_MEGA_SERVICE_HOST_IP=${HOST_IP} export SEARCH_RERANK_SERVICE_HOST_IP=${HOST_IP} export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_EMBEDDING_PORT} export SEARCH_TEI_RERANKING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT} -export SEARCH_TGI_LLM_ENDPOINT=http://${HOST_IP}:3006 +export SEARCH_TGI_LLM_ENDPOINT=http://${HOST_IP}:${SEARCH_TGI_SERVICE_PORT} export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} From d7bf1dd4c9ba0d20078cdc555f0fef3301c0de91 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Wed, 26 Mar 2025 09:43:18 +0700 Subject: [PATCH 32/44] SearchQnA - fix files for deploy on ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/tests/test_compose_on_rocm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SearchQnA/tests/test_compose_on_rocm.sh b/SearchQnA/tests/test_compose_on_rocm.sh index 4fdda4884b..e32c9a0e5d 100644 --- a/SearchQnA/tests/test_compose_on_rocm.sh +++ b/SearchQnA/tests/test_compose_on_rocm.sh @@ -58,7 +58,7 @@ function start_services() { export SEARCH_RERANK_SERVICE_HOST_IP=${HOST_IP} export SEARCH_TEI_EMBEDDING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_EMBEDDING_PORT} export SEARCH_TEI_RERANKING_ENDPOINT=http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT} - export SEARCH_TGI_LLM_ENDPOINT=http://${HOST_IP}:3006 + export SEARCH_TGI_LLM_ENDPOINT=http://${HOST_IP}:${SEARCH_TGI_SERVICE_PORT} export SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP=${HOST_IP} sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env From a812d75639fd893129a3a076c9b42e1c94345aaa Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Wed, 26 Mar 2025 09:46:15 +0700 Subject: [PATCH 33/44] SearchQnA - fix files for deploy on ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/docker_compose/amd/gpu/rocm/README.md | 2 +- SearchQnA/ui/svelte/tests/searchQnA.spec.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README.md b/SearchQnA/docker_compose/amd/gpu/rocm/README.md index 5c0e686729..a2632351b1 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README.md @@ -253,7 +253,7 @@ All containers should be running and should not restart: ##### If you use TGI: - search-tgi-service -- search-llm-serverr +- search-llm-server - search-web-retriever-server - search-tei-embedding-server - search-tei-reranking-server diff --git a/SearchQnA/ui/svelte/tests/searchQnA.spec.ts b/SearchQnA/ui/svelte/tests/searchQnA.spec.ts index 29396cd61a..3b27b09233 100644 --- a/SearchQnA/ui/svelte/tests/searchQnA.spec.ts +++ b/SearchQnA/ui/svelte/tests/searchQnA.spec.ts @@ -16,7 +16,7 @@ async function enterMessageToChat(page: Page, message: string) { await page.getByTestId("chat-input").click(); await page.getByTestId("chat-input").fill(message); await page.getByTestId("chat-input").press("Enter"); - await page.waitForTimeout(10000); + await page.waitForTimeout(30000); await expect(page.getByTestId("display-answer")).toBeVisible(); } From 8d85d857ea6f80453f2d94def6a9f866d49527de Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Wed, 26 Mar 2025 09:46:44 +0700 Subject: [PATCH 34/44] SearchQnA - fix files for deploy on ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/ui/svelte/playwright.config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SearchQnA/ui/svelte/playwright.config.ts b/SearchQnA/ui/svelte/playwright.config.ts index 578a1c2872..78a80e0c39 100644 --- a/SearchQnA/ui/svelte/playwright.config.ts +++ b/SearchQnA/ui/svelte/playwright.config.ts @@ -21,7 +21,7 @@ export default defineConfig({ * Maximum time expect() should wait for the condition to be met. * For example in `await expect(locator).toHaveText();` */ - timeout: 5000, + timeout: 10000, }, /* Run tests in files in parallel */ fullyParallel: true, From f2d2310459c23d775ae3d1715200f276207398b8 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Wed, 26 Mar 2025 09:50:04 +0700 Subject: [PATCH 35/44] SearchQnA - fix files for deploy on ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/ui/svelte/playwright.config.ts | 2 +- SearchQnA/ui/svelte/tests/searchQnA.spec.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/SearchQnA/ui/svelte/playwright.config.ts b/SearchQnA/ui/svelte/playwright.config.ts index 78a80e0c39..66692b6ca9 100644 --- a/SearchQnA/ui/svelte/playwright.config.ts +++ b/SearchQnA/ui/svelte/playwright.config.ts @@ -21,7 +21,7 @@ export default defineConfig({ * Maximum time expect() should wait for the condition to be met. * For example in `await expect(locator).toHaveText();` */ - timeout: 10000, + timeout: 20000, }, /* Run tests in files in parallel */ fullyParallel: true, diff --git a/SearchQnA/ui/svelte/tests/searchQnA.spec.ts b/SearchQnA/ui/svelte/tests/searchQnA.spec.ts index 3b27b09233..29396cd61a 100644 --- a/SearchQnA/ui/svelte/tests/searchQnA.spec.ts +++ b/SearchQnA/ui/svelte/tests/searchQnA.spec.ts @@ -16,7 +16,7 @@ async function enterMessageToChat(page: Page, message: string) { await page.getByTestId("chat-input").click(); await page.getByTestId("chat-input").fill(message); await page.getByTestId("chat-input").press("Enter"); - await page.waitForTimeout(30000); + await page.waitForTimeout(10000); await expect(page.getByTestId("display-answer")).toBeVisible(); } From 89cb95b9bbaca0c99fe7816e7e0f2d9b4005a4c3 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Wed, 26 Mar 2025 09:52:34 +0700 Subject: [PATCH 36/44] SearchQnA - fix files for deploy on ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/tests/test_compose_on_rocm.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/SearchQnA/tests/test_compose_on_rocm.sh b/SearchQnA/tests/test_compose_on_rocm.sh index e32c9a0e5d..0d84036c7e 100644 --- a/SearchQnA/tests/test_compose_on_rocm.sh +++ b/SearchQnA/tests/test_compose_on_rocm.sh @@ -130,15 +130,15 @@ function stop_docker() { function main() { -# stop_docker -# if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + stop_docker + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi start_services validate_megaservice validate_frontend -# stop_docker -# echo y | docker system prune + stop_docker + echo y | docker system prune } From 0f6690e708bd2b2e9c7449aa1dc7b5de7baf51c4 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Wed, 26 Mar 2025 10:03:16 +0700 Subject: [PATCH 37/44] SearchQnA - fix files for deploy on ROCm vLLM Signed-off-by: Chingis Yundunov --- SearchQnA/tests/test_compose_vllm_on_rocm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SearchQnA/tests/test_compose_vllm_on_rocm.sh b/SearchQnA/tests/test_compose_vllm_on_rocm.sh index 200f8388a9..3a047a65a9 100644 --- a/SearchQnA/tests/test_compose_vllm_on_rocm.sh +++ b/SearchQnA/tests/test_compose_vllm_on_rocm.sh @@ -134,7 +134,7 @@ function main() { if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi start_services - sleep 5m + sleep 20 validate_megaservice validate_frontend From e55695bc61d500df6619449d3a9148a0e44031c7 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Wed, 26 Mar 2025 10:42:06 +0700 Subject: [PATCH 38/44] SearchQnA - fix files for deploy on ROCm vLLM Signed-off-by: Chingis Yundunov --- .../docker_compose/amd/gpu/rocm/README.md | 270 ++++++------------ 1 file changed, 92 insertions(+), 178 deletions(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README.md b/SearchQnA/docker_compose/amd/gpu/rocm/README.md index a2632351b1..92aa9b022c 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README.md @@ -272,9 +272,9 @@ All containers should be running and should not restart: ```bash DATA='{"model": "Intel/neural-chat-7b-v3-3", '\ -'"messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 256}' +'"messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 32}' -curl http://${HOST_IP}:${SEARCHQNA_VLLM_SERVICE_PORT}/v1/chat/completions \ +curl http://${HOST_IP}:${SEARCH_VLLM_SERVICE_PORT}/v1/chat/completions \ -X POST \ -d "$DATA" \ -H 'Content-Type: application/json' @@ -283,27 +283,7 @@ curl http://${HOST_IP}:${SEARCHQNA_VLLM_SERVICE_PORT}/v1/chat/completions \ Checking the response from the service. The response should be similar to JSON: ```json -{ - "id": "chatcmpl-512d16e876774d13a323514e96122cbc", - "object": "chat.completion", - "created": 1742819098, - "model": "Intel/neural-chat-7b-v3-3", - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": " Deep Learning is a subset of Machine Learning that relies on Artificial Neural Networks to perform tasks like image recognition, natural language processing, and predictive analytics. It aims to model the inner workings of the human brain through the intelligent analysis of big data. Essentially, Deep Learning algorithms try to process and learn from raw data in multiple layers to extract patterns, ultimately enabling systems to recognize complex patterns and make predictions more accurately. Although thoughest it involves much data and computation power, recently it is often referred through increasingly simple approaches thanks to advances in computation hardware. Deep Learning developed from previous techniques like neural networks, but has proven more powerful and effective for various tasks that involve massive data volumes and complex decisions.", - "tool_calls": [] - }, - "logprobs": null, - "finish_reason": "stop", - "stop_reason": null - } - ], - "usage": { "prompt_tokens": 15, "total_tokens": 161, "completion_tokens": 146, "prompt_tokens_details": null }, - "prompt_logprobs": null -} +{"id":"chatcmpl-a3761920c4034131b3cab073b8e8b841","object":"chat.completion","created":1742959065,"model":"Intel/neural-chat-7b-v3-3","choices":[{"index":0,"message":{"role":"assistant","content":" Deep Learning refers to a modern approach of Artificial Intelligence that aims to replicate the way human brains process information by teaching computers to learn from data without extensive programming","tool_calls":[]},"logprobs":null,"finish_reason":"length","stop_reason":null}],"usage":{"prompt_tokens":15,"total_tokens":47,"completion_tokens":32,"prompt_tokens_details":null},"prompt_logprobs":null} ``` If the service response has a meaningful response in the value of the "choices.message.content" key, @@ -312,10 +292,10 @@ then we consider the vLLM service to be successfully launched #### If you use TGI: ```bash -DATA='{"inputs":"IWhat is Deep Learning?",'\ +DATA='{"inputs":"What is Deep Learning?",'\ '"parameters":{"max_new_tokens":256,"do_sample": true}}' -curl http://${HOST_IP}:${SEARCHQNA_TGI_SERVICE_PORT}/generate \ +curl http://${HOST_IP}:${SEARCH_TGI_SERVICE_PORT}/generate \ -X POST \ -d "$DATA" \ -H 'Content-Type: application/json' @@ -336,10 +316,10 @@ then we consider the TGI service to be successfully launched ```bash DATA='{"query":"What is Deep Learning?",'\ -'"max_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,'\ +'"max_tokens":32,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,'\ '"repetition_penalty":1.03,"stream":false}' -curl http://${HOST_IP}:${SEARCHQNA_TGI_SERVICE_PORT}/v1/chat/completions \ +curl http://${HOST_IP}:${SEARCH_LLM_SERVICE_PORT}/v1/chat/completions \ -X POST \ -d "$DATA" \ -H 'Content-Type: application/json' @@ -348,200 +328,134 @@ curl http://${HOST_IP}:${SEARCHQNA_TGI_SERVICE_PORT}/v1/chat/completions \ Checking the response from the service. The response should be similar to JSON: ```json -{ - "id": "chatcmpl-512d16e876774d13a323514e96122cbc", - "object": "chat.completion", - "created": 1742819098, - "model": "Intel/neural-chat-7b-v3-3", - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": " Deep Learning is a subset of Machine Learning that relies on Artificial Neural Networks to perform tasks like image recognition, natural language processing, and predictive analytics. It aims to model the inner workings of the human brain through the intelligent analysis of big data. Essentially, Deep Learning algorithms try to process and learn from raw data in multiple layers to extract patterns, ultimately enabling systems to recognize complex patterns and make predictions more accurately. Although thoughest it involves much data and computation power, recently it is often referred through increasingly simple approaches thanks to advances in computation hardware. Deep Learning developed from previous techniques like neural networks, but has proven more powerful and effective for various tasks that involve massive data volumes and complex decisions.", - "tool_calls": [] - }, - "logprobs": null, - "finish_reason": "stop", - "stop_reason": null - } - ], - "usage": { "prompt_tokens": 15, "total_tokens": 161, "completion_tokens": 146, "prompt_tokens_details": null }, - "prompt_logprobs": null -} +{"id":"cmpl-0b974d00a7604c2ab8b721ebf6b88ae3","choices":[{"finish_reason":"length","index":0,"logprobs":null,"text":"\n\nDeep Learning is a subset of Machine Learning that is concerned with algorithms inspired by the structure and function of the brain. It is a part of Artificial","stop_reason":null,"prompt_logprobs":null}],"created":1742959134,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":{"completion_tokens":32,"prompt_tokens":6,"total_tokens":38,"completion_tokens_details":null,"prompt_tokens_details":null}} ``` -If the service response has a meaningful response in the value of the "choices.text" key, -then we consider the vLLM service to be successfully launched +### 3. Validate TEI Embedding service + +```bash +curl http://${HOST_IP}:${SEARCH_TEI_EMBEDDING_PORT}/embed \ + -X POST \ + -d '{"inputs":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' +``` + +Checking the response from the service. The response should be similar to text: + +```textmate +[[0.00037115702,-0.06356819,..................,-0.02125421,-0.02984927,-0.0049473033]] +``` -### 3. Validate Embedding service +If the response text is similar to the one above, then we consider the service verification successful. + +### 4. Validate Embedding service ```bash -curl http://${SEARCH_HOST_IP}:${SEARCHQNA_TGI_SERVICE_PORT}/v1/embeddings\ +curl http://${HOST_IP}:${SEARCH_EMBEDDING_SERVICE_PORT}/v1/embeddings \ -X POST \ - -d '{"text":"hello"}' \ + -d '{"input":"Hello!"}' \ -H 'Content-Type: application/json' ``` Checking the response from the service. The response should be similar to JSON: ```json -{ - "detail": [ - { - "type": "missing", - "loc": ["body", "input"], - "msg": "Field required", - "input": { "text": "hello" } - } - ] -} +{"object":"list","model":"BAAI/bge-base-en-v1.5","data":[{"index":0,"object":"embedding","embedding":[0.010614655,0.019818036,"******",0.06571652,-0.019738553]}],"usage":{"prompt_tokens":4,"total_tokens":4,"completion_tokens":0}} ``` -### 4. Validate Web Retriever service +If the response JSON is similar to the one above, then we consider the service verification successful. + +### 5. Validate Web Retriever service ```bash export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") -curl http://${SEARCH_HOST_IP}:3003/v1/web_retrieval \ +curl http://${HOST_IP}:${SEARCH_WEB_RETRIEVER_SERVICE_PORT}/v1/web_retrieval \ -X POST \ -d "{\"text\":\"What is the 2024 holiday schedule?\",\"embedding\":${your_embedding}}" \ -H 'Content-Type: application/json' ``` -### 5. Validate the TEI Service +Checking the response from the service. The response should be similar to JSON: + +```json +{"id":"ec32c767e0ae107c4943b634648c9752","retrieved_docs":[{"downstream_black_list":[],"id":"ab002cd89cd20d9229adae1e091c7e2d","text":"2025\n\n * ### New Year’s Day 2024/2025 \n\nWednesday, January 1, 2025 Early Close (2:00 p.m. Eastern Time): Tuesday,\nDecember 31, 2024\n\n * ### Martin Luther King Day \n\nMonday, January 20, 2025\n\n * ### Presidents Day \n\nMonday, February 17, 2025\n\n * ### Good Friday \n\nFriday, April 18, 2025 Early Close (2:00 p.m. Eastern Time): Thursday, April\n17, 2025\n\n * ### Memorial Day \n\nMonday, May 26, 2025 Early Close (2:00 p.m. Eastern Time): Friday, May 23,\n2025\n\n * ### Juneteenth \n\nThursday, June 19, 2025\n\n * ### U.S. Independence Day \n\nFriday, July 4, 2025 Early Close (2:00 p.m. Eastern Time): Thursday, July 3,\n2025\n\n * ### Labor Day \n\nMonday, September 1, 2025\n\n * ### Columbus Day \n\nMonday, October 13, 2025\n\n * ### Veterans Day \n\nTuesday, November 11, 2025\n\n * ### Thanksgiving Day \n\nThursday, November 27, 2025 Early Close (2:00 p.m. Eastern Time): Friday,\nNovember 28, 2025\n\n * ### Christmas Day \n\nThursday, December 25, 2025 Early Close (2:00 p.m. Eastern Time): Wednesday,\nDecember 24, 2025\n\n * ### New Year’s Day 2025/2026 \n\nThursday, January 1, 2026 Early Close (2:00 p.m. Eastern Time): Wednesday,\nDecember 31, 2025\n\n2026\n\n * ### New Year’s Day 2025/2026 \n\nThursday, January 1, 2026 Early Close (2:00 p.m. Eastern Time): Wednesday,\nDecember 31, 2025\n\n * ### Martin Luther King Day \n\nMonday, January 19, 2026\n\n * ### Presidents Day \n\nMonday, February 16, 2026\n\n * ### Good Friday \n description: \n \n title: \n Holiday Schedule - SIFMA - Holiday Schedule - SIFMA\n \n \n source: https://www.sifma.org/resources/general/holiday-schedule/ \n"},{"downstream_black_list":[],"id":"f498f4a1357bfbc631a5d67663c64680","text":"Monday, May 26, 2025\n\n * ### Juneteenth \n\nThursday, June 19, 2025\n\n * ### U.S. Independence Day \n\nFriday, July 4, 2025\n\n * ### Summer Bank Holiday \n\nMonday, August 25, 2025\n\n * ### Labor Day \n\nMonday, September 1, 2025\n\n * ### Columbus Day \n\nMonday, October 13, 2025\n\n * ### Veterans Day \n\nTuesday, November 11, 2025\n\n * ### Thanksgiving Day \n\nThursday, November 27, 2025\n\n * ### Christmas Day \n\nThursday, December 25, 2025\n\n * ### Boxing Day \n\nFriday, December 26, 2025\n\n * ### New Year’s Day 2025/2026 \n\nThursday, January 1, 2026\n\n2026\n\n * ### New Year’s Day 2025/2026 \n\nThursday, January 1, 2026\n\n * ### Martin Luther King Day \n\nMonday, January 19, 2026\n\n * ### Presidents Day \n\nMonday, February 16, 2026\n\n * ### Good Friday \n\nFriday, April 3, 2026\n\n * ### Easter Monday \n\nMonday, April 6, 2026\n\n * ### May Day \n\nMonday, May 4, 2026\n\n * ### Memorial Day \n\nMonday, May 25, 2026\n\n * ### Spring Bank Holiday \n\nMonday, May 25, 2026\n\n * ### Juneteenth \n\nFriday, June 19, 2026\n\n * ### U.S. Independence Day \n\nFriday, July 3, 2026\n\n * ### Summer Bank Holiday \n\nMonday, August 31, 2026\n\n * ### Labor Day \n\nMonday, September 7, 2026\n\n * ### Columbus Day \n\nMonday, October 12, 2026\n\n * ### Veterans Day \n\nWednesday, November 11, 2026\n\n * ### Thanksgiving Day \n\nThursday, November 26, 2026\n\n * ### Christmas Day \n\nFriday, December 25, 2026\n\n * ### Boxing Day (Substitute) \n description: \n \n title: \n Holiday Schedule - SIFMA - Holiday Schedule - SIFMA\n \n \n source: https://www.sifma.org/resources/general/holiday-schedule/ \n"},{"downstream_black_list":[],"id":"3a845fba37a225ee3a67601cfa51f6d6","text":"**Holiday** | **2024** | **Non-Management, Supervisory Units** | **Department of Corrections Employees** | **State Police Unit** | **Exempt, Managerial, and Confidential** \n---|---|---|---|---|--- \n**New Year’s Day** | **Monday, January 1, 2024** | Observed | Observed | Observed | Observed \n**Martin Luther King Jr. Day** | **Monday, January 15, 2024** | Observed | Observed | Observed | Observed \n**Presidents' Day** | **Monday, February 19, 2024** | Observed | Observed | Observed | Observed \n**Town Meeting Day** | **Tuesday, \nMarch 5, 2024** | Observed | Observed | Observed | Observed \n**Memorial Day** | **Monday, \nMay 27, 2024** | Observed | Observed | Observed | Observed \n**Independence Day** | **Thursday, \nJuly 4, 2024** | Observed | Observed | Observed | Observed \n**Bennington Battle Day** | **Friday, \nAugust 16, 2024** | Observed | **Not Observed** | **Not Observed** | Observed \n**Labor Day** | **Monday, September 2, 2024** | Observed | Observed | Observed | Observed \n**Indigenous Peoples' Day** | **Monday, October 14, 2024** | **Not Observed** | Observed | Observed | **Not Observed** \n**Veterans' Day** | **Monday, November 11, 2024** | Observed | Observed | Observed | Observed \n**Thanksgiving Day** | **Thursday, November 28, 2024** | Observed | Observed | Observed | Observed \n**Christmas Day** | **Wednesday, December 25, 2024** | Observed | Observed | Observed | Observed \n title: State Holiday Schedule | Department of Human Resources \n \n source: https://humanresources.vermont.gov/benefits-wellness/holiday-schedule \n"},{"downstream_black_list":[],"id":"34926c9655c38d2af761833d57c8ab8a","text":"* ### Good Friday \n\nNone Early Close (12:00 p.m. Eastern Time): Friday, April 3, 2026 - Tentative\n- pending confirmation of scheduled release of BLS employment report\n\n * ### Memorial Day \n\nMonday, May 25, 2026 Early Close (2:00 p.m. Eastern Time): Friday, May 22,\n2026\n\n * ### Juneteenth \n\nFriday, June 19, 2026\n\n * ### U.S. Independence Day (observed) \n\nFriday, July 3, 2026 Early Close (2:00 p.m. Eastern Time): Thursday, July 2,\n2026\n\n * ### Labor Day \n\nMonday, September 7, 2026\n\n * ### Columbus Day \n\nMonday, October 12, 2026\n\n * ### Veterans Day \n\nWednesday, November 11, 2026\n\n * ### Thanksgiving Day \n\nThursday, November 26, 2026 Early Close (2:00 p.m. Eastern Time): Friday,\nNovember 27, 2026\n\n * ### Christmas Day \n\nFriday, December 25, 2026 Early Close (2:00 p.m. Eastern Time): Thursday,\nDecember 24, 2026\n\n * ### New Year’s Day 2026/2027 \n\nFriday, January 1, 2027 Early Close (2:00 p.m. Eastern Time): Thursday,\nDecember 31, 2026\n\nArchive\n\n### U.K. Holiday Recommendations\n\n2025\n\n * ### New Year’s Day 2024/2025 \n\nWednesday, January 1, 2025\n\n * ### Martin Luther King Day \n\nMonday, January 20, 2025\n\n * ### Presidents Day \n\nMonday, February 17, 2025\n\n * ### Good Friday \n\nFriday, April 18, 2025\n\n * ### Easter Monday \n\nMonday, April 21, 2025\n\n * ### May Day \n\nMonday, May 5, 2025\n\n * ### Memorial Day \n\nMonday, May 26, 2025\n\n * ### Spring Bank Holiday \n\nMonday, May 26, 2025\n\n * ### Juneteenth \n description: \n \n title: \n Holiday Schedule - SIFMA - Holiday Schedule - SIFMA\n \n \n source: https://www.sifma.org/resources/general/holiday-schedule/ \n"}],"initial_query":"What is the 2024 holiday schedule?","top_n":1} +``` + +If the response JSON is similar to the one above, then we consider the service verification successful. -```bash -DATA='{"inputs":"What is Deep Learning?"}' +### 6. Validate the TEI Reranking Service -curl http://${HOST_IP}:${SEARCHQNA_TEI_SERVICE_PORT}//embed \ +```bash +DATA='{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' +curl http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT}/rerank \ -X POST \ -d "$DATA" \ -H 'Content-Type: application/json' ``` -Checking the response from the service. The response should be similar to Array of Data: +Checking the response from the service. The response should be similar to JSON: ```json -[ - [ - 0.00037115702, -0.06356819, 0.0024758505, -0.012360337, 0.050739925, 0.023380278, 0.022216318, 0.0008076447, - -0.0003412891, -0.033479452, -0.024872458, 0.0064700204, -0.00731641, 0.06648339, 0.0013361155, 0.047004532, - 0.062838696, -0.021005465, 0.011151533, 0.044124223, -0.050683793, -0.062093593, -0.03992629, 0.017758112, - -0.0013727234, 0.0022603935, -0.04363493, 0.012822347, -0.02408198, 0.011079012, 0.028809752, -0.008898206, - 0.037047423, -0.027456092, 0.016162485, 0.04173568, -0.039396558, -0.035203997, -0.022387454, -0.019808905, - -0.01864915, -0.042313505, -0.0120891025, 0.048949677, -0.08100209, 0.017953783, -0.12084276, 0.0024097406, - -0.022705944, -0.012279724, -0.07547717, 0.051262986, 0.03203861, -0.019056482, 0.04271625, 0.015248945, - 0.004222296, -0.08073051, 0.010240495, -0.05635268, 0.052041706, 0.03712775, -0.01854724, -0.02750096, - -0.00096631586, -0.026202224, 0.024124105, 0.042904165, -0.023528703, -0.0034956702, -0.028778492, 0.029217377, - -0.020601744, -0.0049860086, -0.05246627, -0.011162583, 0.012888553, 0.014507065, 0.08219481, -0.008273658, - 0.0036607939, 0.062248874, 0.042562004, 0.03170365, 0.0046070544, 0.00065274147, -0.019365542, -0.004698561, - -0.0449053, 0.02275239, 0.01039843, -0.053169794, 0.060175993, 0.051545423, 0.014204941, 0.0076600607, 0.013906856, - -0.035385784, -0.011683805, -0.014732695, -0.02331647, -0.059045117, -0.016870823, -0.014698294, -0.048483565, - 0.026726946, 0.05227064, -0.013973138, 0.014551645, -0.019573484, -0.0013427412, -0.008475066, -0.0025058866, - -0.048502546, -0.043069497, -0.0077841803, -0.016379999, 0.0037450534, -0.025010578, -0.04592572, 0.034388185, - 0.03836159, 0.0019682923, 0.021373231, -0.03391387, 0.015393363, 0.003937917, 0.01832765, 0.0045520393, -0.02696203, - 0.020696502, 0.016930614, -0.007926859, 0.021834886, -0.014779224, 0.00073025556, -0.020250296, 0.006635754, - 0.025785012, 0.009847587, -0.002533611, -0.057919327, 0.03010091, -0.03554674, 0.054443054, -0.015446536, - -0.0079982905, -0.0042982297, -0.018884834, 0.0027541735, -0.044417977, 0.05555447, -0.018901609, -0.049503766, - 0.008309782, 0.039867956, -0.0004423662, 0.0059798234, 0.03447887, 0.023205558, 0.058959927, -0.019526886, - -0.054637823, -0.009800092, -0.024515655, -0.05426387, 0.05535355, 0.024482403, -0.020081121, 0.024965372, - -0.002176406, -0.011429285, 0.02036594, -0.011996402, 0.011601014, 0.04732072, 0.028819714, 0.03407571, 0.0430521, - 0.05145868, -0.065615594, 0.046596047, -0.008815781, -0.0063788523, -0.044762302, -0.03171996, 0.04966251, - -0.010887125, 0.036779672, 0.014379601, -0.06393863, -0.036413074, -0.033719108, -0.037734028, 0.033251368, - -0.01693572, -0.015116194, 0.082118206, -0.011095621, 0.046565905, 0.054315507, -0.051471975, 0.0153609, - -0.016379755, -0.02725454, 0.029903106, 0.01588181, -0.043773234, -0.0034126595, 0.0034703915, 0.0074963053, - -0.049301904, -0.005326988, 0.0014556781, 0.043266784, 0.03043187, -0.008008064, -0.0047954894, 0.0065719066, - -0.018209687, 0.00520577, -0.04222329, 0.024618099, 0.0030018033, 0.008215917, 0.088026844, 0.041226704, - -0.05174175, 0.035067245, -0.037319127, 0.0037409177, 0.024523623, -0.0126059465, 0.019197112, 0.013823613, - -0.02756309, 0.014537172, 0.010373209, 0.045283005, -0.033583794, -0.07042238, 0.0071703074, -0.047405772, - 0.052970607, 0.01187145, 0.009470498, 0.033309255, -0.014022496, -0.01466476, -0.016799983, -0.004560339, - -0.00007741032, 0.016623817, 0.02886948, -0.023846539, -0.05926324, 0.0019861246, -0.0097210035, 0.10283416, - 0.027582858, -0.050722197, 0.051445477, -0.027595742, 0.022260211, -0.025540655, -0.09528184, -0.028447622, - -0.020006616, 0.08766454, -0.014110661, 0.04828308, 0.0074301455, 0.03928737, -0.0000046884684, -0.026885474, - 0.005424345, 0.054999787, 0.055203326, -0.012640017, -0.0435913, -0.024285164, 0.06663095, 0.005627971, - -0.015168387, 0.027197381, -0.026075814, -0.003045215, -0.008655605, -0.009072627, 0.004339306, 0.03589536, - 0.061759293, -0.04240408, 0.04873947, 0.021134883, 0.053518154, 0.045864865, -0.027563328, -0.01566489, - 0.00018125105, -0.007070503, 0.039647527, -0.021650534, 0.038786504, 0.02006178, -0.013114097, 0.07950984, - -0.014730525, -0.19681875, -0.013000412, 0.018087342, -0.0073786196, 0.038186155, -0.059353005, -0.0058362517, - -0.009970051, 0.0016716863, -0.023077143, -0.02714242, -0.006529649, 0.037998736, 0.025349554, 0.019855456, - -0.016530242, 0.00880591, -0.016678277, -0.03673031, 0.045423195, -0.03146899, -0.029318942, -0.012635296, - 0.071473934, -0.02904274, 0.027330637, -0.084734075, -0.05050938, -0.0030655882, -0.0022098075, -0.02383695, - -0.028460467, -0.03240081, 0.048773084, 0.023262978, 0.016216593, 0.027833678, -0.039854486, -0.002443358, - 0.01758309, -0.033520985, -0.04862155, 0.0030191801, -0.040858116, 0.045017388, 0.01576234, -0.09301789, - -0.04828378, -0.014886363, 0.0012595668, -0.010673225, -0.02463904, -0.06783802, -0.0012545382, 0.015514673, - -0.004911741, 0.0025960177, -0.012014308, -0.024893451, 0.036577918, -0.003223495, -0.020390507, -0.022805423, - -0.059310623, -0.02081245, -0.023387661, -0.061122973, -0.06244, 0.017364288, 0.033477243, -0.010211365, 0.04805492, - -0.0644543, -0.048770227, 0.0068986556, -0.025725175, -0.029574871, -0.00949049, 0.05490974, 0.027187059, - 0.00826158, -0.06282722, 0.035274204, 0.012130771, -0.009545266, -0.048487406, 0.04640102, -0.037075754, - -0.020248186, -0.02851919, 0.064635284, -0.0064534973, -0.026640853, -0.026290758, 0.035040796, 0.020074066, - 0.0032996435, 0.02883776, -0.012944289, 0.019450067, -0.02121465, -0.024558635, -0.04377821, -0.016631315, - -0.04083968, -0.021962307, -0.010120014, 0.02998998, 0.10129919, -0.0025703132, -0.03771752, 0.01426784, - 0.025374308, 0.00082124525, 0.00029568642, -0.030749727, 0.016260363, 0.0014756168, 0.018676473, -0.03861688, - -0.032052398, 0.056064054, 0.005533946, 0.04515451, 0.015364342, -0.02965325, 0.0009782034, 0.01524649, 0.019077078, - -0.025799321, 0.020865263, -0.00037949806, 0.012502633, 0.0090223905, -0.0015367466, -0.012833919, -0.011109666, - -0.006981191, -0.009670439, 0.009430074, -0.007729517, 0.0016868497, 0.016697595, -0.015892748, -0.020780738, - 0.049529854, -0.07344469, 0.0607613, -0.0068755895, -0.014736902, 0.014770749, -0.028858911, 0.025249828, - -0.058469485, 0.030096894, -0.007117604, 0.010155325, -0.0065526864, -0.028654601, -0.04420291, 0.009965181, - 0.030222228, -0.010007972, 0.0104629295, 0.05589087, 0.05443477, -0.02641796, -0.061689503, 0.03118466, 0.012150501, - 0.03404673, -0.029666431, -0.008654386, -0.031682808, -0.014843155, 0.036703967, 0.026411135, -0.005715008, - 0.024990784, 0.058862202, 0.017355891, 0.039204415, -0.0034798204, 0.033091135, 0.050439566, 0.032798093, - -0.029705318, 0.005968363, -0.055048566, 0.028009748, -0.03823961, 0.024362633, -0.017294712, -0.019563003, - -0.019944556, -0.027790153, -0.01866823, 0.047109686, -0.0033735516, -0.020653522, -0.039765686, -0.019055683, - -0.0263571, -0.023188936, 0.049641415, -0.077975206, 0.030659853, 0.048734687, 0.044718176, 0.036765084, - -0.011803315, -0.027699227, -0.07258002, -0.08741319, -0.0392474, -0.042096145, -0.0040325304, 0.01667375, - 0.026754893, -0.030304687, 0.029919326, 0.024295082, 0.011638254, -0.012232291, -0.047564257, -0.036413006, - 0.026577674, 0.036411874, 0.00057670544, 0.017877145, 0.009268524, -0.006965588, 0.011874776, -0.005112591, - -0.034651127, 0.03160231, -0.052825063, 0.014719321, -0.0139615545, -0.016238235, 0.002020219, 0.02526055, - -0.07056756, 0.010022732, -0.014104433, -0.005984697, -0.00897443, 0.021115793, -0.043804843, -0.027990978, - 0.060727082, 0.0040618493, -0.038511537, -0.048857935, 0.024104802, -0.059829835, -0.029107396, -0.05538522, - -0.06930553, -0.0057559577, -0.022053827, -0.00876388, -0.0056931996, 0.029746206, 0.0224666, 0.008767829, - -0.03966822, -0.006478918, 0.06567699, -0.01581077, -0.03742192, -0.06186453, -0.028619587, 0.08638498, 0.031267703, - -0.0008673075, 0.003113204, 0.012213491, 0.020067157, -0.02849485, 0.0018909829, 0.02714576, 0.0026566028, - -0.03609787, 0.0060567204, -0.047545094, -0.0046444787, -0.021402694, -0.023118727, -0.015218381, -0.043136228, - -0.0438743, -0.005564044, -0.009355076, -0.028500054, 0.009921202, 0.027966693, 0.06036647, 0.06929019, 0.007004997, - -0.024255225, 0.04914266, 0.0032520234, 0.0044063884, -0.029372599, 0.038042217, -0.035385627, -0.04905816, - 0.047601648, 0.0071805464, -0.008339494, -0.035425205, 0.036915354, 0.024695326, -0.038979523, 0.01886513, - 0.013804558, -0.04848749, -0.04819779, 0.022526458, -0.029244151, 0.041152976, 0.04666112, 0.020387372, 0.037857335, - 0.060002513, 0.011064769, -0.032094717, 0.070615225, 0.04814509, 0.017521046, 0.074162334, -0.04956284, 0.07335939, - -0.009453019, -0.06289444, 0.024246441, 0.021851622, 0.01857824, 0.02037353, -0.017273203, 0.021301785, 0.05051385, - 0.053983003, -0.01588495, 0.054096334, 0.05107405, 0.0720548, -0.029601721, 0.04816011, 0.006444874, -0.02505102, - 0.013238045, -0.021370836, 0.025479412, -0.048463117, 0.03514722, 0.08079718, 0.00369719, -0.015530819, - 0.0021374116, 0.03247959, 0.11611161, -0.021934662, -0.029833768, 0.016046036, -0.00634777, -0.06037879, - -0.005574648, 0.028324481, -0.021840915, 0.03284168, -0.022047363, -0.03463407, 0.011823492, -0.03520137, - -0.014746701, -0.03972389, -0.02124471, 0.026924072, -0.0022506462, 0.04452787, -0.015707701, -0.0065392647, - 0.0066317394, -0.005149294, -0.07763598, 0.054278333, 0.027830306, -0.03989325, -0.026995605, -0.024925973, - -0.0024197767, 0.07852477, -0.034251966, 0.03694585, 0.044244047, 0.012739273, 0.0037145729, 0.008245091, - 0.013920077, -0.010570776, -0.021823786, 0.057918977, -0.075884886, -0.054011993, 0.0039594076, 0.003970741, - -0.038295034, -0.03029311, 0.063210145, -0.08822839, -0.061069354, 0.08516593, 0.020341832, 0.08075477, 0.03257605, - 0.0039170105, 0.029395742, 0.012290831, -0.06368765, 0.023519376, -0.0173505, -0.001395915, 0.017215127, - 0.043243848, 0.04967547, 0.028518617, 0.021273924, -0.0023932487, -0.030911915, -0.05524172, -0.045551147, - 0.042072143, -0.027773965, -0.03693362, 0.028450156, 0.06675585, -0.061626967, -0.08894698, 0.045917906, - -0.00475913, 0.034920968, -0.0064531155, -0.00689886, -0.06119457, 0.021173967, -0.027787622, -0.02472986, - 0.03998034, 0.03737826, -0.0067949123, 0.022558564, -0.04570635, -0.033072025, 0.022725677, 0.016026087, - -0.02125421, -0.02984927, -0.0049473033 - ] -] +[{"index":1,"score":0.94238955},{"index":0,"score":0.120219156}] ``` -### 6. Validate Reranking service +If the response JSON is similar to the one above, then we consider the service verification successful. + +### 7. Validate Reranking service ```bash -curl http://${SEARCH_HOST_IP}:3005/v1/reranking\ +DATA='{"initial_query":"What is Deep Learning?", "retrieved_docs": '\ +'[{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' + +curl http://${HOST_IP}:${SEARCH_RERANK_SERVICE_PORT}/v1/reranking \ -X POST \ - -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -d "$DATA" \ -H 'Content-Type: application/json' ``` -### 7 Validate MegaService +Checking the response from the service. The response should be similar to JSON: + +```json +{"id":"d44b5be4002e8e2cc3b6a4861e396093","model":null,"query":"What is Deep Learning?","max_tokens":1024,"max_new_tokens":1024,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"frequency_penalty":0.0,"presence_penalty":0.0,"repetition_penalty":1.03,"stream":true,"language":"auto","chat_template":null,"documents":["Deep learning is..."]} +``` + +If the response JSON is similar to the one above, then we consider the service verification successful. + +### 8. Validate MegaService ```bash -curl http://${SEARCH_HOST_IP}:3008/v1/searchqna -H "Content-Type: application/json" -d '{ - "messages": "What is the latest news? Give me also the source link.", - "stream": "True" - }' +DATA='{"messages": "What is the latest news from the AI world? '\ +'Give me a summary.","stream": "True"}' + +curl http://${HOST_IP}:${SEARCH_BACKEND_SERVICE_PORT}/v1/searchqna \ + -H "Content-Type: application/json" \ + -d "$DATA" +``` + +Checking the response from the service. The response should be similar to JSON: + +```textmate +data: {"id":"cmpl-f095893d094a4e9989423c2364f00bc1","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":",","stop_reason":null}],"created":1742960360,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-f095893d094a4e9989423c2364f00bc1","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" with","stop_reason":null}],"created":1742960360,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-f095893d094a4e9989423c2364f00bc1","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" calls","stop_reason":null}],"created":1742960360,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-f095893d094a4e9989423c2364f00bc1","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" for","stop_reason":null}],"created":1742960360,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-f095893d094a4e9989423c2364f00bc1","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" more","stop_reason":null}],"created":1742960360,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-f095893d094a4e9989423c2364f00bc1","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" regulation","stop_reason":null}],"created":1742960360,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-f095893d094a4e9989423c2364f00bc1","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" and","stop_reason":null}],"created":1742960360,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-f095893d094a4e9989423c2364f00bc1","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":" trans","stop_reason":null}],"created":1742960360,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-f095893d094a4e9989423c2364f00bc1","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":"parency","stop_reason":null}],"created":1742960360,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-f095893d094a4e9989423c2364f00bc1","choices":[{"finish_reason":null,"index":0,"logprobs":null,"text":".","stop_reason":null}],"created":1742960360,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: {"id":"cmpl-f095893d094a4e9989423c2364f00bc1","choices":[{"finish_reason":"stop","index":0,"logprobs":null,"text":"","stop_reason":null}],"created":1742960360,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":null} +data: [DONE] ``` -### 8. Stop application +If the response text is similar to the one above, then we consider the service verification successful. + +### 9. Validate Frontend + + + +### 10. Stop application #### If you use vLLM From 7a0bf040e2f9d5cec9ecddeaa1a69fd096181660 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Mar 2025 03:42:36 +0000 Subject: [PATCH 39/44] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../docker_compose/amd/gpu/rocm/README.md | 110 ++++++++++++++++-- 1 file changed, 102 insertions(+), 8 deletions(-) diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README.md b/SearchQnA/docker_compose/amd/gpu/rocm/README.md index 92aa9b022c..d95f8a830b 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README.md @@ -283,7 +283,27 @@ curl http://${HOST_IP}:${SEARCH_VLLM_SERVICE_PORT}/v1/chat/completions \ Checking the response from the service. The response should be similar to JSON: ```json -{"id":"chatcmpl-a3761920c4034131b3cab073b8e8b841","object":"chat.completion","created":1742959065,"model":"Intel/neural-chat-7b-v3-3","choices":[{"index":0,"message":{"role":"assistant","content":" Deep Learning refers to a modern approach of Artificial Intelligence that aims to replicate the way human brains process information by teaching computers to learn from data without extensive programming","tool_calls":[]},"logprobs":null,"finish_reason":"length","stop_reason":null}],"usage":{"prompt_tokens":15,"total_tokens":47,"completion_tokens":32,"prompt_tokens_details":null},"prompt_logprobs":null} +{ + "id": "chatcmpl-a3761920c4034131b3cab073b8e8b841", + "object": "chat.completion", + "created": 1742959065, + "model": "Intel/neural-chat-7b-v3-3", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": " Deep Learning refers to a modern approach of Artificial Intelligence that aims to replicate the way human brains process information by teaching computers to learn from data without extensive programming", + "tool_calls": [] + }, + "logprobs": null, + "finish_reason": "length", + "stop_reason": null + } + ], + "usage": { "prompt_tokens": 15, "total_tokens": 47, "completion_tokens": 32, "prompt_tokens_details": null }, + "prompt_logprobs": null +} ``` If the service response has a meaningful response in the value of the "choices.message.content" key, @@ -328,7 +348,30 @@ curl http://${HOST_IP}:${SEARCH_LLM_SERVICE_PORT}/v1/chat/completions \ Checking the response from the service. The response should be similar to JSON: ```json -{"id":"cmpl-0b974d00a7604c2ab8b721ebf6b88ae3","choices":[{"finish_reason":"length","index":0,"logprobs":null,"text":"\n\nDeep Learning is a subset of Machine Learning that is concerned with algorithms inspired by the structure and function of the brain. It is a part of Artificial","stop_reason":null,"prompt_logprobs":null}],"created":1742959134,"model":"Intel/neural-chat-7b-v3-3","object":"text_completion","system_fingerprint":null,"usage":{"completion_tokens":32,"prompt_tokens":6,"total_tokens":38,"completion_tokens_details":null,"prompt_tokens_details":null}} +{ + "id": "cmpl-0b974d00a7604c2ab8b721ebf6b88ae3", + "choices": [ + { + "finish_reason": "length", + "index": 0, + "logprobs": null, + "text": "\n\nDeep Learning is a subset of Machine Learning that is concerned with algorithms inspired by the structure and function of the brain. It is a part of Artificial", + "stop_reason": null, + "prompt_logprobs": null + } + ], + "created": 1742959134, + "model": "Intel/neural-chat-7b-v3-3", + "object": "text_completion", + "system_fingerprint": null, + "usage": { + "completion_tokens": 32, + "prompt_tokens": 6, + "total_tokens": 38, + "completion_tokens_details": null, + "prompt_tokens_details": null + } +} ``` ### 3. Validate TEI Embedding service @@ -360,7 +403,14 @@ curl http://${HOST_IP}:${SEARCH_EMBEDDING_SERVICE_PORT}/v1/embeddings \ Checking the response from the service. The response should be similar to JSON: ```json -{"object":"list","model":"BAAI/bge-base-en-v1.5","data":[{"index":0,"object":"embedding","embedding":[0.010614655,0.019818036,"******",0.06571652,-0.019738553]}],"usage":{"prompt_tokens":4,"total_tokens":4,"completion_tokens":0}} +{ + "object": "list", + "model": "BAAI/bge-base-en-v1.5", + "data": [ + { "index": 0, "object": "embedding", "embedding": [0.010614655, 0.019818036, "******", 0.06571652, -0.019738553] } + ], + "usage": { "prompt_tokens": 4, "total_tokens": 4, "completion_tokens": 0 } +} ``` If the response JSON is similar to the one above, then we consider the service verification successful. @@ -378,7 +428,33 @@ curl http://${HOST_IP}:${SEARCH_WEB_RETRIEVER_SERVICE_PORT}/v1/web_retrieval \ Checking the response from the service. The response should be similar to JSON: ```json -{"id":"ec32c767e0ae107c4943b634648c9752","retrieved_docs":[{"downstream_black_list":[],"id":"ab002cd89cd20d9229adae1e091c7e2d","text":"2025\n\n * ### New Year’s Day 2024/2025 \n\nWednesday, January 1, 2025 Early Close (2:00 p.m. Eastern Time): Tuesday,\nDecember 31, 2024\n\n * ### Martin Luther King Day \n\nMonday, January 20, 2025\n\n * ### Presidents Day \n\nMonday, February 17, 2025\n\n * ### Good Friday \n\nFriday, April 18, 2025 Early Close (2:00 p.m. Eastern Time): Thursday, April\n17, 2025\n\n * ### Memorial Day \n\nMonday, May 26, 2025 Early Close (2:00 p.m. Eastern Time): Friday, May 23,\n2025\n\n * ### Juneteenth \n\nThursday, June 19, 2025\n\n * ### U.S. Independence Day \n\nFriday, July 4, 2025 Early Close (2:00 p.m. Eastern Time): Thursday, July 3,\n2025\n\n * ### Labor Day \n\nMonday, September 1, 2025\n\n * ### Columbus Day \n\nMonday, October 13, 2025\n\n * ### Veterans Day \n\nTuesday, November 11, 2025\n\n * ### Thanksgiving Day \n\nThursday, November 27, 2025 Early Close (2:00 p.m. Eastern Time): Friday,\nNovember 28, 2025\n\n * ### Christmas Day \n\nThursday, December 25, 2025 Early Close (2:00 p.m. Eastern Time): Wednesday,\nDecember 24, 2025\n\n * ### New Year’s Day 2025/2026 \n\nThursday, January 1, 2026 Early Close (2:00 p.m. Eastern Time): Wednesday,\nDecember 31, 2025\n\n2026\n\n * ### New Year’s Day 2025/2026 \n\nThursday, January 1, 2026 Early Close (2:00 p.m. Eastern Time): Wednesday,\nDecember 31, 2025\n\n * ### Martin Luther King Day \n\nMonday, January 19, 2026\n\n * ### Presidents Day \n\nMonday, February 16, 2026\n\n * ### Good Friday \n description: \n \n title: \n Holiday Schedule - SIFMA - Holiday Schedule - SIFMA\n \n \n source: https://www.sifma.org/resources/general/holiday-schedule/ \n"},{"downstream_black_list":[],"id":"f498f4a1357bfbc631a5d67663c64680","text":"Monday, May 26, 2025\n\n * ### Juneteenth \n\nThursday, June 19, 2025\n\n * ### U.S. Independence Day \n\nFriday, July 4, 2025\n\n * ### Summer Bank Holiday \n\nMonday, August 25, 2025\n\n * ### Labor Day \n\nMonday, September 1, 2025\n\n * ### Columbus Day \n\nMonday, October 13, 2025\n\n * ### Veterans Day \n\nTuesday, November 11, 2025\n\n * ### Thanksgiving Day \n\nThursday, November 27, 2025\n\n * ### Christmas Day \n\nThursday, December 25, 2025\n\n * ### Boxing Day \n\nFriday, December 26, 2025\n\n * ### New Year’s Day 2025/2026 \n\nThursday, January 1, 2026\n\n2026\n\n * ### New Year’s Day 2025/2026 \n\nThursday, January 1, 2026\n\n * ### Martin Luther King Day \n\nMonday, January 19, 2026\n\n * ### Presidents Day \n\nMonday, February 16, 2026\n\n * ### Good Friday \n\nFriday, April 3, 2026\n\n * ### Easter Monday \n\nMonday, April 6, 2026\n\n * ### May Day \n\nMonday, May 4, 2026\n\n * ### Memorial Day \n\nMonday, May 25, 2026\n\n * ### Spring Bank Holiday \n\nMonday, May 25, 2026\n\n * ### Juneteenth \n\nFriday, June 19, 2026\n\n * ### U.S. Independence Day \n\nFriday, July 3, 2026\n\n * ### Summer Bank Holiday \n\nMonday, August 31, 2026\n\n * ### Labor Day \n\nMonday, September 7, 2026\n\n * ### Columbus Day \n\nMonday, October 12, 2026\n\n * ### Veterans Day \n\nWednesday, November 11, 2026\n\n * ### Thanksgiving Day \n\nThursday, November 26, 2026\n\n * ### Christmas Day \n\nFriday, December 25, 2026\n\n * ### Boxing Day (Substitute) \n description: \n \n title: \n Holiday Schedule - SIFMA - Holiday Schedule - SIFMA\n \n \n source: https://www.sifma.org/resources/general/holiday-schedule/ \n"},{"downstream_black_list":[],"id":"3a845fba37a225ee3a67601cfa51f6d6","text":"**Holiday** | **2024** | **Non-Management, Supervisory Units** | **Department of Corrections Employees** | **State Police Unit** | **Exempt, Managerial, and Confidential** \n---|---|---|---|---|--- \n**New Year’s Day** | **Monday, January 1, 2024** | Observed | Observed | Observed | Observed \n**Martin Luther King Jr. Day** | **Monday, January 15, 2024** | Observed | Observed | Observed | Observed \n**Presidents' Day** | **Monday, February 19, 2024** | Observed | Observed | Observed | Observed \n**Town Meeting Day** | **Tuesday, \nMarch 5, 2024** | Observed | Observed | Observed | Observed \n**Memorial Day** | **Monday, \nMay 27, 2024** | Observed | Observed | Observed | Observed \n**Independence Day** | **Thursday, \nJuly 4, 2024** | Observed | Observed | Observed | Observed \n**Bennington Battle Day** | **Friday, \nAugust 16, 2024** | Observed | **Not Observed** | **Not Observed** | Observed \n**Labor Day** | **Monday, September 2, 2024** | Observed | Observed | Observed | Observed \n**Indigenous Peoples' Day** | **Monday, October 14, 2024** | **Not Observed** | Observed | Observed | **Not Observed** \n**Veterans' Day** | **Monday, November 11, 2024** | Observed | Observed | Observed | Observed \n**Thanksgiving Day** | **Thursday, November 28, 2024** | Observed | Observed | Observed | Observed \n**Christmas Day** | **Wednesday, December 25, 2024** | Observed | Observed | Observed | Observed \n title: State Holiday Schedule | Department of Human Resources \n \n source: https://humanresources.vermont.gov/benefits-wellness/holiday-schedule \n"},{"downstream_black_list":[],"id":"34926c9655c38d2af761833d57c8ab8a","text":"* ### Good Friday \n\nNone Early Close (12:00 p.m. Eastern Time): Friday, April 3, 2026 - Tentative\n- pending confirmation of scheduled release of BLS employment report\n\n * ### Memorial Day \n\nMonday, May 25, 2026 Early Close (2:00 p.m. Eastern Time): Friday, May 22,\n2026\n\n * ### Juneteenth \n\nFriday, June 19, 2026\n\n * ### U.S. Independence Day (observed) \n\nFriday, July 3, 2026 Early Close (2:00 p.m. Eastern Time): Thursday, July 2,\n2026\n\n * ### Labor Day \n\nMonday, September 7, 2026\n\n * ### Columbus Day \n\nMonday, October 12, 2026\n\n * ### Veterans Day \n\nWednesday, November 11, 2026\n\n * ### Thanksgiving Day \n\nThursday, November 26, 2026 Early Close (2:00 p.m. Eastern Time): Friday,\nNovember 27, 2026\n\n * ### Christmas Day \n\nFriday, December 25, 2026 Early Close (2:00 p.m. Eastern Time): Thursday,\nDecember 24, 2026\n\n * ### New Year’s Day 2026/2027 \n\nFriday, January 1, 2027 Early Close (2:00 p.m. Eastern Time): Thursday,\nDecember 31, 2026\n\nArchive\n\n### U.K. Holiday Recommendations\n\n2025\n\n * ### New Year’s Day 2024/2025 \n\nWednesday, January 1, 2025\n\n * ### Martin Luther King Day \n\nMonday, January 20, 2025\n\n * ### Presidents Day \n\nMonday, February 17, 2025\n\n * ### Good Friday \n\nFriday, April 18, 2025\n\n * ### Easter Monday \n\nMonday, April 21, 2025\n\n * ### May Day \n\nMonday, May 5, 2025\n\n * ### Memorial Day \n\nMonday, May 26, 2025\n\n * ### Spring Bank Holiday \n\nMonday, May 26, 2025\n\n * ### Juneteenth \n description: \n \n title: \n Holiday Schedule - SIFMA - Holiday Schedule - SIFMA\n \n \n source: https://www.sifma.org/resources/general/holiday-schedule/ \n"}],"initial_query":"What is the 2024 holiday schedule?","top_n":1} +{ + "id": "ec32c767e0ae107c4943b634648c9752", + "retrieved_docs": [ + { + "downstream_black_list": [], + "id": "ab002cd89cd20d9229adae1e091c7e2d", + "text": "2025\n\n * ### New Year’s Day 2024/2025 \n\nWednesday, January 1, 2025 Early Close (2:00 p.m. Eastern Time): Tuesday,\nDecember 31, 2024\n\n * ### Martin Luther King Day \n\nMonday, January 20, 2025\n\n * ### Presidents Day \n\nMonday, February 17, 2025\n\n * ### Good Friday \n\nFriday, April 18, 2025 Early Close (2:00 p.m. Eastern Time): Thursday, April\n17, 2025\n\n * ### Memorial Day \n\nMonday, May 26, 2025 Early Close (2:00 p.m. Eastern Time): Friday, May 23,\n2025\n\n * ### Juneteenth \n\nThursday, June 19, 2025\n\n * ### U.S. Independence Day \n\nFriday, July 4, 2025 Early Close (2:00 p.m. Eastern Time): Thursday, July 3,\n2025\n\n * ### Labor Day \n\nMonday, September 1, 2025\n\n * ### Columbus Day \n\nMonday, October 13, 2025\n\n * ### Veterans Day \n\nTuesday, November 11, 2025\n\n * ### Thanksgiving Day \n\nThursday, November 27, 2025 Early Close (2:00 p.m. Eastern Time): Friday,\nNovember 28, 2025\n\n * ### Christmas Day \n\nThursday, December 25, 2025 Early Close (2:00 p.m. Eastern Time): Wednesday,\nDecember 24, 2025\n\n * ### New Year’s Day 2025/2026 \n\nThursday, January 1, 2026 Early Close (2:00 p.m. Eastern Time): Wednesday,\nDecember 31, 2025\n\n2026\n\n * ### New Year’s Day 2025/2026 \n\nThursday, January 1, 2026 Early Close (2:00 p.m. Eastern Time): Wednesday,\nDecember 31, 2025\n\n * ### Martin Luther King Day \n\nMonday, January 19, 2026\n\n * ### Presidents Day \n\nMonday, February 16, 2026\n\n * ### Good Friday \n description: \n \n title: \n Holiday Schedule - SIFMA - Holiday Schedule - SIFMA\n \n \n source: https://www.sifma.org/resources/general/holiday-schedule/ \n" + }, + { + "downstream_black_list": [], + "id": "f498f4a1357bfbc631a5d67663c64680", + "text": "Monday, May 26, 2025\n\n * ### Juneteenth \n\nThursday, June 19, 2025\n\n * ### U.S. Independence Day \n\nFriday, July 4, 2025\n\n * ### Summer Bank Holiday \n\nMonday, August 25, 2025\n\n * ### Labor Day \n\nMonday, September 1, 2025\n\n * ### Columbus Day \n\nMonday, October 13, 2025\n\n * ### Veterans Day \n\nTuesday, November 11, 2025\n\n * ### Thanksgiving Day \n\nThursday, November 27, 2025\n\n * ### Christmas Day \n\nThursday, December 25, 2025\n\n * ### Boxing Day \n\nFriday, December 26, 2025\n\n * ### New Year’s Day 2025/2026 \n\nThursday, January 1, 2026\n\n2026\n\n * ### New Year’s Day 2025/2026 \n\nThursday, January 1, 2026\n\n * ### Martin Luther King Day \n\nMonday, January 19, 2026\n\n * ### Presidents Day \n\nMonday, February 16, 2026\n\n * ### Good Friday \n\nFriday, April 3, 2026\n\n * ### Easter Monday \n\nMonday, April 6, 2026\n\n * ### May Day \n\nMonday, May 4, 2026\n\n * ### Memorial Day \n\nMonday, May 25, 2026\n\n * ### Spring Bank Holiday \n\nMonday, May 25, 2026\n\n * ### Juneteenth \n\nFriday, June 19, 2026\n\n * ### U.S. Independence Day \n\nFriday, July 3, 2026\n\n * ### Summer Bank Holiday \n\nMonday, August 31, 2026\n\n * ### Labor Day \n\nMonday, September 7, 2026\n\n * ### Columbus Day \n\nMonday, October 12, 2026\n\n * ### Veterans Day \n\nWednesday, November 11, 2026\n\n * ### Thanksgiving Day \n\nThursday, November 26, 2026\n\n * ### Christmas Day \n\nFriday, December 25, 2026\n\n * ### Boxing Day (Substitute) \n description: \n \n title: \n Holiday Schedule - SIFMA - Holiday Schedule - SIFMA\n \n \n source: https://www.sifma.org/resources/general/holiday-schedule/ \n" + }, + { + "downstream_black_list": [], + "id": "3a845fba37a225ee3a67601cfa51f6d6", + "text": "**Holiday** | **2024** | **Non-Management, Supervisory Units** | **Department of Corrections Employees** | **State Police Unit** | **Exempt, Managerial, and Confidential** \n---|---|---|---|---|--- \n**New Year’s Day** | **Monday, January 1, 2024** | Observed | Observed | Observed | Observed \n**Martin Luther King Jr. Day** | **Monday, January 15, 2024** | Observed | Observed | Observed | Observed \n**Presidents' Day** | **Monday, February 19, 2024** | Observed | Observed | Observed | Observed \n**Town Meeting Day** | **Tuesday, \nMarch 5, 2024** | Observed | Observed | Observed | Observed \n**Memorial Day** | **Monday, \nMay 27, 2024** | Observed | Observed | Observed | Observed \n**Independence Day** | **Thursday, \nJuly 4, 2024** | Observed | Observed | Observed | Observed \n**Bennington Battle Day** | **Friday, \nAugust 16, 2024** | Observed | **Not Observed** | **Not Observed** | Observed \n**Labor Day** | **Monday, September 2, 2024** | Observed | Observed | Observed | Observed \n**Indigenous Peoples' Day** | **Monday, October 14, 2024** | **Not Observed** | Observed | Observed | **Not Observed** \n**Veterans' Day** | **Monday, November 11, 2024** | Observed | Observed | Observed | Observed \n**Thanksgiving Day** | **Thursday, November 28, 2024** | Observed | Observed | Observed | Observed \n**Christmas Day** | **Wednesday, December 25, 2024** | Observed | Observed | Observed | Observed \n title: State Holiday Schedule | Department of Human Resources \n \n source: https://humanresources.vermont.gov/benefits-wellness/holiday-schedule \n" + }, + { + "downstream_black_list": [], + "id": "34926c9655c38d2af761833d57c8ab8a", + "text": "* ### Good Friday \n\nNone Early Close (12:00 p.m. Eastern Time): Friday, April 3, 2026 - Tentative\n- pending confirmation of scheduled release of BLS employment report\n\n * ### Memorial Day \n\nMonday, May 25, 2026 Early Close (2:00 p.m. Eastern Time): Friday, May 22,\n2026\n\n * ### Juneteenth \n\nFriday, June 19, 2026\n\n * ### U.S. Independence Day (observed) \n\nFriday, July 3, 2026 Early Close (2:00 p.m. Eastern Time): Thursday, July 2,\n2026\n\n * ### Labor Day \n\nMonday, September 7, 2026\n\n * ### Columbus Day \n\nMonday, October 12, 2026\n\n * ### Veterans Day \n\nWednesday, November 11, 2026\n\n * ### Thanksgiving Day \n\nThursday, November 26, 2026 Early Close (2:00 p.m. Eastern Time): Friday,\nNovember 27, 2026\n\n * ### Christmas Day \n\nFriday, December 25, 2026 Early Close (2:00 p.m. Eastern Time): Thursday,\nDecember 24, 2026\n\n * ### New Year’s Day 2026/2027 \n\nFriday, January 1, 2027 Early Close (2:00 p.m. Eastern Time): Thursday,\nDecember 31, 2026\n\nArchive\n\n### U.K. Holiday Recommendations\n\n2025\n\n * ### New Year’s Day 2024/2025 \n\nWednesday, January 1, 2025\n\n * ### Martin Luther King Day \n\nMonday, January 20, 2025\n\n * ### Presidents Day \n\nMonday, February 17, 2025\n\n * ### Good Friday \n\nFriday, April 18, 2025\n\n * ### Easter Monday \n\nMonday, April 21, 2025\n\n * ### May Day \n\nMonday, May 5, 2025\n\n * ### Memorial Day \n\nMonday, May 26, 2025\n\n * ### Spring Bank Holiday \n\nMonday, May 26, 2025\n\n * ### Juneteenth \n description: \n \n title: \n Holiday Schedule - SIFMA - Holiday Schedule - SIFMA\n \n \n source: https://www.sifma.org/resources/general/holiday-schedule/ \n" + } + ], + "initial_query": "What is the 2024 holiday schedule?", + "top_n": 1 +} ``` If the response JSON is similar to the one above, then we consider the service verification successful. @@ -396,7 +472,10 @@ curl http://${HOST_IP}:${SEARCH_TEI_RERANKING_PORT}/rerank \ Checking the response from the service. The response should be similar to JSON: ```json -[{"index":1,"score":0.94238955},{"index":0,"score":0.120219156}] +[ + { "index": 1, "score": 0.94238955 }, + { "index": 0, "score": 0.120219156 } +] ``` If the response JSON is similar to the one above, then we consider the service verification successful. @@ -416,7 +495,24 @@ curl http://${HOST_IP}:${SEARCH_RERANK_SERVICE_PORT}/v1/reranking \ Checking the response from the service. The response should be similar to JSON: ```json -{"id":"d44b5be4002e8e2cc3b6a4861e396093","model":null,"query":"What is Deep Learning?","max_tokens":1024,"max_new_tokens":1024,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"frequency_penalty":0.0,"presence_penalty":0.0,"repetition_penalty":1.03,"stream":true,"language":"auto","chat_template":null,"documents":["Deep learning is..."]} +{ + "id": "d44b5be4002e8e2cc3b6a4861e396093", + "model": null, + "query": "What is Deep Learning?", + "max_tokens": 1024, + "max_new_tokens": 1024, + "top_k": 10, + "top_p": 0.95, + "typical_p": 0.95, + "temperature": 0.01, + "frequency_penalty": 0.0, + "presence_penalty": 0.0, + "repetition_penalty": 1.03, + "stream": true, + "language": "auto", + "chat_template": null, + "documents": ["Deep learning is..."] +} ``` If the response JSON is similar to the one above, then we consider the service verification successful. @@ -453,8 +549,6 @@ If the response text is similar to the one above, then we consider the service v ### 9. Validate Frontend - - ### 10. Stop application #### If you use vLLM From 50b8a9e66bbc3984af106c1da4fcd0f64fe13f1c Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Wed, 26 Mar 2025 11:47:20 +0700 Subject: [PATCH 40/44] Added Web UI validation example Signed-off-by: Artem Astafev --- .../img/searchqna-ui-response-example.png | Bin 0 -> 37058 bytes .../assets/img/searchqna-ui-starting-page.png | Bin 0 -> 13856 bytes SearchQnA/docker_compose/amd/gpu/rocm/README.md | 10 ++++++++++ 3 files changed, 10 insertions(+) create mode 100644 SearchQnA/assets/img/searchqna-ui-response-example.png create mode 100644 SearchQnA/assets/img/searchqna-ui-starting-page.png diff --git a/SearchQnA/assets/img/searchqna-ui-response-example.png b/SearchQnA/assets/img/searchqna-ui-response-example.png new file mode 100644 index 0000000000000000000000000000000000000000..76d615ef82bfb742ab44d9c2ca1a2afa42676a7d GIT binary patch literal 37058 zcmeFZcT|(<*EY(Gqt1we3<@eGqbP_7h|)`989_vdic+LRK|mmYp-3Rjkx@DsL5hT; zf}pfW?}y^1c)XAQbVXA^~7;-e($%|`QyBQoNt})yw8Hgx|65e&))mq``Xvu zH?bG(tafbMw@pe)YKQf?-!Dr^{UVf-`f=OOTO^;P+z#hR{`?Sf+3K`ZS&#CRWbl*E zDce(0Qsr1_!Hvz5@h`W}IfqC|{Tll9_lGfW09;DyFGuU&PhEk#&yN1mq!NZYyudiL z<>&pg{30Xh{y%?rxOD1l`~a>f|D2|}pFgA+Uo+rio5STD3wW=7C(IY+P;998Jo9vQ z?BEVLxm`cVpN{$Q7u;cl)6E_GgC?>})_S@gSB~ItxU2=?qRGucv7q+xiODQRHiqF} zAox@unbSWs{{2bWFYzHda?4x4pJVZQf$)9s zPVFb*4;!PC*q#4*-syLMjc=rufBr8|`JapamdyVmy|0p{J+j@jAvKG+y1La)D{?Gt z#DeOoxD~zJ15GLqb8=nm)gaRlH!EiXV+j#P-Y4hbnbr6+l@?!A?uj;L@nm)oLatu(?th62 zk3X^2Q=p|HS4h0=YM8xGk)q6b18wLKahRLPKK-!>D#bCdy8!WaZ;-LUpkP#s1wqfe zgOBc)_rU(bN=}|kP*Oz90`=Sl%uSvX`)S8(p8NtEq*>zxGsP^eAe^!fz(8*mXRH%! zQ97nBuR$uemyH8ut~@h7;c|ifAC_x7pV&DMKZ3KHJ*H_F;brJHo{FGw8gKSL7Rrn^ zuJr$~Hl~}z0O9OLLMTGc3bes7OK&oj0|_!Yob>ozy7rBkVU02IA16F$>&*=H=vBZj z@r?FLp%rkk-JTghTXa9B5TW?*LfK8Y-Ji!fXP6DFXTK1&R!n*0%>vhqf5WL(iE}t) zhcm;3V63q^E@5b=(MYI|;i%dw&{V{%Mvw!e-dpvyH5)PH*e(lmUl2>r&)~z+Mm@Y` zZP-*p;Q__d?JwvWK;EZ*rkciVGNOo40QVEN!ltN>Z7#sYDC4PYUmViRPW*O&lcqbO zY=d5O5-=az(rV~wComzAf^OV27&{U`8QXSUIlf7nw_%xgk8}cJQk5#ENIyig>CAQ% zf~=$g?kX@M+eh(df3+|0NjXFeC*=>B;F5M=No(mo{msl&ew(J5SAT4@Ls?P^2#ylW zRb#zuj>`g-8N9(nJ^!(AmY}oJE1|)tng1u8{ZMz$zH96CKFm0qvba37xKVEg5}0_1 z%21?QWR$^=mNkiToGPGUP2;VMq$u%1Ciir}*2vg@4g5u@k86nIy!9>qyn@esw(aB` z>ULJ>MZ@M#pX%o4p{C@vWzDNt9vx%{3O(!*$a0%O=q~uBD{kMctsdCO8m!a8c_*mo zS%{d+vGsR}8mvArL;=sOUD+#<(3DEHdV5pq~9$qZnuFIsyGSz};;U?6>vY~&BF?=(BAvpy-$nzg8)<&W|vN)s!4fI!h| z;#Sg!SThoNhb%d|mv}%FRAcRSr^%76SUA^cKU;1;7*l;G;y@WelvcOUA5OyhOBW}> zSTAbZaH7xEu&K`Q_6ftb2Db39I)<4}6D%~%3Gwm0M!Z0CWsGowP3EVdxqr~?S#BD3 zaZ!FNxSDQzSbx|Ib5nd!*$aBZ8NMtwTs(0@FJy5Z##Lo`dJH-nnm6yCujh-nwO&ST zP3+0s?as4}rTVjvG(HLYr}=z$lJfyxzVy2=45PYWZMH#GF>Rm_BsFbv7+OM6|A( zI5P>3Juz+57~aiuT9P={A%9*ur#f6^ALZtf8y!Kc`|^fL*?~dc%*yVCEVuQ@--&Cd zsHg%OKX!4)>iIW2my+UwM1T8GGjsQ~XR+lFwU4@~X+dkToT3w0EFQ)EFhA|vW5Y~d zk}cZ&b%Fpzc|ZHGXmziq*%Lm4*zmW1KD|r4QtGDOHe(0;EvZ}Ks>wu2dX;Z{I_Y^x zQ3tK(24|49?*~lm4%xP67$xV|kmpg5U6Az!8&<0>Lrq4dgi#fsPWnYRcVBv!MOg*A zXL41(?RaL;~|dbL1gx$ z)#vTy@D_ZVqI-7GK$ayW%Sd~S!?cEdx&9jB7oA}YJFU!TR)GlWgM#+>2Mns!!+)L0 zt#J(yR*T(TAiq_)$}ua;ma3xQqdfNWj>seEJ4?KnaamHw4%)LJ13S~wJvWzYJj3Hh zE2rSxhjp_Ww9tunj=B37Xv$Mq*PoFWhsZm?ZMH_BU7!o@p!)DL`pG%)k2g4L+V{$~ z*h(D4ls3Pu#VjA}L*FS2%4y0O*`riB;44K1QdCuP5o>?Ykm-SLdojX1jgYB?cvg?O zW+1n#m>EB*Jmhuq1CaB+UN=@ciSE1L>o>zj)j9M4#sp3Rk28jMr)12?dx;U3UHf*G zRC~@EI9c_QhUwE`kLD5Vi)4=x_pNOVG3Pz(mLbG9+JdF)-e`59%NwVRaCPO0@>o|J zCtFMV3W-+-e5K`A<1q;SAJ#7Ue2cyQY?P_0?ySC!9dfjadut%(UFmEyqUiu_$NAj+32~}v!lIwK1F=WHbksC{dg|H+g&z-mn^uUQL32WDo|Wh(VW*_3|)H1|xTyuvcNjy7+AlI+oeX7V->Q_4D2sYp6=$d0UvX?rNy1C>mL>b0yp@sVO8qYl;hy0+X zTF+|Xo;Gx=T$AHGrfqF&alX>a3LjG-3aud6mjHo#R$7xCd@bB#VlkS<;Z0`@Vc=c- z6==bmANp70J&nxwIyax8($Z-GhVRB ze^}x@4If7?JhbfHv!jumpt#zWO)ZIG`RK;CjtRSmFsnu^jH~_j)hyUZ%kCdizhWJH zA|rdPduSz_(KCE#iYQ*9b9kMqotxj;CO8rWe?h(71a>u}jIqpO3WZ8xcz5 z-Il(Ctt`M|8y(A%sG?7fGjqvJcTkLK4$Um&54hQ!Jkh9e&u3mic-j^ejZbI>m~&wBX+CWm+z>0n0p#sH>RRa=IFK;$ZO;_`7HbN zgGWUt1*m}Oc5dJB)0oOpS%%qInyr=ct`jmH>Ykn@t|9)0ZsChK8LT5avl|LG7ER?k zkkX~Wfr#UT_4Qn`<9x5TCk0Nko>Seib_KZ-;@HMUiOZqD)O^^KEK|V#d=F1%%i$xv z3`eWCGV=iS2fTIG&kB~?T|Zv)82Qth!!s%Xix6i8ZeBZGFkNN=mknK^Bn#@;F1&Dh z6U}vEp3WR)L>$&UHCorYni#$-F4C$=xQUVEY=hYg>Q39u3pU*yU>)Oq4v2sR;PEoi%0ljc0Z`)t32h%FEz80UjBwn20 zdW*8^`DNT@S;j8O^VS#CWaFq_{k{+ZK*LUXHeLz9JKHqbN;Rrwui>#&%zQcwY?$jV z?M-NatzxzR%}(Fh-KZPmzy#&dUj6dSk@m!&n{CT^gJzXeF#s_fRnl4Di5+EiDf;wp zLXh+lW3nezTc8;`Cjpi8>kIGgYXt|!APK*8367%$@lOk5=g_a`>!k{kbH&F+ zi)Q1miyO3b5Wk_*Bya$VL&90H{;dzU>=Yd?Xj&`cvN~1w?MpT~fnIB7TCgL|B3V25 zH*JpN$8_!duDF@2G6bs^N5@CQgn4IH_B4_nP0?<4dvf|OUw0V8veChZohtJv%uYqd zzHqOQ2zF+<<8~kp)_R8X7*pRI+?bRDKSo$szkJqD2Xp#0{758(~Wg-(7Ks<%J7HxQ84<&N36qssSHw7EtyK4I1<6I zhD91W&&K0C+ER^3KG(KCL{h`i3Xt};tMg$CIi zGpXYIR0nCRcAUI7gAT@&AdPWJu`t0huYdk;pn(WD!iituJU@N=Jpl%vf9hifKtQ3q zMOrGJhyw!YN6P!9J&4eX*&K-ZE>KSaZiIJxBr`rv$tdu|=~&e!U7!feh>sF~S>%r5 zN9>V{3>kT5Np03HNC(K5{p6Q=b8tef`$e}NTR_AmWn4h&7InLkL%1$8INKOA@qrs1 zp0kh9{AG7J&;Ep`aFVgJw+Tgj+A#n5sTB*6`a@cDn;zeA*w}+LC&9JXe)=;kl##~) z&~Cm@;e0%t*wNUVql?f~NawhS)%9BJm9^M|WPDaa9 zD1of#dxt4~Zf;?#J75fOMZ1FG!8zkw0_g_Xqbg_rgFlFRbGzB>LIMl&D@Ovll(uv0 zCzJTB8Cqy710QiPr=uVo{Rzm)F+OT%Z0xdAi8)B0?=C2|2&VV4JRvUh^_6S{vjCd= z>G7_uiO7Sr)vrba|5U`C{@5?0Mpmit$;!4>w2)(z9dT@%VW2bTewBp4iCks2iQnW< zy}}PGuI8e*EKUjz5Z8&Xx1g;~z!{hkrL}G(TIT1YUQe3D^Wu9{f`YpsM)djWBPF=WtDYdVMFNb;F~QM%8b*vz)0T`^;M^vTg~ zO0&Y>Zq^2JE>!Kk5gGp_CMq`|81@`$ZsUGYe*X;9>>5z-Hr9>)(_JAi5Oz~1@=Fe% z-CJPUnO(m66nZ=raCs%xXX~Z|{U?hx=1UGsL=jYRPc?G>A#7gcyf6@gr6d!D-m^Re zBS9X>ZH5n6VUnSZrlQqy!WZ?f;^xaZWkPt9Hg3$Y1lv2t{qjzJw`{W4zi}hrdrEkt~D($_5t|K#yDt&Pt)v2 z@_Huxb351BaIdU|{Fyvg_OF!ecRII%dyS{=y5b|haJZ-b>+-OYEDu@W!e7Q{F|^?L zU0v9WE^D|Ydfhf5#g1;U+Bso3Rt+TlWh+~6q|@kmc$bb09R#d<+~Q&eh`23@qWchJ zD@b+FY-;yea|KYtVmq`!niv**6oeoe>*GqR*5qKwyw@)$4co5oa3=sAWMym`?VAKu zYQu$BgC{=8dmj69rUyb66BSG$gQ7l}|y85x|C(0bGtxP^^t= z#}1RmnK6zJCNk1hALVb(31S;ZGN8wElC*TizOE0ucMS9I8kfQ5Lp_;EZmWs0ut8_t zlCjlPtF|Cj!{t8-R)Lt`fGM^VS&5h5YBo^^Z!gAT88*@EvfE#Zu}Ma%$=wG{W!{g5 zjYV7i>z_mSgIt4@oBWkiY#p{PJdZntv;tK58}8;*R)-&u=_gB~K4f$hml|jU3t)Zd zKBuv5%1y=@`Py0nImg1je5`+Lsz2qzK+OCh!MC}~t97#-u0ELCwt^*aaBud=%o8qO z+AgMc7caDrP?)@qm zF*ruk+Pi*{VPH&kI8b3NC}t>tG0OU4L20v>wbmQ;TeKaIL0uY8riss6))x{8)FuHG1d$>ak0}w!!B+%^GmBb zIR^zMk7Dg&qUev--sKa5Y^D*f4%dtLufc&aYT~EBtUG!DaGQlJTY2ljsX%z-8FLY| z0UDj*Gy^&_U%#pW>$pdcKUIq$9@AN0NH9j=MIvmVFs1x|liGTR?VW zH-BOw|2s$jg&Y-5sV2TH*lM9l71f=U@D%Ht%7Yg)VUME$v7JXYi7tc5KETKT+h+m{X~0OIDR!^al(ZqUu&+U6uw8X(N-CNIEX*} znnBiJcIGxm!hgWLOvF8#ddnhT-69Bp3qEl%;@e(&A9cg}6Je_wLGh{Ji5%y&{Bni} z@u6cqIZwUqY2w||$(;B?zf-PZ;i#zsE~&v)hMb6P*Da#|DtzTgB9>MsMh+F5oVXJ`0aGv@_c@CrI(6Dt!7TUx)nv2g%MM9 zgTX2nc=I_a({t<3^gps`-1im!EQV@_L+x!UH837O5q-30h2a+7m1YoeM2EnguREbB z;AU6c(fmme5N8!se+XTyypSHbu$mFp633+?N5_+#Z!tb+tq&_=uIyb%-{<^ay`L8f z={3iqg%$)zL8g1^Cd!y6v#=wa>+zrNDyha`(VvxI0!m=4O%U3A9ue%R6Ejl}7jOza z=K|KqKE#DqV^NfU#dxW?3Y++G5l*MFvDH|EKI>d%?#+?q6iKd(V#I_HPuzP~cPO0# zlXhsM;3IxR2ebP*w;Rm~!bVO{aWNJ~9nkV1Bk*w`!t7Yxlq9R-UOZ4P0STOiC~Vs} zb>vqf{|_yuK&x`Mfh>tX>9(8Qa1>Iu{7Vp+ydp_&AB!h5xfgUw-`{RaFH4Z5_5nwm zCZ0O+-&VedzlAh6`J%?;FoIYm)2g|&FlqSxSAas48ZkEF7o5j}juWIk$b3;+<5#yQDnWp7H;&qXbW1Pxqn zqO}+k`9kV%+BEXx6ts+1$*3<-;MN_)%q~woTm$a1Npsyzal7s5`cOMCS$QeIdbC*t znUBGX1?_O-@R4DAl@tPZrPHBJDJ8`FV{t&ou{TY!(VtM?>x@sV;C=s7TKpOuEdEo6DxlTQ+!!J+v#=w;M79l^hCBk~ z-p{?c^i6M2>5el32hL}Z6gln*=Ls z>h`QrRw^3>&I&n)ntyM=h|@)`G&ms`{Tyh%ukt=TytRazsF^aYl#gD@-s!4OweGdliXV`!9uuwJ! z>4-#g)|drVqVPR{Mu4GO4~9Pz?{z-myjeh#QJJ9DhBZ^n#;2m9Rl*aa3JB(7B6=bD zhck)aV&yA0lSQ$+YVDZ#cS9p2-rNt*jb|8}x}$$z1Mm3h?hzL7(c6L_hU^4_26kJ~ z)-?%@VrC)Joo0a059Eo!%vFL9D~at6SOWtM+(Q=M!$|YkwuanM1*+)td*j7a3Tdo8 z%?vzZvWofiC;V17s=e@WKhqYUwa>m#S5(4az z(4&7C$1Ex?i-)L&UdwugS-nncbR<(9Kypg+O?vaV#fiF)vS>9lD?!b3jW@i_Syf|J z0qhEk5Zrw8$arblSFah<3tudAVUcr3c81UDS=gD}EQInQBrGw)#=Pc}B5Y2W!omoq zTv$z1;=!a*ZMRyzKrtQ@w^4Q%$6U$e+XLx?&aqQK`aX&mEF;RJLfLs@R-I>VHm7CV2yNQYqmsD06kdl zndC6=!Ku+-!C+KCld!}yWj*pW#2glYoam>LqJ%_MMZ#o>G0>W|(8^B6ed+sZt{MxmOxXMKUP zv3XdKNsl=%wr+lU045Y7+Wg&{FsNDnQT&WXv%ZdiRmmK>Hk!4^3RHdoy--PKlKCm=`7Z=+CIR$>Gk@hbS$Nu9SLrlNl^2nm^GU`AvvyWwt5*(Qr+vB#m>90L;hRizXXAd#s}eIz!*U z%30l@ID=a8kvpdApN)@q_Z+V7&xQzHk(@%aw!N5T3MVPO>BXIW;w1ow>n#Idk1Ic5||M@K@eSz^2jkH>uMlN&$TpzKpFCkP)el|3s4 z-V;mhAqFb#DD6)N2mCo9kY0s(NAib3!Hf$ixY~`2{N3*=l}g??~tw*K}=vVvbbiwm&9E`kV~$HGC3Vh)*Z^OkRkWZMqXI7>wOYj*XsE zXk-iS9ligIEtqXF76z3i5e37gv|pvZB>iWg-G|~s14-*${osMgkX3W;E1vxF`PE?yH%Z~7zdn;*bcC%Vf9HZod~O60jzWhHPd$Wx z=6uej6Xio}e47yS80x((fF}HhJ^xr*ETF^Tw5iJ6L%%0T&ve0%9=>pO&OMS_av*?;;$q9Lzo$K9 z=T?0OWoU3b@j9_nOttx1amm)+eD{<5^48a{N-$6M7}T!)VnzCa^SdjChCQHi@J>AZ zO96L2)2HHbm!~6naV955MaH~g`OgMFN*40Utb#0Qce*?MQEy^6QjHMN8KU>6MytN!xG05w;vuCzhC1UG@!%lasLiT*?dV9S( z-9j-UX|I7KY&7;dNCHa#Hg!)}r!0Dp<0A-_v0IIA9DH(?=tv3y@zH`#FKbiC`gtU4 zvYm|((xUYT@X$%cwi4%cPt`qJ*|;VH(5nN4<WE|pF`u}13C&CG=^mF zYM*@vcztK)kGhHEAX0h<9*;jim_XVOTb=FC!!Yw;!o?s`9{{+M<=5Jh6t75_%GO(( z^<>>m45#dFZ&)@?NBV>bkQ6m0)o$;7PVgGa7x_`vE-^c({&-q}SKaLDP@T9OHkK>8 zkre{3y%FgP6d%)pcP1DgMelX=UKjLaPBQ;WZ`RYv;Pc?0(qgzav-;z|T*$5^`=(SR`jOjwi9)imFME<%qm?W`v3lShlh6TgrRR4tLoYWi zff>qp3y*Xs;`G)P;7V?K+gcF%xQsEYDkC%C8-ei3VsFno!abqXP#@3mP=&A55LsZJ z=Dtj;Lo)v0lGwSAv}X=|N7yQQV%zAOoqr2Y>WhJopnHHLbfIq*J#Rq+G9&U&f~0DU z!T)AvpJ^$I!j6JL^C{rn%76;-$0KyCi| z>3^gupNhg(o`71=Qc$gLQ~fp#so=3iT`eZ2NpSHG$#*CIEvibL#2)+qF%$BI{xw1P zvR~48>~HvWUxVa$6_Es5je%~v8<72PhoeMJ|G`^JeUZ!gx54?p$x{8dsIC9G=$n!( zo(0(-z-@UFov+Box+-$8MwTOMH&L{-iryYMc8&t9C6@Mmk4Mcm$(pj5W; zwiCUkDni{Fx+#Hg6*gx5s{7ZkCnBb4&;y*Q0Xa7Rlwr!b!hWwEvcOc^@3;GQ77lZs z1hDP&{uKb(&6%WHA{VSyrXGaiLR~gatO@u=U0B?tfW72nea0T!R{bTz!eNRaV{C9D zmo!DYAl+)>V56kwkPlXB(quMRspY_RTnJ}K?g%U7nvq&T{!%+n1s?+!?v)cqKm+2> zDz7BJlR2>v+40LgINL{T{{9U;kLtS0`z29D;o@`Y#I|*{bEM^D$I0f|S1F{`P;K z2|XmyNDd{3mSGFE##&|Z$7AAi6W~;5xwU2k588|x5RN28r>yHX?`N)txPZd&r5w4S z3wH#geKTDal`9e8vq)xH3{`zbk{B}~j)%s39j3^6lDJf6eud#O?1&ZM=Peauu1}+CM-jB1+pXm#z ziIG?nDxCC()$hm`ZGSa-j;Ht^xC7K@X2IlyR#wVuP?APqm%|+`r3Y|E?27Y-Cd$>n z$X+*FZ2_D`BCi^zaPxfS0H!7$BF(>zXC2hzZTX%@SL>^5_(QE`OkU(SpXjuFn>3^D zvp#gfV^CF&!FMLPRqtpB8NDi(en^}Xas{RO&&{uwNaE5^(p*k_6<_#L4zpANRexw# z*tt)=={MY-v*E%b5gW!I5UYe?&t@&Zj8R#_>spTdRT@~wD~O>dFA4`gLr*aSgzaM0 zIPM!B51Fz<4r2W)Fa7h1ECUN)NsdX!$HE6*$^k46(l?g!^@P74+KqydC}1o%>ZM%n zIgz$ay0AxYC7>^$_L0S%9(7BX=c8{NafSE>=xKJ+`F{PzFzmJ3aPi!gnQ;Q1+3setG> zSWEkr?880|TK5t%E*Sxti4-eApeQUfV0r9R?}#!7(rYOoKYV_@c~otT03qqJW4P`$ zFRn}04g10>*xe{suAG`8M_j6o38VRKD5~@;C+pJnNT{Z+E=Er#@dSel>jQFF4Y9jm zZe-_c+Kk28A+4M^F2;yz7%+98qVAO;SI+q(W~r?GyGMa;mMnsy;G`J%%%3qzL2*vW zmSc(4g$Hy9Mjni)^Jkcj?Hka@KA+ zJV=cR!gi-!t@{`+hshQo=3EXW;dT8g=A-2(B{AIQ{Ow}6Q_ZW-{pprXd{GRz$$+^Q zUZ(4xx_TnqQhB{PMnfjCF5-!C!m)qm^V&%gmhY+>@LxbZq#FeDF-GEW!xN_ws`8vD zd$m>gai&1mQh9Yeh9N|gh$A~;EkpMk-tQZwloiRB;}4|YHxHe>^URB0`FQC zTmhVRCAIm811`t5JmBXOANZ)oJ8^|Ex+4;9?F8)&NyqH0GWt5H0)jQxcl*m?3ZL4m zK>>l*YghFN2fNR2)u13?$@3L4KG?WAeit+%HPJdUH{Qwg#+P<V@RHk`(TUaJ^a%^SG(Qa^H7k@_OQHl`B=mww7aO=X7tV@RhA(GuTNR zX*+rD#;(U#+LDH=lP*7sH_oZ~x`*w{xsMxV$k#PrEznKw-z!#D{$~0&E_iL&mGXVp zo75Alf1#?>UDf^HYYkHWWslysNV#!~|GDTNN&N2=#1ku&fCa#g-l3PSo-f=mu_4hn zZ7=_&Kg2Gls21CTGxar9QOoa#zZX^Mx?fRN5gxCLD+)2$u&g1`-Z8}u4ga63d_y|( zG3vch2+Xk7Yaq!W!o8$xf_6`#H-SKC*8_X~v$62C;Tg#)3b@zkKyGRu_yMIgX=}Io zc(14Yw9$Eop7?f@bwsASOQmwWr+0F+Ufkmm%MYDR0UtK{WtJ5r68^mlsJq3(t;9XG z+o;6#R~zafu@13L^RWNG^v$-{5qf4g26A*WvD>{y)3DTSXMg4Trx|R$R(x8!NfvrkxRJWJMP zpjnU7IZu9^T)0;H(Zn#J@I9b2ug(wV+GL)%uGIV&vDBA$ue#j#{mBv+d=jEju4t$Y)l0Me}HY{H+u$>Evd@v@B?vBLZ&`&{lGBmT1tI#DN0r+v=N zuJM3<+wtAe=Ty7=OOXTg&^#x6cbL691nzO8bbGs=qESb3wT(8oJ@jS5{VFB;_8xWm zL8VZ?$>3(&oZ33W3!y71yA&y1Ce}fjL=$k(Ki>WBq^v{=A1;8zSzoXQdb{Z9z-~OK z#{_dH3uA{VacQ`DEF*Bd$UshL?|ID-Qi-(dinFQM@8U=r5sm9<08Z5eySti)wSD}n zY$|0Xt+)NhkAAIhb>fyIf;PoZ&!mT~>}+Y%3n9PBNS>2$$d8}{uWYft?Sz?qp2~%P z_@784n?$v~Q}@L`w*OgR6ZLU|^bCqxyrzFhq37`Cl|w6i;hcyldP#;-H@)iNmZ5H^ zNV}3-%4*ivyR(qD$C%uY)9YsH;5^Vgg-{8+*?zI~^k$x-VRmqAhPOFEK9g-%e_{ zMH)(c+i|sAo##1yefdW4h?g41!*$L#`f(e;b&uX!^2o8O=f_L46cu#%;7?^Dj`-hOTGlK1nLKm4W72se_fISE|+;zg|6@6jJZ>zc@P zhd*O5e=x1gCRfTS?nCDB(gQC_@>g%xl3(FhJ|T^4w50;w zj%Gc~cSKf=#^kr|rIscv-*_;i(6dMRSU|MlCZ$cvM&ssbh>n|C~-pvtO_IA{o8m_q&$|4A)LINqbb z{5vst!NKBU>1t^GwO90osJ6bRxB1&z#|NXAMqMk@y;{2p&aewk1*yr1{AHZ{w;Q-3 z#~)B^bD7jCw0()nfE9HI=~?W@&BUz@t@gORbaq=@@$dKQ}J63jL2`n``%zQCd)Ak@ZTUB~6FwQnU?o77~ zr6Z!!<3?9vyx-=|%_cR+1N;KtvOc|?eugXjG%%budb^Q!u`|SR*Mt7i;mjFrR1w6` z`ItpP(8KS@;qM^wxhqM(CaAmbd3hPeZ{6@dSrvo`% ziwlHqdT3R!dsmjx?2E?aRK1{JNSBlGA%#-6W5%HY7H?s8yHu4kK<>Zh95J z_I^*=-hbqh1D43;;$K@DOI>Ppc)cc=V^U>hs*loTjLc>;i28eGlk4TAeLh5gv~^d^ zbyOXzRXtXtLoc8^y)vlRpUE4Uc)%#F(rLG>LCqPcrR7u}*w(sQ;H_J1VRCytYoz9c z10^B~d*ni{PTheCiC56eaDccPuRQZG-4ribo=?qtoO5KT^eoC7SReIsSoldRQd}zDr-I>1Y zMds;C0lkvQrKh3LRuJGjJ6d``q7;j|;JLKDjPI{(^p)uts&LSWeLlluWi4$M{By6@ z6B3R>Dj}!sgy&*Y=ym3Uzg?M=(J{>Ja~}T)?E2@gY-66!@Q0ZDC;VE$R7i-&`rZ9W z8`G)!)-fVM7Eg}!jw^3m#h;Xr1RKZy@_!`$<9`|azs$Xz#AfRIUN*d%^*!O_ODZ`} ztlUE1pRK-5Tu!yy2zIZ8GkFnAw;^U-X&f8%jbqC{s;tPFk>U0(Oy_^<{d@gf_?|4xv(F^B(i@!-ZA{!ImL z4E`4Rlft7pG)2@2MV}BCM{V7d$+iwCy3={@l6sKY`20;5zq~T-oV_TNHG!{AY0_pq z8Fe<%RS)j~2jB=p%mKIJ+{fj%ResvKF6G$`zKRcCq7!i=PcL^;M{=45^`TpZFA8Uf zE*3O9Pk8UE#DU(WXJEsrd7jC%p?=8TaSLkF+vG6nQ;sz9)po4s62jQoXO zO6#Hrmo7C<O0`d~+j?NrEWRNtn z8|TqqP+fjKvPQDphwP3fJwBm8esp%qLR{cvXy($jG~5x9QhRg}| zLPfihw982z(7>mA#OdK`dA0N#$22LaN0JEsTOmjqxi7LbqfvcgIba6k+!)s7Zs-81 zyMW#7pW?axBkbJ^k#VJ8SV_F5Ua2C{1V{L7PED$Ns9gzYgQScT84+JZv8f>omHK^Lw*CQyhBL*=sVhMyhot8 z!KvXOV8u}^{krAGU9B9~O1O6*it*Xw-N%oRSjGOm?GWo43%h4^gk{J0MC1pN?V!*2 zi@6l26XHnkM?xFXF9>RHwCtF3pLO+vjb8UbVtoJ8RNRt9{<`itdfLkp^Ux4V;r0Qn z(Id24(8~C@XEieTQl6c(?{;682f;ZuE9Rb`8e+nvJuMr`0wOxRG*t`JqD?weUXE95 zT&uZIJOU1;D-2iqiu)oiuiyN0(Qv%MFC^*1v}UdFBeHO*D7334Z0K~SVUgDAQn2bi zR3zs*|HGZ24Z2haw0KqiEef0ZB?)8u=JgbH0pMPRV6qaauWVJ0$-)8RJ z6k5Z;Nqj$2ml{&!2GMmx_)XIl(*_pPJHzSd{&SU~M5mW1(2D_j!u@ic^`K7)p{TQ? z105iZ`X!rr(Zm%1-X3tI1Ygp6WUhx3>K0-e72weC3Z5z^G-TYVH*XVN0j}Q4GD{Sm z_qvtU8AWZgL_FdVTNLJ8 z%p3-M^2(u|ay1wjN)6vVdG%7X!12qx@px$G46b+bn*3zvSb44j_7cinRvdY$m~A-Y z?1It`BW!&Uv(JsqbZy%W|HjyI;hfmX)$+CHHHNk8b;{MQ-=z!6pqiF^O}6CB&iAGkAm-;miXlgfV=4*R&^)wFcp8Ou`z?`>PPME zq_D3j1}wRN(84~m!Wx1b-KhF%jXB;6^kO`(E$sOS;p)oRDNfz1H+Ke;TnJ$|{g_LZ$Z8$l?m0}miYbfQu~Y9_SYbOl$*YkrI!tMeN~w=Ugz=%OTN^#PO43zIW@_x>$ZLU4F6G|b!a5} zmgya5wb7(WZ-Qzj)}CJYfr9Hlx7x0&6<{CK7`OPjuN2BSEo$g=X*}XP47`L-@qpaw z39`0t9`7BE1a9S9=b^V?ajAKJ&h^moH;=F`dW_Ya;;h;Wh3JtITT|=$2Whp39SJ)s zc$$g^k`1)ME7^09w>H}1di~0P2lOq8q1z~drtTqbp+&uKc-JE5rcLRDXFZuq10#cH z*58&Po+Ays>60&A8PCJULn{RaD^WLT@4Q!p+vWz(=c#N7?@4klKbq@KLA<`!kZ4-+ z7JLI$gSml=1E!=(`o|)O-iZN=z6Nf%%xQy%1Gb*E1xv-3!t1^62V@Rcx)xZ{$7^5q z*@DjLtKiRDlp-&mS--`I-X=7@s)fO^ydsWf+1T>)DZX}j@REthqFTs6#YlNymDV)$ z)}`=q9VI)=k>zyKLJ#h#=Z^8hk!Vx@>00N#W``~XvXde-$D0c(%+~`d^n=&6lxsaR zyS-k{fim19P{B=qc;I$jQ3*Oq5B`aNWtFc_+&y{Ky-+}%h(=_Ulx>>?mtMF!>86RX z4THP~H#QXL7eaaw0~Jq0Gka1Fj=m|Eo~9grbm9`Y(y1&`X`=J$V3tGue5coJ89TLa zPf0p5dvK=CHR)Mgg+oE^c;3aq*^F^?YVR4f(gLrxNa$s{dU#2GPy!e69yxk-rF^&o ziLMAv?dwoND;e|@PI#?1unVvQHJX#9t`BZx+1#%QDvCdd4VZb_81k$}qSu#!^dg~8 zb==H(k2;2Cy1_W{sdA?q+^IF`=`L=2_VX<6x2u^I|L7X#>c4#cj8{1uj+-ZDwg{pA2icYfZQAdS2 zio34VnTC zDBiiwPP4Gum$N=jlFb?uTD5f8a-3F8!7EPvJ}^}hH%@MQbu1!_Fd6T2?tgW zO-+Z9Co9NB{fO!tF6ANgG8>*S6?rC0nG%p6(&tL6E3EFL-E=v=jHvQ7%q)xOXbF|$ zN$YQ9V81WaZPU3fbva<(NW0)5F;bIerK>{xoJB9?Z9C)Bu2;acOe0M=zda|vgRytc zs0|nUbqQCzcE=YD1=7rN(=qf8mwtMgo4x=24pfmozjSny2vDnl5bd1ME7L<+0Xdsk`_{&`vE5O) zxR>7&;Gs1bDK1Y>sQx?;r^)_=6T&W!^P7@@&T28>1Tul+gsdq>l3PMtlq$Sat9^0U zh5~y97XC&*C_&;3-aMc=oZlVXRo5qqcrWeF!*_NIjtf~IEeJW4;wo-vYodkWZd^rQ3aoXmWln?;nwvv^)!zu`EfS?K0yJu z?v3N?=4IR?wiPeY(+LUA<7?q+BF7f-azJMPzI`fPhX=VUe!CMgs0F8%u$6|KODHL|fW#2)L##nC$$;ZbP&2(w*sFxF2R zGxj+-vU$H&WH*H?UxT~#`8JhwI>DA8+^B12s^KY~@ik+H@|wQmc3)#Js%4Qk^v{&r4WHGcR@gmpZvp^PV{?}_jUS)-cBxbD zr~$jwudlJvbJ~7)EMUs{aTfND-J!RLhKB7Q^N=Ic3xshr;#kJSdgm`=@e08K(HglH zPY*jfnxFjLEpI6#dMuwbTaW9nz37TNe&Saj>9vRNJQzjM(L>vIP-F29tc1bM9o0aj%W`~@^&AuSSI?J1HnZz)P}hV}L-?dqXjSGH{uuMQU}if|h49JgzY z(AvE>Sp%ywjEE7Rpra-;Hp>rDUXyYsXynB(iHW#w=hJK5z&pL8a7%FsLy$2_#C_gw zd`=USxEH__T4bs4bk_HsY<^A$AoV@bL-6o$(5E-Oy@-po=M6QN zKc$p#{4)mIm;o`|SzibSsJU3zUn+6UU!|(By&@dLaZnA=Nd;|(>~VlI8%2HJ!HK8I z8unx(9F%+8Q33$7G-U(T9!C;a6_ZyAhx4D2*j>kh@^hxbMy7Y#?}=k`V;m#BiR4+= z8N#7Jj9-svY)*U&6~5~n1DUS5{e9CN5Q*Wh$L(w#Lc8@Wt^@Lpk09Li)`PCKE_b=M zvNf02JsB;WYVK=mZ?13I#+3+^?D4GF!-AL*NAmt)-$`16b88G?WXUJa`o7j-wjTZ7 zd=r+`cSz9S>Gzg-Faj||0-QT&{bfc{i}I-qV6JuwukVr3iT|3kJ)YM-FJj}bV&p;5 zxRlq-!y1^I+j0?)lliBbh4+%Z89P zKS<)h6RS9Q!<`T`ss8a)e?4d7E@U9s`U^x|sZXc2a>RR^R)0^D7z=XU{Sn$d3X2ym zlt4=ba5Hr4dwj|g#Yuh6Kv{V&?*5vy)7#Fz0?Z%GDC3NoYPU~7SkAf728pj z>p}gaBrt6;f%fq#I{j*&H{a*={n@lD<0Qmt>Q+%u&Fb->ea~!RygKrM<|R4sm_KKK zt)pSIAOuv3HcB50W9VVH(=v^X4-5BYsa^GY-(XB-}f8Y%e6DZ0%)I0UZKNy`*WZ zEe`Wrcyh(}Z+L7`K|^(m6)r`@rW7u8ndR($YTW$SqttrR`75{evx+50|0=GKOZLK| zJP8>?4*pZYi)mT`_b)H@-mtWmrdA_sJDf_Vj%A0bYv^0_>^tZ4c&%#WC32@>A4_}x ziE+uVZEvJ=*x)l&F3f4lrx|%(uxNjwd=;2SCQsNoVEk@Uipm&QB-)KvwmQaVw)GQf z;4kUje`vzgxhih@H8!%2@KJUlygkiTQ2tM~4Y<&WeXC(o#O~*1kln9fvj!J&8|+ z8q}j|r=;f6Cmwkxy3ZK4QyrTwFWnW(%e=ZT4e4H_2yeLQ$=hFuJ>|4p*S@3rQHtqo zVZQ|bmH4z3|S=-R+)g7z^u@nOcx zG1xl)spJcw?W6(CC&*CM;(22X-1#cjY6U)ej zURzGz4N;xW6y?j)N2i&3utm7u@10T<%;%TI3cdC3eOW5G7W*LIRzO0!@{TQ`2B9;@ z&Ypv~No{?)1Z}{S8rp9n!)o+ezDfsH-q?)HEIHD-L4P{V+KGf~-V5vuiExwP`mVKam=t0Kj1(T5MOTTM5=Fioxf-qJ)X`&UhDJmOc6j|z5#^>2P~CmLQ& z!1p&@j?_9lz!fIMZP*yIUQs@_sZ`|kKReuSRI$~VN#|ElHct1o=^95RFr&yV>}unt zD-o-QcTYIxfYT1JedsKD8`pIcQw(ccLYhG_CflbJjrw)CS*8>I(V>_|TO60hRH11W zjFWeWxBIRwx_HR~3j4DJcX7fs9ihOLLJKNOig|S^pmy-yza-FZv3>PeTEAcwEZ}>} zY`)2D&=E-eQ!N-o!hcbXG~#|Ub8H6nK2~$N?>nw4K(QC;Y()AZj+O><0q$%&L7>C_ z6B8{4Z-DQ!gb1><0dYF8>H$h}_U>DOI*z*X00Wrj;TXf#pi0dI=u5%J&Ovle&+(a< zc+Dw?{KH&NS2)J-P0VclSA#9%W~Gd>)6*B{mD}H(pV?zLbq6{IR#9AJfA*6T#~u^Z zmPXpYBnM(cgqL}Sg%pLW7N8BHmY{N41Wjo~VbU;GnST40;{e3mnG4H42L&%&f%ZL>@V81H zxdSWefQUhNB{r+yU3633wv?4-ya=J4<+PhAR##@*BaQ#kMU+^D7mc+-B3Bx5{his) z1-5$G_A@KDi|9)aB$36Iii6&+H3AP^ivR+>8dv%&MFEooEdB+=?E8=-tT1N`9$3q#A`Q0`-x&cH7fYaB+-n~tXTv$S#8(*AlU+(ahy_xjphtjF~HwLC7gE5!ftQ%BLLv_&$vsjS| z_e!MZa`)BMQgWRYmu8BRuhCiI`Pm*1P+7Ri@s8MjpOFZ&j-`yM4Oa6=5g2m$VnGPi z-X#OZsn-!EVOIiW;E-}elMYj(MsAmVo^)u$7<4F2dE+Prw71UJR;b!=W z7t?SC?n=m9X`Vr8dpgoo$@?{?{(z3Ts9=@Q|H7oHJM(#~+}o`Kp>E`WuZy7GZkv2l zRP%JXiN)O;=ztAY7=xrY%^9?bN<-F*POP%af2?dyO`AX-mw7e+j~u`hrI!EBj5icq z;wA-awryGLq%7W@a85$ii15*|1r2f1oYo%^lS5MEf^USuDgN;7`zJBJ)>dp$;fBpU zqRR7j-EW8Y`|!m{T*Rbui^Umjj;sPk&_+p!&^{~`(|;8YfTsG+*!qA!1{`}O?*sJdM?5 z9^$_Cwn{GU(zCUUl%hs@^7y1j+nq`s-z<2B7s(}{rJ_cEjb!*9)+z6)4qP%qK`Twt zl5a7lEE5cy=E@vR9v5hLuy2bpfESvLlLb=P#b;<;lAEviYXQ3{;;&Ea@lMX@xn}up zCx_t-le#u^h_R;hsy2ULHNZ(#erO@7qhrOuA93iI-%q3(;S-~XS^nS?)}Hv;2S6M2 zi-qO<9}iY&TrAL&VVo0~%Iy8mph&K#1vq8*6j)>Iz4hd+2MoLnhW)7(U70LcJf6Q8 zbzRrUwMOoPbg;nxlehl-D{uWDIIVVO{pCN@D4G8QR61u5 z{V6!xyYb?@-D*;AoDv1dG=k}Q5+}4tpJ4$of}BtIY~#HidR+#KxRdr9DC#B_+cEhp z;TRmuYKfgSOUo2-?+*#QS7h7$VknXjyl#smGN3gWt9jpHgw(@)r$&eb zuQW1s7~SSlGE?yGR~Y`wPA2L$(BEzVTzA0`>Zy3J+`s=DL$}R1FY`*{-SvcTYB`>N zo6LozQPCfEb-|IOV=^z|y7c_tzPXSTDEJ|N`giH@`}{67fahJh4WXc2+ga~-s)waa zwpn{g$%AMOchh8TC|2?*X7l1@Ul7>J6z{3DeCOJA#o(OVB=|WOP?%0oZ7iBtqM67w6^l|lEXvyBOm?_s9 zFS(#+yvU`-V3eY6jK`?|gx1w)*5ndJ5Rx&oYT3?@*-<7ftJ+oc`v``4xkIJsV{j(*$gE?Hl4}zpO>!n zuP6F)?$>mGL2XC$SxVqT!Nn3R=F*MXv&?OvjxB|w}Ay6beDTA+$>>KDULA#FKGt$`n2?Uo^L`fw?!`T zSi<3PaQ{VlMqL#ykO5~-z;V2(dhJ7`AOQ9K>anWgSUDlAN=)V)Oo>!}rbMaL&;&_K z`b9xhy%JRPaHUek^f|`7uFH^rK)$EN1-3aRfiB_9HeBmN-nsv|Mx56-bDnymJqmE{ zK=qKo)mQm;%RWZ!(~v5N!KnB2d9;=TZr$m|60>(L5n17Ak+feF>f^HQrM;0}9xKIR zLLbMvjNh);px9+|->mid2CNuKTO(%NZFI_)rx~q-sz0aaAsq)e5Cz+uxu4a?qpb2X zm*KS+ZpN`R%AJ4~ts|SpPu7K-91wwlUqW7IR68@vA`;-KI1O-TznrU11o-$(x&`9! znUS*?Vw;PX-a0MoGAA|tM4uaOC56)`uFa0j=bNMef?v|k!x$1O6!_C)en5Ms(811x zBE=@M3C9omfY+ICj(gA1fGEA~+m)xYT=@ zjtRB7ywrbf(eMn&lNmegUF`cjLFic#blr$G*knZ|{_4`4eVR~CRv99;rVHi;%yRPT zWOpz2Mo~D(C9|x}t{#mA((xfY(O7wpP|w-E|1&KDNXw6ystD@Ac|sV3C6uYA6D)sj z`QmtP=u77Lm=+F0(4f_5M!9Lpb}ueaZC)Ll$WafiEo|8SoxKiUZLJCiuq+ke2+e?v zC*8zf++ciFhwc%1+b*l`oE;Fv2?(3O$R{r0vzns=!Di z$j!`Q?k2K&G50XhM`3s>n{gy?HO)7R9nNw00P?7a6adyvo*FXH#34NSB%C`2M#%@# zFboo`RHJP82R^GstcE`&BlD%L>HCL8`VvecVX)~Q_0l>?r}lilofOosvha}CeOfc9 zapLN{$Y>CpJ}8C4PBZ4BG@wo(j6*A_BhW*4#!n70kS&*rVYO1gcT&s_`MxN!@2dX| zU_;;2^+tgQ&JP`P<4eSk!|{{)O_a>tXL7LpdCRTnkb3{}k~T|`^O(c49$~O}l4VKA zWk9Y{s>FfMB6tVu&5nKzdJQzOlZ5kF%+2j3gqooLrpeERLcj4fMV1o4+g#R?$60Cw zg5QCv+iFS%ml$*F&!E)J9V|41>rfVX!9)M)up5jSBiTFuGVzLcF-Jkz&*qMBXq zyB!h8&x5!Q`;6EB#?&BQDP4^x1pr;!YXS_EQm}FCiiZmt8dacA*iO!-`STv@|1>ieW1viW4o z1YdqQD+5y z)`&jDiR(=rZSLLCVEwGRU$voGq)e-6~ueRgo$Xu88`)CHuFIud(iVr-p0fAE9-vT0Un zZO1gw1e)el2cd=~OilQ*Hmdtv`h2iHM{{UAmsP#TP{8=@%WGaWWqLX{>NIR|Ro+l4M9pZvYE3`H09osJx+rrGGmt{wq z;9M6KaugWQs~=B{*Q5HX*R!-?C9eK1{hs4NU`>om|2jgO)6|8B`6`iy1-v07aL@_n z`FOLky7PA|>ExL6=8>L&Q9DZi=sghk32xlADkOEr*CQz)YxBZjR4}GKywGL*a@7tl z)zQm)*sZ1TAmdPjH+H$UhK#b#OiMI8dCiFGj6Ne5;FeX|3K-nfG_<$c?t{OtDHys} z1{w&Bl2&e)h&63UXvcJU*N%RU$#`)xR21w|SeBQ@Ahi*FGW&~$-IAiAl^lpSvx-yA z^9~wbf3yyNPXc<5hlL@q9gx{3L&z+CY}Kv@yL6>+v-fCrTBZHZay{Hluh5&eSR^LI z)fw-kT&b4iXO42;TU7I$oNICn#S!=O<>5E`Y@wKS>&}5ttfBe@IEoK)1`n0j zyYQd5`kvPab-eOC748K0HfRL74S^JpWO5NeknaZ`o2fk6IwRBg|{FGgF-yl!)ou72nIq3_HXN_P&Ud4I2H;>xVP}$S2_uS9ix2a~41~nW5m^<>6IXXUG-j1hpf41IOyQ)=KpH+7^fE?U> zc;-}^rf7I&Zj!}A=to2i7PbdF9@4-#fq=F5Jf7v3tl}*+lQ#+$t*I>l^Kv_zzE<-4 zphVhJEygWlTFa85%Dl+ZZt=t{E@ZZ;pY#?Uk~<0rskEy=>M$LGI+OhZ5li#phL$50 zG*hc6fhAa_!bwDWzzuZc;(}k3mL7+mSW7ds+b)Yi~lKD6CcRFny*Wit3-gtg)-iv_hC7 zXBVoyO{cj{H-=(*1X57Qei<-?1}@I$8QeQf+aZzreP-TCQw81-Uu#tgjuUF7O*6zw zczcW{!#l^2hivm$UWboIwRXq`PDngQZ>%A$#!XDbJI3d>Yg}7r7H(Wo8Wu%jeoPR9 z-^e%H8%3BYklxlYVx=cSo+5mi&FQEYl#H}x+e36L_DyJ}tdt^ayz2EQ4iBX5-2Uw?4_B$kB-j_Ur&lWl z8BX}^{@i)`s%U=0z1C2ZT9W%r{L9q(LD9;=36r{ewaTs$w0k}^NkEGjkL+E8sk9nP z$mpzI-%8vOtmAPK8SuO@o!2DeSjqiPkgg5k}cDIh(H(bg15;;>*arcV!f z_Yk)z#?&ZL@Th&m!%u(pplNoLqOfgbyZujSS8a#evAv}z@k}=mlSe4N!%$>qIDNP8Xdctz{ZI2=z0j zAz7)!04K#q>Q+Qbh*B>VO}`fBGpd3Di0@yNf(_swVLrD9a?P}JnuI^!$%dFEYgfrb zPWDr^6=_;mH}RvuL05i;8_Ls#L+D~uqtkOyEoL=bjS2_bKtIHxiY?9YKb}c>@R4Zi zX;ddeL_8;llVI8Q$q`oPJrO?*oU95jfP8wD7zn{Hxar| zwQLy;b;0_?wtZs+oYQ(*==uG2QA<8q!ey5jgcyHB$e~Gwhy(Os~JpLLUax4(3)neOx9$T8qRnH$u3*@TL6b&n-HE5E! z-_Zl#(zsefb-#;+Gc#0Z3=iq|l3Q0giB?;^;3}_mL$#T$96v&!Y7$ySuk6{>>@5vy z`^VV^+2ht2&9$60$pv(E#S_iODhq28^#v7@ocA*-_1mIA~{P0*?rcQfX(+SJb< z_CX6p4XgMIGYjj;HjaPXY9FnRn>xQuHetHm6-^X9m^kGiU4551K-EaWcAa7;#XfYX z<%a&c;IEf?{Y>@b*ThyI^vU><-Oue>!~Ja}u5LbKsFWu@A}x0iq~(sM$Gh)8Mohg| zJ?PJhWBe*Vyz=Y)JCk`lca=V=O6A=yk==*T4f;qvmP^^T(2_GyL9Y$Cc7AyYxn{6m z>Xel|f#(jO3-aco{IHZtdx~+=&JSTyX4oPyZz_71O~(%+>w7=c zYLtnqFEZEjI0g?#K8%yzX0< zs$Z`}h>ttF%o`#f7<0SIg070`|Cz~1a7hs+Bd z7woLqS@(}6xKHrnmkM-*fYT@JJaDETV}G1EKF!C_*1 zI6O6LmQlYP)nrM6Zi=oe(2^ZvFjFgnNyjA%Vi0^$qNVOITgz=pxIMNMaR}ZfyRsfj z{|Vy@P@FQ4G2+ZDns@jhOi{DPR*|pX!DF=-?_10cvNO_|DiOs=qIpNrA(ygGTVIWD zKIU-TqnPdE*fauVpwx8ovyz)@BcHxxc*3syhO>@6LIn%31Gmjh~ zG-5+2)v0f2Nm<=@mmryC5Z7|cvk|*pHuVtIW}M+kYcc%e*x*h{)lC;=D&?B@(LGY( zj`FCR;oZ0Q#B!pib6YtBDeqD-9a=!b%+sR*GpCoSDhGu1+5yy$mVpi#C#ta>$FZN= zT+yol7GD4|B9s3j>($1+cWJm`ErIY(%4L*=cK=*fq+hD=t zyy*9#N56GH(GRfb!mpJEMc2HMB`CdgaIB0H7q?Bs@9k$3U7HruKq*kE*^Y}YKaQkL zZzTL7<)AP3pP53qQMz-`*OjOE{#Rko{ojsO;lBNYtl<{;H^4N;WNWA_R8yiD`&dC^ zHO;3@5*ql>2)?8?MR5_1_xQXM+V+fOaZc$E+?c%_e})!y`}*Ksod|*C`NpGdYyv*b zX4gKU?Bo#fr4&ylZYnu~{$s=Zo@pzm&u_Zdf*jwf$LrQeS*61f@WEs4g6D(CG^Ij` zQy4mDn(B2EpZ{?dCfmO-zL>lC30RBe1vK;Nob3slt}s((L)2=667#cZc2kMPS;7uG z<>So$;P@ZgbM&>0pHz#3;Mt}IhzNnc@CX^8t*+0dJxDu_ zFil&PR`z9TkxT#Sru1L<ym{1$jrm6ARzB-*kyu2nv6eTmp!iOcwv>IL)LlCiY)4<(4hR!}d*(xErU_F#oZ8Xah5M?ShNBYA7(jtku2swVDq@p)^>oAL$m6izs1*?dPgr7@s*&epWYF3` z@Ks5?RNvP92Cl1v){8guNqX-(d}WbeX}lqQef%3Q5yduNf?ggv2Z%0Ws^|#2SIDK@ z5QGiGAifw6`VR!dGe3^+X_h9>4`-!1XzACff5ROmQo(%8;9rlw0v<@z&U-y*;M+IH zQ|iwVO!|Fpjg;#Iek5H1nX5=;!G8lL`RAl%N7eZgAt4RY2U|d(zCKvP*9T*bE9$xR zd{x)oro^O+qyoDI&40iX?y$8PsTgP;5c@WcTj}t>RccZQMcVp^T`?9X9{r#Cvf=+v zH;4ZV`q6*cWwZ+JsrLh9KQNC28phrRoym?m{rJ+A>;F1??CqD|zGI0|cD^AQAxDr>#wYWq zQ1(j2{|3y?e_#AR4|@LD6TJ*R+;(>qW>t4`gK>Is1b;qT@9S^1X5am@-)oscVDwG2 zgQLcN)tJ*2!H}cHA!F7XJggkU78Eit8=BIc>aSh? zXFPA6ahLlI9cpm|VWsSUNaj`I#|MyqgMu%{^Vcix)aIEM!@}oCHiVQ6RZ`Ws^j?yr zIoIEx_toHyOY7s`6A9Q>1N!M4flY{FrdORU&U?lzd~J5B#7RImcyLFzRp^l}dcXB$ ze5%5KxGzX>jc_~4@3O9}jE2f@=zpK;64vFgun-vv^%ARXH3fR8Js>4x1zin^SvZDT zyKGvjJ=eFb!WU98SKilThU))w=e8t2%h9K({bTp^?8a}b_giCs7Ld6QBUWxAi;5PT zdwREDg9%5YEdKQG_DfqTS}_Eq9d)wR==1r!OeI{wG!pPrDB6Z;NN{VEk zS<<^{yr#JaJ?KjN!1Uo|fE8yEgFjr1h7c$__s@c(9XC~KK|)A<^q)Ui;8SBbg-SGR z_m4~Ui@=zQP-t`(ZR^Xz3h>LDo%J$J*xM?9zq#Z4eS3D>wV5ihvDUz8_Zs0Z^7G{f zdsrhSo^O}m9)E{$ccz?(C9}i3zCh-BiNg7M zkadOR-DQ(6?|$YrC@xW#Rf>9Z8}Pl3%e6~D{Tedb-;A(MUS>>X>DAav`sKc5!ZP0-XeaJ8^$LC_J zmp5^T7W2&X7AJg9?mr}B`}03t9X(Uh1=>nmj9|{48}~fiXf8Cu5kqh6dK&}}E*ral zjkGY<9?3BT2rP88t{j;(P|BN+F5^8ECZm@)&@FjZ-5P(9hI8YH$Qw3-PHQ&aEs?>k z*m}+^x;Z8{>;-?oA?|a~@KcjFPmfP>u%{>^M>`h95e_1JYZ)`lM-MR@9&&hVVPQiH z)h%CW-@ApN4Avs|zx2rooY7Mf9U6KUW%i+GIw9>7ltS?Q3+igU>N?F&1hU+;C^C|R}>Fa$e(S}|#=(F!{#>!RN1MKbkl*T7FRt&!C`rs@<5!IjeL7EXP_F26mq zhga&<1XdfPt&G#WrQ+77THNL-L(_UN`5yFVOv1Hvro3b83Gu0()+}lDjd(`*0F59;Z1E8EDTb$fd5{J3bkN z!hWnCJyZ)fm{37eN;S?czIviiD#VbEUA?{*M>f!v^Bq2=fV){PXfnt8TJhA zgcQW-cMR5yX($~J25zbpQeF=q!uR%udZ&pr|k{{Aqg)c-;Y4s9(Y54F)0Ag;! zi5fI`Cwgs9KyGbqK(p88?Y#)zLD5`yA8N2e49RX2^y!(4-t05+o1PnJ*-bpzUtYIH&OW?rWkytMO9{Jvi)EH`A0@FNyb>Y@O(Xeay7w#g%InnTNl66=|m=px4k$ z_OG`1!o!~{BTG*F;bpp9P>qIw)W`i=-*Lg6@qD12PL3>K8A_Zv7 zaJDRmLwRMT9fg*84o|=jK;3pbI#m?eYEXwkiN7J?2lgiB&oy9n&bj$5pKg}+ zZbT&BqTR) z$o55_@Q^^n^@y1h9)Ga5sJqOLJ2h@@3grB z`A4e2%k2p?@~1FwcTVp;br1AOnUVxz4udJ^t;2x?0<9sku6o;jOl?VtG!YV?$P>tcv4IMRuW= zXuukyZk;8VBu%ssdV>x+nT|(VD;Lb@SLR|cDpMuupPt%m2Zgb>O;qj-s~gT%o@kCy z<&F$!Lup`Z#aG=|F0Q{nxXx5meil|L^6$JO1dqe7BRjRjM_%>qzq_e<96l7KC#3PH zzFS!>cXRrP z^Aen-LH#su9sD?(-<<_#&UT!?XLW+tt=M(nD$vU-A|K>}dx>;kR2wFKYl!OPV;^<+ zuTT0Aqc1oBGuRD&q}4B*w|u+as$cXQ%J(qy_u3c2{Pp4F;+JFuK76+&Y^uRiw#Xd> zacM$qsvzu8M(y$|1xO~mB^Yyf{nduW8KsWQ`3Ljy;-TKLn9z4}mXZq`?=7g7aWp2l ziAG@;%Z(WS^zi17|16sqa#I&U#0809tHz_NvRXF{s7pA8zFufLhx0?h{O2WZNnsft zh?U+uxdsiLITZI~w{q!xsDpbVqL;6-`Qcy2HCulrHFs}lPsHjdMXx?Rd{J9ZIT-i@ z;z@vD=-nqnDM*~!^}8)tw(s4K{#u^_fOc#T4^M&F<0r1PIlS3ofx>wkbG9)n>B9<6 zb`x8_2@fBC4zVlD1X0?-OGV9RndFx&?oSWL)BlNRWpG!)9it6C^rPc?H1DTq(aalt zKgS}aUG3vQ7L=pH>;VKR_*=P|S%?MC8PmQvvG-IIX0^&r@n9DHdq&aSRp5j#;)ML(yu{+gyg zgFe$Vfi_nHz#T+I%mH6n%9W(H5J>;S;nM=Jr1W}F%L?oJmJJ%OVZu%JouuD*#wdR^ zbB{o=F&XOsK$63EsUUwr;LPWPM1KG1Rz&v^v0%E%l~NGy3^RIdg=iNdqWe!w&A9G(V8;y0DpD5lHQC1l!}dStM=4xmt6Fp?6rtaL%}b zKjs@%J-(AUukbc5aZJ5+ccC<5IzO{G<|O8Jj;r^sFy0FhhTPm@$A!P% z820;;)!DEN!elY@?y@CQSX3~T97Nc)idR7etPaI*3niusK{D>(ECD*~-}#EnEUjj5 z?^ug1KD(UKc(;R$AFo`_6)NtkDr_>$f)q|w8wSOmHB~WCb5kCjw&BE8F?=7K7~*Ce z8Ppyd-6!T{>?-FRl${1DO@Ke+@X_#~Cfl4xmM}5SVlbf4;`*mh6HuzC_F5Itty3Gu zf4XYRiCF>`9&TDHdR=j7V8ZU_f4>ZGw;L|QhaSa*OzDe4COGJ@vyiZ>kJGj1%KOy`=%FZ>WEhpxRhTw3p3z-~sv7Q_SZswc z%tZMZeH_+;+w;8kenP=E55syBg8ATo;GxSKrWjXp>^}vMMl}cR>wskS#KftD)A>eC zP*Cqb8M1TIY22Z6*KN9oG!g}02JL%&+moazvrG0r>+F5t_u?}~7KJ+>wXN2aBX@$= ziG%QcAEeIa7Gg{EN&P?5A}v8B z{x=7Z7KWbP{~sOMGyk;zPr51n_l=+aYx)20;Opr9zXAEbeGCrt?(WHaZ9J~{je_9o PjN4e*|BnCd)*t^13BPpo literal 0 HcmV?d00001 diff --git a/SearchQnA/assets/img/searchqna-ui-starting-page.png b/SearchQnA/assets/img/searchqna-ui-starting-page.png new file mode 100644 index 0000000000000000000000000000000000000000..dde90a765da223391304cdb1b2bc53f42212af9b GIT binary patch literal 13856 zcmeHtXH=8h)-G<2dX$YIN*AJADN@DIL$LuO0s>M4K|y+np@o*jty`r8q9DCQL8XHb zAapE{NK2%5A_+};XbB{ci^t>M=iDFn_Z{E2-!U>W=E@jr%r)nlPoDYAH~N;D5${p4 zqg-5EyvEnBS#oh5+2!Io0Q~2n{gvOBKu7k!4g^{nUEwMvo%*(K{KMn2>18giatsgq z?!kTg$o=c~fm~byLEk?ICfos`TwH2Z#@8<24uvd@AE~r)Yt&x_oDx0uG)(dzFYGVf za=*?0^g`V2&c!F`>SOI0NlsQ}p(~0|w_rti+@y1|LyVK6u}RmF<3~=WT|49^nH2US zjhn;J^}q&B1Zti>EjPNkU0gNEScEr6=#wW{U9Aq3?7SO}JD;2Oz4`YOA7-7{eBh_` zz%}y=;HSa$`j7uL#@ehP@!*FcV+**@(j){h}WMr5*? zZAPx207_`xH?HlzsJaX4W4fwu<8`F;QU>U6EVF9v+hIbdBCfjjHwqItXL?sqKi#?IvIkZPv$ph5D-ie4xG{etIO?*@lvJ<{BxpOREc2@PbqpD^vmKW9eD-rk?9S#1)`iT6I|fVOU>a{^a8y`1{t?UjjOnB^}IHVHq4@dX4RtV-W23EmU}NJ)a*Sa z8zz~@D~&d>BY)lPY7l9>Z!~pHR%>32xP<3LuP_RefJKz1q4K+~XyjT|4V%S5>p7G3 z59}1>aqi-8C!GEby?jS$%vgILFHPARr!x75YaJApu2LPj>`xN9QfRIsq~sC-j5y{6(#5@R_AhPfGK<{YV~w7ts~ zQ40>fX`ixt#{`Y7yJ)zJuyI6=;=1SM6RAx(%Fhh{p#Q{SK_iE_YdFJ4GTFe$jMr(Su-!r})bH zPkHyLHig^gF~e7$biai_=oKZ8AD6>u$)QL#TO^EA5!yKS-Z3puKT`|dE&#)Ml0DbiI)z zd*Nb!#F+XbQ#0>1YWR&8ekDeb$A&=O)0RQ*&2Le;8CEh%rv_A-bZTH5{t%$p^sFG` zJg7+#@w6B#(pO#CAa+>W5}>d6g4@@c~XdK~d0Sx@Vq*6bg0$vy>o8VNl=k zI$(0AlSDJJp@KnQWg%5lzw!ON4055=Z_C-= z>w3x4PJ>jeIZSyF-u+6o1g9qLQEFk4@(STXGB=9_k^KdhOnyPxhlN=*TkMJ;EwHJP z9xKc%Za_Yp3(SwbuGG)|DmhL_f&7mAbm3s&tf&gLGBtd`SfwU(w>mJ|3*{YqBQ^n=iQ*E(|ce}Pmrd{!=usUo3ao#P(D z&MQoI#r|@4V8Mx5hM%NUqEphhi$U)%XSY5OLAbfmbW+vnY0FZ-H>1ZFXc#_8`>$n| zC1z+NC`i#Cl#8rXYHS|%NCY&J=7ca~LybM~S+;VPI1h42-hXjWvrb)(hwWxvWTEJQ z_Yu`VE)}On4$%;Y;~ZMcP?0ucDC9Q9v`TCkUVs?me8RLKW~cP}Pf4v%O=XC)<|QXe zOt?|@*10cF>(^zcBLL($EoXiDDrJ-L;4jK>Th%WN?VQvn+67ieHyJw>oBrw#7T_H@ z4(%N0r~eD%D&)P5u)j}VHM9*b5#LPER`-ul=}pk?-pSmfFV2FEQrOg6c9Y8zDiO;& zq33a?;407!Gp8>(rtA%_S4EcFB{bU9|s1GZhe%!sYg0gBL-hO#c&+` z%?psTT}Hq_Cb9#w8|IK9*i;z9p8o!g;xQHe{lAZ9*d_ELBsWOCinmBzC~o~tEtuLh z-uCg0brJEa`9`$S7LSiL=E)m|r>%nB?MO@o#mRc0{Ik!{MdVHusaxC2w$S?#lN7d@ zd)jL`&xav`xH_v@1o7{b1F^tvj18`}48kK@{AZomMMgF8p*6_Wr&f8|U}fDrepDWO zeCLu_f9wepcY)3!6{vM2iwn3S(L|$;Ettjgh})b`&w>sYmXwEknDqd2Upm#-(Ap0f z7v1cw>u-1e48}`^DWx&sUD*LEH&^a9Qk#&*!0J|?HW+YVb}V&#)YY&;zuq(~&|~eg zfprd1Ew3eIWJ-|SkgAgrz^E}?Vs?74T2RZvzmzbQ-0ykZDPG*R(N(I;>;`5xPX3Vz zpC8}a{ru0}pWN_0(xx?43Y4Io>-MNdLE}P@cjBDAK;1~S79p-n$i%g$;@mqS>zu2j zHpW@~rw>&tqzPYe=}icRIa>;Q^_^L6%5kf_(W3)SQHn7~ZMMyYbnvKLz-`uT)rxE} z2}2HS*vU#CaSep3*X^9`Hq+dN$@_2t&RUT={y@g}kMxvcrMIET5Ic9Mce@>R&3|65JU{~T z+d48l`$!YyoQNH}AkJi_NTUwJ|Mr(0NTy{l7`?ZY+R4~EE>Vj3%^T(BfL0Q9wW$h+ z**tzWcR9;ZK3&Fbpw3hZO6a}?GShl>Lm^95H{6lV@C;n)?Ou$@qdChY@kM0!G+Imj z-u|7RJYu2bsbh21bGU1?W_0&J+j#4v$@#bPYG+Y}D-bt>-fHhhq+V&RJ z7k|$@>94iv;z{-{r}o>!CTu0;kxFSLTbqeAxJlr{P&F~4aH%XHppiKOaa(aA47@R+ z#-LCN^V511P)bW@PbVbFvp>%&60iD>>CZym`(y7XTE7%vOe+n7t4@didVL<-SIu&D0ZX4EoJTmwXb_Z74B0~QJjt?JTj1RJtIRLC zwJQ#S6~lx468pAc_N;_ICYn;!P+pcs4QuUB>{9zInjx78Z%b4$GU&YV{NlJkVvPGC zh39~?b$yDwfTdK1``9hL6RDx{kT9IDdU<%)!t_5<K~u1!XpGw*LmEZXW$m>{wPj1Q8mDGy+p6^}F^-ru?8+M+y6Mr$+fL274S1xD!3)Ms7Tg znVFIkulBzGl`_G)H1`*KNvd~*URzJ*O>}b~z1BP+1$^Hqktj3s$HLo^Xj={G;FH%d z*{!(oER@UTKKa6n6Jz{*Nj$=8(bZas+5L+nYFoNG4LwUTxC$cn@)Sov z*||;R3>3YsRZ!E=06%`l0k4hRD-zD>wZXq|8VJ_@JkBAoTvt)EM2c&8<4pLB$(*`yLo!jn#SE{+|WWPvacC2N7J6L2B z-Q#?#77hbIuO51m+E5V`M~Qk;l>@g#^8W|GSWc%b#3l1mD-6?`^#?R>;NV5- zExFTITuofZ5-RT8PC0?YanBIHR3B^wT@+FZU3mX{1EY09RzY8dQjhm{9Sh{N{lok4kSzkchxP^Vrfw8^v9e#Pmj z3#~fz!g)UTt|wz|34Fa12qX2YUq7G`pycli@vWaYvDJ8Yc_~)cNaX&h@11kl%9m8O zb6yL_@r#~e=E#LZnRfi9JcS11-fm~w8NC%(!-wI-c`xHIu6iK3{B{-Xn6BN9B>xWg zzv1)&w=CWf+W&A){SN}nsWKW;k|0qIno)L0 z>Ps4}^b8x@Zg~iw$Oenz5?4-TYj>}GwoTBj2`-`~LmP!KNu&^w=kUl!BH4Y@ez6|% z;lV$ExxmRG91?Q#OR$|#affiy}v9ffvIL5tmq&bi^1 z67m!1Z%}DBn?!e57cs^oiT$#2ujOSZwLQ7JJ3)zJ#=#fDEb^{eN?xie)OywGS5G}Hf3tb^uPlGP^e-f;3tm3yfiFp`gcH%j zCW+d_B5?b|hgJK^Lx|s}hnVLC=kFZyBU`v3e7@Ng{U5 zTbF;i11J%f2(-K^KUi#BnCs@HV26ESUu#n6KWZfw)p@9-TCwDwW0J4F*`bl0ig=hk z?p$OQ+y`Ap`dlDRoS0CiuQ!w(+PT%-RwluHLfZ?yr<# zp;AcvV!|Te+Xt>8u~kuk%wEkKGdM@iPNrr=3niyZ0}}N+{$^AMj!Y+u=U!0O`kG%T zWNL2OzN;G53r=D~fa-o2jf9H>D=`(;r9K7>$YCB+^(eirz=^_5Fb!+GefdAHg(`J3OsG1@blp&!=hwY)D6?%RyNtE-Pt}Tvog-PEu!hJ&KQ_+(e2!s?#3y6pzP5?0JYwN z|LW81TifMVwOeIIUUYrId(lT~Kn1inn<}`C*3Q@ivl~&$_fl3$uPp>>6Ag$macH<> z4(r)}(T}gE1NPCurYhIO(nj)Yha7;qFjf&_Y0yx9$3Vek-jUJxPFpDXkI8+>NjWm` z`j}R}S;bk~teVxHJ(bz!I;kb@d>iGaoy}g_Ou&J6+OV!cR*TwF`&;m6`F%IGKe9*j zWWNvb;90ZSv}T>Nk~JGGnz=#4)MECW?n@J$wUSbmO5e{GHIY}*X7hIV(|bGN!|Wj5 zh>*Bh|GAS_<&S`?{0G&S*%S6E-q$N~b8Jkhl#YYq>-*|iFRirKTWSeDTZSo%I;HD*g2GlA&OXcnfYUWNYMa{@i`tA{_O?k=}U)kk)S9OA&r&Z zv)MIogTFz6MZq)U;gdkcoUk5J!;*@7B&nmx-;5pNKx#@!eBEs9#7`b5n`f~vqISf0*MHn_z+pwj$C0=*Ue>GS zy{@!=Q~6F!yxQLV(Oy`87WEmt7{uN>k_InI;fhv`bQi3>m2-E?;| zA6#;y9bc;afUEhf&jf&cLh5Z=$J^$$O3~C-l z;shl%tCFuZ$)mN2XF(?J+TFXpV^&i0gZcON%0J?147kn-EcQhuuFic&`(p3lm|7@s z)~+$cL}Bd3l2?^yf1!=4^5>4*AeRm0Y_thM2r#fy@j-==7}xI3k{^@cN%oIct3Nxh zULc$$KGhn(IqGWui9I!u!a-ik2%W+=x`wv#Z`EU_c}ZwSbZ4kZa*1Ms#7g`*FWV~b zzBY1fLmZUQ&)EqO_(iwR=M=>G*z1ljd-T3p2g8g{_S$$y)Y;OyNp=n1xy@-@M1N;$ z9d@;+J@_llp@qBo6#v;99;}Xrdhv<)Z^MVJN(_Vit@wEheKCR=>$&}x38UJ9&BAWW z@Nc&QCMZ?C?$?M|LFGKBCIHA~nvmYD6FBi*!5PpUM(p?TbGzqp0c(|Wj8Co8SMhOX zQAYkCsV5qp+fGxCxQ%Tg#Itg^tq$kQADS#>7H65SYQ^YL+MkU7>z#p;s5Yo{a6yU6 zb`tkL_9`Tu#mkaO0x3f^=LxsPm>naMaEhLme|BAo@AwF#n%hX zY@9B0sMIfK?gjZh6OI@xjarZ#c~+ZYM6A)*0|@IhNZU*w(*{jUWe<>f|3F-VpmwJB zxjxQiBc^lRY^yQ1k>z4*DJiiJe(dS4O#8#Y75_)tt>pC<->*>f1~#LO{&i=H#%;IN zk`t-`Z=+^1@FAP`D9Df&>|W(>$?k`oOAOZ>+Yjz-wa$dCQ>_H|Wt8t-!Pz__AYjFb zq}HKJ>4eTiABL2wuBHUvL+D@pOMAV4OPtz$5S*wD`@-csu&=`wvwz9VNG=Aw4|p&2 zz4JlIg^y_|8PAmiLlKeuxe8t5`o>ai)QjCZLZ^3**3(dG9btS>?Vq?ZobjsbNEWXiKjuCGI9{v1!<`mp>Fp)yB2K`X{KoMRt; zuVV10e(>^P?}(c0PQ>Ea=0_CQDXYJbMNam}ZW8|<_gA~`I~4%6kg(kndP%dA`N)sj zIpvpET_CQ88P&Hrv%l%#lJ+L_>qDofrT1nALchknwz!;o8H)ic>*;wt_aFX3p_PvS zzrU%##T9DKyGL&crAs_ST-8=XuS7NkW_G_~cRt`+g{brb-aj>?&B5EkVD^ufXpLgh zSU~AxItEJ-)DQhw9I%qLNngu%CMq&aO@4a9Bd0_Mivl?ZR*@%mxS3xHPDq#o_#esp zB1Zad=;=EyxPG7pmm_R@rgYyaCM!A!W`qby`~5gZ{6TKMGaI&7n0yZyv40l-B`MO} zeP)r@k^Ma(MHR|t3l3bbjc%@0lMJP`)Ub)z=-=C#b`>%Ds1>+MbsLfDfh`uyT|r%x zRzQG4mbzloO+m!GrcmPzzh|~V_vqU(?WZNuiGPex!b9nSR;36l+z}Q5Qlf<_e;3iF26u?!j?*) z#5a8#-raXv9I{d?9P|^=)w`%us#9V=<1HmkR4zs7*zoQ6I`#%dvUi|7E=#4_TDuBp zULdh$7h@i0-~+!$5VNQX@A3?|QdV=X#V7}CW^2OQi6aw@69F+6%FUt~AqKaheNuV| z)2#{LSbLP>aI;2yQD9^yy`Y2IGe%F|nq8+;xNTbi01*IiLavrLy+ZD3-c#LUw3-?O zmnJH)Y$D&FdWOu0x1rza6_y)0k5jW6Io3r6jp}(VQANtK`ft&>YnexFTbYNAh>5s? zlW~pSnA&a^zRgF^(r*&V zII2ywpWTd^kWq_6@VFEIIATRDN#@SA#ofCyELcW3fvav^egkyfG{&NzJMV->M&2jp z+p@i+67*&UM&SvGrYfSL>g*ctTp-gTFbWewN0 z^euWBg?*FVagaO5XMEA)aXAy(90S-3_V>+4MrI;IICJoTr=He+h)(Rf;my<JN&o?}LXfzj6Vp&VNdLAK+yWS9Q)9*6f5JsP$8gvEE>UCEk`}1vz+*LjM zF9br9iFJnKw2D`GLgO@l2@FR1u;X(rQD|J8%4FKMMIJ>|W$wi>7`qAYLSh@JxY1#- zaE`jg#n<2SokGBJ7z{e2)jFr^eJ^!PFDni~- zV{C1GjUAps>2*sma?3RRV?%KFE91+RUa8o~U$~46=lH%}eXY5HSAj(AJPyqnt5y|z zap*werSZb0?t9GU*5IokgHd&&5z9Z%FoW01r6(t;ylDF6T)#rjOFBJ`TO6WC3;iY6!Q(Ieyt<0-e^6C z7JK!6M@UtSIYm`LdlqtMTc0X}N(V%MPzp`EaXi$%6S!R+J#EIExCeYf$V!?NQ{W>; zsLfNChpg*;TX>?K*NSgaSFIBo&zR^A8{J^eipRRl%nQ-KiI<`SwePWdr4Ffw%M%*v zRIB^uk3Z6W!R9Wh(Kn%th&h$jDwT?ky_!q+1BJu_SrWp5kW`7@l#>)}_-49?UeWOm z>3f9HcTh)QgL8NJ;bUw!DcZQ5)Xwf`70eB*-s(EvuUkk)Xw4`fLEaln;u5%Vts7*o zZ?23M?y|wC56-&|k;wPa27?_FDSK7I#2Vv3m#b@}ANh+}=AS#)V32*8;JlKT#RT;jgt+F~Ll-3LACDEU zN5PQA%snmo$7a4Lic?s?SR{nSyuBjSU2i&`Z@XSI_3N}QgHR{3L5rr3%go3`aD83f zpT{@rC>kB4iZ7hQ!yM{rSj{Gs9iw?ryO9YmeS4Zt)2E4>c&} zoDR(1^}l2Tsq{`4rBB>+?%=J3RQg&?-mLOIsh9%Jb$`quT05rjqb%&W{xbzNm?st!|;_{ia5pj;iNij!! z-N#QqE*J9|bM_%GeeM~v8Kzx>P1NLh9WyQgM-yOmB%v2CFN2U}VU#A$KXVArnEdKdDm(xnWTJMjsf zt+7L7LXV*X>s8IRetAE+Pi?+Not*NRZ5);CPq)pqes>QfWfq$9s@e{xohGKj z5yg(jOB3Qovs(S=^nRen`C9IL3tS=bi@%kxL~r6~+gkMrhAth<(8%`c{E?i%$D;Fc zgKwaD`QkpVV{>h+P2Bx9EBZ!5<8#UzaAxKrD1D2RAxZX=1+e5ZS+{lYnsUuEbyd%c z+avl;Wz7azW{wm%v{2jV3@H{-^)oqJKb$qe}ke#WV(nxw0}Lj3ObE8%e1$ z)NCw86u+w&@uj}+UARIMBM-*-0=N6>@9oX{#`BcWj$L zo8H;RvrwXESHh(R)E32^YjaqalaK2KREhh?dLUQVnO%vkgOM|n6vAf;DXL4&9Brc1 z*61GV*?(>wJ5|+7du4=UtiBS_mD-*g*OnunR}Od5Ry8ZEJx>_lq%kLU$wJt(5dc7) zGIDjvLPbSO;74LUO<{QkWcpv7*y8@$D7|rb^%YKCT6gWX&TUx4T2-fv5tS)|Y<=lz z`%dev-rjBY_M58bww0%RA({8-DKx^QQ*PK~j7ZA1LPVBPc7|X_5j+aB0Xn)ct*)i$ zzh}wG*Mdf}wLni)Vsqcw4>D7rNGT=1hGLDfm;N6p7Y7G z(1z1yp~?a%Q8lwVO+!e?m15NPaE_0jc9>J&@_h%#b-+Gx4o-L!Gu8Uu-1#IzY$?uw zc5yet+sn5oEpnq&;?3;sJoCuy3@mzGyDx^fY!}DO+F3{qg)MJ+>V@73GCB5Rbj7@& zmH7toc$X`*`Sx(i^FFToXgf*N-o%QFZz87E$$2qx(Il~r9=ia*^gq%A;31Kb6+dRC z4=P33`*28QKRV2*xZ4cnTacYQ{GLU)`R6;Vf{VlT$`Jd8^Il8jP9#_4t^a6>QtN`O z7Kd;eUP;IIWBzDA=0Bi^>(+-q4o;_R3huA|t6}=FyubRm^_BizeXe-5|B@X<_hsC0 z_JU~1!CfxRnZG9FT$gUxO$h&cPcAOYD}Q!s|I>|}AMN8o_s(qW?SBVg#SBv_k#O`u z<%M3rpE3Erw6M-UQ^3V_{;#I@Z{vHw{tn3BG5Gr){D6+XYvJ!&`2W2Y3i#=q1E`~( VUyHllk>8y)HZZ$Zdgac;{{x962l4;_ literal 0 HcmV?d00001 diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/README.md b/SearchQnA/docker_compose/amd/gpu/rocm/README.md index d95f8a830b..72cff55a39 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/README.md +++ b/SearchQnA/docker_compose/amd/gpu/rocm/README.md @@ -549,6 +549,16 @@ If the response text is similar to the one above, then we consider the service v ### 9. Validate Frontend +To access the UI, use the URL - http://${EXTERNAL_HOST_IP}:${SEARCH_FRONTEND_SERVICE_PORT} A page should open when you click through to this address: +![UI start page](../../../../assets/img/searchqna-ui-starting-page.png) + +If a page of this type has opened, then we believe that the service is running and responding, and we can proceed to functional UI testing. + +Let's enter the task for the service in the "Enter prompt here" field. For example, "What is DeepLearning?" and press Enter. After that, a page with the result of the task should open: + +![UI start page](../../../../assets/img/searchqna-ui-response-example.png) +If the result shown on the page is correct, then we consider the verification of the UI service to be successful. + ### 10. Stop application #### If you use vLLM From df9aa92620b81c4a7e3b14a9cc3df4bb321099a3 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Wed, 26 Mar 2025 13:58:15 +0700 Subject: [PATCH 41/44] Build AMD vLLM image from Comps repo Signed-off-by: Artem Astafev --- SearchQnA/Dockerfile-vllm-rocm | 18 ------------------ SearchQnA/docker_image_build/build.yaml | 10 +++------- 2 files changed, 3 insertions(+), 25 deletions(-) delete mode 100644 SearchQnA/Dockerfile-vllm-rocm diff --git a/SearchQnA/Dockerfile-vllm-rocm b/SearchQnA/Dockerfile-vllm-rocm deleted file mode 100644 index ca68154db7..0000000000 --- a/SearchQnA/Dockerfile-vllm-rocm +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2024 Advanced Micro Devices, Inc. - -FROM rocm/vllm:rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6 - -# Set the working directory -WORKDIR /workspace - -# Expose the port used by the API server -EXPOSE 8011 - -# Set environment variables -ENV HUGGINGFACE_HUB_CACHE=/workspace -ENV VLLM_USE_TRITON_FLASH_ATTENTION=0 -ENV PYTORCH_JIT=0 - -# Set the entrypoint to the api_server.py script -RUN cp /usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py /workspace/api_server.py -ENTRYPOINT ["python3", "/workspace/api_server.py"] \ No newline at end of file diff --git a/SearchQnA/docker_image_build/build.yaml b/SearchQnA/docker_image_build/build.yaml index 254a860e9c..bb622dd0c1 100644 --- a/SearchQnA/docker_image_build/build.yaml +++ b/SearchQnA/docker_image_build/build.yaml @@ -43,10 +43,6 @@ services: image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} vllm-rocm: build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - context: ../ - dockerfile: ./Dockerfile-vllm-rocm - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + context: GenAIComps + dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu + image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest} From be8cc63461a100c6cc82029d38c1d67615850cea Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Wed, 26 Mar 2025 14:55:21 +0700 Subject: [PATCH 42/44] Update vllm-rocm config from comps Signed-off-by: Artem Astafev --- .../amd/gpu/rocm/compose_vllm.yaml | 2 +- SearchQnA/tests/test_compose_vllm_on_rocm.sh | 2 +- .../amd/gpu/rocm/.README.md.kate-swp | Bin 0 -> 25934 bytes 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 Translation/docker_compose/amd/gpu/rocm/.README.md.kate-swp diff --git a/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml index 7f5428f332..e05304c418 100644 --- a/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -87,7 +87,7 @@ services: restart: unless-stopped search-vllm-service: - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest} container_name: search-vllm-service ports: - "${SEARCH_VLLM_SERVICE_PORT:-8081}:8011" diff --git a/SearchQnA/tests/test_compose_vllm_on_rocm.sh b/SearchQnA/tests/test_compose_vllm_on_rocm.sh index 3a047a65a9..530245cdcb 100644 --- a/SearchQnA/tests/test_compose_vllm_on_rocm.sh +++ b/SearchQnA/tests/test_compose_vllm_on_rocm.sh @@ -40,7 +40,7 @@ function start_services() { export SEARCH_LLM_MODEL_ID='Intel/neural-chat-7b-v3-3' export SEARCH_RERANK_MODEL_ID='BAAI/bge-reranker-base' - export MODEL_PATH="./data" + export MODEL_CACHE="./data" export SEARCH_BACKEND_SERVICE_PORT=3008 export SEARCH_EMBEDDING_SERVICE_PORT=3002 diff --git a/Translation/docker_compose/amd/gpu/rocm/.README.md.kate-swp b/Translation/docker_compose/amd/gpu/rocm/.README.md.kate-swp new file mode 100644 index 0000000000000000000000000000000000000000..fe3049eedf2ff2dc5eb6181860678397e3d23f68 GIT binary patch literal 25934 zcmdsA2Yj2ybr)${cD#qY*9nqL4HCp62+DR{N}?pnl;n`K>_k%O55NZ!5r7ZIAjQf` z(>>goG;Py1T}k(D(>5LH-ZO2}bT4=Bv`yR0rvLxD_YHmoElQOvCHP79cX#i-yLNk);r1t;j%#L?oTk}dt+^(XOl{b(VbdGmT7B{TZ-1%&zKy9n-m|Ok+Uwr*w(QI# zNQ$^l1>22wVB_5&RVrALR#rcY?$gGg&dqUe|Qnt|9tvf)e6Re*7=~3eLy)f#0>y zJ@;JEX)myldDo&%eJtQ_c;EQ!_~Qd-5_PZa))Ko-VzSY3Yr_q<+j43{r3I%mG+G>5 z%noG}gJ$3ahzRa5iTG5W9Z1yOcH5bE+n{j#Orqu0T+%`dTR=;cJP>VknDq=OnrhqZ zbKRzya-CMA+L(VLarPv@>dxu8j(5s!kW4mnc7V09l4tqR&-Sv~tN4N6!BV$XGYg$g zbNBG@)-wl>&dknDK6Q5Y)-y8`RW4D7Z2ADUfmy1R|jY}TqJhm{Y%uI)9Xo5a{g)N2*>DhwOD&Gc3LxQ+#cc$GpN z-MiqHPJw+QgDtn+L~mSEX?bK zF%sHG`Ek8p+YMS9#fYtCA@f9xZq&lQGt6eHk1f1OvvbBIs$~v4nO*|Rhn(zaK0TI6 zm2#+eb1WI15lB=UW%o1)q@;-~y^Ua-6zp~dQ)L67kYn-fgu7GW z9x9Rp;GCiS`i+?^Nr8*>bJuTeG?3ESrUH-D#L&%c(*WK@A*in`)!t zwd$fFrnqc6F6yA1bI7|LZAFd7!%900v16919f)?vxTj%j+SNsur7v>g?k+`!gVvb1MgIaX8F(;*E%($9J4{3=9vFXy5Q@l(9U8y%ap;=Ckz)*L*j#CTs za-(A7eTy0vWHTsOL1C`!cATm(nawtqm#o`kBOca9prbxsjGgQap`@cm!TY3>Oo{o~ ztl7Q^Wm-&Vf(5K+i$av)O6kDWREAb^!D&;>U5DA#xhJ}>`$^SI z*e2`n%qe$S*vtycta+=}{5bF$X32$2Ls+3rmMT0}&=_bMt#r3Uoj@CE%ckaZ8>I!e zOfHbpZe2j5~eii_=dkKo6(SA~E#C3bTHv zPYiMTj63=BC6;NC*few5!VEv~yA)2Jw8iOXSp#d!@dLl8(`TND#fTIEO1uwKqIG$j zea*X|yFQ!Ca~+st+O-pn6O9!yAA>*#(p_x39qLhCY`km4tV8Jr&C)^@uBGCY5Jt9* z*Dz&w(XDw+dIE^jf>)-eXg8}Z7x4_-1T?V%{a|247*)=Qhj|o^s|A&6x+VH5So`Hc zL8B+Cc6|SX+MGt&IHl*ipxri$POIv5;lL2Sp&=D>s^Klw-159Da1n2XSj#hR!zp4m zQD%f2_J*x%xSb`hbqeIs3OX~dji$5n-CCy#$@g2p=GUBM@pSZ`bcf*9LKVW>DlMSZ z^mc}R(ZU54*&f>1=(O!dvE6Ao!WBS)gVF_YnQ+JGA0l&cwMmZAESlMDRU0K~4w|-V z&|sEawAY{efOxmm>7qjb=Cpw!wHnC?>lruD)Cn~D9|Q|_rV+BH(9+ zmITeZ03SG(mcdc%+5_@&^vha~Xvo%;v~i+i6<_ zEDp#bLsOMpw8cW$a9xYpHL#Vk6I4Ua2bEBeSc+kU2+7C>vefVzwQ2*cf;Dv-?I5IT zn?OBn_(VqP$WZXev?}};Vy6z%q#u!Sd_zLqMNtVWq)CK)#sgf-K)%ah4b%~1S44CJ z0>1tHVIph^C0SbpUt}r>?;(58O{z)JZFj&56>0%n zA)XZaLF(9IwX8j>whV$sx-|m=>}t&c5$TulS_T0cMqTuVBjJR~6sSv`0s|NB5TM&u z!Whs2nt*{&v;oGzpv4Lx-Ys_?YSc1LZQg4^oe?6#CQ3)p4hS#C*4GA54*JCkR>)+_ zg-k$H+6&dD5^zr=-WMZ*O4%#XcaE$Ln~{YE1|%&yMlC@{3rl{@yXXc+eM>L#6;opA z3!3eFE2hO;jj@1;yA|U2MT)Nifwlf52&-7naaQ=KR`?h?wmPih`idXof$=XFRgutR zWWr;lNYalh>4&-Wl8G(fFkYLn^K2p!_F;Z}l8^m_Vn+ik=7}f$WG&q32UyQ{-PZCs z<13==WYfkK|0otbGuNpnc2}HQ+qH2U@ph3Hdr5$l z_9>+U=Zf3TW8H={50LnT5|4*)%$E>uzXJ3K;aDUg+yRAIe+Wkm3E>VZs;?AGi&CI* zw)=+^;!=cgq%9%blYVVeT3a-PW1a}!VJ);F98;p$M>IQy5Kh-^xMq_oj*aHZsgyIC zPvuG(r&uVCX3}o4k}nntj*FE+4l|Kc9Eqb!rN^QOi-gfnDNML1!W@~RzFLdcHAN<{ zVWrcG)q5$0rLvSVqhK#LStpE4)@K#?9Xjn^cPx8YW7#QP-?T;@#vSIrgwO?EU*fl@ z<&Oojq2Hp>_QP5~7?AZ6cwJdabr2D?B!&*xVC;~v1MeVrp4Oh+x8?$Z3?aepBm6Oi ze^B9d4&P%gPRIv|0uhj3xahzf8PR98C|Go0hAcX~MoW4w)uppJX%&HF9p<$PeLhQd z#2H}wjw|xYl|&BdoQ;-g)-2lyDl3U66bEcbw3pi*w?2n>VBT#pLgut4Sl}U~u;HYl z+z6Cl8HMV>f*aY8Pf-Y2rNh#A&t(&05Kva><`hGO)?XJ17RvL&B*KW$KDPomwo;QV z2ryv;*iqm|IL@ov5?ieiT%tU}eiRjVv*5nmDi1qD=*>hcDcV)WNL0b}RV*lLfnCgD zN^Fwr!|QrD(_&XEEFj{%LTsah&PE*c?8Ky*s2AO`1bZ3)urb8g#*YP`MpbD<-8A#W zTE0#TA3c}fLWOe^hxbhE+c$aSfbKZBY=B*EAe~3q;!{fSCbDkDE(1$$B1la^k^)jT z*rGYt>dV;yTMbc@q)}HI(3f@ZLl8&wrJ=~aKVw?>?y-R6n+kCe{TbC!R;ox_{Mqw< zZ7r?s2FL-yusrN?2$?Vby{!Ppjvq%DlS*Yr)9I1XRAz7p$$WMsUr3J)CRhEWIFrtd zeYh$&;*$RUBj2J6e~|9HxaiT^0~xQ6c6v0>&K0ZqKmo zjPoPAFO1K<2IhoCZ_&<7SBJ2E__~N`DVHu})*G%3CfPjJInR$JzZIvo6`QFVxrl1? zR0~3HCjPRI|EY=}RSV{cTAb0sbB1%FO03$96YZdS>NXr{9w7f-P~!1$j`=d&XBFUn zh<2zRh@B6*wVJm%;cElo?kC09E5iC?J7P#|_i2ji4>!{?+;3n3iGMmj@Vkh*5bVj3 zw&=oVum;xlnf$;n8rv~XhUK%Q2vD*-$CTL7&(`caVCci0v>K*~*6FeQ4if(y1>6F_ zU_)%r`1s^-ao!!mPO>}XHij0{$q|c4_$@^ITt&o?B{_fUV!jO7=PAG`E)Knf=vh}#tP{|Dw_w~PWb_Yx;`?R&W~X0fr_6V8_gC* zMzSZZw_y_&+1St5wgfQ=GbASY0xg+BHv$hHY)1rcH-}nd-j!Ls{CUdOO~O;G`$et6 zdOD_LguYO-eJNvFr0k1WK*TRrh*d^~vT})x3hNZ3@+E#NzEoQw{&pl{RGx8*X4>sw z-3(UBpKwzGh#~&=%X}JdR2osk!aR}SH)-MP-P27Eku458wB`ayczo;j<}y})8(A|{ zf)HU96);%gDtq-uu{7!X`^U|3o0-JY85h5{52Xi9dIuL%%^X*;P(1H$AI%qboP1Ok zJv-Y2^c{&EY&o0qdb0k@wM|FP6$3mov1e|2Vs?6R;@HG=Z1(}D50=~_cJ72nSXYu= z2$DX+I^L{x#BCn)MS;FT0bVOMFLCBXg5KmrV)uz)M~1#6lgi}Gg2w|Y%cjl!qHfK7 z;zVNbM5s?d+InUsW6z$|V+^e3wQSH=YJKZBnZyv2`BjSQ3p3Lq%x_@4>PV=W@_cCm|JuLSM{0UdO=JXiKgvdK*OBE+2p{RRaMC&bK=>C-oAQG#ZZ zw$)qWYAx1V=883^bZSnFIP9a{(It3-B)>`Vh853t0jDienNqnhRw`#w<*`g=%qgQY zZfY!D8g(;e+Xdpv8K1XVA{*bUL@03F&w-4MaKC`rI}O7NbHrUTufdG>r8m3<=X)S} zl*3^H8bUtfmK$yxA~|RdIgPHYuFDl+cUdUDbq~WlgP%L3oHGz z)w;W5x0&r)eC{NbsHrWJwIp(}rX1!5L&HpUp$;wcBv(w?AVMAX8Y|k6N!dokC_IIL z4Qsks=U;`n-z~Om^Re&UgAWHhuWa_WMEgk9eh!*(%o=MZT{zE9FLYa$Yl45Gk?vjV zVHdj}Jbe-G=wXfoeB}9X+HbPpW(Q9+dUW$p7fbUlIJ6*qoekQ_0)tq)7jK?U&TzA! zjDOvCWNzbRb(Yk@XMuVpiU$=t#d^wnLo0!*AmQ)&Jg0R=Cb?Zg! z{`n2{5s#KSo}guW_VgGxyRq#O;Q7*Pjv&gn751)icEM?!YR^{}qhcF&F+j&0<-EA0 z96r$qDX1*QyS&gsSXjpq2*1x^?SbZH5QCl17=`K|QQwRXbgTirEXfEhNj@%WD-22A ztkaP6Y>S!_v1+X9N^4WZ(w>KTQ$39LW?kmZ8jU#WO7n(o?slnFZE`-k+hBP32YUoH z9CWna!8#8AzXvrenbEMs44Ik9{fEc#|Fw!%k+GGu5fzqD3lD&{aRPK{X(`#RR_adD zYt0YCd`o$I*zO(GhBY7>@&lq_v`@CX;NmvEn-25s>M%F5vG!yVvo^Bqo3$+T&&(13 z{4H9vPXA1B@z39?cq{#LB{x>6l?$}lgE;H*v;Jv&-&yz&J4FJ=GRs`u@SvdV>ZGUsAz~6uWg+nUT+qz z(?Y1Pj#qsy3trxvZ#lM`G#`FAPa zN>8433b~O=5jP&3OlHJ&vku&NDqX5LBk4-MoVT8wxZ=sb+Y;IMWXzNA+^K=l&Yk83 zvvVgTF{htLyvERBr-8%Tb^OQfUbE8K^mrJB%u^0PUDyeYOu9&o{5uM*qGkyH7YVqMh-wBMngkG zFZB4!qMbYAjz+2bW^d)XfbHc`91v|SVhimBbB<4`p@-8r)JoF8u2q-SlMAXWjG_m6 z)DW%{*tzfpRWXaj9&^~mu^YRFp}hzulMoiT86FBX|BsGDy-JKvS^?G&to6L0Uc_ho zelLPG1F0o4^}nE6_P7n6H|*fhxr>1TwbtFWL0GWncng(#KAt^rr{anr(6%GN!z{TEm)ASk%O(NM_+o^xC&5e z_I+Hh`hsPIv@XI)*Lzd%l`9p*9pNg;2x`gL#fl4J;0k}KRYbgX+-Y7w9sZox8qif! z*%xrCHEM$o%C)Cyf07DT@K1b7E9a+!nQRifDs`M6P0o9Z!$n+VsKCMxEn&M8>#FTl z^IH&<=`H-q#z{KG?@=!maf-|mr}({E7CJ@dh*SJNEn25jB)B-m?^nE-Q^ekSzBGm{ zyi&GM&NvlZ2P|Z><&n`+0SC(*>lBGAPVomUk&PdK-r^TbZ*|FeV%-{*m=oStNkcySyL zz$W~#Qsenn-Z9{%?Ie?%ed=@zDBGyR>K?H>qWS`GyKC<{pX#}wj~Jl(=N z<#fxB`>ps1ZN(1Sl6>TJ3t7YyrPJKjlV=wq7e9z42S(oI)A>oI6FuR=JUMXiQ(9PA z@5K)r`N*+>#LD|oI7-;?IylzI<5E)O-#NsAAZDMm*D+XSg>3$5t^3@!XK|W!dSZI~ z$dln|)|PbMo?9Uyxjp+cN-=(Jh52%B_4rlIeq_bTI2hkV zyM+6S&@$ZL$z`&Iv5^tonR#KpOW2yPl)FFlqX$=kDNL=oIySKK~1;L4OV z=QlLlXDQRd(%)nOEB!5ncqO%!b&9sW+i%5hYb)e{>u5wNG%I0Z z3Vv2KoHP&66u+0mSeFgg-8uh}lWL^5mxTX73CA^_`J(ZEr~o}Q zo<*YZf21($*LY%x#{aRR`r^s7i07ZMfGzw}g}4+NPuimKf9BWr=UQ7-L=W@l7qMO43Ew|uiN+p~vb8>}z+RcvTQ)7D1ig?I{|D_pUE zP&c@c!opk0h`&;(t$>UFQTv((Dw6FKDPTC81Q{U4kT# z%qEF9A&NMee^*=^5;7$k@E@A(YcbQJ#sA3y4#fKv;+1qVtW%uKfBCKWZ*9f>3<(Pn zCv(`HcOpR`;qK?4{g03N0mY2E7v_mw_@EY!`|c(3h+5!U7B(zD)d)tGZVvkHDHfI?Miz%A;E@7mx6y03P+C zb-CgM3B2jMg-p6unRL_TViGQ%T$wBaXZm>SwEw!xt!sS2=gQ;}SYvhVRm!7HmrIQ7 zyD?nOzA`xl2K6!I?!W$WD;!R!T$wBaXRN~Ape(vPZvb?7#uT~UYJc5XI2mc*A{~kC8ztxokdUA3UeZqqnJX>Q=lJY5BJn@Hj zWJ)ehd!qOLqer;Yh(2-&1i*agqMz-0kstT@Exlh``pzl0GtkBpoO~mZ1IC9T<;FJ? zx)s2?-%KVZ1k65NWB8b30`L8~wQ|zz$4x=^w7f=()%@tA^;7z$(-R5l23PxVjFyCw zX1rpnb#PKv`6cf>u`Lf`-|$nC~^%o&+OJ zvWF-}4=MfU!&ge!NHh$mPWh}R8ZN_*!NWEbj}3<-jU!Q!my4tCl8BH{aRQFVU)_^o zWbMzhzYnt+Y>(jwexkVPRMN~~J{f&5pU{Q?{MbxTq9yo&->Qd?`OhN|k^p@8xPJ@5 zh_aO*_(fLcB#$d|{o8EWhC`*Yd3{86@Y*aMzOD@)a2w;36PBs%;Tc)zdUa!b80(^^ z+?K3(;3o61Q!fwCH@m|S={hSXSz-GlN`JdW0BhwbSTj`dKj#)}wR&>dsn-mzdlQ>T z+u6DS#rCZ*(_)3Uvw#pg6ygrpzZL&n1Rg>pDVE>CkDWfoF2#tlgL%S^K`p$<>>ycT z$B@$BvL-u1PE5>9!(S1BK4&L-8zgR9a6D|ypY9HiPaPf~N@w?2VD^^4vViC(&IK$- z7=7Toq6Q%RS1T;}t3aBP{BW40emxW>sm>3ENvin+VUjlN{xC_~cHad5y%G6`E|~0_ z4bz0~n-0??);AueSy$hYFwLg)&4g*Tv~Ma*Q}v?whUrLs)Um$F5T2UUcQi~>p-|ud E0Z-VuQUCw| literal 0 HcmV?d00001 From 02b2167cf424d87b92142d448388a294883269ea Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Mar 2025 07:58:31 +0000 Subject: [PATCH 43/44] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../amd/gpu/rocm/.README.md.kate-swp | Bin 25934 -> 25935 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/Translation/docker_compose/amd/gpu/rocm/.README.md.kate-swp b/Translation/docker_compose/amd/gpu/rocm/.README.md.kate-swp index fe3049eedf2ff2dc5eb6181860678397e3d23f68..8fec6153b7a08606c161a6147ed5108a162673b9 100644 GIT binary patch delta 109 zcmX?iit+p@#tkvdj9imrnH?CpHqT>zCMM3M00g=b3IYDXAqp|!3=E9f3=9lhn`cYK zTTY%BP%*hEWFAnhU}zK*nEN<<9vhb@0|OKH Date: Mon, 31 Mar 2025 15:40:12 +0700 Subject: [PATCH 44/44] Delete .README.md.kate-swp Signed-off-by: Artem Astafev --- .../amd/gpu/rocm/.README.md.kate-swp | Bin 25935 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 Translation/docker_compose/amd/gpu/rocm/.README.md.kate-swp diff --git a/Translation/docker_compose/amd/gpu/rocm/.README.md.kate-swp b/Translation/docker_compose/amd/gpu/rocm/.README.md.kate-swp deleted file mode 100644 index 8fec6153b7a08606c161a6147ed5108a162673b9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 25935 zcmdsA2Y6f8brxw`c5Hv1@?KYvWNMHg7C}(9>rxUeQKlq^q-7_PNE1JK({wL)@3c+Z?Dqf9J#X+4G$~bzl;D%>ckeyt+;!GH zcWBe5O}9PibX+sD~&4I?HZ!*B`6{8;KxVtS8)E9ANXDS z+;h(ro%R9?nRhMP)W-t;h7XL-jz2bVHc|J=ZY{CbBqkdjw>I2xyDg_SR9bL4L!-r^ z#q3ZvF=z%(f{5S_lZa2{*?~meZMU6yw+#v>&L&!3%_S|gum!Y4$pg_whgr{nqN%pc z0oQGsDc5N=s*U-_6X#9=tnQqd>v*T#2FYYI=LT3CD|wb5{cJC*y^0_B9V&HOHM7v^ zH1`eMCw-y?M<6*FlOjk^Aw0$0OsF7fNK=sCIFO8Vmn7LVPRNw6Ja(h z%nraDG6VZ<|A%Imo9 ztT=ZAr_^#Gq-;WZG?y708B66yqy_@rP3T(`dH~QH5dt=kI0Hn!Rgu99=%Pm*fT~Uq zhDE{)w<*ju20s}m=3GPc+qLLc7J)>yQFhOOKuVg((pw3(MZs=UFcmfc3ON?vMz}i^ z?tTjgg1#U+?RK@@aUg<1nSl2b<4(nRIlzdB>Pi^}2UXlwo800U4pgf_PHuTlX~B3E zGv4Y{E7elfsTolrwd%avC{aH`;GJq+Fk7x=eXBN?oMp4ntvd}( znf<;?`wbSczeLCLYI7y0R+9-l#Tg`!u*lsCeVv6pCu7LW>qzS!Exk)A)x3FBTqK=F zw^kEi!rVpRdllHPwo+|W+Y58xQiv8X5;eCm-&wHDC9omZeTsFDR?+Tw%@BvRu)T!3 zhv4@sxNzJ#7BNRS?g1^j+L%+)GG<&&qzARc1KV_|%gJ6Qfv(h>olq^ON1&)XUdO40 zdAU*1@xDe43$hs$te`Mgb~{d02ARz^b}U)9$3{G)jX+0zyx4HEH-wUo8U^o@PBJCx zXRBuWDwJtap$QhSo^1+IhAE{1S5+Bm$pxoPHg_GGr%JaLmQ)*pvFOygF8QA5zU(Je zGNGHS!851bWf^8xSZ2-JwdTiw*Dy;icpA(KX|hydv4X}x(x|1oCCUWaP+K-Nr`sqk zxMhw4DLuxI9VAKG4=e30aW4V>g=lvIY$3{kkFrxyB2Gdd^F+Jv(!xEylTd9y{J_|2 zS@A*ZAP{d48?;*q6b2Ga2a@Pgb`yKyZNSnu+@*k1G9&rHMA@5bJ7gkoUZ>S{&shh6 zL<(&59#SLOK_&a>dJX{k9kc(?Np6d7n;3l(dXyB0lw#cIGhd8;QUQ7xeHMwLA6A%+ z8+~Gk(Wl?ZpU<&O3&*CJ%NAz%f#0Pt`lKyJKg$|eTaF+2MU6i5ge^v-2vA~um=dYW z+w5!J3EB0Bxjff_I;LJb**Mu);q%c6bim!kw%ef`)kVj-`s9<87;^V(=SJKwE!s^EOT1#EuJSr$u2`$=<%(ORg2d0V9g zw3^n=&@NiIpd#Bt>l>Z1-6*y@Ek{NLP#8h!f|yL0WAqQ6xtQ7{=V%tqY__V6k~9ZR zTQzA=OD@{$uYEwgTk3StApmpPK$lvTWMt^+H&E3HH2NPn3wEUu4o%5v%J2c+DM>$I zAZ4(`X~uXpoeuO!11h=X%{Qu?Zqsqv7*?5Gw^D(d2>TCxX{Qlt020S3s z4rh5)?NHosMaY?Mt7>Q4A9XT}owWvrtW5@W(d{mpAU%dZQo{uK6Nl!x^g`ffb7rmX zv@HS_CuEVXsd6sbVj--%uEp#chLyY%RKt-EDj_4W6w?S6l9>&7so^zh)dpGxZR#}I zflJjkfp}W?i36!KL&oEvRbjv2J9U^Q{Rofa8yw;;ibz-{O~U0f9^hIA{9VS-KpZi5 zMR+$L;LA_5iXo9UM#+J_Lp~nF$gku$l^Iiln+RP(PSzH|7MTj%d+;7)lVVbI+Z_yr zGPQuMU{5mrAZ2W^TGpObTLw-e&6)uLcD3e!i1f>NEdvJ)vo3POnQ%g33dE&OhJgup z2+(aSVNB=%Rlq>V+5n?t&|(E(@0L3cF=`p7Ht)3{&Tx^T6Qv_)2bdRg>q`SD2mN9N zD|oWyf+xT#?S*Pn3AktA?~9T^rR4zA4$;84p^w(zWI-5v@eTW~Qke?_$2Y}&ZuA432p5<7w04HvyRcRH~*owA?J#NJeLYy^qrCiU1{nBmr&bcrBE zgh{29k_?4yCYELMbx}_DYL&!pLHMR)OEH%kB;PvPaaHoznQGRq9agQ2!;kF0lII zzeP2F%#jW47L~T|*80wX#7kgxC6wyGBWj5c9mZhnl&}MD=jc4GJ-K(y0D?>*!R{se zafN?C;dKq)V=Yd|2Z#a|kY5;dV2;e_vsx4c9hf0Ohu3IH&rn@Do0C=%Na8TBRp|2x z)e&ca?K`2!Di7zC6k-JD_w)B16dAW)u{K_ZMW?Q<)DW09I{ zL4XM(z>Wex%z0kjmRPh#aEbCT`%zTft%CbXQ66@N&|8UCQnag#5wC*jt5{Ih0vpU> zN_3Lz!)rX8Y0;|{77%e>A$HI}XCnrBc4E>@)QfIeoIP~_*cf8$;KzbbqpCEbW}11T zEnlaFkF006P~qIfk$n>f4on_Bs5=fW8(_l?r1J<{d|C-!&!Jnf%fOQB2~ty#q=1wS zwrJLizMLDd)etpF8g-=sdD-wj1aU-O8j9@OGp1$SJr8k~o;~l^ z*3#N;03Q$x;bE6k$b7NyZ3Q@f;so57R4O}~PLGVHGJ``%=CdRDLV9d4x#}mynRI3( zmr2|2u|jSvGb-Py+*ocjo62TSonl?A;yAnB(YpM>VOj>K%K}0yD#W~cz*wW$?HRV6 zetu;4h5otMz?!h=E!vgoY8TdbUl*|~W#~dyyy=nd44SUtvI8t*h&}HDv1&6;q=VwA+i;|LkmL7)5|6uc%$MmtrvUeX zwL|fM?|jIu)x50p)HxD9~8 zhS0EvUn-ENl71D)NHV?Sn@&g8CePlG9AHmiG1wTDDnk|fs zWKUUZ!zL`Uv7fJP349V}h)?nbS~7)h1QtBljtIU_18d1f^;x~b~)^pvYFCoe$-7lZaOtuf_soH=Ciqc zu232q9m!aYNW4Al;@2yIJ3v4u-7U|Ry^?G)lfMXY2SL9+ zi1V+)e8MfZZ1b`2-D4aMdS2N)(Guw+Rr@(;#<6OwnRMYiJH60t*-;bxlZ|xmT93Hc z{ov_~ct?+LCSW7aXQcfm3vPDsWTQtn4|frocQHZ>GOn{hJ6T}hYxf$Pr;{_>EGQ$8 zi2v|>h~Hel%EMi70xaqIG&fo|u-9nt6#VN}P@m5SGj0>>dh(KTV~i2@d$n%8h}}QG zp+4fVQpXdtY|ow^=VmvyT>?Cxd(BaJ`L@E|HO?+LjnnP<>SB~_!!8EsSfiZRDCr2F zY=jh)7vo)CXu&P4;|PS`=dkub@-pzj&S#E7@sEgaMkhMffL@wpgqkEDXSEfIByTpT zNP4zKRf%XdR&}MdDWYl5L%pdS#(T3N^=6eu9Cf97!!~!j)T%bQ9^Gp&J^X_`f*KAw zTI*m9hyUM0>Xyu?TVjUH%;XbC#_|8PidK=am9*g%mQV{1fVOc0bZKcR*{)XVPSR`5 z4?}%Rd3)II9o2@_AsX@>qG7a8w!2{BHouDo^KEJ{H?y(!WD>JBv+SF-EVR$e5&Qft zTC_p?OmMN!->P^k?QUrR-`e~hC)1`0e@<4m_+hoapF{T9qNLHx#!cjvp40oS*CTcW%n;I%@^#fvP7piYb<|3-fSgWoeEf;sg!*>Y7X;>6(-^PgmEAmRljAru+^(2kTkJ$f^hS;82%n*%E?&I04Z3)}hj4=$ zLTz=t>h&~)b(rE<>w9>JCG}wQRUTgGR;=>z;&pD=$Lq~At967wGoh&Wp3sPf*g1p4 zcQDmBQN4?{`s3e{!x(P!*1vP2mo>+#x0mH<0&xEu8qG!`8<3 zShMCWyRHv8gQI}1c?wP%mS!ZO6eiB#mRqg14RX4)#(Rds<+y-vVw+SXNB0XGWY z1*Ru9d|`OO8j)3%1!Q{jw-Cqh@iq<5AV1}LI8EB_#cx4C!g>z2rXGFiUE?Z1soA%2 zz3L0X2x$$%N!NQ*@0BYR#0}vp&IoGB*kHv44d4oUsZ~U*b=+uPKpp;?*DBCeOW7B2 zsx_*E56QKsX@8N5q2QnRl2*=72P@emc2(*)KboBP7Ke+t$WVcXA6mk8C*rE@RqI<2 zl;th#%H~NL#qUuo6)}p;5~KLNS{52b=7>@JJ}ugyQ6#t+#qU?Vm{G*udcHJ9EN?aByfPNS$QDA6D}zAkHN(li zO0aQR^+yqhn1!9h{`#T|V`dnqr0T`VtxDCulLjN|M-j?6M2jP}h+89b-_#5?plDIW z*5bG%wx;5%npcd|s?!$h!@6A%4?bh#v~gGSqPg%pJvGJj7Q+>hXl;M=R*wCe?GEFK zg1tt|1Dz`M`A$NpQ9pj&`SYCLXvlVrPOl0Y_}m@WZ5s}f26MDJUK?kzxLYTyCB|z< zS8~dVXzl1m2ccI(q6@%rI6oq-CmXU4^>p&dWDs7O#@z-4ov`O44lfxO?Yd}NS8?IK z^7ox$EnY8XfkROe=8vJR6|}JWD?YqY_i}|Si{9!TliEiIw@v%JJ!aC)2%a8l5 z_z7*rF6xqeywj)ilS(Ig!i9Nq;NYjUu(HvMAJ+4c zV*`nm_oHx>u;F!ZtdYm1q{zQ>hyy{)K4q_Cu*wRD`KPt+^>5GOH0$)l^!U*y!_%xS z>AXF+LPBzT_Ggr0{M-uj<=o28DnO5`7c7z+0Y9fOJ2re&l^8qNqn}q)pS744*7^k& z5aJgV;!>P7A#FKp@=JbgzpS-I&zdk#rv6v7uusm$F(u;htD61rij#3rzKM1T_Z1;! zxWALjWD8>>Bf2+5l!w{7U+amBa}5#PG^}2FEIpF9XVHivxWBHreR9B+DQV7cXtqD3 zOv{k|CJR{UZz;sbQd(K3Nb9@&R{XZMLJqi&Mx-^A(#W}QLdyZy-|_K(SMj4#$~>8> zcWdEvMxmNC(Mtb{6_z#qDT;iE8690z^&_m)`BohBe z3bS#ECx%G;A1kWQo=gjS{s{}%!ar4rOCj;3EfW7{er9TSzR~#w2IULY(3vQ-V!r3w>SIDQ`>{vcEruVFfC#TC^Rsy%c6#EBtg9|Av zyoCetR|>TqP^-*-oN>uLMA%NuzgA41UW;BYG-*Vj-2Y2`v6#*qAStQ)?cM8Ma(b2$#MXEsidJhYwcYm)?TL=|iZ44m^u!X((2Zg+o zkkLDjJTjb?x(RY8VgFHKake~BX}K7-Mrk>`@<Kw0~1jxqOA|)3{rU*ahPM80mnI2qc%U-mB0sP&k2D>N3I+@ke4hNvsJ` z#K`=+;#!xGDUpEx&}?6dnHDMjPZn?@-lq^BOC!TN#mM}Z--`d%R@_IIun;jaN8EWQ z;sg@zK2F;I_?Yik%&2)`p6G=SXyLeTULudE1*U~wC+7smA{y>eJAB=1e3EzVYvsrF zRW?!^=a_v^$qo!0>f(<5)e0+$=r1D{s}Md@F-@lGmweFZ?^B3NU>bxrO+&v_$J(ya+G6*XB%h|T|7r`+ zcRlY+;LRmDYMhYA(kiL3k-}KEoEpnHId^QVKo78#$>wvFRH|6QV`=`&Ly5icBVvz9 zi46OfEN~AzyuE2l)a@7)rieT!#ZtV-7rvb)Pp9Q^(E>!&(7e(!fx-0>acoxHuIzb> z(zZsgT1e#_KnzPCZ^HFoqvLbaWf~v3s(WRQ5r(FZ#^~Pid&wBx-tg|V`CNfjXFrTTqcRb#yuB+SLP^T zaQe6uzU&$C=+E*q6VO;uTA#$@FpiMX00XlKu z*wG2D@H!z@n|wdDpFn)5z5fm$?M_8|)2zHb$d`z@{C;|JP~KXX@98O=5DDHKpLlk5 zV*2R#)ZFUF$Df!!c64^t!q|-Cu^zk|sjqN*I9|eM1RL_8dkLp5Pov6~*PUPB3vrzV z?Cv<_G9pTCK8w-M7QTt&e3v%$;MNmQ<88AJ_ML_Bsh!E>aPSAuKD(XvaJ4=^9IVEN zy44|pM^tx+ZAms8^W3@;ng{uDw?82FXe}?{5evRDgofjki?oVwVH&=Y%PzQOc~4PZ ztth&91QBru{~bI39ylkz)s+K!a&i=X!h;w*TVqd>@+n+A@rQS0N-j=&qWAuzN4V37 zJ~9LXU_Nxw&vw1Uk9+-=-lr{n$CTR{XyXY^zLCfYWj( zL~zroq?y5bGWuXXp$!4}v6Y}iOYj4~RSzHYpGP1h0r>E7|2BdVWjjCci$vxmkCD0l z9ky)K;ZoVW7!e)3Hj9U^Yr_ZK#`xrf9n|*lj0Cz~-54K6T=cZtl86UxG7mfT^6-4K zI}DbtvvQJ^VSiZZ@3aUQT6qfA3|0Kkxy4$oo?Le7HN)%P#3s^Cwr)VNeJ#wiXyKhK zAjB?(xE=a$#XlE;hY(4M<+t-=w~w($F`~o4JQ=hAcb9SS*LE_d0$HUhAneOoT)RFO_batNwW^V~B0YpEs9; z0%>ycLt&Eg^=u`o@6LVef&9}Nh(J^%m!