Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
cf60682
DocSum - add files for deploy app with ROCm vLLM
Feb 13, 2025
1fd1de1
DocSum - fix main
Feb 13, 2025
bd2d47e
DocSum - add files for deploy app with ROCm vLLM
Feb 13, 2025
2459ecb
DocSum - fix main
Feb 13, 2025
4d35065
Merge remote-tracking branch 'origin/main'
Feb 19, 2025
6d5049d
DocSum - add files for deploy app with ROCm vLLM
Feb 13, 2025
9dfbdc5
DocSum - fix main
Feb 13, 2025
a8857ae
DocSum - add files for deploy app with ROCm vLLM
Feb 13, 2025
5a38b26
DocSum - fix main
Feb 13, 2025
0e2ef94
Merge remote-tracking branch 'origin/main'
Feb 25, 2025
30071db
Merge branch 'main' of https://github.com/opea-project/GenAIExamples
Mar 11, 2025
c37963b
SearchQnA - add files for deploy with ROCm vLLM
Mar 11, 2025
76115de
SearchQnA - add files for deploy with ROCm vLLM
Mar 11, 2025
cf9cb23
SearchQnA - add files for deploy with ROCm vLLM
Mar 11, 2025
f40cc25
SearchQnA - add files for deploy with ROCm vLLM
Mar 11, 2025
39dc7fd
SearchQnA - add files for deploy with ROCm vLLM
Mar 11, 2025
3b2e5f2
SearchQnA - add files for deploy with ROCm vLLM
Mar 11, 2025
78179d2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 11, 2025
a5fd773
SearchQnA - add files for deploy with ROCm vLLM
Mar 11, 2025
8fe6d4b
Merge remote-tracking branch 'origin/feature/SearchQnA_vLLM' into fea…
Mar 11, 2025
ea3eb0d
SearchQnA - add files for deploy with ROCm vLLM
Mar 11, 2025
6684f77
Update Readme.md and add MODEL_PATH var
artem-astafev Mar 24, 2025
0cb3cee
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 24, 2025
4e0982b
Merge branch 'main' into feature/SearchQnA_vLLM
artem-astafev Mar 24, 2025
c27fb91
Update README.md
artem-astafev Mar 24, 2025
351371b
Merge branch 'feature/SearchQnA_vLLM' of https://github.com/chyunduno…
artem-astafev Mar 24, 2025
73fc522
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 24, 2025
74621ab
Update Readme.md and set_env files
artem-astafev Mar 25, 2025
f884fea
Merge branch 'feature/SearchQnA_vLLM' of https://github.com/chyunduno…
artem-astafev Mar 25, 2025
ccefa76
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 25, 2025
a32d8bd
Add sleep before running validate
artem-astafev Mar 25, 2025
0740267
Increase timeout before tests
artem-astafev Mar 25, 2025
f997214
SearchQnA - fix files for deploy on ROCm vLLM
Mar 25, 2025
6bc9f86
SearchQnA - fix files for deploy on ROCm vLLM
Mar 25, 2025
e33bf5a
Merge branch 'main' into feature/SearchQnA_vLLM
artem-astafev Mar 25, 2025
b1cf27b
Merge branch 'main' into feature/SearchQnA_vLLM
artem-astafev Mar 26, 2025
7836844
SearchQnA - fix files for deploy on ROCm vLLM
Mar 26, 2025
4e75717
Merge remote-tracking branch 'origin/feature/SearchQnA_vLLM' into fea…
Mar 26, 2025
3803cbb
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 26, 2025
75b731d
SearchQnA - fix files for deploy on ROCm vLLM
Mar 26, 2025
5946031
Merge remote-tracking branch 'origin/feature/SearchQnA_vLLM' into fea…
Mar 26, 2025
a2cc5de
SearchQnA - fix files for deploy on ROCm vLLM
Mar 26, 2025
d7bf1dd
SearchQnA - fix files for deploy on ROCm vLLM
Mar 26, 2025
a812d75
SearchQnA - fix files for deploy on ROCm vLLM
Mar 26, 2025
8d85d85
SearchQnA - fix files for deploy on ROCm vLLM
Mar 26, 2025
f2d2310
SearchQnA - fix files for deploy on ROCm vLLM
Mar 26, 2025
89cb95b
SearchQnA - fix files for deploy on ROCm vLLM
Mar 26, 2025
0f6690e
SearchQnA - fix files for deploy on ROCm vLLM
Mar 26, 2025
e55695b
SearchQnA - fix files for deploy on ROCm vLLM
Mar 26, 2025
7a0bf04
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 26, 2025
50b8a9e
Added Web UI validation example
artem-astafev Mar 26, 2025
df9aa92
Build AMD vLLM image from Comps repo
artem-astafev Mar 26, 2025
be8cc63
Update vllm-rocm config from comps
artem-astafev Mar 26, 2025
02b2167
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 26, 2025
e162ba9
Merge branch 'main' into feature/SearchQnA_vLLM
chyundunovDatamonsters Mar 26, 2025
96b6813
Merge branch 'main' into feature/SearchQnA_vLLM
artem-astafev Mar 28, 2025
8e988eb
Merge branch 'main' into feature/SearchQnA_vLLM
artem-astafev Mar 31, 2025
af506cb
Delete .README.md.kate-swp
artem-astafev Mar 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
549 changes: 473 additions & 76 deletions SearchQnA/docker_compose/amd/gpu/rocm/README.md

Large diffs are not rendered by default.

64 changes: 35 additions & 29 deletions SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ services:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: search-tei-embedding-server
ports:
- "3001:80"
- "${SEARCH_TEI_EMBEDDING_PORT:-3001}:80"
volumes:
- "${MODEL_PATH:-./data}:/data"
- "${MODEL_CACHE:-./data}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
Expand All @@ -20,13 +20,14 @@ services:
HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
command: --model-id ${SEARCH_EMBEDDING_MODEL_ID} --auto-truncate

search-embedding:
image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
container_name: search-embedding-server
depends_on:
- search-tei-embedding-service
ports:
- "3002:6000"
- "${SEARCH_EMBEDDING_SERVICE_PORT:-3002}:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
Expand All @@ -36,11 +37,12 @@ services:
TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT}
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped

search-web-retriever:
image: ${REGISTRY:-opea}/web-retriever:${TAG:-latest}
container_name: search-web-retriever-server
ports:
- "3003:7077"
- "${SEARCH_WEB_RETRIEVER_SERVICE_PORT:-3003}:7077"
ipc: host
environment:
no_proxy: ${no_proxy}
Expand All @@ -50,26 +52,28 @@ services:
GOOGLE_API_KEY: ${SEARCH_GOOGLE_API_KEY}
GOOGLE_CSE_ID: ${SEARCH_GOOGLE_CSE_ID}
restart: unless-stopped

search-tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: search-tei-reranking-server
ports:
- "3004:80"
- "${SEARCH_TEI_RERANKING_PORT:-3004}:80"
volumes:
- "${MODEL_PATH:-./data}:/data"
- "${MODEL_CACHE:-./data}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${SEARCH_RERANK_MODEL_ID} --auto-truncate

search-reranking:
image: ${REGISTRY:-opea}/reranking:${TAG:-latest}
container_name: search-reranking-server
depends_on:
- search-tei-reranking-service
ports:
- "3005:8000"
- "${SEARCH_RERANK_SERVICE_PORT:-3005}:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
Expand All @@ -80,13 +84,14 @@ services:
HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped

search-tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
container_name: search-tgi-service
ports:
- "3006:80"
- "${SEARCH_TGI_SERVICE_PORT:-3006}:80"
volumes:
- "${MODEL_PATH:-./data}:/data"
- "${MODEL_CACHE:-./data}:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
Expand All @@ -96,7 +101,7 @@ services:
shm_size: 1g
devices:
- /dev/kfd:/dev/kfd
- /dev/dri/:/dev/dri/
- /dev/dri:/dev/dri
cap_add:
- SYS_PTRACE
group_add:
Expand All @@ -105,25 +110,26 @@ services:
- seccomp:unconfined
ipc: host
command: --model-id ${SEARCH_LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048

search-llm:
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
container_name: search-llm-server
depends_on:
- search-tgi-service
ports:
- "3007:9000"
- "${SEARCH_LLM_SERVICE_PORT:-3007}:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${SEARCH_TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
LLM_ENDPOINT: ${SEARCH_TGI_LLM_ENDPOINT}
LLM_MODEL_ID: ${SEARCH_LLM_MODEL_ID}
LLM_MODEL: ${SEARCH_LLM_MODEL_ID}
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
OPENAI_API_KEY: ${SEARCH_OPENAI_API_KEY}
LLM_COMPONENT_NAME: "OpeaTextGenService"

restart: unless-stopped
search-backend-server:
image: ${REGISTRY:-opea}/searchqna:${TAG:-latest}
Expand All @@ -139,18 +145,18 @@ services:
ports:
- "${SEARCH_BACKEND_SERVICE_PORT:-3008}:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${SEARCH_MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${SEARCH_EMBEDDING_SERVICE_HOST_IP}
- WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${SEARCH_RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${SEARCH_LLM_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_PORT=${SEARCH_EMBEDDING_SERVICE_PORT}
- WEB_RETRIEVER_SERVICE_PORT=${SEARCH_WEB_RETRIEVER_SERVICE_PORT}
- RERANK_SERVICE_PORT=${SEARCH_RERANK_SERVICE_PORT}
- LLM_SERVICE_PORT=${SEARCH_LLM_SERVICE_PORT}
no_proxy: ${no_proxy}
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
MEGA_SERVICE_HOST_IP: ${SEARCH_MEGA_SERVICE_HOST_IP}
EMBEDDING_SERVICE_HOST_IP: ${SEARCH_EMBEDDING_SERVICE_HOST_IP}
WEB_RETRIEVER_SERVICE_HOST_IP: ${SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP}
RERANK_SERVICE_HOST_IP: ${SEARCH_RERANK_SERVICE_HOST_IP}
LLM_SERVICE_HOST_IP: ${SEARCH_LLM_SERVICE_HOST_IP}
EMBEDDING_SERVICE_PORT: ${SEARCH_EMBEDDING_SERVICE_PORT}
WEB_RETRIEVER_SERVICE_PORT: ${SEARCH_WEB_RETRIEVER_SERVICE_PORT}
RERANK_SERVICE_PORT: ${SEARCH_RERANK_SERVICE_PORT}
LLM_SERVICE_PORT: ${SEARCH_LLM_SERVICE_PORT}
ipc: host
restart: always
search-ui-server:
Expand All @@ -161,10 +167,10 @@ services:
ports:
- "${SEARCH_FRONTEND_SERVICE_PORT:-5173}:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- BACKEND_BASE_URL=${SEARCH_BACKEND_SERVICE_ENDPOINT}
no_proxy: ${no_proxy}
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
BACKEND_BASE_URL: ${SEARCH_BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always

Expand Down
185 changes: 185 additions & 0 deletions SearchQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# SPDX-License-Identifier: Apache-2.0

services:
search-tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: search-tei-embedding-server
ports:
- "${SEARCH_TEI_EMBEDDING_PORT:-3001}:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
command: --model-id ${SEARCH_EMBEDDING_MODEL_ID} --auto-truncate

search-embedding:
image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
container_name: search-embedding-server
depends_on:
- search-tei-embedding-service
ports:
- "${SEARCH_EMBEDDING_SERVICE_PORT:-3002}:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_HOST_IP: ${SEARCH_HOST_IP}
TEI_EMBEDDING_PORT: ${SEARCH_TEI_EMBEDDING_PORT}
TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT}
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped

search-web-retriever:
image: ${REGISTRY:-opea}/web-retriever:${TAG:-latest}
container_name: search-web-retriever-server
ports:
- "${SEARCH_WEB_RETRIEVER_SERVICE_PORT:-3003}:7077"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT}
GOOGLE_API_KEY: ${SEARCH_GOOGLE_API_KEY}
GOOGLE_CSE_ID: ${SEARCH_GOOGLE_CSE_ID}
restart: unless-stopped

search-tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: search-tei-reranking-server
ports:
- "${SEARCH_TEI_RERANKING_PORT:-3004}:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${SEARCH_RERANK_MODEL_ID} --auto-truncate

search-reranking:
image: ${REGISTRY:-opea}/reranking:${TAG:-latest}
container_name: search-reranking-server
depends_on:
- search-tei-reranking-service
ports:
- "${SEARCH_RERANK_SERVICE_PORT:-3005}:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${SEARCH_TEI_RERANKING_ENDPOINT}
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped

search-vllm-service:
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
container_name: search-vllm-service
ports:
- "${SEARCH_VLLM_SERVICE_PORT:-8081}:8011"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
WILM_USE_TRITON_FLASH_ATTENTION: 0
PYTORCH_JIT: 0
volumes:
- "${MODEL_CACHE:-./data}:/data"
shm_size: 20G
devices:
- /dev/kfd:/dev/kfd
- /dev/dri/:/dev/dri/
cap_add:
- SYS_PTRACE
group_add:
- video
security_opt:
- seccomp:unconfined
- apparmor=unconfined
command: "--model ${SEARCH_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
ipc: host

search-llm:
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
container_name: search-llm-server
depends_on:
- search-vllm-service
ports:
- "${SEARCH_LLM_SERVICE_PORT:-3007}:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LLM_ENDPOINT: ${SEARCH_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL_ID: ${SEARCH_LLM_MODEL_ID}
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
LLM_COMPONENT_NAME: "OpeaTextGenService"
restart: unless-stopped

search-backend-server:
image: ${REGISTRY:-opea}/searchqna:${TAG:-latest}
container_name: search-backend-server
depends_on:
- search-tei-embedding-service
- search-embedding
- search-web-retriever
- search-tei-reranking-service
- search-reranking
- search-vllm-service
- search-llm
ports:
- "${SEARCH_BACKEND_SERVICE_PORT:-3008}:8888"
environment:
no_proxy: ${no_proxy}
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
MEGA_SERVICE_HOST_IP: ${SEARCH_MEGA_SERVICE_HOST_IP}
EMBEDDING_SERVICE_HOST_IP: ${SEARCH_EMBEDDING_SERVICE_HOST_IP}
EMBEDDING_SERVICE_PORT: ${SEARCH_EMBEDDING_SERVICE_PORT}
WEB_RETRIEVER_SERVICE_HOST_IP: ${SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP}
WEB_RETRIEVER_SERVICE_PORT: ${SEARCH_WEB_RETRIEVER_SERVICE_PORT}
RERANK_SERVICE_HOST_IP: ${SEARCH_RERANK_SERVICE_HOST_IP}
RERANK_SERVICE_PORT: ${SEARCH_RERANK_SERVICE_PORT}
LLM_SERVICE_HOST_IP: ${SEARCH_LLM_SERVICE_HOST_IP}
LLM_SERVICE_PORT: ${SEARCH_LLM_SERVICE_PORT}
ipc: host
restart: always

search-ui-server:
image: ${REGISTRY:-opea}/searchqna-ui:${TAG:-latest}
container_name: search-ui-server
depends_on:
- search-backend-server
ports:
- "${SEARCH_FRONTEND_SERVICE_PORT:-5173}:5173"
environment:
no_proxy: ${no_proxy}
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
BACKEND_BASE_URL: ${SEARCH_BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always

networks:
default:
driver: bridge
Loading