Skip to content
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
149 commits
Select commit Hold shift + click to select a range
cf60682
DocSum - add files for deploy app with ROCm vLLM
Feb 13, 2025
1fd1de1
DocSum - fix main
Feb 13, 2025
bd2d47e
DocSum - add files for deploy app with ROCm vLLM
Feb 13, 2025
2459ecb
DocSum - fix main
Feb 13, 2025
4d35065
Merge remote-tracking branch 'origin/main'
Feb 19, 2025
5b441e8
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
52c15cf
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
e578d3d
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
32075f0
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
ab627e5
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
7a9c041
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
4fb10b7
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
4652d88
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
75e9f02
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 19, 2025
ba1f2b1
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
ba56d73
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
5415478
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
25aa0d4
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
c1958bd
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
208c9f9
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
4958c39
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
605b332
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
02aaca3
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
ac678a2
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
14402dc
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
03bf8cb
DocSum - fix files for deploy on ROCm
Mar 19, 2025
ae708d6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 19, 2025
ac60bd4
DocSum - fix files for deploy on ROCm
Mar 19, 2025
0787a6a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 19, 2025
65c76af
DocSum - fix files for deploy on ROCm
Mar 19, 2025
614c6ce
DocSum - fix files for deploy on ROCm
Mar 19, 2025
2e92248
DocSum - fix files for deploy on ROCm
Mar 19, 2025
f3faa9d
DocSum - fix files for deploy on ROCm
Mar 19, 2025
172dda6
DocSum - fix files for deploy on ROCm
Mar 19, 2025
9a82629
DocSum - fix files for deploy on ROCm
Mar 20, 2025
bb42e69
DocSum - fix files for deploy on ROCm
Mar 21, 2025
e702cf1
DocSum - fix files for deploy on ROCm
Mar 21, 2025
d2d4725
Fix minor typo in README (#1559)
jotpalch Feb 17, 2025
9c49538
Remove perf test code from test scripts. (#1510)
ZePan110 Feb 18, 2025
c6a0746
DocSum - add files for deploy app with ROCm vLLM
Feb 13, 2025
ef4182a
DocSum - fix main
Feb 13, 2025
135f912
DocSum - add files for deploy app with ROCm vLLM
Feb 13, 2025
b0eb7b8
DocSum - fix main
Feb 13, 2025
7750111
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
d8d3d2f
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
8cc16e3
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
6a97033
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
da022a8
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
795c0e9
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
c13c216
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
4c3d300
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
674ce6a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 19, 2025
f8b2887
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
ea5002d
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
190f8de
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
7eb1ae9
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
5e24e8f
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
57f8c0c
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
a4f04ce
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
d25f642
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
07849cd
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
3736848
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
aff88f2
DocSum - fix files for deploy with ROCm vLLM
Feb 19, 2025
968d98f
DocSum - fix files for deploy on ROCm
Mar 19, 2025
2a814ad
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 19, 2025
1bd405d
DocSum - fix files for deploy on ROCm
Mar 19, 2025
3c8a2aa
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 19, 2025
0ad9f15
Bump gradio from 5.5.0 to 5.11.0 in /MultimodalQnA/ui/gradio (#1391)
dependabot[bot] Feb 19, 2025
48f7d78
Simplify ChatQnA AIPC user setting (#1573)
xiguiw Feb 19, 2025
2e08a5d
Fix mismatched environment variable (#1575)
xiguiw Feb 19, 2025
f7b0d31
Fix trivy issue (#1569)
ZePan110 Feb 20, 2025
ebc997e
Update AgentQnA and DocIndexRetriever (#1564)
minmin-intel Feb 22, 2025
44e98a0
Update README.md of AIPC quick start (#1578)
yinghu5 Feb 23, 2025
bc84ddc
Fix "OpenAI" & "response" spelling (#1561)
eero-t Feb 25, 2025
af17b74
Bump gradio from 5.5.0 to 5.11.0 in /DocSum/ui/gradio (#1576)
dependabot[bot] Feb 25, 2025
6b9e472
Align mongo related image names with comps (#1543)
Spycsh Feb 27, 2025
e860515
Fix ChatQnA ROCm compose Readme file and absolute path for ROCM CI te…
artem-astafev Feb 27, 2025
6301d02
Fix async in chatqna bug (#1589)
XinyaoWa Feb 27, 2025
5328fc2
Fix benchmark scripts (#1517)
chensuyue Feb 28, 2025
4a528fd
Top level README: add link to github.io documentation (#1584)
alexsin368 Feb 28, 2025
1f42b35
fix click example button issue (#1586)
WenjiaoYue Feb 28, 2025
136903b
ChatQnA Docker compose file for Milvus as vdb (#1548)
ezelanza Feb 28, 2025
3f80f1b
Fix cd workflow condition (#1588)
chensuyue Mar 3, 2025
a7f269b
Update DBQnA tgi docker image to latest tgi 2.4.0 (#1593)
yinghu5 Mar 3, 2025
f190d02
Revert chatqna async and enhance tests (#1598)
Spycsh Mar 3, 2025
a62e9ec
Use model cache for docker compose test (#1582)
ZePan110 Mar 4, 2025
4d2a35c
open chatqna frontend test (#1594)
chensuyue Mar 4, 2025
6a20d83
Enable CodeGen,CodeTrans and DocSum model cache for docker compose te…
ZePan110 Mar 4, 2025
25fcc53
bugfix GraphRAG updated docker compose and env settings to fix issues…
rbrugaro Mar 4, 2025
d70b4d7
Enable ChatQnA model cache for docker compose test. (#1605)
ZePan110 Mar 5, 2025
07d4c89
Enable SearchQnA model cache for docker compose test. (#1606)
ZePan110 Mar 5, 2025
849df16
Fix docker image opea/edgecraftrag security issue #1577 (#1617)
Yongbozzz Mar 5, 2025
3673398
[AudioQnA] Fix the LLM model field for inputs alignment (#1611)
wangkl2 Mar 5, 2025
4d16ea3
Update compose.yaml for SearchQnA (#1622)
ZePan110 Mar 7, 2025
b374417
Update compose.yaml for ChatQnA (#1621)
ZePan110 Mar 7, 2025
0b1186b
Update compose.yaml (#1620)
ZePan110 Mar 7, 2025
cb86be2
Update compose.yaml (#1619)
ZePan110 Mar 7, 2025
c1a56f7
Enable vllm for CodeTrans (#1626)
letonghan Mar 7, 2025
f2d94ea
Update model cache for AgentQnA (#1627)
ZePan110 Mar 7, 2025
587e708
Use GenAIComp base image to simplify Dockerfiles (#1612)
eero-t Mar 7, 2025
3763c94
[Bug: 112] Fix introduction in GenAIExamples main README (#1631)
srajabos Mar 7, 2025
8311e9e
Fix corner CI issue when the example path deleted (#1634)
chensuyue Mar 7, 2025
43432ea
[ChatQnA] Show spinner after query to improve user experience (#1003)…
wangleflex Mar 7, 2025
8402fff
Use the latest HabanaAI/vllm-fork release tag to build vllm-gaudi ima…
chensuyue Mar 7, 2025
3698827
Set vLLM as default model for FaqGen (#1580)
XinyaoWa Mar 10, 2025
304d835
Fix vllm model cache directory (#1642)
wangkl2 Mar 10, 2025
f6c3f7b
Enhance ChatQnA test scripts (#1643)
chensuyue Mar 10, 2025
9407fe2
Add GitHub Action to check and close stale issues and PRs (#1646)
XuehaoSun Mar 12, 2025
6c3000e
Use GenAIComp base image to simplify Dockerfiles & reduce image sizes…
eero-t Mar 13, 2025
88b1364
Enable inject_commit to docker image feature. (#1653)
ZePan110 Mar 13, 2025
c7c85d9
Enable CodeGen vLLM (#1636)
xiguiw Mar 13, 2025
02fd196
[ChatQnA][docker]Check healthy of redis to avoid dataprep failure (#1…
gavinlichn Mar 13, 2025
383a67e
Enable GraphRAG and ProductivitySuite model cache for docker compose …
ZePan110 Mar 13, 2025
666e7af
Enable Gaudi3, Rocm and Arc on manually release test. (#1615)
ZePan110 Mar 13, 2025
2a9dfcd
Refine README with highlighted examples and updated support info (#1006)
CharleneHu-42 Mar 13, 2025
3c95214
[AudioQnA] Enable vLLM and set it as default LLM serving (#1657)
wangkl2 Mar 14, 2025
398005c
[ChatQnA] Enable Prometheus and Grafana with telemetry docker compos…
louie-tsai Mar 14, 2025
cb41f8a
Update stale issue and PR settings to 30 days for inactivity (#1661)
XuehaoSun Mar 14, 2025
8854d2c
Add final README.md and set_env.sh script for quickstart review. Prev…
jedwards-habana Mar 14, 2025
e2b4f20
Fix input issue for manual-image-build.yml (#1666)
chensuyue Mar 17, 2025
6c67245
Set vLLM as default model for VisualQnA (#1644)
Spycsh Mar 18, 2025
e01fade
Fix workflow issues. (#1691)
ZePan110 Mar 19, 2025
b9cb0c6
Enable base image build in CI/CD (#1669)
chensuyue Mar 19, 2025
a6e3b54
fix errors for running AgentQnA on xeon with openai and update readme…
minmin-intel Mar 20, 2025
0fec748
Add new UI/new features for EC-RAG (#1665)
Yongbozzz Mar 20, 2025
71a8791
Merge FaqGen into ChatQnA (#1654)
XinyaoWa Mar 20, 2025
d18f4c4
DocSum - fix files for deploy on ROCm vLLM
Mar 21, 2025
84932a7
DocSum - fix files for deploy on ROCm vLLM
Mar 21, 2025
d85d3a9
DocSum - fix files for deploy on ROCm vLLM
Mar 21, 2025
0cb164b
DocSum - fix files for deploy on ROCm vLLM
Mar 24, 2025
28e68ae
Merge branch 'main' of https://github.com/opea-project/GenAIExamples …
Mar 24, 2025
9d3595b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 24, 2025
2392346
DocSum - fix files for deploy on ROCm vLLM
Mar 24, 2025
b4527e4
Merge remote-tracking branch 'origin/feature/DocSum_vLLM' into featur…
Mar 24, 2025
42fdcbf
DocSum - fix files for deploy on ROCm vLLM
Mar 24, 2025
f5e0196
DocSum - fix files for deploy on ROCm vLLM
Mar 24, 2025
4aef21b
Merge branch 'main' into feature/DocSum_vLLM
artem-astafev Mar 28, 2025
3ad714a
Merge branch 'feature/Docsum_vLLM' of https://github.com/chyundunovDa…
Apr 1, 2025
aa15e3e
Merge branch 'main' of https://github.com/opea-project/GenAIExamples …
Apr 1, 2025
08a4540
DocSum - fix files for deploy on ROCm vLLM
Apr 1, 2025
183bb79
DocSum - fix files for deploy on ROCm vLLM
Apr 2, 2025
d4f8070
Merge branch 'main' of https://github.com/opea-project/GenAIExamples …
Apr 2, 2025
3d64cb4
Merge branch 'main' into feature/DocSum_vLLM
chyundunovDatamonsters Apr 2, 2025
5f65d79
DocSum - fix files for deploy on ROCm vLLM
Apr 3, 2025
2232e4f
Merge remote-tracking branch 'origin/feature/DocSum_vLLM' into featur…
Apr 3, 2025
f8f6baa
DocSum - fix files for deploy on ROCm vLLM
Apr 3, 2025
3a1bf7f
Merge branch 'main' of https://github.com/opea-project/GenAIExamples …
Apr 3, 2025
16320de
DocSum - fix files for deploy on ROCm vLLM
Apr 3, 2025
d056c51
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions DocSum/Dockerfile-vllm-rocm
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) 2024 Advanced Micro Devices, Inc.

FROM rocm/vllm:rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6

# Set the working directory
WORKDIR /workspace

# Expose the port used by the API server
EXPOSE 8011

# Set environment variables
ENV HUGGINGFACE_HUB_CACHE=/workspace
ENV VLLM_USE_TRITON_FLASH_ATTENTION=0
ENV PYTORCH_JIT=0

# Set the entrypoint to the api_server.py script
RUN cp /usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py /workspace/api_server.py
ENTRYPOINT ["python3", "/workspace/api_server.py"]
2 changes: 2 additions & 0 deletions DocSum/docker_compose/amd/gpu/rocm/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
Copyright (C) 2024 Advanced Micro Devices, Inc.

# Build and deploy DocSum Application on AMD GPU (ROCm)

## Build images
Expand Down
128 changes: 128 additions & 0 deletions DocSum/docker_compose/amd/gpu/rocm/README_vllm.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
Copyright (C) 2024 Advanced Micro Devices, Inc.

# Build and deploy DocSum Application on AMD GPU (ROCm)

## Build images

## 🚀 Build Docker Images

First of all, you need to build Docker Images locally and install the python package of it.

### 1. Build LLM Image

```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
docker build -t opea/llm-docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile .
```

### 2. Build Whisper Image

```bash
cd GenAIComps
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile .
```

### 3. Build MegaService Docker Image

To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command:

```bash
git clone https://github.com/opea-project/GenAIExamples
cd GenAIExamples/DocSum/
docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
```

### 4. Build Gradio UI Docker Image

Build the frontend Docker image via below command:

```bash
cd GenAIExamples/DocSum/ui
docker build -t opea/docsum-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile.gradio .
```

Then run the command `docker images`, you will have the following Docker Images:

1. `opea/llm-docsum:latest`
2. `opea/docsum:latest`
3. `opea/docsum-gradio-ui:latest`
4. `opea/llm-vllm-rocm:latest`
5. `opea/whisper:latest`

## 🚀 Start Microservices and MegaService

### Required Models

Default model is "Intel/neural-chat-7b-v3-3". Change "DOCSUM_LLM_MODEL_ID" in environment variables below if you want to use another model.
For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable.

### Setup Environment Variables

```bash
export HUGGINGFACEHUB_API_TOKEN='your huggingfacehub token'
```

Edit the file set_env_vllm.sh - set the desired values of the variables in it
Note: Please replace value HOST_IP with your server IP address, do not use localhost.

Set values:

```bash
cd GenAiExamples/DocSum/docker_compose/amd/gpu/rocm
. set_env_vllm.sh
```

#### Set GPU settings in compose_vllm.yaml:

Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered<node>, where <node> is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus)
Example for set isolation for 1 GPU

```
- /dev/dri/card0:/dev/dri/card0
- /dev/dri/renderD128:/dev/dri/renderD128
```

Example for set isolation for 2 GPUs

```
- /dev/dri/card0:/dev/dri/card0
- /dev/dri/renderD128:/dev/dri/renderD128
- /dev/dri/card1:/dev/dri/card1
- /dev/dri/renderD129:/dev/dri/renderD129
```

Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus)

### Start Microservice Docker Containers

```bash
cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm
docker compose -f compose_vllm.yaml up -d
```

### Validate Microservices

1. vLLM Service

```bash
curl http://${host_ip}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions \
-X POST \
-d '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' \
-H 'Content-Type: application/json'
```

2. LLM Microservice

```bash
curl http://${host_ip}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum \
-X POST \
-d '{"messages":"What is Deep Learning?"}' \
-H 'Content-Type: application/json'
```

3. MegaService

```bash
curl http://${host_ip}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum -H "Content-Type: application/json" -d '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
```
65 changes: 26 additions & 39 deletions DocSum/docker_compose/amd/gpu/rocm/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ services:
image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
container_name: docsum-tgi-service
ports:
- "${DOCSUM_TGI_SERVICE_PORT}:80"
- "${DOCSUM_TGI_SERVICE_PORT:-8008}:80"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
Expand All @@ -15,64 +15,52 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
host_ip: ${host_ip}
DOCSUM_TGI_SERVICE_PORT: ${DOCSUM_TGI_SERVICE_PORT}
healthcheck:
test: [ "CMD-SHELL", "curl -f http://localhost:80/health || exit 1" ]
interval: 10s
timeout: 10s
retries: 100
volumes:
- "/var/opea/docsum-service/data:/data"
shm_size: 1g
devices:
- /dev/kfd:/dev/kfd
- /dev/dri/${DOCSUM_CARD_ID}:/dev/dri/${DOCSUM_CARD_ID}
- /dev/dri/${DOCSUM_RENDER_ID}:/dev/dri/${DOCSUM_RENDER_ID}
- /dev/dri/:/dev/dri/
cap_add:
- SYS_PTRACE
group_add:
- video
security_opt:
- seccomp:unconfined
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://${host_ip}:${DOCSUM_TGI_SERVICE_PORT}/health || exit 1"]
interval: 10s
timeout: 10s
retries: 100
command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${DOCSUM_MAX_INPUT_TOKENS} --max-total-tokens ${DOCSUM_MAX_TOTAL_TOKENS}

docsum-llm-server:
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
container_name: docsum-llm-server
depends_on:
docsum-tgi-service:
condition: service_healthy
- docsum-tgi-service
ports:
- "${DOCSUM_LLM_SERVER_PORT}:9000"
ipc: host
group_add:
- video
security_opt:
- seccomp:unconfined
cap_add:
- SYS_PTRACE
devices:
- /dev/kfd:/dev/kfd
- /dev/dri/${DOCSUM_CARD_ID}:/dev/dri/${DOCSUM_CARD_ID}
- /dev/dri/${DOCSUM_RENDER_ID}:/dev/dri/${DOCSUM_RENDER_ID}
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
LLM_ENDPOINT: ${DOCSUM_TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS}
LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
DocSum_COMPONENT_NAME: "OpeaDocSumTgi"
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

whisper:
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
container_name: whisper-service
ports:
- "7066:7066"
- "${DOCSUM_WHISPER_PORT:-7066}:7066"
ipc: host
environment:
no_proxy: ${no_proxy}
Expand All @@ -89,13 +77,12 @@ services:
ports:
- "${DOCSUM_BACKEND_SERVER_PORT}:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${HOST_IP}
- LLM_SERVICE_HOST_IP=${HOST_IP}
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}

no_proxy: ${no_proxy}
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
MEGA_SERVICE_HOST_IP: ${HOST_IP}
LLM_SERVICE_HOST_IP: ${HOST_IP}
ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP}
ipc: host
restart: always

Expand All @@ -105,13 +92,13 @@ services:
depends_on:
- docsum-backend-server
ports:
- "5173:5173"
- "${DOCSUM_FRONTEND_PORT:-5173}:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT}
- DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
no_proxy: ${no_proxy}
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
BACKEND_SERVICE_ENDPOINT: ${BACKEND_SERVICE_ENDPOINT}
DOC_BASE_URL: ${BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always

Expand Down
105 changes: 105 additions & 0 deletions DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Copyright (C) 2024 Advanced Micro Devices, Inc.
# SPDX-License-Identifier: Apache-2.0

services:
docsum-vllm-service:
image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest}
container_name: docsum-vllm-service
ports:
- "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
VLLM_USE_TRITON_FLASH_ATTENTION: 0
PYTORCH_JIT: 0
volumes:
- "./data:/data"
shm_size: 20G
devices:
- /dev/kfd:/dev/kfd
- /dev/dri/:/dev/dri/
cap_add:
- SYS_PTRACE
group_add:
- video
security_opt:
- seccomp:unconfined
- apparmor=unconfined
command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
ipc: host

docsum-llm-server:
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
container_name: docsum-llm-server
depends_on:
- docsum-vllm-service
ports:
- "${DOCSUM_LLM_SERVER_PORT}:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LLM_ENDPOINT: ${DOCSUM_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS}
LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
DocSum_COMPONENT_NAME: "OpeaDocSumvLLM"
LOGFLAG: ${LOGFLAG:-False}
restart: unless-stopped

whisper:
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
container_name: whisper-service
ports:
- "${DOCSUM_WHISPER_PORT:-7066}:7066"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
restart: unless-stopped

docsum-backend-server:
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
container_name: docsum-backend-server
depends_on:
- docsum-vllm-service
- docsum-llm-server
ports:
- "${DOCSUM_BACKEND_SERVER_PORT}:8888"
environment:
no_proxy: ${no_proxy}
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
MEGA_SERVICE_HOST_IP: ${HOST_IP}
LLM_SERVICE_HOST_IP: ${HOST_IP}
ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP}
ipc: host
restart: always

docsum-gradio-ui:
image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest}
container_name: docsum-ui-server
depends_on:
- docsum-backend-server
ports:
- "${DOCSUM_FRONTEND_PORT:-5173}:5173"
environment:
no_proxy: ${no_proxy}
https_proxy: ${https_proxy}
http_proxy: ${http_proxy}
BACKEND_SERVICE_ENDPOINT: ${BACKEND_SERVICE_ENDPOINT}
DOC_BASE_URL: ${BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always

networks:
default:
driver: bridge
Loading