diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/README.md b/VideoQnA/docker_compose/intel/cpu/xeon/README.md index 32fc0397f0..96b1d97ec0 100644 --- a/VideoQnA/docker_compose/intel/cpu/xeon/README.md +++ b/VideoQnA/docker_compose/intel/cpu/xeon/README.md @@ -17,7 +17,7 @@ Port 8001 - Open to 0.0.0.0/0 embedding ========= -Port 6000 - Open to 0.0.0.0/0 +Port 6990 - Open to 0.0.0.0/0 retriever ========= @@ -33,13 +33,13 @@ Port 9009 - Open to 0.0.0.0/0 lvm === -Port 9000 - Open to 0.0.0.0/0 +Port 9399 - Open to 0.0.0.0/0 -chaqna-xeon-backend-server +videoqna-xeon-backend-server ========================== Port 8888 - Open to 0.0.0.0/0 -chaqna-xeon-ui-server +videoqna-xeon-ui-server ===================== Port 5173 - Open to 0.0.0.0/0 ``` @@ -106,17 +106,14 @@ docker build -t opea/videoqna-ui:latest --build-arg https_proxy=$https_proxy --b Then run the command `docker images`, you will have the following 8 Docker Images: +1. `opea/embedding-multimodal-clip:latest` +1. `opea/retriever:latest` +1. `opea/reranking:latest` +1. `opea/lvm-video-llama:latest` +1. `opea/lvm:latest` 1. `opea/dataprep:latest` -2. `opea/embedding-multimodal-clip:latest` -3. `opea/retriever:latest` -4. `opea/reranking:latest` -5. `opea/video-llama-lvm-server:latest` -6. # `opea/lvm-video-llama:latest` -7. `opea/reranking-tei:latest` -8. `opea/lvm-video-llama:latest` -9. `opea/lvm:latest` -10. `opea/videoqna:latest` -11. `opea/videoqna-ui:latest` +1. `opea/videoqna:latest` +1. `opea/videoqna-ui:latest` ## 🚀 Start Microservices @@ -132,18 +129,18 @@ Since the `compose.yaml` will consume some environment variables, you need to se export host_ip="External_Public_IP" ``` -**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable** +**Export the value of your Huggingface API token to the `HF_TOKEN` environment variable** > Change the `Your_Huggingface_API_Token` below with your actual Huggingface API Token value ``` -export your_hf_api_token="Your_Huggingface_API_Token" +export HF_TOKEN="Your_Huggingface_API_Token" ``` **Append the value of the public IP address to the no_proxy list** ``` -export your_no_proxy="${your_no_proxy},${host_ip}" +export no_proxy="${your_no_proxy},${host_ip}" ``` Then you can run below commands or `source set_env.sh` to set all the variables @@ -152,26 +149,52 @@ Then you can run below commands or `source set_env.sh` to set all the variables export no_proxy=${your_no_proxy} export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} -export MEGA_SERVICE_HOST_IP=${host_ip} -export EMBEDDING_SERVICE_HOST_IP=${host_ip} -export RETRIEVER_SERVICE_HOST_IP=${host_ip} -export RERANK_SERVICE_HOST_IP=${host_ip} -export LVM_SERVICE_HOST_IP=${host_ip} -export LVM_ENDPOINT="http://${host_ip}:9009" -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/videoqna" -export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:8888/v1/health_check" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get" -export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos" +export HF_TOKEN=${HF_TOKEN} +export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} -export VDMS_HOST=${host_ip} -export VDMS_PORT=8001 export INDEX_NAME="mega-videoqna" -export LLM_DOWNLOAD="True" +export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download +export RERANK_COMPONENT_NAME="OPEA_VIDEO_RERANKING" +export LVM_COMPONENT_NAME="OPEA_VIDEO_LLAMA_LVM" +export EMBEDDING_COMPONENT_NAME="OPEA_CLIP_EMBEDDING" export USECLIP=1 +export LOGFLAG=True -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export EMBEDDING_SERVICE_HOST_IP=${host_ip} +export LVM_SERVICE_HOST_IP=${host_ip} +export MEGA_SERVICE_HOST_IP=${host_ip} +export RERANK_SERVICE_HOST_IP=${host_ip} +export RETRIEVER_SERVICE_HOST_IP=${host_ip} +export VDMS_HOST=${host_ip} + +export BACKEND_PORT=8888 +export DATAPREP_PORT=6007 +export EMBEDDER_PORT=6990 +export MULTIMODAL_CLIP_EMBEDDER_PORT=6991 +export LVM_PORT=9399 +export RERANKING_PORT=8000 +export RETRIEVER_PORT=7000 +export UI_PORT=5173 +export VDMS_PORT=8001 +export VIDEO_LLAMA_PORT=9009 + +export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/health_check" +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/videoqna" +export CLIP_EMBEDDING_ENDPOINT="http://${host_ip}:${MULTIMODAL_CLIP_EMBEDDER_PORT}" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get" +export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get_videos" +export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/ingest" +export EMBEDDING_ENDPOINT="http://${host_ip}:${EMBEDDER_PORT}/v1/embeddings" +export FRONTEND_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health" +export LVM_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}" +export LVM_VIDEO_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}/generate" +export RERANKING_ENDPOINT="http://${host_ip}:${RERANKING_PORT}/v1/reranking" +export RETRIEVER_ENDPOINT="http://${host_ip}:${RETRIEVER_PORT}/v1/retrieval" +export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}" +export UI_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health" + +export no_proxy="${NO_PROXY},${host_ip},vdms-vector-db,dataprep-vdms-server,clip-embedding-server,reranking-tei-server,retriever-vdms-server,lvm-video-llama,lvm,videoqna-xeon-backend-server,videoqna-xeon-ui-server" ``` Note: Replace with `host_ip` with you external IP address, do not use localhost. @@ -190,12 +213,13 @@ In the deploy steps, you need to start the VDMS DB and dataprep firstly, then in ```bash cd GenAIExamples/VideoQnA/docker_compose/intel/cpu/xeon/ -docker volume create video-llama-model +docker volume create video-llama- +docker volume create videoqna-cache docker compose up vdms-vector-db dataprep -d -sleep 1m # wait for the services ready +sleep 30s # Insert some sample data to the DB -curl -X POST http://${host_ip}:6007/v1/dataprep/ingest \ +curl -X POST ${DATAPREP_INGEST_SERVICE_ENDPOINT} \ -H "Content-Type: multipart/form-data" \ -F "files=@./data/op_1_0320241830.mp4" @@ -212,11 +236,12 @@ docker compose up -d ```bash # Single file upload - curl -X POST ${DATAPREP_SERVICE_ENDPOINT} \ + curl -X POST ${DATAPREP_INGEST_SERVICE_ENDPOINT} \ -H "Content-Type: multipart/form-data" \ -F "files=@./file1.mp4" + # Multiple file upload - curl -X POST ${DATAPREP_SERVICE_ENDPOINT} \ + curl -X POST ${DATAPREP_INGEST_SERVICE_ENDPOINT} \ -H "Content-Type: multipart/form-data" \ -F "files=@./file1.mp4" \ -F "files=@./file2.mp4" \ @@ -228,6 +253,7 @@ docker compose up -d ```bash # List available videos curl -X 'GET' ${DATAPREP_GET_VIDEO_LIST_ENDPOINT} -H 'accept: application/json' + # Download available video curl -X 'GET' ${DATAPREP_GET_FILE_ENDPOINT}/video_name.mp4 -H 'accept: application/json' ``` @@ -235,9 +261,9 @@ docker compose up -d 2. Embedding Microservice ```bash - curl http://${host_ip}:6000/v1/embeddings \ + curl ${EMBEDDING_ENDPOINT} \ -X POST \ - -d '{"text":"Sample text"}' \ + -d '{"input":"What is the man doing?"}' \ -H 'Content-Type: application/json' ``` @@ -251,16 +277,16 @@ docker compose up -d ```bash export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") - curl http://${host_ip}:7000/v1/retrieval \ + curl ${RETRIEVER_ENDPOINT} \ -X POST \ - -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \ + -d "{\"text\":\"What is the man doing?\",\"embedding\":${your_embedding},\"search_type\":\"mmr\", \"k\":4}" \ -H 'Content-Type: application/json' ``` 4. Reranking Microservice ```bash - curl http://${host_ip}:8000/v1/reranking \ + curl ${RERANKING_ENDPOINT} \ -X 'POST' \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ @@ -282,7 +308,7 @@ docker compose up -d ```bash curl -X POST \ - "http://${host_ip}:9009/generate?video_url=silence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" \ + "${LVM_VIDEO_ENDPOINT}?video_url=silence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" \ -H "accept: */*" \ -d '' ``` @@ -294,9 +320,9 @@ docker compose up -d This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup. ```bash - curl http://${host_ip}:9000/v1/lvm\ + curl http://${host_ip}:${LVM_PORT}/v1/lvm \ -X POST \ - -d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}' \ + -d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the man doing?","max_new_tokens": 50}' \ -H 'Content-Type: application/json' ``` @@ -305,7 +331,7 @@ docker compose up -d 7. MegaService ```bash - curl http://${host_ip}:8888/v1/videoqna -H "Content-Type: application/json" -d '{ + curl ${BACKEND_SERVICE_ENDPOINT} -H "Content-Type: application/json" -d '{ "messages": "What is the man doing?", "stream": "True" }' @@ -343,4 +369,5 @@ To clean the volume: ```bash docker volume rm video-llama-model +docker volume rm videoqna-cache ``` diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml b/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml index 780ff3c704..827136122b 100644 --- a/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -5,69 +5,74 @@ services: vdms-vector-db: - image: intellabs/vdms:v2.8.0 + image: intellabs/vdms:latest container_name: vdms-vector-db ports: - - "8001:55555" + - "${VDMS_PORT}:55555" dataprep: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-vdms-server depends_on: - vdms-vector-db ports: - - "6007:5000" + - "${DATAPREP_PORT}:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + MULTIMODAL_DATAPREP: true + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALVDMS" VDMS_HOST: ${VDMS_HOST} VDMS_PORT: ${VDMS_PORT} INDEX_NAME: ${INDEX_NAME} - MULTIMODAL_DATAPREP: true - entrypoint: sh -c 'sleep 15 && python ingest_videos.py' + COLLECTION_NAME: ${INDEX_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} volumes: - - /home/$USER/.cache/clip:/home/user/.cache/clip - - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub + - videoqna-cache:/home/user/.cache embedding: image: ${REGISTRY:-opea}/embedding-multimodal-clip:${TAG:-latest} - container_name: embedding-multimodal-server + container_name: clip-embedding-server ports: - - "6000:6000" + - "${EMBEDDER_PORT:-6990}:6990" ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - volumes: - - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub + CLIP_EMBEDDING_ENDPOINT: ${CLIP_EMBEDDING_ENDPOINT} + EMBEDDING_COMPONENT_NAME: "OPEA_CLIP_EMBEDDING" + LOGFLAG: ${LOGFLAG:-False} restart: unless-stopped + volumes: + - videoqna-cache:/home/user/.cache retriever: image: ${REGISTRY:-opea}/retriever:${TAG:-latest} container_name: retriever-vdms-server depends_on: - vdms-vector-db ports: - - "7000:7000" + - "${RETRIEVER_PORT}:7000" ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + INDEX_NAME: ${INDEX_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_VDMS" VDMS_INDEX_NAME: ${INDEX_NAME} VDMS_HOST: ${VDMS_HOST} VDMS_PORT: ${VDMS_PORT} VDMS_USE_CLIP: ${USECLIP} - LOGFLAG: ${LOGFLAG} - RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_VDMS" - entrypoint: sh -c 'sleep 30 && python retriever_vdms.py' + NUMBA_CACHE_DIR: "/tmp/numba_cache" restart: unless-stopped volumes: - - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub + - videoqna-cache:/home/user/.cache reranking: image: ${REGISTRY:-opea}/reranking:${TAG:-latest} container_name: reranking-tei-server ports: - - "8000:8000" + - "${RERANKING_PORT}:8000" ipc: host environment: no_proxy: ${no_proxy} @@ -76,12 +81,13 @@ services: CHUNK_DURATION: ${CHUNK_DURATION} FILE_SERVER_ENDPOINT: ${DATAPREP_GET_FILE_ENDPOINT} DATAPREP_GET_VIDEO_LIST_ENDPOINT: ${DATAPREP_GET_VIDEO_LIST_ENDPOINT} + RERANK_COMPONENT_NAME: ${RERANK_COMPONENT_NAME:-OPEA_VIDEO_RERANKING} restart: unless-stopped lvm-video-llama: image: ${REGISTRY:-opea}/lvm-video-llama:${TAG:-latest} container_name: lvm-video-llama ports: - - "9009:9009" + - ${VIDEO_LLAMA_PORT:-9009}:9009 ipc: host environment: http_proxy: ${http_proxy} @@ -89,20 +95,20 @@ services: no_proxy: ${no_proxy} llm_download: ${LLM_DOWNLOAD} volumes: - - "/home/$USER/.cache:/home/user/.cache" + - videoqna-cache:/home/user/.cache - video-llama-model:/home/user/model restart: unless-stopped lvm: image: ${REGISTRY:-opea}/lvm:${TAG:-latest} container_name: lvm ports: - - "9000:9000" + - "${LVM_PORT}:9399" ipc: host environment: http_proxy: ${http_proxy} https_proxy: ${https_proxy} no_proxy: ${no_proxy} - LVM_COMPONENT_NAME: "OPEA_VIDEO_LLAMA_LVM" + LVM_COMPONENT_NAME: ${LVM_COMPONENT_NAME:-OPEA_VIDEO_LLAMA_LVM} LVM_ENDPOINT: ${LVM_ENDPOINT} restart: unless-stopped depends_on: @@ -119,13 +125,14 @@ services: - lvm-video-llama - lvm ports: - - "8888:8888" - entrypoint: sh -c 'sleep 45 && python videoqna.py' + - "${BACKEND_PORT}:8888" environment: http_proxy: ${http_proxy} https_proxy: ${https_proxy} no_proxy: ${no_proxy} + LOGFLAG: ${LOGFLAG:-False} MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + BACKEND_PORT: ${BACKEND_PORT} EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP} RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP} RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP} @@ -138,18 +145,23 @@ services: depends_on: - videoqna-xeon-backend-server ports: - - "5173:5173" + - "${UI_PORT}:5173" environment: https_proxy: ${https_proxy} http_proxy: ${http_proxy} no_proxy: ${no_proxy} BACKEND_SERVICE_ENDPOINT: ${BACKEND_SERVICE_ENDPOINT} BACKEND_HEALTH_CHECK_ENDPOINT: ${BACKEND_HEALTH_CHECK_ENDPOINT} + DATAPREP_INGEST_SERVICE_ENDPOINT: ${DATAPREP_INGEST_SERVICE_ENDPOINT} + DATAPREP_PORT: ${DATAPREP_PORT} + BACKEND_PORT: ${BACKEND_PORT} + UI_PORT: ${UI_PORT} ipc: host restart: always volumes: video-llama-model: external: true + videoqna-cache: networks: default: driver: bridge diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh b/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh index dcf574774b..ada41f8ba9 100644 --- a/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ b/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh @@ -8,21 +8,48 @@ popd > /dev/null host_ip=$(hostname -I | awk '{print $1}') -export MEGA_SERVICE_HOST_IP=${host_ip} -export EMBEDDING_SERVICE_HOST_IP=${host_ip} -export RETRIEVER_SERVICE_HOST_IP=${host_ip} -export RERANK_SERVICE_HOST_IP=${host_ip} -export LVM_SERVICE_HOST_IP=${host_ip} +export HF_TOKEN=${HF_TOKEN} +export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} -export LVM_ENDPOINT="http://${host_ip}:9009" -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/videoqna" -export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:8888/v1/health_check" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get" -export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos" +export INDEX_NAME="mega-videoqna" +export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download +export RERANK_COMPONENT_NAME="OPEA_VIDEO_RERANKING" +export LVM_COMPONENT_NAME="OPEA_VIDEO_LLAMA_LVM" +export EMBEDDING_COMPONENT_NAME="OPEA_CLIP_EMBEDDING" +export USECLIP=1 +export LOGFLAG=True +export EMBEDDING_SERVICE_HOST_IP=${host_ip} +export LVM_SERVICE_HOST_IP=${host_ip} +export MEGA_SERVICE_HOST_IP=${host_ip} +export RERANK_SERVICE_HOST_IP=${host_ip} +export RETRIEVER_SERVICE_HOST_IP=${host_ip} export VDMS_HOST=${host_ip} + +export BACKEND_PORT=8888 +export DATAPREP_PORT=6007 +export EMBEDDER_PORT=6990 +export MULTIMODAL_CLIP_EMBEDDER_PORT=6991 +export LVM_PORT=9399 +export RERANKING_PORT=8000 +export RETRIEVER_PORT=7000 +export UI_PORT=5173 export VDMS_PORT=8001 -export INDEX_NAME="mega-videoqna" -export USECLIP=1 -export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download +export VIDEO_LLAMA_PORT=9009 + +export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/health_check" +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/videoqna" +export CLIP_EMBEDDING_ENDPOINT="http://${host_ip}:${MULTIMODAL_CLIP_EMBEDDER_PORT}" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get" +export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get_videos" +export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/ingest_videos" +export EMBEDDING_ENDPOINT="http://${host_ip}:${EMBEDDER_PORT}/v1/embeddings" +export FRONTEND_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health" +export LVM_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}" +export LVM_VIDEO_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}/generate" +export RERANKING_ENDPOINT="http://${host_ip}:${RERANKING_PORT}/v1/reranking" +export RETRIEVER_ENDPOINT="http://${host_ip}:${RETRIEVER_PORT}/v1/retrieval" +export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}" +export UI_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health" + +export no_proxy="${NO_PROXY},${host_ip},vdms-vector-db,dataprep-vdms-server,clip-embedding-server,reranking-tei-server,retriever-vdms-server,lvm-video-llama,lvm,videoqna-xeon-backend-server,videoqna-xeon-ui-server" diff --git a/VideoQnA/docker_image_build/build.yaml b/VideoQnA/docker_image_build/build.yaml index 24fab7697b..aa3a2dc86d 100644 --- a/VideoQnA/docker_image_build/build.yaml +++ b/VideoQnA/docker_image_build/build.yaml @@ -23,7 +23,7 @@ services: dockerfile: comps/dataprep/src/Dockerfile extends: videoqna image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} - embedding-multimodal-clip: + embedding: build: context: GenAIComps dockerfile: comps/third_parties/clip/src/Dockerfile diff --git a/VideoQnA/tests/test_compose_on_xeon.sh b/VideoQnA/tests/test_compose_on_xeon.sh index 614c2efc47..d4c1b5a3b5 100755 --- a/VideoQnA/tests/test_compose_on_xeon.sh +++ b/VideoQnA/tests/test_compose_on_xeon.sh @@ -13,6 +13,55 @@ export TAG=${IMAGE_TAG} WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') +export host_ip=${ip_address} + +function setup_env() { + export HF_TOKEN=${HF_TOKEN} + export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} + + export INDEX_NAME="mega-videoqna" + export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download + export RERANK_COMPONENT_NAME="OPEA_VIDEO_RERANKING" + export LVM_COMPONENT_NAME="OPEA_VIDEO_LLAMA_LVM" + export EMBEDDING_COMPONENT_NAME="OPEA_CLIP_EMBEDDING" + export USECLIP=1 + export LOGFLAG=True + + export EMBEDDING_SERVICE_HOST_IP=${host_ip} + export LVM_SERVICE_HOST_IP=${host_ip} + export MEGA_SERVICE_HOST_IP=${host_ip} + export RERANK_SERVICE_HOST_IP=${host_ip} + export RETRIEVER_SERVICE_HOST_IP=${host_ip} + export VDMS_HOST=${host_ip} + + export BACKEND_PORT=8888 + export DATAPREP_PORT=6007 + export EMBEDDER_PORT=6990 + export MULTIMODAL_CLIP_EMBEDDER_PORT=6991 + export LVM_PORT=9399 + export RERANKING_PORT=8000 + export RETRIEVER_PORT=7000 + export UI_PORT=5173 + export VDMS_PORT=8001 + export VIDEO_LLAMA_PORT=9009 + + export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/health_check" + export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/videoqna" + export CLIP_EMBEDDING_ENDPOINT="http://${host_ip}:${MULTIMODAL_CLIP_EMBEDDER_PORT}" + export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get" + export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get_videos" + export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/ingest_videos" + export EMBEDDING_ENDPOINT="http://${host_ip}:${EMBEDDER_PORT}/v1/embeddings" + export FRONTEND_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health" + export LVM_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}" + export LVM_VIDEO_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}/generate" + export RERANKING_ENDPOINT="http://${host_ip}:${RERANKING_PORT}/v1/reranking" + export RETRIEVER_ENDPOINT="http://${host_ip}:${RETRIEVER_PORT}/v1/retrieval" + export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}" + export UI_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health" + + export no_proxy="${NO_PROXY},${host_ip},vdms-vector-db,dataprep-vdms-server,clip-embedding-server,reranking-tei-server,retriever-vdms-server,lvm-video-llama,lvm,videoqna-xeon-backend-server,videoqna-xeon-ui-server" +} function build_docker_images() { opea_branch=${opea_branch:-"main"} @@ -28,26 +77,33 @@ function build_docker_images() { fi cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git GenAIComps + + # Create .cache directory for cache volume to connect (avoids permission denied error) + OLD_STRING="mkdir -p /home/user " + NEW_STRING="mkdir -p /home/user/.cache " + sed -i "s|$OLD_STRING|$NEW_STRING|g" "GenAIComps/comps/dataprep/src/Dockerfile" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "GenAIComps/comps/retrievers/src/Dockerfile" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "GenAIComps/comps/third_parties/clip/src/Dockerfile" echo "Build all the images with --no-cache, check docker_image_build.log for details..." - docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log + docker compose -f build.yaml build --no-cache 2>&1 > ${LOG_PATH}/docker_image_build.log - docker pull intellabs/vdms:v2.8.0 + docker pull intellabs/vdms:latest docker images && sleep 1s } - function start_services() { + echo "Starting services..." cd $WORKPATH/docker_compose/intel/cpu/xeon/ - source set_env.sh docker volume create video-llama-model + docker volume create videoqna-cache docker compose up vdms-vector-db dataprep -d sleep 30s # Insert some sample data to the DB - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep/ingest \ + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST ${DATAPREP_INGEST_SERVICE_ENDPOINT} \ -H "Content-Type: multipart/form-data" \ -F "files=@./data/op_1_0320241830.mp4") @@ -58,12 +114,13 @@ function start_services() { docker logs dataprep-vdms-server >> ${LOG_PATH}/dataprep.log exit 1 fi + # Bring all the others docker compose up -d > ${LOG_PATH}/start_services_with_compose.log sleep 1m # List of containers running uvicorn - list=("dataprep-vdms-server" "embedding-multimodal-server" "retriever-vdms-server" "reranking-tei-server" "lvm-video-llama" "lvm-video-llama" "videoqna-xeon-backend-server") + list=("dataprep-vdms-server" "clip-embedding-server" "retriever-vdms-server" "reranking-tei-server" "lvm-video-llama" "videoqna-xeon-backend-server") # Define the maximum time limit in seconds TIME_LIMIT=5400 @@ -95,10 +152,10 @@ function start_services() { for i in "${!list[@]}"; do item=${list[i]} if check_condition "$item"; then - echo "Condition met for $item, removing from list." + echo "Condition met for $item, removing from list." >> ${LOG_PATH}/list_check.log unset list[i] else - echo "Condition not met for $item, keeping in list." + echo "Condition not met for $item, keeping in list." >> ${LOG_PATH}/list_check.log fi done @@ -110,7 +167,7 @@ function start_services() { echo "List is empty. Exiting." break fi - sleep 5m + sleep 2m done if docker logs videoqna-xeon-ui-server 2>&1 | grep -q "Streamlit app"; then @@ -128,33 +185,37 @@ function validate_services() { local DOCKER_NAME="$4" local INPUT_DATA="$5" - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + HTTP_RESPONSE=$(curl -s -w "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + + # check response body + if [[ "${RESPONSE_BODY}" != *"${EXPECTED_RESULT}"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." fi sleep 1s } function validate_microservices() { # Check if the microservices are running correctly. - cd $WORKPATH/docker_compose/intel/cpu/xeon//data + cd $WORKPATH/docker_compose/intel/cpu/xeon/data # dataprep microservice - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep/ingest \ + echo "Validating Dataprep microservice ..." + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST "${DATAPREP_INGEST_SERVICE_ENDPOINT}" \ -H "Content-Type: multipart/form-data" \ -F "files=@./op_1_0320241830.mp4") @@ -168,24 +229,24 @@ function validate_microservices() { # Embedding Microservice validate_services \ - "${ip_address}:6000/v1/embeddings" \ - "Sample text" \ + ${EMBEDDING_ENDPOINT} \ + '"embedding":[' \ "embedding" \ - "embedding-multimodal-server" \ - '{"text":"Sample text"}' + "clip-embedding-server" \ + '{"input":"What is the man doing?"}' # Retriever Microservice - export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") + export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") validate_services \ - "${ip_address}:7000/v1/retrieval" \ + ${RETRIEVER_ENDPOINT} \ "retrieved_docs" \ "retriever" \ "retriever-vdms-server" \ - "{\"text\":\"test\",\"embedding\":${your_embedding}}" + "{\"text\":\"What is the man doing?\",\"embedding\":${your_embedding},\"search_type\":\"mmr\", \"k\":4}" # Reranking Microservice validate_services \ - "${ip_address}:8000/v1/reranking" \ + ${RERANKING_ENDPOINT} \ "video_url" \ "reranking" \ "reranking-tei-server" \ @@ -198,32 +259,52 @@ function validate_microservices() { ] }' + # Video Llama LVM Backend Service + result=$(http_proxy="" curl -X POST \ + "${LVM_VIDEO_ENDPOINT}?video_url=https%3A%2F%2Fgithub.com%2FDAMO-NLP-SG%2FVideo-LLaMA%2Fraw%2Fmain%2Fexamples%2Fsilence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" \ + -H "accept: */*" -d '') + + if [[ $result == *"silence"* ]]; then + echo "LVM microservice is running correctly." + else + echo "LVM microservice is not running correctly. Received status was $HTTP_STATUS" + docker logs lvm-video-llama >> ${LOG_PATH}/lvm-video-llama.log + exit 1 + fi + # LVM Microservice validate_services \ - "${ip_address}:9000/v1/lvm" \ + "http://${host_ip}:${LVM_PORT}/v1/lvm" \ "silence" \ "lvm" \ - "lvm-video-llama" \ - '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}' + "lvm" \ + '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the man doing?","max_new_tokens": 50}' + echo "==== microservices validated ====" sleep 1s } function validate_megaservice() { + echo "Validating videoqna-xeon-backend-server ..." + validate_services \ - "${ip_address}:8888/v1/videoqna" \ + ${BACKEND_SERVICE_ENDPOINT} \ "man" \ "videoqna-xeon-backend-server" \ "videoqna-xeon-backend-server" \ '{"messages":"What is the man doing?","stream":"True"}' + + echo "==== megaservice validated ====" } function validate_frontend() { - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X GET http://${ip_address}:5173/_stcore/health) + echo "Validating frontend ..." + + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X GET ${FRONTEND_ENDPOINT}) if [ "$HTTP_STATUS" -eq 200 ]; then echo "Frontend is running correctly." - local CONTENT=$(curl -s -X GET http://${ip_address}:5173/_stcore/health) + local CONTENT=$(curl -s -X GET ${FRONTEND_ENDPOINT}) if echo "$CONTENT" | grep -q "ok"; then echo "Frontend Content is as expected." else @@ -236,20 +317,31 @@ function validate_frontend() { docker logs videoqna-xeon-ui-server >> ${LOG_PATH}/ui.log exit 1 fi + + echo "==== frontend validated ====" } function stop_docker() { + echo "Stopping docker..." cd $WORKPATH/docker_compose/intel/cpu/xeon/ docker compose stop && docker compose rm -f docker volume rm video-llama-model + docker volume rm videoqna-cache + echo "Docker stopped." } function main() { + setup_env stop_docker if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + + start_time=$(date +%s) start_services + end_time=$(date +%s) + duration=$((end_time-start_time)) + echo "Mega service start duration is $duration s" && sleep 1s validate_microservices validate_megaservice @@ -260,4 +352,4 @@ function main() { } -# main +main diff --git a/VideoQnA/videoqna.py b/VideoQnA/videoqna.py index c447dd2abf..311030e984 100644 --- a/VideoQnA/videoqna.py +++ b/VideoQnA/videoqna.py @@ -12,25 +12,43 @@ ChatMessage, UsageInfo, ) -from comps.cores.proto.docarray import LLMParams +from comps.cores.proto.docarray import LLMParams, TextDoc from fastapi import Request from fastapi.responses import StreamingResponse -MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888)) +MEGA_SERVICE_PORT = int(os.getenv("BACKEND_PORT", 8888)) EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0") -EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000)) +EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDER_PORT", 6990)) RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0") -RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000)) +RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_PORT", 7000)) RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0") RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000)) LVM_SERVICE_HOST_IP = os.getenv("LVM_SERVICE_HOST_IP", "0.0.0.0") -LVM_SERVICE_PORT = int(os.getenv("LVM_SERVICE_PORT", 9000)) +LVM_SERVICE_PORT = int(os.getenv("LVM_PORT", 9399)) + + +def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): + service_type = self.services[cur_node].service_type + if service_type == ServiceType.EMBEDDING: + if "input" in inputs: + input_text = inputs["input"]["text"] if isinstance(inputs["input"], dict) else inputs["input"] + inputs = TextDoc(text=input_text).model_dump() + return inputs + + +def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs): + if self.services[cur_node].service_type == ServiceType.EMBEDDING: + return {"text": inputs["text"], "embedding": data["embedding"]} + else: + return data class VideoQnAService: def __init__(self, host="0.0.0.0", port=8888): self.host = host self.port = port + ServiceOrchestrator.align_inputs = align_inputs + ServiceOrchestrator.align_outputs = align_outputs self.megaservice = ServiceOrchestrator() self.endpoint = str(MegaServiceEndpoint.VIDEO_RAG_QNA) @@ -74,8 +92,8 @@ def add_remote_service(self): async def handle_request(self, request: Request): data = await request.json() - stream_opt = data.get("stream", False) - chat_request = ChatCompletionRequest.parse_obj(data) + stream_opt = bool(data.get("stream", False)) + chat_request = ChatCompletionRequest.model_validate(data) prompt = handle_message(chat_request.messages) parameters = LLMParams( max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, @@ -86,9 +104,10 @@ async def handle_request(self, request: Request): presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0, repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03, stream=stream_opt, + chat_template=chat_request.chat_template if chat_request.chat_template else None, ) result_dict, runtime_graph = await self.megaservice.schedule( - initial_inputs={"text": prompt}, llm_parameters=parameters + initial_inputs={"input": prompt}, llm_parameters=parameters ) for node, response in result_dict.items(): # Here it suppose the last microservice in the megaservice is LVM.