diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/README.md b/VideoQnA/docker_compose/intel/cpu/xeon/README.md
index 32fc0397f0..96b1d97ec0 100644
--- a/VideoQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/VideoQnA/docker_compose/intel/cpu/xeon/README.md
@@ -17,7 +17,7 @@ Port 8001 - Open to 0.0.0.0/0
 
 embedding
 =========
-Port 6000 - Open to 0.0.0.0/0
+Port 6990 - Open to 0.0.0.0/0
 
 retriever
 =========
@@ -33,13 +33,13 @@ Port 9009 - Open to 0.0.0.0/0
 
 lvm
 ===
-Port 9000 - Open to 0.0.0.0/0
+Port 9399 - Open to 0.0.0.0/0
 
-chaqna-xeon-backend-server
+videoqna-xeon-backend-server
 ==========================
 Port 8888 - Open to 0.0.0.0/0
 
-chaqna-xeon-ui-server
+videoqna-xeon-ui-server
 =====================
 Port 5173 - Open to 0.0.0.0/0
 ```
@@ -106,17 +106,14 @@ docker build -t opea/videoqna-ui:latest --build-arg https_proxy=$https_proxy --b
 
 Then run the command `docker images`, you will have the following 8 Docker Images:
 
+1. `opea/embedding-multimodal-clip:latest`
+1. `opea/retriever:latest`
+1. `opea/reranking:latest`
+1. `opea/lvm-video-llama:latest`
+1. `opea/lvm:latest`
 1. `opea/dataprep:latest`
-2. `opea/embedding-multimodal-clip:latest`
-3. `opea/retriever:latest`
-4. `opea/reranking:latest`
-5. `opea/video-llama-lvm-server:latest`
-6. # `opea/lvm-video-llama:latest`
-7. `opea/reranking-tei:latest`
-8. `opea/lvm-video-llama:latest`
-9. `opea/lvm:latest`
-10. `opea/videoqna:latest`
-11. `opea/videoqna-ui:latest`
+1. `opea/videoqna:latest`
+1. `opea/videoqna-ui:latest`
 
 ## 🚀 Start Microservices
 
@@ -132,18 +129,18 @@ Since the `compose.yaml` will consume some environment variables, you need to se
 export host_ip="External_Public_IP"
 ```
 
-**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable**
+**Export the value of your Huggingface API token to the `HF_TOKEN` environment variable**
 
 > Change the `Your_Huggingface_API_Token` below with your actual Huggingface API Token value
 
 ```
-export your_hf_api_token="Your_Huggingface_API_Token"
+export HF_TOKEN="Your_Huggingface_API_Token"
 ```
 
 **Append the value of the public IP address to the no_proxy list**
 
 ```
-export your_no_proxy="${your_no_proxy},${host_ip}"
+export no_proxy="${your_no_proxy},${host_ip}"
 ```
 
 Then you can run below commands or `source set_env.sh` to set all the variables
@@ -152,26 +149,52 @@ Then you can run below commands or `source set_env.sh` to set all the variables
 export no_proxy=${your_no_proxy}
 export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
-export MEGA_SERVICE_HOST_IP=${host_ip}
-export EMBEDDING_SERVICE_HOST_IP=${host_ip}
-export RETRIEVER_SERVICE_HOST_IP=${host_ip}
-export RERANK_SERVICE_HOST_IP=${host_ip}
-export LVM_SERVICE_HOST_IP=${host_ip}
 
-export LVM_ENDPOINT="http://${host_ip}:9009"
-export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/videoqna"
-export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:8888/v1/health_check"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
-export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
+export HF_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 
-export VDMS_HOST=${host_ip}
-export VDMS_PORT=8001
 export INDEX_NAME="mega-videoqna"
-export LLM_DOWNLOAD="True"
+export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download
+export RERANK_COMPONENT_NAME="OPEA_VIDEO_RERANKING"
+export LVM_COMPONENT_NAME="OPEA_VIDEO_LLAMA_LVM"
+export EMBEDDING_COMPONENT_NAME="OPEA_CLIP_EMBEDDING"
 export USECLIP=1
+export LOGFLAG=True
 
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export LVM_SERVICE_HOST_IP=${host_ip}
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export VDMS_HOST=${host_ip}
+
+export BACKEND_PORT=8888
+export DATAPREP_PORT=6007
+export EMBEDDER_PORT=6990
+export MULTIMODAL_CLIP_EMBEDDER_PORT=6991
+export LVM_PORT=9399
+export RERANKING_PORT=8000
+export RETRIEVER_PORT=7000
+export UI_PORT=5173
+export VDMS_PORT=8001
+export VIDEO_LLAMA_PORT=9009
+
+export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/health_check"
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/videoqna"
+export CLIP_EMBEDDING_ENDPOINT="http://${host_ip}:${MULTIMODAL_CLIP_EMBEDDER_PORT}"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get"
+export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get_videos"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/ingest"
+export EMBEDDING_ENDPOINT="http://${host_ip}:${EMBEDDER_PORT}/v1/embeddings"
+export FRONTEND_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health"
+export LVM_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}"
+export LVM_VIDEO_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}/generate"
+export RERANKING_ENDPOINT="http://${host_ip}:${RERANKING_PORT}/v1/reranking"
+export RETRIEVER_ENDPOINT="http://${host_ip}:${RETRIEVER_PORT}/v1/retrieval"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}"
+export UI_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health"
+
+export no_proxy="${NO_PROXY},${host_ip},vdms-vector-db,dataprep-vdms-server,clip-embedding-server,reranking-tei-server,retriever-vdms-server,lvm-video-llama,lvm,videoqna-xeon-backend-server,videoqna-xeon-ui-server"
 ```
 
 Note: Replace with `host_ip` with you external IP address, do not use localhost.
@@ -190,12 +213,13 @@ In the deploy steps, you need to start the VDMS DB and dataprep firstly, then in
 ```bash
 cd GenAIExamples/VideoQnA/docker_compose/intel/cpu/xeon/
 
-docker volume create video-llama-model
+docker volume create video-llama-
+docker volume create videoqna-cache
 docker compose up vdms-vector-db dataprep -d
-sleep 1m # wait for the services ready
+sleep 30s
 
 # Insert some sample data to the DB
-curl -X POST http://${host_ip}:6007/v1/dataprep/ingest \
+curl -X POST ${DATAPREP_INGEST_SERVICE_ENDPOINT} \
       -H "Content-Type: multipart/form-data" \
       -F "files=@./data/op_1_0320241830.mp4"
 
@@ -212,11 +236,12 @@ docker compose up -d
 
    ```bash
    # Single file upload
-   curl -X POST ${DATAPREP_SERVICE_ENDPOINT} \
+   curl -X POST ${DATAPREP_INGEST_SERVICE_ENDPOINT} \
        -H "Content-Type: multipart/form-data" \
        -F "files=@./file1.mp4"
+
    # Multiple file upload
-   curl -X POST ${DATAPREP_SERVICE_ENDPOINT} \
+   curl -X POST ${DATAPREP_INGEST_SERVICE_ENDPOINT} \
        -H "Content-Type: multipart/form-data" \
        -F "files=@./file1.mp4" \
        -F "files=@./file2.mp4" \
@@ -228,6 +253,7 @@ docker compose up -d
    ```bash
    # List available videos
    curl -X 'GET' ${DATAPREP_GET_VIDEO_LIST_ENDPOINT} -H 'accept: application/json'
+
    # Download available video
    curl -X 'GET' ${DATAPREP_GET_FILE_ENDPOINT}/video_name.mp4 -H 'accept: application/json'
    ```
@@ -235,9 +261,9 @@ docker compose up -d
 2. Embedding Microservice
 
    ```bash
-   curl http://${host_ip}:6000/v1/embeddings \
+   curl ${EMBEDDING_ENDPOINT} \
        -X POST \
-       -d '{"text":"Sample text"}' \
+       -d '{"input":"What is the man doing?"}' \
        -H 'Content-Type: application/json'
    ```
 
@@ -251,16 +277,16 @@ docker compose up -d
 
    ```bash
    export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
-   curl http://${host_ip}:7000/v1/retrieval \
+   curl ${RETRIEVER_ENDPOINT} \
      -X POST \
-     -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
+     -d "{\"text\":\"What is the man doing?\",\"embedding\":${your_embedding},\"search_type\":\"mmr\", \"k\":4}" \
      -H 'Content-Type: application/json'
    ```
 
 4. Reranking Microservice
 
    ```bash
-   curl http://${host_ip}:8000/v1/reranking \
+   curl ${RERANKING_ENDPOINT} \
      -X 'POST' \
      -H 'accept: application/json' \
      -H 'Content-Type: application/json' \
@@ -282,7 +308,7 @@ docker compose up -d
 
    ```bash
    curl -X POST \
-     "http://${host_ip}:9009/generate?video_url=silence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" \
+     "${LVM_VIDEO_ENDPOINT}?video_url=silence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" \
      -H "accept: */*" \
      -d ''
    ```
@@ -294,9 +320,9 @@ docker compose up -d
    This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup.
 
    ```bash
-   curl http://${host_ip}:9000/v1/lvm\
+   curl http://${host_ip}:${LVM_PORT}/v1/lvm \
      -X POST \
-     -d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}' \
+     -d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the man doing?","max_new_tokens": 50}' \
      -H 'Content-Type: application/json'
    ```
 
@@ -305,7 +331,7 @@ docker compose up -d
 7. MegaService
 
    ```bash
-   curl http://${host_ip}:8888/v1/videoqna -H "Content-Type: application/json" -d '{
+   curl ${BACKEND_SERVICE_ENDPOINT} -H "Content-Type: application/json" -d '{
          "messages": "What is the man doing?",
          "stream": "True"
          }'
@@ -343,4 +369,5 @@ To clean the volume:
 
 ```bash
 docker volume rm video-llama-model
+docker volume rm videoqna-cache
 ```
diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml b/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 780ff3c704..827136122b 100644
--- a/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -5,69 +5,74 @@
 
 services:
   vdms-vector-db:
-    image: intellabs/vdms:v2.8.0
+    image: intellabs/vdms:latest
     container_name: vdms-vector-db
     ports:
-      - "8001:55555"
+      - "${VDMS_PORT}:55555"
   dataprep:
     image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
     container_name: dataprep-vdms-server
     depends_on:
       - vdms-vector-db
     ports:
-      - "6007:5000"
+      - "${DATAPREP_PORT}:5000"
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
+      MULTIMODAL_DATAPREP: true
+      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALVDMS"
       VDMS_HOST: ${VDMS_HOST}
       VDMS_PORT: ${VDMS_PORT}
       INDEX_NAME: ${INDEX_NAME}
-      MULTIMODAL_DATAPREP: true
-    entrypoint: sh -c 'sleep 15 && python ingest_videos.py'
+      COLLECTION_NAME: ${INDEX_NAME}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     volumes:
-      - /home/$USER/.cache/clip:/home/user/.cache/clip
-      - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub
+      - videoqna-cache:/home/user/.cache
   embedding:
     image: ${REGISTRY:-opea}/embedding-multimodal-clip:${TAG:-latest}
-    container_name: embedding-multimodal-server
+    container_name: clip-embedding-server
     ports:
-      - "6000:6000"
+      - "${EMBEDDER_PORT:-6990}:6990"
     ipc: host
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-    volumes:
-      - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub
+      CLIP_EMBEDDING_ENDPOINT: ${CLIP_EMBEDDING_ENDPOINT}
+      EMBEDDING_COMPONENT_NAME: "OPEA_CLIP_EMBEDDING"
+      LOGFLAG: ${LOGFLAG:-False}
     restart: unless-stopped
+    volumes:
+      - videoqna-cache:/home/user/.cache
   retriever:
     image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
     container_name: retriever-vdms-server
     depends_on:
       - vdms-vector-db
     ports:
-      - "7000:7000"
+      - "${RETRIEVER_PORT}:7000"
     ipc: host
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
+      INDEX_NAME: ${INDEX_NAME}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_VDMS"
       VDMS_INDEX_NAME: ${INDEX_NAME}
       VDMS_HOST: ${VDMS_HOST}
       VDMS_PORT: ${VDMS_PORT}
       VDMS_USE_CLIP: ${USECLIP}
-      LOGFLAG: ${LOGFLAG}
-      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_VDMS"
-    entrypoint: sh -c 'sleep 30 && python retriever_vdms.py'
+      NUMBA_CACHE_DIR: "/tmp/numba_cache"
     restart: unless-stopped
     volumes:
-      - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub
+      - videoqna-cache:/home/user/.cache
   reranking:
     image: ${REGISTRY:-opea}/reranking:${TAG:-latest}
     container_name: reranking-tei-server
     ports:
-      - "8000:8000"
+      - "${RERANKING_PORT}:8000"
     ipc: host
     environment:
       no_proxy: ${no_proxy}
@@ -76,12 +81,13 @@ services:
       CHUNK_DURATION: ${CHUNK_DURATION}
       FILE_SERVER_ENDPOINT: ${DATAPREP_GET_FILE_ENDPOINT}
       DATAPREP_GET_VIDEO_LIST_ENDPOINT: ${DATAPREP_GET_VIDEO_LIST_ENDPOINT}
+      RERANK_COMPONENT_NAME: ${RERANK_COMPONENT_NAME:-OPEA_VIDEO_RERANKING}
     restart: unless-stopped
   lvm-video-llama:
     image: ${REGISTRY:-opea}/lvm-video-llama:${TAG:-latest}
     container_name: lvm-video-llama
     ports:
-      - "9009:9009"
+      - ${VIDEO_LLAMA_PORT:-9009}:9009
     ipc: host
     environment:
       http_proxy: ${http_proxy}
@@ -89,20 +95,20 @@ services:
       no_proxy: ${no_proxy}
       llm_download: ${LLM_DOWNLOAD}
     volumes:
-      - "/home/$USER/.cache:/home/user/.cache"
+      - videoqna-cache:/home/user/.cache
       - video-llama-model:/home/user/model
     restart: unless-stopped
   lvm:
     image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
     container_name: lvm
     ports:
-      - "9000:9000"
+      - "${LVM_PORT}:9399"
     ipc: host
     environment:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       no_proxy: ${no_proxy}
-      LVM_COMPONENT_NAME: "OPEA_VIDEO_LLAMA_LVM"
+      LVM_COMPONENT_NAME: ${LVM_COMPONENT_NAME:-OPEA_VIDEO_LLAMA_LVM}
       LVM_ENDPOINT: ${LVM_ENDPOINT}
     restart: unless-stopped
     depends_on:
@@ -119,13 +125,14 @@ services:
       - lvm-video-llama
       - lvm
     ports:
-      - "8888:8888"
-    entrypoint: sh -c 'sleep 45 && python videoqna.py'
+      - "${BACKEND_PORT}:8888"
     environment:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       no_proxy: ${no_proxy}
+      LOGFLAG: ${LOGFLAG:-False}
       MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      BACKEND_PORT: ${BACKEND_PORT}
       EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
       RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
       RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
@@ -138,18 +145,23 @@ services:
     depends_on:
       - videoqna-xeon-backend-server
     ports:
-      - "5173:5173"
+      - "${UI_PORT}:5173"
     environment:
       https_proxy: ${https_proxy}
       http_proxy: ${http_proxy}
       no_proxy: ${no_proxy}
       BACKEND_SERVICE_ENDPOINT: ${BACKEND_SERVICE_ENDPOINT}
       BACKEND_HEALTH_CHECK_ENDPOINT: ${BACKEND_HEALTH_CHECK_ENDPOINT}
+      DATAPREP_INGEST_SERVICE_ENDPOINT: ${DATAPREP_INGEST_SERVICE_ENDPOINT}
+      DATAPREP_PORT: ${DATAPREP_PORT}
+      BACKEND_PORT: ${BACKEND_PORT}
+      UI_PORT: ${UI_PORT}
     ipc: host
     restart: always
 volumes:
   video-llama-model:
     external: true
+  videoqna-cache:
 networks:
   default:
     driver: bridge
diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh b/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh
index dcf574774b..ada41f8ba9 100644
--- a/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -8,21 +8,48 @@ popd > /dev/null
 
 host_ip=$(hostname -I | awk '{print $1}')
 
-export MEGA_SERVICE_HOST_IP=${host_ip}
-export EMBEDDING_SERVICE_HOST_IP=${host_ip}
-export RETRIEVER_SERVICE_HOST_IP=${host_ip}
-export RERANK_SERVICE_HOST_IP=${host_ip}
-export LVM_SERVICE_HOST_IP=${host_ip}
+export HF_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 
-export LVM_ENDPOINT="http://${host_ip}:9009"
-export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/videoqna"
-export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:8888/v1/health_check"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
-export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
+export INDEX_NAME="mega-videoqna"
+export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download
+export RERANK_COMPONENT_NAME="OPEA_VIDEO_RERANKING"
+export LVM_COMPONENT_NAME="OPEA_VIDEO_LLAMA_LVM"
+export EMBEDDING_COMPONENT_NAME="OPEA_CLIP_EMBEDDING"
+export USECLIP=1
+export LOGFLAG=True
 
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export LVM_SERVICE_HOST_IP=${host_ip}
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export VDMS_HOST=${host_ip}
+
+export BACKEND_PORT=8888
+export DATAPREP_PORT=6007
+export EMBEDDER_PORT=6990
+export MULTIMODAL_CLIP_EMBEDDER_PORT=6991
+export LVM_PORT=9399
+export RERANKING_PORT=8000
+export RETRIEVER_PORT=7000
+export UI_PORT=5173
 export VDMS_PORT=8001
-export INDEX_NAME="mega-videoqna"
-export USECLIP=1
-export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download
+export VIDEO_LLAMA_PORT=9009
+
+export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/health_check"
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/videoqna"
+export CLIP_EMBEDDING_ENDPOINT="http://${host_ip}:${MULTIMODAL_CLIP_EMBEDDER_PORT}"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get"
+export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get_videos"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/ingest_videos"
+export EMBEDDING_ENDPOINT="http://${host_ip}:${EMBEDDER_PORT}/v1/embeddings"
+export FRONTEND_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health"
+export LVM_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}"
+export LVM_VIDEO_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}/generate"
+export RERANKING_ENDPOINT="http://${host_ip}:${RERANKING_PORT}/v1/reranking"
+export RETRIEVER_ENDPOINT="http://${host_ip}:${RETRIEVER_PORT}/v1/retrieval"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}"
+export UI_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health"
+
+export no_proxy="${NO_PROXY},${host_ip},vdms-vector-db,dataprep-vdms-server,clip-embedding-server,reranking-tei-server,retriever-vdms-server,lvm-video-llama,lvm,videoqna-xeon-backend-server,videoqna-xeon-ui-server"
diff --git a/VideoQnA/docker_image_build/build.yaml b/VideoQnA/docker_image_build/build.yaml
index 24fab7697b..aa3a2dc86d 100644
--- a/VideoQnA/docker_image_build/build.yaml
+++ b/VideoQnA/docker_image_build/build.yaml
@@ -23,7 +23,7 @@ services:
       dockerfile: comps/dataprep/src/Dockerfile
     extends: videoqna
     image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
-  embedding-multimodal-clip:
+  embedding:
     build:
       context: GenAIComps
       dockerfile: comps/third_parties/clip/src/Dockerfile
diff --git a/VideoQnA/tests/test_compose_on_xeon.sh b/VideoQnA/tests/test_compose_on_xeon.sh
index 614c2efc47..d4c1b5a3b5 100755
--- a/VideoQnA/tests/test_compose_on_xeon.sh
+++ b/VideoQnA/tests/test_compose_on_xeon.sh
@@ -13,6 +13,55 @@ export TAG=${IMAGE_TAG}
 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')
+export host_ip=${ip_address}
+
+function setup_env() {
+    export HF_TOKEN=${HF_TOKEN}
+    export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+
+    export INDEX_NAME="mega-videoqna"
+    export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download
+    export RERANK_COMPONENT_NAME="OPEA_VIDEO_RERANKING"
+    export LVM_COMPONENT_NAME="OPEA_VIDEO_LLAMA_LVM"
+    export EMBEDDING_COMPONENT_NAME="OPEA_CLIP_EMBEDDING"
+    export USECLIP=1
+    export LOGFLAG=True
+
+    export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+    export LVM_SERVICE_HOST_IP=${host_ip}
+    export MEGA_SERVICE_HOST_IP=${host_ip}
+    export RERANK_SERVICE_HOST_IP=${host_ip}
+    export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+    export VDMS_HOST=${host_ip}
+
+    export BACKEND_PORT=8888
+    export DATAPREP_PORT=6007
+    export EMBEDDER_PORT=6990
+    export MULTIMODAL_CLIP_EMBEDDER_PORT=6991
+    export LVM_PORT=9399
+    export RERANKING_PORT=8000
+    export RETRIEVER_PORT=7000
+    export UI_PORT=5173
+    export VDMS_PORT=8001
+    export VIDEO_LLAMA_PORT=9009
+
+    export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/health_check"
+    export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_PORT}/v1/videoqna"
+    export CLIP_EMBEDDING_ENDPOINT="http://${host_ip}:${MULTIMODAL_CLIP_EMBEDDER_PORT}"
+    export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get"
+    export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/get_videos"
+    export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/ingest_videos"
+    export EMBEDDING_ENDPOINT="http://${host_ip}:${EMBEDDER_PORT}/v1/embeddings"
+    export FRONTEND_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health"
+    export LVM_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}"
+    export LVM_VIDEO_ENDPOINT="http://${host_ip}:${VIDEO_LLAMA_PORT}/generate"
+    export RERANKING_ENDPOINT="http://${host_ip}:${RERANKING_PORT}/v1/reranking"
+    export RETRIEVER_ENDPOINT="http://${host_ip}:${RETRIEVER_PORT}/v1/retrieval"
+    export TEI_RERANKING_ENDPOINT="http://${host_ip}:${TEI_RERANKING_PORT}"
+    export UI_ENDPOINT="http://${host_ip}:${UI_PORT}/_stcore/health"
+
+    export no_proxy="${NO_PROXY},${host_ip},vdms-vector-db,dataprep-vdms-server,clip-embedding-server,reranking-tei-server,retriever-vdms-server,lvm-video-llama,lvm,videoqna-xeon-backend-server,videoqna-xeon-ui-server"
+}
 
 function build_docker_images() {
     opea_branch=${opea_branch:-"main"}
@@ -28,26 +77,33 @@ function build_docker_images() {
     fi
 
     cd $WORKPATH/docker_image_build
-    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git GenAIComps
+
+    # Create .cache directory for cache volume to connect (avoids permission denied error)
+    OLD_STRING="mkdir -p /home/user "
+    NEW_STRING="mkdir -p /home/user/.cache "
+    sed -i "s|$OLD_STRING|$NEW_STRING|g" "GenAIComps/comps/dataprep/src/Dockerfile"
+    sed -i "s|$OLD_STRING|$NEW_STRING|g" "GenAIComps/comps/retrievers/src/Dockerfile"
+    sed -i "s|$OLD_STRING|$NEW_STRING|g" "GenAIComps/comps/third_parties/clip/src/Dockerfile"
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
+    docker compose -f build.yaml build --no-cache 2>&1 > ${LOG_PATH}/docker_image_build.log
 
-    docker pull intellabs/vdms:v2.8.0
+	docker pull intellabs/vdms:latest
     docker images && sleep 1s
 }
 
-
 function start_services() {
+    echo "Starting services..."
     cd $WORKPATH/docker_compose/intel/cpu/xeon/
 
-    source set_env.sh
     docker volume create video-llama-model
+    docker volume create videoqna-cache
     docker compose up vdms-vector-db dataprep -d
     sleep 30s
 
     # Insert some sample data to the DB
-    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep/ingest \
+    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST ${DATAPREP_INGEST_SERVICE_ENDPOINT} \
     -H "Content-Type: multipart/form-data" \
     -F "files=@./data/op_1_0320241830.mp4")
 
@@ -58,12 +114,13 @@ function start_services() {
         docker logs dataprep-vdms-server >> ${LOG_PATH}/dataprep.log
         exit 1
     fi
+
     # Bring all the others
     docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
     sleep 1m
 
     # List of containers running uvicorn
-    list=("dataprep-vdms-server" "embedding-multimodal-server" "retriever-vdms-server" "reranking-tei-server" "lvm-video-llama" "lvm-video-llama" "videoqna-xeon-backend-server")
+    list=("dataprep-vdms-server" "clip-embedding-server" "retriever-vdms-server" "reranking-tei-server" "lvm-video-llama" "videoqna-xeon-backend-server")
 
     # Define the maximum time limit in seconds
     TIME_LIMIT=5400
@@ -95,10 +152,10 @@ function start_services() {
         for i in "${!list[@]}"; do
             item=${list[i]}
             if check_condition "$item"; then
-                echo "Condition met for $item, removing from list."
+                echo "Condition met for $item, removing from list." >> ${LOG_PATH}/list_check.log
                 unset list[i]
             else
-                echo "Condition not met for $item, keeping in list."
+                echo "Condition not met for $item, keeping in list." >> ${LOG_PATH}/list_check.log
             fi
         done
 
@@ -110,7 +167,7 @@ function start_services() {
             echo "List is empty. Exiting."
             break
         fi
-        sleep 5m
+        sleep 2m
     done
 
     if docker logs videoqna-xeon-ui-server 2>&1 | grep -q "Streamlit app"; then
@@ -128,33 +185,37 @@ function validate_services() {
     local DOCKER_NAME="$4"
     local INPUT_DATA="$5"
 
-    local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
-    if [ "$HTTP_STATUS" -eq 200 ]; then
-        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+    HTTP_RESPONSE=$(curl -s -w "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
+    HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
+    RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
 
-        local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
+    docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
 
-        if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
-            echo "[ $SERVICE_NAME ] Content is as expected."
-        else
-            echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
-            docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
-            exit 1
-        fi
-    else
+    # check response status
+    if [ "$HTTP_STATUS" -ne "200" ]; then
         echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
-        docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
         exit 1
+    else
+        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+    fi
+
+    # check response body
+    if [[ "${RESPONSE_BODY}" != *"${EXPECTED_RESULT}"* ]]; then
+        echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
+        exit 1
+    else
+        echo "[ $SERVICE_NAME ] Content is as expected."
     fi
     sleep 1s
 }
 
 function validate_microservices() {
     # Check if the microservices are running correctly.
-    cd $WORKPATH/docker_compose/intel/cpu/xeon//data
+    cd $WORKPATH/docker_compose/intel/cpu/xeon/data
 
     # dataprep microservice
-    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep/ingest \
+    echo "Validating Dataprep microservice ..."
+    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST "${DATAPREP_INGEST_SERVICE_ENDPOINT}" \
     -H "Content-Type: multipart/form-data" \
     -F "files=@./op_1_0320241830.mp4")
 
@@ -168,24 +229,24 @@ function validate_microservices() {
 
     # Embedding Microservice
     validate_services \
-        "${ip_address}:6000/v1/embeddings" \
-        "Sample text" \
+        ${EMBEDDING_ENDPOINT} \
+        '"embedding":[' \
         "embedding" \
-        "embedding-multimodal-server" \
-        '{"text":"Sample text"}'
+        "clip-embedding-server" \
+        '{"input":"What is the man doing?"}'
 
     # Retriever Microservice
-    export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
+    export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
     validate_services \
-        "${ip_address}:7000/v1/retrieval" \
+        ${RETRIEVER_ENDPOINT} \
         "retrieved_docs" \
         "retriever" \
         "retriever-vdms-server" \
-        "{\"text\":\"test\",\"embedding\":${your_embedding}}"
+        "{\"text\":\"What is the man doing?\",\"embedding\":${your_embedding},\"search_type\":\"mmr\", \"k\":4}"
 
     # Reranking Microservice
     validate_services \
-        "${ip_address}:8000/v1/reranking" \
+        ${RERANKING_ENDPOINT} \
         "video_url" \
         "reranking" \
         "reranking-tei-server" \
@@ -198,32 +259,52 @@ function validate_microservices() {
             ]
         }'
 
+    # Video Llama LVM Backend Service
+    result=$(http_proxy="" curl -X POST \
+        "${LVM_VIDEO_ENDPOINT}?video_url=https%3A%2F%2Fgithub.com%2FDAMO-NLP-SG%2FVideo-LLaMA%2Fraw%2Fmain%2Fexamples%2Fsilence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" \
+        -H "accept: */*" -d '')
+
+    if [[ $result == *"silence"* ]]; then
+        echo "LVM microservice is running correctly."
+    else
+        echo "LVM microservice is not running correctly. Received status was $HTTP_STATUS"
+        docker logs lvm-video-llama >> ${LOG_PATH}/lvm-video-llama.log
+        exit 1
+    fi
+
     # LVM Microservice
     validate_services \
-        "${ip_address}:9000/v1/lvm" \
+        "http://${host_ip}:${LVM_PORT}/v1/lvm" \
         "silence" \
         "lvm" \
-        "lvm-video-llama" \
-        '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}'
+        "lvm" \
+        '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the man doing?","max_new_tokens": 50}'
 
+    echo "==== microservices validated ===="
     sleep 1s
 }
 
 function validate_megaservice() {
+    echo "Validating videoqna-xeon-backend-server ..."
+
     validate_services \
-    "${ip_address}:8888/v1/videoqna" \
+    ${BACKEND_SERVICE_ENDPOINT} \
     "man" \
     "videoqna-xeon-backend-server" \
     "videoqna-xeon-backend-server" \
     '{"messages":"What is the man doing?","stream":"True"}'
+
+    echo "==== megaservice validated ===="
 }
 
 function validate_frontend() {
-    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X GET http://${ip_address}:5173/_stcore/health)
+    echo "Validating frontend ..."
+
+    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X GET ${FRONTEND_ENDPOINT})
 
     if [ "$HTTP_STATUS" -eq 200 ]; then
         echo "Frontend is running correctly."
-        local CONTENT=$(curl -s -X GET http://${ip_address}:5173/_stcore/health)
+        local CONTENT=$(curl -s -X GET ${FRONTEND_ENDPOINT})
         if echo "$CONTENT" | grep -q "ok"; then
             echo "Frontend Content is as expected."
         else
@@ -236,20 +317,31 @@ function validate_frontend() {
         docker logs videoqna-xeon-ui-server >> ${LOG_PATH}/ui.log
         exit 1
     fi
+
+    echo "==== frontend validated ===="
 }
 
 function stop_docker() {
+    echo "Stopping docker..."
     cd $WORKPATH/docker_compose/intel/cpu/xeon/
     docker compose stop && docker compose rm -f
     docker volume rm video-llama-model
+    docker volume rm videoqna-cache
+    echo "Docker stopped."
 }
 
 function main() {
 
+    setup_env
     stop_docker
 
     if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
+
+    start_time=$(date +%s)
     start_services
+    end_time=$(date +%s)
+    duration=$((end_time-start_time))
+    echo "Mega service start duration is $duration s" && sleep 1s
 
     validate_microservices
     validate_megaservice
@@ -260,4 +352,4 @@ function main() {
 
 }
 
-# main
+main
diff --git a/VideoQnA/videoqna.py b/VideoQnA/videoqna.py
index c447dd2abf..311030e984 100644
--- a/VideoQnA/videoqna.py
+++ b/VideoQnA/videoqna.py
@@ -12,25 +12,43 @@
     ChatMessage,
     UsageInfo,
 )
-from comps.cores.proto.docarray import LLMParams
+from comps.cores.proto.docarray import LLMParams, TextDoc
 from fastapi import Request
 from fastapi.responses import StreamingResponse
 
-MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
+MEGA_SERVICE_PORT = int(os.getenv("BACKEND_PORT", 8888))
 EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
-EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
+EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDER_PORT", 6990))
 RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
-RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
+RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_PORT", 7000))
 RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
 RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
 LVM_SERVICE_HOST_IP = os.getenv("LVM_SERVICE_HOST_IP", "0.0.0.0")
-LVM_SERVICE_PORT = int(os.getenv("LVM_SERVICE_PORT", 9000))
+LVM_SERVICE_PORT = int(os.getenv("LVM_PORT", 9399))
+
+
+def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
+    service_type = self.services[cur_node].service_type
+    if service_type == ServiceType.EMBEDDING:
+        if "input" in inputs:
+            input_text = inputs["input"]["text"] if isinstance(inputs["input"], dict) else inputs["input"]
+            inputs = TextDoc(text=input_text).model_dump()
+    return inputs
+
+
+def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
+    if self.services[cur_node].service_type == ServiceType.EMBEDDING:
+        return {"text": inputs["text"], "embedding": data["embedding"]}
+    else:
+        return data
 
 
 class VideoQnAService:
     def __init__(self, host="0.0.0.0", port=8888):
         self.host = host
         self.port = port
+        ServiceOrchestrator.align_inputs = align_inputs
+        ServiceOrchestrator.align_outputs = align_outputs
         self.megaservice = ServiceOrchestrator()
         self.endpoint = str(MegaServiceEndpoint.VIDEO_RAG_QNA)
 
@@ -74,8 +92,8 @@ def add_remote_service(self):
 
     async def handle_request(self, request: Request):
         data = await request.json()
-        stream_opt = data.get("stream", False)
-        chat_request = ChatCompletionRequest.parse_obj(data)
+        stream_opt = bool(data.get("stream", False))
+        chat_request = ChatCompletionRequest.model_validate(data)
         prompt = handle_message(chat_request.messages)
         parameters = LLMParams(
             max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
@@ -86,9 +104,10 @@ async def handle_request(self, request: Request):
             presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
             repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
             stream=stream_opt,
+            chat_template=chat_request.chat_template if chat_request.chat_template else None,
         )
         result_dict, runtime_graph = await self.megaservice.schedule(
-            initial_inputs={"text": prompt}, llm_parameters=parameters
+            initial_inputs={"input": prompt}, llm_parameters=parameters
         )
         for node, response in result_dict.items():
             # Here it suppose the last microservice in the megaservice is LVM.