opea-project · minmin-intel · Apr 11, 2025 · Apr 10, 2025 · Apr 10, 2025
@@ -4,7 +4,7 @@
 
 1. [Overview](#overview)
 2. [Deploy with Docker](#deploy-with-docker)
-3. [Launch the UI](#launch-the-ui)
+3. [How to interact with the agent system with UI](#how-to-interact-with-the-agent-system-with-ui)
 4. [Validate Services](#validate-services)
 5. [Register Tools](#how-to-register-other-tools-with-the-ai-agent)
 
@@ -144,21 +144,19 @@ source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
 
 ### 2. Launch the multi-agent system. </br>
 
-Two options are provided for the `llm_engine` of the agents: 1. open-source LLMs on Gaudi, 2. OpenAI models via API calls.
+We make it convenient to launch the whole system with docker compose, which includes microservices for LLM, agents, UI, retrieval tool, vector database, dataprep, and telemetry. There are 3 docker compose files, which make it easy for users to pick and choose. Users can choose a different retrieval tool other than the `DocIndexRetriever` example provided in our GenAIExamples repo. Users can choose not to launch the telemetry containers.
 
-#### Gaudi
+#### Launch on Gaudi
 
-On Gaudi, `meta-llama/Meta-Llama-3.1-70B-Instruct` will be served using vllm.
-By default, both the RAG agent and SQL agent will be launched to support the React Agent.  
-The React Agent requires the DocIndexRetriever's [`compose.yaml`](../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml) file, so two `compose.yaml` files need to be run with docker compose to start the multi-agent system.
-
-> **Note**: To enable the web search tool, skip this step and proceed to the "[Optional] Web Search Tool Support" section.
+On Gaudi, `meta-llama/Meta-Llama-3.3-70B-Instruct` will be served using vllm. The command below will launch the multi-agent system with the `DocIndexRetriever` as the retrieval tool for the Worker RAG agent.
 
 ```bash
 cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/
 docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml up -d
 ```
 
+> **Note**: To enable the web search tool, skip this step and proceed to the "[Optional] Web Search Tool Support" section.
+
 To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
 Gaudi example with Open Telemetry feature:
 
@@ -183,11 +181,9 @@ docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/
 
 </details>
 
-#### Xeon
+#### Launch on Xeon
 
-On Xeon, only OpenAI models are supported.
-By default, both the RAG Agent and SQL Agent will be launched to support the React Agent.  
-The React Agent requires the DocIndexRetriever's [`compose.yaml`](../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml) file, so two `compose yaml` files need to be run with docker compose to start the multi-agent system.
+On Xeon, only OpenAI models are supported. The command below will launch the multi-agent system with the `DocIndexRetriever` as the retrieval tool for the Worker RAG agent.
 
 ```bash
 export OPENAI_API_KEY=<your-openai-key>
@@ -206,9 +202,10 @@ bash run_ingest_data.sh
 
 > **Note**: This is a one-time operation.
 
-## Launch the UI
+## How to interact with the agent system with UI
 
-Open a web browser to http://localhost:5173 to access the UI.
+The UI microservice is launched in the previous step with the other microservices.
+To see the UI, open a web browser to `http://${ip_address}:5173` to access the UI. Note the `ip_address` here is the host IP of the UI microservice.
 
 1. `create Admin Account` with a random value
 2. add opea agent endpoint `http://$ip_address:9090/v1` which is a openai compatible api

@@ -104,7 +104,7 @@ services:
       - "8080:8000"
     ipc: host
   agent-ui:
-    image: opea/agent-ui
+    image: opea/agent-ui:latest
     container_name: agent-ui
     environment:
       host_ip: ${host_ip}
@@ -138,4 +138,4 @@ services:
     cap_add:
       - SYS_NICE
     ipc: host
-    command: --model $LLM_MODEL_ID --tensor-parallel-size 4 --host 0.0.0.0 --port 8000 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 16384
+    command: --model $LLM_MODEL_ID --tensor-parallel-size 4 --host 0.0.0.0 --port 8000 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture 16384
@@ -1,7 +1,9 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+host_ip=$(hostname -I | awk '{print $1}')
+port=6007
 FILEDIR=${WORKDIR}/GenAIExamples/AgentQnA/example_data/
 FILENAME=test_docs_music.jsonl
 
-python3 index_data.py --filedir ${FILEDIR} --filename ${FILENAME} --host_ip $host_ip
+python3 index_data.py --filedir ${FILEDIR} --filename ${FILENAME} --host_ip $host_ip --port $port
@@ -8,6 +8,8 @@ WORKPATH=$(dirname "$PWD")
 export WORKDIR=$WORKPATH/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
+export host_ip=$ip_address
+echo "ip_address=${ip_address}"
 export TOOLSET_PATH=$WORKPATH/tools/
 export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
@@ -24,12 +26,12 @@ ls $HF_CACHE_DIR
 vllm_port=8086
 vllm_volume=${HF_CACHE_DIR}
 
-function start_tgi(){
-    echo "Starting tgi-gaudi server"
+
+function start_agent_service() {
+    echo "Starting agent service"
     cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
     source set_env.sh
-    docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml tgi_gaudi.yaml -f compose.telemetry.yaml up -d
-
+    docker compose -f compose.yaml up -d
 }
 
 function start_all_services() {
@@ -69,7 +71,6 @@ function download_chinook_data(){
     cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite $WORKDIR/GenAIExamples/AgentQnA/tests/
 }
 
-
 function validate() {
     local CONTENT="$1"
     local EXPECTED_RESULT="$2"
@@ -138,24 +139,6 @@ function remove_chinook_data(){
     echo "Chinook data removed!"
 }
 
-export host_ip=$ip_address
-echo "ip_address=${ip_address}"
-
-
-function validate() {
-    local CONTENT="$1"
-    local EXPECTED_RESULT="$2"
-    local SERVICE_NAME="$3"
-
-    if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
-        echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT"
-        echo 0
-    else
-        echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
-        echo 1
-    fi
-}
-
 function ingest_data_and_validate() {
     echo "Ingesting data"
     cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool/

@@ -26,15 +26,39 @@ function build_agent_docker_image() {
     docker compose -f build.yaml build --no-cache
 }
 
+function build_retrieval_docker_image() {
+    cd $WORKDIR/GenAIExamples/DocIndexRetriever/docker_image_build/
+    get_genai_comps
+    echo "Build retrieval image with --no-cache..."
+    docker compose -f build.yaml build --no-cache
+}
+
 function stop_crag() {
     cid=$(docker ps -aq --filter "name=kdd-cup-24-crag-service")
     echo "Stopping container kdd-cup-24-crag-service with cid $cid"
     if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
 }
 
-function stop_agent_docker() {
+function stop_agent_containers() {
     cd $WORKPATH/docker_compose/intel/hpu/gaudi/
-    docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml down
+    container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
+    for container_name in $container_list; do
+        cid=$(docker ps -aq --filter "name=$container_name")
+        echo "Stopping container $container_name"
+        if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
+    done
+}
+
+function stop_telemetry_containers(){
+    cd $WORKPATH/docker_compose/intel/hpu/gaudi/
+    container_list=$(cat compose.telemetry.yaml | grep container_name | cut -d':' -f2)
+    for container_name in $container_list; do
+        cid=$(docker ps -aq --filter "name=$container_name")
+        echo "Stopping container $container_name"
+        if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
+    done
+    container_list=$(cat compose.telemetry.yaml | grep container_name | cut -d':' -f2)
+
 }
 
 function stop_llm(){
@@ -69,12 +93,16 @@ function stop_retrieval_tool() {
 }
 echo "workpath: $WORKPATH"
 echo "=================== Stop containers ===================="
+stop_llm
 stop_crag
-stop_agent_docker
+stop_agent_containers
+stop_retrieval_tool
+stop_telemetry_containers
 
 cd $WORKPATH/tests
 
 echo "=================== #1 Building docker images===================="
+build_retrieval_docker_image
 build_agent_docker_image
 echo "=================== #1 Building docker images completed===================="
 
@@ -83,8 +111,11 @@ bash $WORKPATH/tests/step4_launch_and_validate_agent_gaudi.sh
 echo "=================== #4 Agent, retrieval test passed ===================="
 
 echo "=================== #5 Stop agent and API server===================="
+stop_llm
 stop_crag
-stop_agent_docker
+stop_agent_containers
+stop_retrieval_tool
+stop_telemetry_containers
 echo "=================== #5 Agent and API server stopped===================="
 
 echo y | docker system prune

@@ -12,7 +12,7 @@ def search_knowledge_base(query: str) -> str:
     print(url)
     proxies = {"http": ""}
     payload = {
-        "text": query,
+        "messages": query,
     }
     response = requests.post(url, json=payload, proxies=proxies)
     print(response)