opea-project · chensuyue · Jan 20, 2025 · Jan 20, 2025 · Jan 20, 2025 · Jan 20, 2025
@@ -60,6 +60,7 @@ services:
       LIMIT_HPU_GRAPH: true
       USE_FLASH_ATTENTION: true
       FLASH_ATTENTION_RECOMPUTE: true
+      TEXT_GENERATION_SERVER_IGNORE_EOS_TOKEN: false
     runtime: habana
     cap_add:
       - SYS_NICE
@@ -93,6 +94,7 @@ services:
       OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
       EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
+      MAX_OUTPUT_TOKENS: ${MAX_OUTPUT_TOKENS}
       LOGFLAG: ${LOGFLAG}
     restart: unless-stopped
   retriever-neo4j-llamaindex:
@@ -122,6 +124,7 @@ services:
       OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
       EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
+      MAX_OUTPUT_TOKENS: ${MAX_OUTPUT_TOKENS}
       LOGFLAG: ${LOGFLAG}
       RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_NEO4J"
     restart: unless-stopped
@@ -144,6 +147,7 @@ services:
       - RETRIEVER_SERVICE_PORT=7000
       - LLM_SERVER_HOST_IP=tgi-gaudi-service
       - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_MODEL_ID=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
     ipc: host
     restart: always

@@ -12,7 +12,7 @@ popd > /dev/null
 
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export OPENAI_EMBEDDING_MODEL="text-embedding-3-small"
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
 export OPENAI_LLM_MODEL="gpt-4o"
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
 export TGI_LLM_ENDPOINT="http://${host_ip}:6005"
@@ -21,3 +21,5 @@ export NEO4J_USERNAME=neo4j
 export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
 export LOGFLAG=True
 export RETRIEVER_SERVICE_PORT=80
+export LLM_SERVER_PORT=80
+export MAX_OUTPUT_TOKENS=1024
@@ -52,6 +52,7 @@ def generate_rag_prompt(question, documents):
 RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
 LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
 LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80))
+LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3.1-8B-Instruct")
 
 
 def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
@@ -60,7 +61,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
     elif self.services[cur_node].service_type == ServiceType.LLM:
         # convert TGI/vLLM to unified OpenAI /v1/chat/completions format
         next_inputs = {}
-        next_inputs["model"] = "tgi"  # specifically clarify the fake model to make the format unified
+        next_inputs["model"] = LLM_MODEL_ID
         next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
         next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
         next_inputs["top_p"] = llm_parameters_dict["top_p"]

@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -51,6 +51,8 @@ function start_services() {
     export TGI_LLM_ENDPOINT="http://${ip_address}:6005"
     export host_ip=${ip_address}
     export LOGFLAG=true
+    export MAX_OUTPUT_TOKENS="1024"
+    unset OPENAI_API_KEY
 
     # Start Docker Containers
     docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
@@ -76,6 +78,7 @@ function validate_service() {
     if [[ $SERVICE_NAME == *"extract_graph_neo4j"* ]]; then
         cd $LOG_PATH
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL")
+        echo $HTTP_RESPONSE
     elif [[ $SERVICE_NAME == *"neo4j-apoc"* ]]; then
          HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" "$URL")
     else
@@ -211,7 +214,7 @@ function main() {
     echo "Mega service start duration is $duration s"
 
     if [ "${mode}" == "perf" ]; then
-        python3 $WORKPATH/tests/chatqna_benchmark.py
+        echo "not implemented"
     elif [ "${mode}" == "" ]; then
         validate_microservices
         validate_megaservice

@@ -21,7 +21,7 @@ export default defineConfig({
 		 * Maximum time expect() should wait for the condition to be met.
 		 * For example in `await expect(locator).toHaveText();`
 		 */
-		timeout: 30000,
+		timeout: 300000,
 	},
 	/* Run tests in files in parallel */
 	fullyParallel: true,