Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ services:
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
TEXT_GENERATION_SERVER_IGNORE_EOS_TOKEN: false
runtime: habana
cap_add:
- SYS_NICE
Expand Down Expand Up @@ -93,6 +94,7 @@ services:
OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
LLM_MODEL_ID: ${LLM_MODEL_ID}
MAX_OUTPUT_TOKENS: ${MAX_OUTPUT_TOKENS}
LOGFLAG: ${LOGFLAG}
restart: unless-stopped
retriever-neo4j-llamaindex:
Expand Down Expand Up @@ -122,6 +124,7 @@ services:
OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
LLM_MODEL_ID: ${LLM_MODEL_ID}
MAX_OUTPUT_TOKENS: ${MAX_OUTPUT_TOKENS}
LOGFLAG: ${LOGFLAG}
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_NEO4J"
restart: unless-stopped
Expand All @@ -144,6 +147,7 @@ services:
- RETRIEVER_SERVICE_PORT=7000
- LLM_SERVER_HOST_IP=tgi-gaudi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LLM_MODEL_ID=${LLM_MODEL_ID}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
Expand Down
4 changes: 3 additions & 1 deletion GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ popd > /dev/null

export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export OPENAI_EMBEDDING_MODEL="text-embedding-3-small"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
export OPENAI_LLM_MODEL="gpt-4o"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
export TGI_LLM_ENDPOINT="http://${host_ip}:6005"
Expand All @@ -21,3 +21,5 @@ export NEO4J_USERNAME=neo4j
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
export LOGFLAG=True
export RETRIEVER_SERVICE_PORT=80
export LLM_SERVER_PORT=80
export MAX_OUTPUT_TOKENS=1024
3 changes: 2 additions & 1 deletion GraphRAG/graphrag.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def generate_rag_prompt(question, documents):
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80))
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3.1-8B-Instruct")


def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
Expand All @@ -60,7 +61,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
elif self.services[cur_node].service_type == ServiceType.LLM:
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
next_inputs = {}
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
next_inputs["model"] = LLM_MODEL_ID
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
next_inputs["top_p"] = llm_parameters_dict["top_p"]
Expand Down
7 changes: 5 additions & 2 deletions GraphRAG/tests/test_compose_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

set -xe
set -x
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
Expand Down Expand Up @@ -51,6 +51,8 @@ function start_services() {
export TGI_LLM_ENDPOINT="http://${ip_address}:6005"
export host_ip=${ip_address}
export LOGFLAG=true
export MAX_OUTPUT_TOKENS="1024"
unset OPENAI_API_KEY

# Start Docker Containers
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
Expand All @@ -76,6 +78,7 @@ function validate_service() {
if [[ $SERVICE_NAME == *"extract_graph_neo4j"* ]]; then
cd $LOG_PATH
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL")
echo $HTTP_RESPONSE
elif [[ $SERVICE_NAME == *"neo4j-apoc"* ]]; then
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" "$URL")
else
Expand Down Expand Up @@ -211,7 +214,7 @@ function main() {
echo "Mega service start duration is $duration s"

if [ "${mode}" == "perf" ]; then
python3 $WORKPATH/tests/chatqna_benchmark.py
echo "not implemented"
elif [ "${mode}" == "" ]; then
validate_microservices
validate_megaservice
Expand Down
2 changes: 1 addition & 1 deletion GraphRAG/ui/svelte/playwright.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export default defineConfig({
* Maximum time expect() should wait for the condition to be met.
* For example in `await expect(locator).toHaveText();`
*/
timeout: 30000,
timeout: 300000,
},
/* Run tests in files in parallel */
fullyParallel: true,
Expand Down