diff --git a/comps/dataprep/deployment/docker_compose/compose.yaml b/comps/dataprep/deployment/docker_compose/compose.yaml index 95fb462824..6a9638de36 100644 --- a/comps/dataprep/deployment/docker_compose/compose.yaml +++ b/comps/dataprep/deployment/docker_compose/compose.yaml @@ -20,7 +20,7 @@ services: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-elasticsearch ports: - - "${DATAPREP_PORT:-11100}:5000" + - "${DATAPREP_PORT:-5000}:5000" ipc: host environment: no_proxy: ${no_proxy} @@ -40,7 +40,7 @@ services: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-milvus-server ports: - - "${DATAPREP_PORT:-11101}:5000" + - "${DATAPREP_PORT:-5000}:5000" ipc: host environment: no_proxy: ${no_proxy} @@ -66,7 +66,7 @@ services: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-multimodal-milvus-server ports: - - "${DATAPREP_PORT:-11102}:5000" + - "${DATAPREP_PORT:-5000}:5000" depends_on: standalone: condition: service_healthy @@ -97,7 +97,7 @@ services: tei-embedding-serving: condition: service_healthy ports: - - "${DATAPREP_PORT:-11103}:5000" + - "${DATAPREP_PORT:-5000}:5000" ipc: host environment: no_proxy: ${no_proxy} @@ -125,7 +125,7 @@ services: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-opensearch-server ports: - - "${DATAPREP_PORT:-11104}:5000" + - "${DATAPREP_PORT:-5000}:5000" depends_on: opensearch-vector-db: condition: service_healthy @@ -147,7 +147,7 @@ services: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-pgvector-server ports: - - "${DATAPREP_PORT:-11105}:5000" + - "${DATAPREP_PORT:-5000}:5000" depends_on: pgvector-db: condition: service_healthy @@ -164,7 +164,7 @@ services: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-pinecone-server ports: - - "${DATAPREP_PORT:-11106}:5000" + - "${DATAPREP_PORT:-5000}:5000" ipc: host environment: no_proxy: ${no_proxy} @@ -185,7 +185,7 @@ services: tei-embedding-serving: condition: service_healthy ports: - - "${DATAPREP_PORT:-11107}:5000" + - "${DATAPREP_PORT:-5000}:5000" ipc: host environment: no_proxy: ${no_proxy} @@ -208,7 +208,7 @@ services: tei-embedding-serving: condition: service_healthy ports: - - "${DATAPREP_PORT:-11108}:5000" + - "${DATAPREP_PORT:-5000}:5000" ipc: host environment: no_proxy: ${no_proxy} @@ -227,7 +227,7 @@ services: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-multimodal-redis-server ports: - - "${DATAPREP_PORT:-11109}:5000" + - "${DATAPREP_PORT:-5000}:5000" depends_on: redis-vector-db: condition: service_healthy @@ -250,7 +250,7 @@ services: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-vdms-server ports: - - "${DATAPREP_PORT:-11110}:5000" + - "${DATAPREP_PORT:-5000}:5000" depends_on: vdms-vector-db: condition: service_healthy @@ -270,7 +270,7 @@ services: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-vdms-multimodal-server ports: - - "${DATAPREP_PORT:-11111}:5000" + - "${DATAPREP_PORT:-5000}:5000" depends_on: vdms-vector-db: condition: service_healthy @@ -297,7 +297,7 @@ services: tei-embedding-serving: condition: service_healthy ports: - - "${DATAPREP_PORT:-11108}:5000" + - "${DATAPREP_PORT:-5000}:5000" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/comps/dataprep/src/README_neo4j_llamaindex.md b/comps/dataprep/src/README_neo4j_llamaindex.md index a29bb24adb..f41048653e 100644 --- a/comps/dataprep/src/README_neo4j_llamaindex.md +++ b/comps/dataprep/src/README_neo4j_llamaindex.md @@ -11,7 +11,16 @@ This microservice follows the graphRAG approached defined by Microsoft paper ["F This dataprep microservice ingests the input files and uses LLM (TGI, VLLM or OpenAI model when OPENAI_API_KEY is set) to extract entities, relationships and descriptions of those to build a graph-based text index. Compose yaml file deploys TGI but works also with vLLM inference endpoint. -## Setup Environment Variables +## 🚀Start Microservice with Docker + +### 1. Build Docker Image + +```bash +cd ../../../../ +docker build -t opea/dataprep:latest --build-arg no_proxy=$no_proxy --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . +``` + +## 2. Setup Environment Variables ```bash # Manually set private environment settings @@ -34,46 +43,20 @@ export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" export MAX_INPUT_TOKENS=4096 export MAX_TOTAL_TOKENS=8192 export OPENAI_LLM_MODEL="gpt-4o" -export TEI_EMBEDDER_PORT=11633 +export TEI_EMBEDDER_PORT=8090 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" -export LLM_ENDPOINT_PORT=11634 +export LLM_ENDPOINT_PORT=8008 export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" export NEO4J_AUTH="${NEO4J_USERNAME}/${NEO4J_PASSWORD}" -export NEO4J_PORT1=7474 # 11631 -export NEO4J_PORT2=7687 # 11632 +export NEO4J_PORT1=7474 +export NEO4J_PORT2=7687 export NEO4J_URI="bolt://${host_ip}:${NEO4J_PORT2}" export NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6004/v1/dataprep" +export DATAPREP_PORT=5000 +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep" export LOGFLAG=True ``` -## 🚀Start Microservice with Docker - -### 1. Build Docker Image - -```bash -cd ../../../../ -docker build -t opea/dataprep-neo4j-llamaindex:latest --build-arg no_proxy=$no_proxy --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . -``` - -### 2. Setup Environment Variables - -```bash -# Set private environment settings -export host_ip=${your_hostname IP} # local IP -export no_proxy=$no_proxy,${host_ip} # important to add {host_ip} for containers communication -export http_proxy=${your_http_proxy} -export https_proxy=${your_http_proxy} -export NEO4J_URI=${your_neo4j_url} -export NEO4J_USERNAME=${your_neo4j_username} -export NEO4J_PASSWORD=${your_neo4j_password} -export PYTHONPATH=${path_to_comps} -export OPENAI_KEY=${your_openai_api_key} # optional, when not provided will use smaller models TGI/TEI -export HUGGINGFACEHUB_API_TOKEN=${your_hf_token} -# set additional environment settings -source ./set_env.sh -``` - ### 3. Run Docker with Docker Compose Docker compose will start 4 microservices: dataprep-neo4j-llamaindex, neo4j-apoc, tgi-gaudi-service and tei-embedding-service. The reason TGI and TEI are needed is because dataprep relies on LLM to extract entities and relationships from text to build the graph and Neo4j Property Graph Index. Neo4j database supports embeddings natively so we do not need a separate vector store. Checkout the blog [Introducing the Property Graph Index: A Powerful New Way to Build Knowledge Graphs with LLMs](https://www.llamaindex.ai/blog/introducing-the-property-graph-index-a-powerful-new-way-to-build-knowledge-graphs-with-llms) for a better understanding of Property Graph Store and Index. @@ -91,7 +74,7 @@ Once document preparation microservice for Neo4J is started, user can use below curl -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@./file1.txt" \ - http://${host_ip}:6004/v1/dataprep/ingest + http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/ingest ``` You can specify chunk_size and chunk_size by the following commands. @@ -102,7 +85,7 @@ curl -X POST \ -F "files=@./file1.txt" \ -F "chunk_size=1500" \ -F "chunk_overlap=100" \ - http://${host_ip}:6004/v1/dataprep/ingest + http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/ingest ``` Please note that clustering of extracted entities and summarization happens in this data preparation step. The result of this is: @@ -122,5 +105,5 @@ curl -X POST \ -F "files=@./your_file.pdf" \ -F "process_table=true" \ -F "table_strategy=hq" \ - http://localhost:6004/v1/dataprep/ingest + http://localhost:${DATAPREP_PORT}/v1/dataprep/ingest ``` diff --git a/comps/dataprep/src/integrations/neo4j_llamaindex.py b/comps/dataprep/src/integrations/neo4j_llamaindex.py index 85e3720598..68b7539038 100644 --- a/comps/dataprep/src/integrations/neo4j_llamaindex.py +++ b/comps/dataprep/src/integrations/neo4j_llamaindex.py @@ -60,7 +60,7 @@ from llama_index.core.schema import BaseNode, TransformComponent host_ip = os.getenv("host_ip") -NEO4J_PORT2 = os.getenv("NEO4J_PORT2") +NEO4J_PORT2 = os.getenv("NEO4J_PORT2", "7687") # Neo4J configuration NEO4J_URL = os.getenv("NEO4J_URL", f"bolt://{host_ip}:{NEO4J_PORT2}") NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j") diff --git a/comps/retrievers/src/README_neo4j.md b/comps/retrievers/src/README_neo4j.md index e8e413fb43..e5563c2198 100644 --- a/comps/retrievers/src/README_neo4j.md +++ b/comps/retrievers/src/README_neo4j.md @@ -17,25 +17,6 @@ cd ../../../ docker build -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . ``` -### 2. Install Requirements - -```bash -pip install -r requirements.txt -``` - -### 3. Start Neo4j VectorDB Service - -```bash -docker run \ - -p 7474:7474 -p 7687:7687 \ - -v $PWD/data:/data -v $PWD/plugins:/plugins \ - --name neo4j-apoc \ - -d \ - -e NEO4J_AUTH=neo4j/password \ - -e NEO4J_PLUGINS=\[\"apoc\"\] \ - neo4j:latest -``` - ### 2. Setup Environment Variables ```bash @@ -58,18 +39,17 @@ export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" export MAX_INPUT_TOKENS=4096 export MAX_TOTAL_TOKENS=8192 export OPENAI_LLM_MODEL="gpt-4o" -export TEI_EMBEDDER_PORT=11633 +export TEI_EMBEDDER_PORT=8090 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" -export LLM_ENDPOINT_PORT=11634 +export LLM_ENDPOINT_PORT=8008 export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" -export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" -export TGI_LLM_ENDPOINT="http://${host_ip}:6005" -export NEO4J_PORT1=7474 # 11631 -export NEO4J_PORT2=7687 # 11632 +export NEO4J_PORT1=7474 +export NEO4J_PORT2=7687 export NEO4J_URI="bolt://${host_ip}:${NEO4J_PORT2}" export NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6004/v1/dataprep" -export RETRIEVER_PORT=11635 +export DATAPREP_PORT=5000 +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}/v1/dataprep" +export RETRIEVER_PORT=7000 export LOGFLAG=True ``` @@ -88,7 +68,7 @@ docker compose -f compose.yaml up ${service_name} -d ### 3.1 Check Service Status ```bash -curl http://${host_ip}:7000/v1/health_check \ +curl http://${host_ip}:${RETRIEVER_PORT}/v1/health_check \ -X GET \ -H 'Content-Type: application/json' ``` @@ -98,7 +78,7 @@ curl http://${host_ip}:7000/v1/health_check \ If OPEN_AI_KEY is provided it will use OPENAI endpoints for LLM and Embeddings otherwise will use TGI and TEI endpoints. If a model name not provided in the request it will use the default specified by the set_env.sh script. ```bash -curl -X POST http://${host_ip}:7000/v1/retrieval \ +curl -X POST http://${host_ip}:${RETRIEVER_PORT}/v1/retrieval \ -H "Content-Type: application/json" \ -d '{"model": "gpt-3.5-turbo","messages": [{"role": "user","content": "Who is John Brady and has he had any confrontations?"}]}' ``` diff --git a/comps/retrievers/src/integrations/config.py b/comps/retrievers/src/integrations/config.py index 81db3e20e4..6ea0570268 100644 --- a/comps/retrievers/src/integrations/config.py +++ b/comps/retrievers/src/integrations/config.py @@ -63,7 +63,7 @@ def get_boolean_env_var(var_name, default_value=False): ####################################################### # Neo4j # ####################################################### -NEO4J_PORT2 = os.getenv("NEO4J_PORT2", "11632") +NEO4J_PORT2 = os.getenv("NEO4J_PORT2", "7687") NEO4J_URL = os.getenv("NEO4J_URI", f"bolt://localhost:{NEO4J_PORT2}") NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j") NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "test") diff --git a/tests/dataprep/test_dataprep_elasticsearch.sh b/tests/dataprep/test_dataprep_elasticsearch.sh index 5cc577ba80..e36096f964 100644 --- a/tests/dataprep/test_dataprep_elasticsearch.sh +++ b/tests/dataprep/test_dataprep_elasticsearch.sh @@ -7,7 +7,7 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -DATAPREP_PORT=11100 +export DATAPREP_PORT=11100 export TAG="comps" function build_docker_images() { diff --git a/tests/dataprep/test_dataprep_milvus.sh b/tests/dataprep/test_dataprep_milvus.sh index 0d60980e3f..df2fabd048 100644 --- a/tests/dataprep/test_dataprep_milvus.sh +++ b/tests/dataprep/test_dataprep_milvus.sh @@ -7,7 +7,7 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -DATAPREP_PORT=11101 +export DATAPREP_PORT=11101 service_name="dataprep-milvus tei-embedding-serving etcd minio standalone" export TAG="comps" export DATA_PATH=${model_cache} diff --git a/tests/dataprep/test_dataprep_milvus_multimodal.sh b/tests/dataprep/test_dataprep_milvus_multimodal.sh index ca6b7fc8f3..19930e18b1 100644 --- a/tests/dataprep/test_dataprep_milvus_multimodal.sh +++ b/tests/dataprep/test_dataprep_milvus_multimodal.sh @@ -22,7 +22,7 @@ image_fn="${tmp_dir}/${image_name}.png" caption_fn="${tmp_dir}/${image_name}.txt" pdf_name="nke-10k-2023" pdf_fn="${tmp_dir}/${pdf_name}.pdf" -DATAPREP_PORT="11102" +export DATAPREP_PORT="11102" function build_docker_images() { cd $WORKPATH diff --git a/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh b/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh index 4e95ea9b18..2f197a158e 100755 --- a/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh +++ b/tests/dataprep/test_dataprep_neo4j_on_intel_hpu.sh @@ -7,7 +7,7 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -DATAPREP_PORT=11103 +export DATAPREP_PORT=11103 LLM_ENDPOINT_PORT=10510 export TAG="comps" export DATA_PATH=${model_cache} @@ -29,8 +29,8 @@ function build_docker_images() { function start_service() { service_name="neo4j-apoc tei-embedding-serving tgi-gaudi-server dataprep-neo4j-llamaindex" export host_ip=${ip_address} - export NEO4J_PORT1=7474 # 11631 - export NEO4J_PORT2=7687 # 11632 + export NEO4J_PORT1=11631 + export NEO4J_PORT2=11632 export NEO4J_AUTH="neo4j/neo4jtest" export NEO4J_URL="bolt://${ip_address}:${NEO4J_PORT2}" export NEO4J_USERNAME="neo4j" diff --git a/tests/dataprep/test_dataprep_opensearch.sh b/tests/dataprep/test_dataprep_opensearch.sh index 3c42ff27d6..e916b0c280 100644 --- a/tests/dataprep/test_dataprep_opensearch.sh +++ b/tests/dataprep/test_dataprep_opensearch.sh @@ -7,7 +7,7 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -DATAPREP_PORT="11104" +export DATAPREP_PORT="11104" OPENSEARCH_INITIAL_ADMIN_PASSWORD="StRoNgOpEa0)" export TAG="comps" diff --git a/tests/dataprep/test_dataprep_pgvector.sh b/tests/dataprep/test_dataprep_pgvector.sh index fe70a22b4a..edb8403376 100644 --- a/tests/dataprep/test_dataprep_pgvector.sh +++ b/tests/dataprep/test_dataprep_pgvector.sh @@ -7,7 +7,7 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -DATAPREP_PORT="11105" +export DATAPREP_PORT="11105" export TAG="comps" SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" diff --git a/tests/dataprep/test_dataprep_pinecone.sh b/tests/dataprep/test_dataprep_pinecone.sh index 7be604f780..aa1ebacc06 100644 --- a/tests/dataprep/test_dataprep_pinecone.sh +++ b/tests/dataprep/test_dataprep_pinecone.sh @@ -6,7 +6,7 @@ set -x WORKPATH=$(dirname "$PWD") ip_address=$(hostname -I | awk '{print $1}') -DATAPREP_PORT="11106" +export DATAPREP_PORT="11106" export TAG="comps" function build_docker_images() { diff --git a/tests/dataprep/test_dataprep_qdrant.sh b/tests/dataprep/test_dataprep_qdrant.sh index 54c898513a..ece11b55cd 100644 --- a/tests/dataprep/test_dataprep_qdrant.sh +++ b/tests/dataprep/test_dataprep_qdrant.sh @@ -7,7 +7,7 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -DATAPREP_PORT="11107" +export DATAPREP_PORT="11107" TEI_EMBEDDER_PORT="10220" export TAG="comps" export DATA_PATH=${model_cache} diff --git a/tests/dataprep/test_dataprep_redis.sh b/tests/dataprep/test_dataprep_redis.sh index a17ee6e412..cfc053e1d5 100644 --- a/tests/dataprep/test_dataprep_redis.sh +++ b/tests/dataprep/test_dataprep_redis.sh @@ -7,7 +7,7 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -DATAPREP_PORT="11108" +export DATAPREP_PORT="11108" TEI_EMBEDDER_PORT="10221" export TAG="comps" export DATA_PATH=${model_cache} diff --git a/tests/dataprep/test_dataprep_redis_finance_on_intel_hpu.sh b/tests/dataprep/test_dataprep_redis_finance_on_intel_hpu.sh index 6ce1d24386..faed69ce13 100644 --- a/tests/dataprep/test_dataprep_redis_finance_on_intel_hpu.sh +++ b/tests/dataprep/test_dataprep_redis_finance_on_intel_hpu.sh @@ -7,7 +7,7 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -DATAPREP_PORT="11108" +export DATAPREP_PORT="11108" TEI_EMBEDDER_PORT="10221" export TAG="comps" diff --git a/tests/dataprep/test_dataprep_redis_multimodal.sh b/tests/dataprep/test_dataprep_redis_multimodal.sh index 63961e887a..370146edc8 100644 --- a/tests/dataprep/test_dataprep_redis_multimodal.sh +++ b/tests/dataprep/test_dataprep_redis_multimodal.sh @@ -23,8 +23,9 @@ audio_name="apple" # Intentionally name the audio file the same as the image f audio_fn="${tmp_dir}/${audio_name}.wav" pdf_name="nke-10k-2023" pdf_fn="${tmp_dir}/${pdf_name}.pdf" +export DATAPREP_PORT="11109" text_ony_pdf_fn="${WORKPATH}/tests/dataprep/ingest_dataprep_text.pdf" -DATAPREP_PORT="11109" + export DATA_PATH=${model_cache} function build_docker_images() { diff --git a/tests/dataprep/test_dataprep_vdms.sh b/tests/dataprep/test_dataprep_vdms.sh index 31a47c90b8..04170cb146 100644 --- a/tests/dataprep/test_dataprep_vdms.sh +++ b/tests/dataprep/test_dataprep_vdms.sh @@ -7,7 +7,7 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -DATAPREP_PORT="11110" +export DATAPREP_PORT="11110" export TAG="comps" function build_docker_images() { diff --git a/tests/dataprep/test_dataprep_vdms_multimodal.sh b/tests/dataprep/test_dataprep_vdms_multimodal.sh index 9bb7fa01f8..d71093f37e 100755 --- a/tests/dataprep/test_dataprep_vdms_multimodal.sh +++ b/tests/dataprep/test_dataprep_vdms_multimodal.sh @@ -7,7 +7,7 @@ set -x WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') -DATAPREP_PORT="11111" +export DATAPREP_PORT="11111" function build_docker_images() { cd $WORKPATH diff --git a/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh index 3ecb3b9035..26f805bc1f 100644 --- a/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh +++ b/tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh @@ -40,8 +40,8 @@ function build_docker_images() { } function start_service() { - export TEI_EMBEDDER_PORT=11633 - export LLM_ENDPOINT_PORT=11634 + export TEI_EMBEDDER_PORT=12007 + export LLM_ENDPOINT_PORT=10511 export RETRIEVER_PORT=11635 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export DATA_PATH="/data2/hf_model" @@ -50,9 +50,10 @@ function start_service() { export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" - export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6004" - export NEO4J_PORT1=7474 # 11631 - export NEO4J_PORT2=7687 # 11632 + export DATAPREP_PORT=11103 + export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_PORT}" + export NEO4J_PORT1=11631 + export NEO4J_PORT2=11632 export NEO4J_URI="bolt://${host_ip}:${NEO4J_PORT2}" export NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" export NEO4J_USERNAME="neo4j" @@ -66,7 +67,7 @@ function start_service() { # dataprep neo4j # Not testing openai code path since not able to provide key for cicd - docker run -d --name="test-comps-retrievers-neo4j-llama-index-dataprep" -p 6004:5000 -v ./data:/data --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \ + docker run -d --name="test-comps-retrievers-neo4j-llama-index-dataprep" -p $DATAPREP_PORT:5000 -v ./data:/data --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \ -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e EMBEDDING_MODEL_ID=$EMBEDDING_MODEL_ID -e LLM_MODEL_ID=$LLM_MODEL_ID -e host_ip=$host_ip -e no_proxy=$no_proxy \ -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" -e NEO4J_USERNAME="neo4j" \ -e NEO4J_PASSWORD="neo4jtest" -e NEO4J_PORT1=$NEO4J_PORT1 -e NEO4J_PORT2=$NEO4J_PORT2 -e HF_TOKEN=$HF_TOKEN -e MAX_INPUT_TOKENS=$MAX_INPUT_TOKENS -e LOGFLAG=True \ @@ -139,7 +140,7 @@ function validate_microservice() { # test /v1/dataprep graph extraction echo "The stock of company Chevron has declined about 10% over the past 90-days despite the fact that Q2 consensus earnings estimates have risen sharply (~25%) during that same time frame. Over the years, Chevron has kept a very strong balance sheet. FirstEnergy company posted its earnings results on Tuesday. The utilities provider reported $0.53 earnings per share for the quarter, topping the consensus estimate of $0.52 by $0.01, RTT News reports. FirstEnergy had a net margin of 10.85% and a return on equity of 17.17%. The Dáil was almost suspended on Thursday afternoon after Sinn Féin TD John Brady walked across the chamber and placed an on-call pager in front of the Minister for Housing Darragh O’Brien during a debate on retained firefighters. Darragh O’Brien said John Brady had taken part in an act of theatre that was obviously choreographed. Around 2,000 retained firefighters around the country staged a second day of industrial action on Tuesday and are due to start all out-strike action from next Tuesday. The mostly part-time workers, who keep the services going outside of Ireland’s larger urban centres, are taking industrial action in a dispute over pay and working conditions. Speaking in the Dáil, Sinn Féin deputy leader Pearse Doherty said firefighters had marched on Leinster House today and were very angry at the fact the Government will not intervene. Reintroduction of tax relief on mortgages needs to be considered, Darragh O’Brien says. Martin withdraws comment after saying People Before Profit would ‘put the jackboot on people’ Taoiseach ‘propagated fears’ farmers forced to rewet land due to nature restoration law – Cairns An intervention is required now. I’m asking you to make an improved offer in relation to pay for retained firefighters, Mr Doherty told the housing minister. I’m also asking you, and challenging you, to go outside after this Order of Business and meet with the firefighters because they are just fed up to the hilt in relation to what you said. Some of them have handed in their pagers to members of the Opposition and have challenged you to wear the pager for the next number of weeks, put up with an €8,600 retainer and not leave your community for the two and a half kilometres and see how you can stand over those type of pay and conditions. At this point, John Brady got up from his seat, walked across the chamber and placed the pager on the desk in front of Darragh O’Brien. Ceann Comhairle Seán Ó Fearghaíl said the Sinn Féin TD was completely out of order and told him not to carry out a charade in this House, adding it was absolutely outrageous behaviour and not to be encouraged. Darragh O’Brien said John Brady had engaged in an act of theatre here today which was obviously choreographed and was then interrupted with shouts from the Opposition benches. Mr Ó Fearghaíl said he would suspend the House if this racket continues. Darragh O’Brien later said he was confident the dispute could be resolved and he had immense regard for firefighters. The minister said he would encourage the unions to re-engage with the State’s industrial relations process while also accusing Sinn Féin of using the issue for their own political gain." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${host_ip}:6004/v1/dataprep/ingest" \ + "http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/ingest" \ "Data preparation succeeded" \ "extract_graph_neo4j" \ "test-comps-retrievers-neo4j-llama-index-dataprep"