Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion comps/retrievers/src/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \
fi && \
pip install --no-cache-dir torch torchvision ${PIP_EXTRA_INDEX_URL} && \
pip install --no-cache-dir ${PIP_EXTRA_INDEX_URL} -r /home/user/comps/retrievers/src/requirements.txt && \
pip install opentelemetry-api==1.27.0 opentelemetry-exporter-otlp==1.27.0 opentelemetry-sdk==1.27.0
pip install opentelemetry-api==1.29.0 opentelemetry-exporter-otlp==1.29.0 opentelemetry-sdk==1.29.0

ENV PYTHONPATH=$PYTHONPATH:/home/user

Expand Down
2 changes: 1 addition & 1 deletion comps/retrievers/src/integrations/vdms.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _initialize_embedder(self):
from comps.third_parties.clip.src.clip_embedding import vCLIP

embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 64})
if TEI_EMBEDDING_ENDPOINT:
elif TEI_EMBEDDING_ENDPOINT:
# create embeddings using TEI endpoint service
if logflag:
logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}")
Expand Down
1 change: 1 addition & 0 deletions comps/retrievers/src/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ cairosvg
docarray[full]
docx2txt
easyocr
einops
fastapi
future
graspologic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

include:
- ../../../tei/deployment/docker_compose/compose.yaml
- ../../../tei/deployment/docker_compose/compose.yaml

services:
pathway-db:
Expand All @@ -12,13 +12,15 @@ services:
- "${PATHWAY_PORT:-6379}:${PATHWAY_PORT:-6379}"
volumes:
- "${PATHWAY_VOLUME:-../../src/README.md}:/app/data/README.md"
network_mode: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
PATHWAY_HOST: ${PATHWAY_HOST_DB}
PATHWAY_PORT: ${PATHWAY_PORT}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HF_TOKEN: ${HF_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
healthcheck:
test: ["CMD-SHELL", "sleep 30 && exit 0"]
interval: 1s
Expand Down
4 changes: 2 additions & 2 deletions comps/third_parties/pathway/src/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
langchain
langchain-community
langchain_huggingface
langchain_openai
openai
pathway[xpack-llm]
sentence-transformers
tiktoken
unstructured[all-docs] >= 0.16
9 changes: 5 additions & 4 deletions comps/third_parties/pathway/src/vectorstore_pathway.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
import nltk
import pathway as pw
from langchain import text_splitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_huggingface import HuggingFaceEndpointEmbeddings
from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
from pathway.xpacks.llm.parsers import ParseUnstructured
from pathway.xpacks.llm.vector_store import VectorStoreServer

Expand Down Expand Up @@ -40,15 +39,17 @@
port = int(os.getenv("PATHWAY_PORT", 8666))

EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")

HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT")

if __name__ == "__main__":
# Create vectorstore
if tei_embedding_endpoint:
# create embeddings using TEI endpoint service
logging.info(f"Initializing the embedder from tei_embedding_endpoint: {tei_embedding_endpoint}")
embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint)
embeddings = HuggingFaceInferenceAPIEmbeddings(
api_key=HUGGINGFACEHUB_API_TOKEN, model_name=EMBED_MODEL, api_url=tei_embedding_endpoint
)
else:
# create embeddings using local embedding model
embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_elasticsearch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ function validate_microservice() {
function stop_docker() {
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} --remove-orphans
cid=$(docker ps -aq --filter "name=elasticsearch-vector-db")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_milvus.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ function stop_docker() {

cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} --remove-orphans
cid=$(docker ps -aq --filter "name=tei-embedding-serving")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down
8 changes: 4 additions & 4 deletions tests/retrievers/test_retrievers_neo4j_on_intel_hpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ function start_service() {
export RETRIEVER_PORT=11635
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export DATA_PATH="/data2/cache"
export MAX_INPUT_TOKENS=1024
export MAX_TOTAL_TOKENS=3000
export MAX_INPUT_TOKENS=4096
export MAX_TOTAL_TOKENS=8192
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
Expand All @@ -67,7 +67,7 @@ function start_service() {
docker run -d --name="test-comps-retrievers-neo4j-llama-index-dataprep" -p 6004:5000 -v ./data:/data --ipc=host -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT \
-e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e EMBEDDING_MODEL_ID=$EMBEDDING_MODEL_ID -e LLM_MODEL_ID=$LLM_MODEL_ID -e host_ip=$host_ip -e no_proxy=$no_proxy \
-e http_proxy=$http_proxy -e https_proxy=$https_proxy -e NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" -e NEO4J_USERNAME="neo4j" \
-e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps
-e NEO4J_PASSWORD="neo4jtest" -e HF_TOKEN=$HF_TOKEN -e MAX_INPUT_LEN=$MAX_INPUT_TOKENS -e LOGFLAG=True -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_NEO4J_LLAMAINDEX" opea/dataprep-neo4j-llamaindex:comps

sleep 1m

Expand Down Expand Up @@ -152,7 +152,7 @@ function validate_microservice() {
}

function stop_docker() {
cid=$(docker ps -aq --filter "name=test-comps-*")
cid=$(docker ps -aq --filter "name=test-comps-*" --filter "name=neo4j-apoc" --filter "name=tgi-gaudi-server" --filter "name=tei-embedding-serving")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} --remove-orphans
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_opensearch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ function validate_microservice() {
function stop_docker() {
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} --remove-orphans
cid=$(docker ps -aq --filter "name=opensearch-vector-db" --filter "name=tei-embedding-serving")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_pathway.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ function validate_microservice() {
function stop_docker() {
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} --remove-orphans
cid=$(docker ps -aq --filter "name=pathway-db")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_pgvector.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ function validate_microservice() {
function stop_docker() {
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} --remove-orphans
cid=$(docker ps -aq --filter "name=pgvector-db")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_qdrant.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ function validate_microservice() {
function stop_docker() {
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} --remove-orphans
cid=$(docker ps -aq --filter "name=qdrant-vector-db")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_redis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ function validate_mm_microservice() {
function stop_docker() {
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} ${service_name_mm} --remove-orphans
cid=$(docker ps -aq --filter "name=redis-vector-db")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down
2 changes: 2 additions & 0 deletions tests/retrievers/test_retrievers_vdms.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ function validate_microservice() {
function stop_docker() {
cd $WORKPATH/comps/retrievers/deployment/docker_compose
docker compose -f compose.yaml down ${service_name} ${service_name_mm} --remove-orphans
cid=$(docker ps -aq --filter "name=retriever-vdms*" --filter "name=vdms-vector-db" --filter "name=tei-embedding-serving")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {
Expand Down