From 369d5dc57329aef642ef171e5e4f60c893b4b81f Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Wed, 30 Apr 2025 17:56:40 -0700 Subject: [PATCH 01/34] add support for remote server Signed-off-by: alexsin368 --- comps/agent/src/integrations/config.py | 6 ++++++ comps/agent/src/integrations/utils.py | 14 +++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/comps/agent/src/integrations/config.py b/comps/agent/src/integrations/config.py index bcbe6207a5..2aa338dd96 100644 --- a/comps/agent/src/integrations/config.py +++ b/comps/agent/src/integrations/config.py @@ -17,6 +17,12 @@ if os.environ.get("llm_endpoint_url") is not None: env_config += ["--llm_endpoint_url", os.environ["llm_endpoint_url"]] +if os.environ.get("api_key") is not None: + env_config += ["--api_key", os.environ["api_key"]] + +if os.environ.get("use_remote_service") is not None: + env_config += ["--use_remote_service", os.environ["use_remote_service"]] + if os.environ.get("llm_engine") is not None: env_config += ["--llm_engine", os.environ["llm_engine"]] diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index 77f4e1cadb..e84d3d1a1f 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -66,7 +66,17 @@ def setup_chat_model(args): **params, ) elif args.llm_engine == "openai": - llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params) + if args.use_remote_service: + openai_endpoint = f"{args.llm_endpoint_url}/v1" + llm = ChatOpenAI( + openai_api_key=args.api_key, + openai_api_base=openai_endpoint, + model_name=args.model, + request_timeout=args.timeout, + **params, + ) + else: + llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params) else: raise ValueError("llm_engine must be vllm, tgi or openai") return llm @@ -162,6 +172,8 @@ def get_args(): parser.add_argument("--model", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct") parser.add_argument("--llm_engine", type=str, default="tgi") parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080") + parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server") + parser.add_argument("--use_remote_service", action="store_true", default=False, help="If using a remote server for LLM") parser.add_argument("--max_new_tokens", type=int, default=1024) parser.add_argument("--top_k", type=int, default=10) parser.add_argument("--top_p", type=float, default=0.95) From 0f6191df895bd96c6351d6d600c6c04fa25b86cd Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Thu, 1 May 2025 17:20:22 -0700 Subject: [PATCH 02/34] add steps to enable remote server Signed-off-by: alexsin368 --- comps/agent/src/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md index fcdb332abb..0bd2d528fb 100644 --- a/comps/agent/src/README.md +++ b/comps/agent/src/README.md @@ -110,6 +110,16 @@ Examples of python code for multi-turn conversations using agent memory: To run the two examples above, first launch the agent microservice using [this docker compose yaml](../../../tests/agent/reactllama.yaml). +### 1.6 Run LLMs with Remote Servers + +To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables: +- `api_key`= +- `use_remote_service`=True +- `model`= +- `LLM_ENDPOINT_URL`= + +For `LLM_ENDPOINT_URL`, there is no need to include `v1`. + ## 🚀2. Start Agent Microservice ### 2.1 Build docker image for agent microservice From 71f1608c44ec974c32382efc8b8675c4a347532c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 2 May 2025 00:43:07 +0000 Subject: [PATCH 03/34] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- comps/agent/src/README.md | 1 + comps/agent/src/integrations/utils.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md index 0bd2d528fb..2f107696ac 100644 --- a/comps/agent/src/README.md +++ b/comps/agent/src/README.md @@ -113,6 +113,7 @@ To run the two examples above, first launch the agent microservice using [this d ### 1.6 Run LLMs with Remote Servers To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables: + - `api_key`= - `use_remote_service`=True - `model`= diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index e84d3d1a1f..14c24b0b6d 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -173,7 +173,9 @@ def get_args(): parser.add_argument("--llm_engine", type=str, default="tgi") parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080") parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server") - parser.add_argument("--use_remote_service", action="store_true", default=False, help="If using a remote server for LLM") + parser.add_argument( + "--use_remote_service", action="store_true", default=False, help="If using a remote server for LLM" + ) parser.add_argument("--max_new_tokens", type=int, default=1024) parser.add_argument("--top_k", type=int, default=10) parser.add_argument("--top_p", type=float, default=0.95) From bbcda06cce7a46f167968dd8536b56ac9bd96568 Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Fri, 2 May 2025 16:58:58 -0700 Subject: [PATCH 04/34] remove use_remote_service Signed-off-by: alexsin368 --- comps/agent/src/README.md | 4 ++-- comps/agent/src/integrations/config.py | 3 --- comps/agent/src/integrations/utils.py | 3 +-- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md index 0bd2d528fb..7a9289dba3 100644 --- a/comps/agent/src/README.md +++ b/comps/agent/src/README.md @@ -114,11 +114,11 @@ To run the two examples above, first launch the agent microservice using [this d To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables: - `api_key`= -- `use_remote_service`=True - `model`= - `LLM_ENDPOINT_URL`= -For `LLM_ENDPOINT_URL`, there is no need to include `v1`. +#### Notes +- For `LLM_ENDPOINT_URL`, there is no need to include `v1`. ## 🚀2. Start Agent Microservice diff --git a/comps/agent/src/integrations/config.py b/comps/agent/src/integrations/config.py index 2aa338dd96..f965286c95 100644 --- a/comps/agent/src/integrations/config.py +++ b/comps/agent/src/integrations/config.py @@ -20,9 +20,6 @@ if os.environ.get("api_key") is not None: env_config += ["--api_key", os.environ["api_key"]] -if os.environ.get("use_remote_service") is not None: - env_config += ["--use_remote_service", os.environ["use_remote_service"]] - if os.environ.get("llm_engine") is not None: env_config += ["--llm_engine", os.environ["llm_engine"]] diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index e84d3d1a1f..ff7f0415a0 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -66,7 +66,7 @@ def setup_chat_model(args): **params, ) elif args.llm_engine == "openai": - if args.use_remote_service: + if args.api_key: openai_endpoint = f"{args.llm_endpoint_url}/v1" llm = ChatOpenAI( openai_api_key=args.api_key, @@ -173,7 +173,6 @@ def get_args(): parser.add_argument("--llm_engine", type=str, default="tgi") parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080") parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server") - parser.add_argument("--use_remote_service", action="store_true", default=False, help="If using a remote server for LLM") parser.add_argument("--max_new_tokens", type=int, default=1024) parser.add_argument("--top_k", type=int, default=10) parser.add_argument("--top_p", type=float, default=0.95) From 45cf9315397a9cdb8460b8f81e77faf7caef802c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 3 May 2025 00:01:52 +0000 Subject: [PATCH 05/34] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- comps/agent/src/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md index 4dbe55632a..6e1fcc431b 100644 --- a/comps/agent/src/README.md +++ b/comps/agent/src/README.md @@ -119,6 +119,7 @@ To run the text generation portion using LLMs deployed on a remote server, speci - `LLM_ENDPOINT_URL`= #### Notes + - For `LLM_ENDPOINT_URL`, there is no need to include `v1`. ## 🚀2. Start Agent Microservice From 4899f7937a15d590cf44b5fbe9b63125d629073a Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Tue, 13 May 2025 17:44:24 -0700 Subject: [PATCH 06/34] add OpenAI models instructions, fix format of commands Signed-off-by: alexsin368 --- comps/agent/src/README.md | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md index 6e1fcc431b..13aee5eaa5 100644 --- a/comps/agent/src/README.md +++ b/comps/agent/src/README.md @@ -82,7 +82,7 @@ for line in resp.iter_lines(decode_unicode=True): **Note**: -1. Currently only `reract_llama` agent is enabled for assistants APIs. +1. Currently only `react_llama` agent is enabled for assistants APIs. 2. Not all keywords of OpenAI APIs are supported yet. ### 1.5 Agent memory @@ -110,13 +110,27 @@ Examples of python code for multi-turn conversations using agent memory: To run the two examples above, first launch the agent microservice using [this docker compose yaml](../../../tests/agent/reactllama.yaml). -### 1.6 Run LLMs with Remote Servers +### 1.6 Run LLMs from OpenAI + +To run any model from OpenAI, just specify the environment variable `OPENAI_API_KEY`: + +```bash +export OPENAI_API_KEY= +``` + +These also need to be passed in to the `docker run` command, or included in a YAML file when running `docker compose`. + +### 1.7 Run LLMs with OpenAI-compatible APIs on Remote Servers To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables: -- `api_key`= -- `model`= -- `LLM_ENDPOINT_URL`= +```bash +export api_key= +export model= +export LLM_ENDPOINT_URL= +``` + +These also need to be passed in to the `docker run` command, or included in a YAML file when running `docker compose`. #### Notes From b7c4acf7d397a284f1499254fa8832533c0c98e3 Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Wed, 14 May 2025 17:35:08 -0700 Subject: [PATCH 07/34] simplify ChatOpenAI instantiation Signed-off-by: alexsin368 --- comps/agent/src/integrations/utils.py | 34 ++++++++------------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index ff7f0415a0..238f44d3e3 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -7,6 +7,7 @@ from .config import env_config +LLM_ENDPOINT_URL_DEFAULT="http://localhost:8080" def format_date(date): # input m/dd/yyyy hr:min @@ -56,29 +57,14 @@ def setup_chat_model(args): "top_p": args.top_p, "streaming": args.stream, } - if args.llm_engine == "vllm" or args.llm_engine == "tgi": - openai_endpoint = f"{args.llm_endpoint_url}/v1" - llm = ChatOpenAI( - openai_api_key="EMPTY", - openai_api_base=openai_endpoint, - model_name=args.model, - request_timeout=args.timeout, - **params, - ) - elif args.llm_engine == "openai": - if args.api_key: - openai_endpoint = f"{args.llm_endpoint_url}/v1" - llm = ChatOpenAI( - openai_api_key=args.api_key, - openai_api_base=openai_endpoint, - model_name=args.model, - request_timeout=args.timeout, - **params, - ) - else: - llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params) - else: - raise ValueError("llm_engine must be vllm, tgi or openai") + openai_endpoint=None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + '/v1' + llm = ChatOpenAI( + openai_api_key=args.api_key, + openai_api_base=openai_endpoint, + model_name=args.model, + request_timeout=args.timeout, + **params + ) return llm @@ -171,7 +157,7 @@ def get_args(): parser.add_argument("--model", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct") parser.add_argument("--llm_engine", type=str, default="tgi") - parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080") + parser.add_argument("--llm_endpoint_url", type=str, default=LLM_ENDPOINT_URL_DEFAULT) parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server") parser.add_argument("--max_new_tokens", type=int, default=1024) parser.add_argument("--top_k", type=int, default=10) From 658665729de80c4436819d2f72c9512931549759 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 15 May 2025 00:35:40 +0000 Subject: [PATCH 08/34] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- comps/agent/src/integrations/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index 238f44d3e3..c45cdced6f 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -7,7 +7,8 @@ from .config import env_config -LLM_ENDPOINT_URL_DEFAULT="http://localhost:8080" +LLM_ENDPOINT_URL_DEFAULT = "http://localhost:8080" + def format_date(date): # input m/dd/yyyy hr:min @@ -57,13 +58,13 @@ def setup_chat_model(args): "top_p": args.top_p, "streaming": args.stream, } - openai_endpoint=None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + '/v1' + openai_endpoint = None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + "/v1" llm = ChatOpenAI( openai_api_key=args.api_key, openai_api_base=openai_endpoint, model_name=args.model, request_timeout=args.timeout, - **params + **params, ) return llm From d2887344dc17ceb86b24ffe304cd4aea81b0bacb Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Wed, 14 May 2025 18:10:19 -0700 Subject: [PATCH 09/34] Revert "simplify ChatOpenAI instantiation" This reverts commit b7c4acf7d397a284f1499254fa8832533c0c98e3. --- comps/agent/src/integrations/utils.py | 34 +++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index 238f44d3e3..ff7f0415a0 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -7,7 +7,6 @@ from .config import env_config -LLM_ENDPOINT_URL_DEFAULT="http://localhost:8080" def format_date(date): # input m/dd/yyyy hr:min @@ -57,14 +56,29 @@ def setup_chat_model(args): "top_p": args.top_p, "streaming": args.stream, } - openai_endpoint=None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + '/v1' - llm = ChatOpenAI( - openai_api_key=args.api_key, - openai_api_base=openai_endpoint, - model_name=args.model, - request_timeout=args.timeout, - **params - ) + if args.llm_engine == "vllm" or args.llm_engine == "tgi": + openai_endpoint = f"{args.llm_endpoint_url}/v1" + llm = ChatOpenAI( + openai_api_key="EMPTY", + openai_api_base=openai_endpoint, + model_name=args.model, + request_timeout=args.timeout, + **params, + ) + elif args.llm_engine == "openai": + if args.api_key: + openai_endpoint = f"{args.llm_endpoint_url}/v1" + llm = ChatOpenAI( + openai_api_key=args.api_key, + openai_api_base=openai_endpoint, + model_name=args.model, + request_timeout=args.timeout, + **params, + ) + else: + llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params) + else: + raise ValueError("llm_engine must be vllm, tgi or openai") return llm @@ -157,7 +171,7 @@ def get_args(): parser.add_argument("--model", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct") parser.add_argument("--llm_engine", type=str, default="tgi") - parser.add_argument("--llm_endpoint_url", type=str, default=LLM_ENDPOINT_URL_DEFAULT) + parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080") parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server") parser.add_argument("--max_new_tokens", type=int, default=1024) parser.add_argument("--top_k", type=int, default=10) From 848368f23fafa674b22777c169a6a885455510df Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Wed, 14 May 2025 18:20:35 -0700 Subject: [PATCH 10/34] add back check and logic for llm_engine, set openai_key argument Signed-off-by: alexsin368 --- comps/agent/src/integrations/utils.py | 33 +++++++++++---------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index ff7f0415a0..6d73e2fff5 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -7,6 +7,7 @@ from .config import env_config +LLM_ENDPOINT_URL_DEFAULT = "http://localhost:8080" def format_date(date): # input m/dd/yyyy hr:min @@ -57,28 +58,20 @@ def setup_chat_model(args): "streaming": args.stream, } if args.llm_engine == "vllm" or args.llm_engine == "tgi": - openai_endpoint = f"{args.llm_endpoint_url}/v1" - llm = ChatOpenAI( - openai_api_key="EMPTY", - openai_api_base=openai_endpoint, - model_name=args.model, - request_timeout=args.timeout, - **params, - ) + openai_key = "EMPTY" elif args.llm_engine == "openai": - if args.api_key: - openai_endpoint = f"{args.llm_endpoint_url}/v1" - llm = ChatOpenAI( - openai_api_key=args.api_key, - openai_api_base=openai_endpoint, - model_name=args.model, - request_timeout=args.timeout, - **params, - ) - else: - llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params) + openai_key = args.api_key else: - raise ValueError("llm_engine must be vllm, tgi or openai") + raise ValueError("llm_engine must be vllm, tgi, or openai") + + openai_endpoint = None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + "/v1" + llm = ChatOpenAI( + openai_api_key=openai_key, + openai_api_base=openai_endpoint, + model_name=args.model, + request_timeout=args.timeout, + **params, + ) return llm From 53aaaa529e64fed7b8f79107de1809c5541a6534 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 15 May 2025 01:22:16 +0000 Subject: [PATCH 11/34] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- comps/agent/src/integrations/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index 6d73e2fff5..3c940a53b9 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -9,6 +9,7 @@ LLM_ENDPOINT_URL_DEFAULT = "http://localhost:8080" + def format_date(date): # input m/dd/yyyy hr:min # output yyyy-mm-dd From a70201fd987dbf1de8524a09b388fc5e50f5758b Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Tue, 29 Apr 2025 11:54:57 +0800 Subject: [PATCH 12/34] Provide ARCH option for lvm-video-llama image build (#1630) Signed-off-by: ZePan110 Signed-off-by: alexsin368 --- comps/third_parties/video-llama/src/Dockerfile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/comps/third_parties/video-llama/src/Dockerfile b/comps/third_parties/video-llama/src/Dockerfile index b2b36a21e4..944a52a08f 100644 --- a/comps/third_parties/video-llama/src/Dockerfile +++ b/comps/third_parties/video-llama/src/Dockerfile @@ -3,6 +3,9 @@ FROM python:3.11-slim +# Set this to "cpu" or "gpu" or etc +ARG ARCH="cpu" + ENV LANG=C.UTF-8 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ @@ -20,7 +23,11 @@ COPY --chown=user:user comps /home/user/comps WORKDIR /home/user/comps/third_parties/video-llama/src RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/user/comps/third_parties/video-llama/src/requirements.txt + if [ ${ARCH} = "cpu" ]; then \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/third_parties/video-llama/src/requirements.txt; \ + else \ + pip install --no-cache-dir -r /home/user/comps/third_parties/video-llama/src/requirements.txt; \ + fi ARG VIDEO_LLAMA_REPO=https://github.com/DAMO-NLP-SG/Video-LLaMA.git ARG VIDEO_LLAMA_COMMIT=0adb19e From 212e6128b1cf192e77518d5bc30d3e2d559eb321 Mon Sep 17 00:00:00 2001 From: Liang Lv Date: Wed, 30 Apr 2025 11:10:30 +0800 Subject: [PATCH 13/34] Add sglang microservice for supporting llama4 model (#1640) Signed-off-by: Ye, Xinyu Co-authored-by: Lv,Liang1 Signed-off-by: alexsin368 --- .../docker/compose/third_parties-compose.yaml | 4 + comps/llms/src/text-generation/README.md | 32 +++---- .../deployment/docker_compose/compose.yaml | 26 ++++++ comps/third_parties/sglang/src/Dockerfile | 47 +++++++++++ comps/third_parties/sglang/src/README.md | 84 +++++++++++++++++++ comps/third_parties/sglang/src/__init__.py | 2 + comps/third_parties/sglang/src/entrypoint.sh | 8 ++ .../_test_third_parties_sglang.sh | 84 +++++++++++++++++++ 8 files changed, 272 insertions(+), 15 deletions(-) create mode 100644 comps/third_parties/sglang/deployment/docker_compose/compose.yaml create mode 100644 comps/third_parties/sglang/src/Dockerfile create mode 100644 comps/third_parties/sglang/src/README.md create mode 100644 comps/third_parties/sglang/src/__init__.py create mode 100644 comps/third_parties/sglang/src/entrypoint.sh create mode 100644 tests/third_parties/_test_third_parties_sglang.sh diff --git a/.github/workflows/docker/compose/third_parties-compose.yaml b/.github/workflows/docker/compose/third_parties-compose.yaml index 22d8b33587..5e8a7eccd3 100644 --- a/.github/workflows/docker/compose/third_parties-compose.yaml +++ b/.github/workflows/docker/compose/third_parties-compose.yaml @@ -105,3 +105,7 @@ services: PORT_SSH: 2345 dockerfile: comps/third_parties/ipex/src/Dockerfile image: ${REGISTRY:-opea}/ipex-llm:${TAG:-latest} + sglang: + build: + dockerfile: comps/third_parties/sglang/src/Dockerfile + image: ${REGISTRY:-opea}/sglang:${TAG:-latest} diff --git a/comps/llms/src/text-generation/README.md b/comps/llms/src/text-generation/README.md index e069b4f867..3e002e22b0 100644 --- a/comps/llms/src/text-generation/README.md +++ b/comps/llms/src/text-generation/README.md @@ -8,21 +8,23 @@ Overall, this microservice offers a streamlined way to integrate large language ## Validated LLM Models -| Model | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi | OVMS | Optimum-Habana | -| ------------------------------------------- | --------- | -------- | ---------- | -------- | -------------- | -| [Intel/neural-chat-7b-v3-3] | ✓ | ✓ | ✓ | ✓ | ✓ | -| [meta-llama/Llama-2-7b-chat-hf] | ✓ | ✓ | ✓ | ✓ | ✓ | -| [meta-llama/Llama-2-70b-chat-hf] | ✓ | - | ✓ | - | ✓ | -| [meta-llama/Meta-Llama-3-8B-Instruct] | ✓ | ✓ | ✓ | ✓ | ✓ | -| [meta-llama/Meta-Llama-3-70B-Instruct] | ✓ | - | ✓ | - | ✓ | -| [Phi-3] | x | Limit 4K | Limit 4K | Limit 4K | ✓ | -| [Phi-4] | x | x | x | x | ✓ | -| [deepseek-ai/DeepSeek-R1-Distill-Llama-8B] | ✓ | - | ✓ | - | ✓ | -| [deepseek-ai/DeepSeek-R1-Distill-Llama-70B] | ✓ | - | ✓ | - | ✓ | -| [deepseek-ai/DeepSeek-R1-Distill-Qwen-14B] | ✓ | - | ✓ | - | ✓ | -| [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B] | ✓ | - | ✓ | - | ✓ | -| [mistralai/Mistral-Small-24B-Instruct-2501] | ✓ | - | ✓ | - | ✓ | -| [mistralai/Mistral-Large-Instruct-2411] | x | - | ✓ | - | ✓ | +| Model | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi | OVMS | Optimum-Habana | SGLANG-CPU | +| --------------------------------------------------------------------------------------------------------------------- | --------- | -------- | ---------- | -------- | -------------- | ---------- | +| [Intel/neural-chat-7b-v3-3] | ✓ | ✓ | ✓ | ✓ | ✓ | - | +| [meta-llama/Llama-2-7b-chat-hf] | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| [meta-llama/Llama-2-70b-chat-hf] | ✓ | - | ✓ | - | ✓ | ✓ | +| [meta-llama/Meta-Llama-3-8B-Instruct] | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| [meta-llama/Meta-Llama-3-70B-Instruct] | ✓ | - | ✓ | - | ✓ | ✓ | +| [Phi-3] | x | Limit 4K | Limit 4K | Limit 4K | ✓ | - | +| [Phi-4] | x | x | x | x | ✓ | - | +| [deepseek-ai/DeepSeek-R1-Distill-Llama-8B] | ✓ | - | ✓ | - | ✓ | - | +| [deepseek-ai/DeepSeek-R1-Distill-Llama-70B] | ✓ | - | ✓ | - | ✓ | - | +| [deepseek-ai/DeepSeek-R1-Distill-Qwen-14B] | ✓ | - | ✓ | - | ✓ | - | +| [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B] | ✓ | - | ✓ | - | ✓ | - | +| [mistralai/Mistral-Small-24B-Instruct-2501] | ✓ | - | ✓ | - | ✓ | - | +| [mistralai/Mistral-Large-Instruct-2411] | x | - | ✓ | - | ✓ | - | +| [meta-llama/Llama-4-Scout-17B-16E-Instruct](https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct) | - | - | - | - | - | ✓ | +| [meta-llama/Llama-4-Maverick-17B-128E-Instruct](https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct) | - | - | - | - | - | ✓ | ### System Requirements for LLM Models diff --git a/comps/third_parties/sglang/deployment/docker_compose/compose.yaml b/comps/third_parties/sglang/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..a2f59cc914 --- /dev/null +++ b/comps/third_parties/sglang/deployment/docker_compose/compose.yaml @@ -0,0 +1,26 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + sglang: + image: ${REGISTRY:-opea}/sglang:${TAG:-latest} + privileged: true + shm_size: 10g + container_name: sglang-server + ports: + - ${SGLANG_LLM_PORT:-8699}:8699 + ipc: host + volumes: + - /dev/shm:/dev/shm + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MODEL_ID: ${MODEL_ID} + HF_TOKEN: ${HF_TOKEN} + SGLANG_LLM_PORT: ${SGLANG_LLM_PORT:-8699} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/third_parties/sglang/src/Dockerfile b/comps/third_parties/sglang/src/Dockerfile new file mode 100644 index 0000000000..eba839f761 --- /dev/null +++ b/comps/third_parties/sglang/src/Dockerfile @@ -0,0 +1,47 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +ARG BASE_IMAGE=ubuntu:22.04 +FROM ${BASE_IMAGE} AS base + +RUN apt-get update && \ + apt-get upgrade -y && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends --fix-missing \ + ca-certificates \ + curl \ + g++-11 \ + gcc-11 \ + git \ + make \ + numactl \ + wget + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100 && \ + update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \ + update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100 + +WORKDIR /root + +RUN curl -fsSL -v -o miniforge.sh -O https://github.com/conda-forge/miniforge/releases/download/24.7.1-2/Miniforge3-24.7.1-2-Linux-x86_64.sh && \ + bash miniforge.sh -b -p ./miniforge3 && \ + rm miniforge.sh + +RUN git clone https://github.com/jianan-gu/sglang -b llama4_optimzed_cpu_r1 +RUN . ~/miniforge3/bin/activate && conda create -n sglang python=3.10 && conda activate sglang && \ + cd sglang && pip install -e "python[all_cpu]" && cd .. && conda install -y libsqlite=3.48.0 && \ + pip uninstall -y triton && pip uninstall -y transformers && pip install transformers==4.51.1 && \ + pip install triton==3.1 && pip install intel-openmp==2024.2.0 && pip install transformers +RUN git clone https://github.com/vllm-project/vllm.git -b v0.6.4.post1 && cd vllm && apt-get install -y libnuma-dev && \ + . ~/miniforge3/bin/activate && conda activate sglang && \ + pip install cmake==3.31.2 wheel packaging ninja "setuptools-scm>=8" numpy nvidia-ml-py && \ + pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu && \ + VLLM_TARGET_DEVICE=cpu python setup.py develop && cd .. + +RUN cd sglang && . ~/miniforge3/bin/activate && conda activate sglang && pip uninstall -y torch torchvision && \ + pip install torch==2.6.0 torchvision==0.21.0 --index-url https://download.pytorch.org/whl/cpu && \ + cd sgl-kernel/ && python setup.py install && cd .. && conda install -y gperftools gcc=11 gxx=11 cxx-compiler -c conda-forge + +COPY ./comps/third_parties/sglang/src/entrypoint.sh /usr/local/bin/entrypoint.sh +RUN chmod +x /usr/local/bin/entrypoint.sh +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] diff --git a/comps/third_parties/sglang/src/README.md b/comps/third_parties/sglang/src/README.md new file mode 100644 index 0000000000..b26ef65c2f --- /dev/null +++ b/comps/third_parties/sglang/src/README.md @@ -0,0 +1,84 @@ +# SGLang Serving microservice + +SGLang is an efficient and scalable serving framework for hosting Large Language Models (LLMs) with minimal latency and high throughput. +Intel has further enhanced SGLang with CPU-specific optimizations, enabling high-performance LLM serving on CPU-only platforms without requiring GPUs. +We have verified the Llama series models with SGLang, with a particular focus on the latest Llama 4 models, including [meta-llama/Llama-4-Scout-17B-16E-Instruct](https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct), [meta-llama/Llama-4-Maverick-17B-128E-Instruct](https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct), as well as the Llama 4 guard model Llama-Guard-4-12B. + +## 🚀1. Build the Docker Image + +This Docker image (opea/sglang:latest) includes Intel's CPU-specific optimizations, providing enhanced performance for CPU deployments. +Ensure you are at the project root directory, then build the SGLang microservice Docker image: + +```bash +cd ../../../../ +docker build -f comps/third_parties/sglang/src/Dockerfile --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/sglang:latest . +``` + +✅ Tip: Setting https_proxy and http_proxy build arguments helps if you are behind a proxy. + +## 🚀2. Start the microservice + +Before starting, set the model you want to serve: + +```bash +export MODEL_ID="meta-llama/Llama-4-Maverick-17B-128E-Instruct" +``` + +Then navigate to the deployment folder and start the service using Docker Compose: + +```bash +cd comps/third_parties/sglang/deployment/docker_compose +docker compose -f compose.yaml up -d +``` + +## 🚀3. Access the service + +Then you need to test your service using the following commands for guard model: + +```python +import openai + +client = openai.Client(base_url="http://127.0.0.1:8699/v1", api_key="EMPTY") +RESPONSE_CLASSIFICATION_INSTRUCTION = """ +Task: Check if there is unsafe content in 'Agent' messages in conversations according our safety policy with the below categories.\n\n + +\nS1: Violent Crimes. \nS2: Non-Violent Crimes. \nS3: Sex Crimes. \nS4: Child Exploitation. \nS5: Defamation. +\nS6: Specialized Advice. \nS7: Privacy. \nS8: Intellectual Property. \nS9: Indiscriminate Weapons. +\nS10: Hate. \nS11: Self-Harm. \nS12: Sexual Content. \nS13: Elections. \n""" +RESPONSE_CLASSIFICATION_TEMPLATE_PREFIX = "{classification_instruction}\n\n\n\nUser: " +RESPONSE_CLASSIFICATION_TEMPLATE_POSTFIX = """{user_message}\n\nAgent: {model_message}\n\n\n\n +Provide your safety assessment for ONLY THE LAST Agent message in the above conversation: +\n - First line must read 'safe' or 'unsafe'. +\n - If unsafe, a second line must include a comma-separated list of violated categories.""" + +response = client.chat.completions.create( + model="meta-llama/Llama-4-guard", + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": RESPONSE_CLASSIFICATION_TEMPLATE_PREFIX.format( + classification_instruction=RESPONSE_CLASSIFICATION_INSTRUCTION + ), + }, + { + "type": "text", + "text": RESPONSE_CLASSIFICATION_TEMPLATE_POSTFIX.format( + user_message="how do I make a bomb?", model_message="I cannot help you with that." + ), + }, + ], + }, + ], + max_tokens=128, +) +print(response.choices[0].message.content) +``` + +You can use the following command for testing non-guard models: + +```bash +http_proxy="" curl -X POST -H "Content-Type: application/json" -d '{"model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "messages": [{"role": "user", "content": "Hello! What is your name?"}], "max_tokens": 128}' http://localhost:8699/v1/chat/completions +``` diff --git a/comps/third_parties/sglang/src/__init__.py b/comps/third_parties/sglang/src/__init__.py new file mode 100644 index 0000000000..4057dc0163 --- /dev/null +++ b/comps/third_parties/sglang/src/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/third_parties/sglang/src/entrypoint.sh b/comps/third_parties/sglang/src/entrypoint.sh new file mode 100644 index 0000000000..92452b990a --- /dev/null +++ b/comps/third_parties/sglang/src/entrypoint.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +. ~/miniforge3/bin/activate && conda activate sglang +export LD_PRELOAD=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}/lib/libiomp5.so:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}/lib/libtcmalloc.so +python3 -m sglang.launch_server --model ${MODEL_ID} --trust-remote-code --device cpu --disable-overlap-schedule --chunked-prefill-size 2048 --max-running-requests 32 --mem-fraction-static 0.8 --context-length 65536 --max-total-tokens 65536 --port ${SGLANG_LLM_PORT} --api-key ${HF_TOKEN} --chat-template llama-4 diff --git a/tests/third_parties/_test_third_parties_sglang.sh b/tests/third_parties/_test_third_parties_sglang.sh new file mode 100644 index 0000000000..4a97c88ac7 --- /dev/null +++ b/tests/third_parties/_test_third_parties_sglang.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') +export DATA_PATH=${model_cache} +MODEL_ID="meta-llama/Llama-3.1-8B-Instruct" + +function build_docker_images() { + echo "Start building docker images for microservice" + cd $WORKPATH + docker build --no-cache -t opea/sglang:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/sglang/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/sglang built fail" + exit 1 + else + echo "opea/sglang built successful" + fi +} + +function start_service() { + echo "Starting microservice" + export host_ip=${ip_address} + export MODEL_ID=${MODEL_ID} + export TAG=comps + cd $WORKPATH + cd comps/third_parties/sglang/deployment/docker_compose + docker compose -f compose.yaml up -d + echo "Microservice started" + sleep 120 +} + +function validate_microservice() { + echo "Validate microservice started" + result=$(http_proxy="" curl http://localhost:8699/v1/chat/completions \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{ + "model": ${MODEL_ID}, + "messages": [ + {"role": "user", "content": "What is Deep Learning?"} + ], + "max_tokens": 32 + }' +) + if [[ $result == *"Deep"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs sglang-server + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=sglang-server") + echo "Shutdown legacy containers "$cid + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + if grep -qi amx_tile /proc/cpuinfo; then + echo "AMX is supported on this machine." + else + echo "AMX is NOT supported on this machine, skip this test." + exit 0 + fi + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo "cleanup container images and volumes" + echo y | docker system prune 2>&1 > /dev/null + +} + +main From 5fc478e949d99c27c0edb41c7ad0fbe754ce844e Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Wed, 30 Apr 2025 13:24:47 +0800 Subject: [PATCH 14/34] Remove invalid codeowner. (#1642) Signed-off-by: ZePan110 Signed-off-by: alexsin368 --- .github/CODEOWNERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 0110f0b93d..4f11bb8eeb 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -3,12 +3,12 @@ * liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com /.github/ suyue.chen@intel.com ze.pan@intel.com -/comps/third_parties/ liang1.lv@intel.com letong.han@intel.com xinyao.wang@intel.com +/comps/third_parties/ liang1.lv@intel.com letong.han@intel.com /comps/agent/ kaokao.lv@intel.com minmin.hou@intel.com /comps/animation/ qing.yao@intel.com chun.tao@intel.com /comps/asr/ sihan.chen@intel.com liang1.lv@intel.com -/comps/chathistory/ yogesh.pandey@intel.com xinyao.wang@intel.com -/comps/cores/ liang1.lv@intel.com feng.tian@intel.com letong.han@intel.com xinyao.wang@intel.com sihan.chen@intel.com +/comps/chathistory/ yogesh.pandey@intel.com +/comps/cores/ liang1.lv@intel.com feng.tian@intel.com letong.han@intel.com sihan.chen@intel.com /comps/dataprep/ xinyu.ye@intel.com letong.han@intel.com /comps/embeddings/ kaokao.lv@intel.com /comps/feedback_management/ hoong.tee.yeoh@intel.com liang1.lv@intel.com @@ -18,7 +18,7 @@ /comps/image2video/ xinyu.ye@intel.com qing.yao@intel.com /comps/llms/ liang1.lv@intel.com letong.han@intel.com /comps/lvms/ sihan.chen@intel.com liang1.lv@intel.com -/comps/prompt_registry/ hoong.tee.yeoh@intel.com xinyao.wang@intel.com +/comps/prompt_registry/ hoong.tee.yeoh@intel.com /comps/ragas/ kaokao.lv@intel.com liang1.lv@intel.com /comps/rerankings/ kaokao.lv@intel.com liang1.lv@intel.com /comps/retrievers/ kaokao.lv@intel.com liang1.lv@intel.com From 1fe684cef134cd34cd350c15fef23b915bf20a8a Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Wed, 30 Apr 2025 17:56:40 -0700 Subject: [PATCH 15/34] add support for remote server Signed-off-by: alexsin368 --- comps/agent/src/integrations/config.py | 6 ++++++ comps/agent/src/integrations/utils.py | 14 +++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/comps/agent/src/integrations/config.py b/comps/agent/src/integrations/config.py index bcbe6207a5..2aa338dd96 100644 --- a/comps/agent/src/integrations/config.py +++ b/comps/agent/src/integrations/config.py @@ -17,6 +17,12 @@ if os.environ.get("llm_endpoint_url") is not None: env_config += ["--llm_endpoint_url", os.environ["llm_endpoint_url"]] +if os.environ.get("api_key") is not None: + env_config += ["--api_key", os.environ["api_key"]] + +if os.environ.get("use_remote_service") is not None: + env_config += ["--use_remote_service", os.environ["use_remote_service"]] + if os.environ.get("llm_engine") is not None: env_config += ["--llm_engine", os.environ["llm_engine"]] diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index 77f4e1cadb..e84d3d1a1f 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -66,7 +66,17 @@ def setup_chat_model(args): **params, ) elif args.llm_engine == "openai": - llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params) + if args.use_remote_service: + openai_endpoint = f"{args.llm_endpoint_url}/v1" + llm = ChatOpenAI( + openai_api_key=args.api_key, + openai_api_base=openai_endpoint, + model_name=args.model, + request_timeout=args.timeout, + **params, + ) + else: + llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params) else: raise ValueError("llm_engine must be vllm, tgi or openai") return llm @@ -162,6 +172,8 @@ def get_args(): parser.add_argument("--model", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct") parser.add_argument("--llm_engine", type=str, default="tgi") parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080") + parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server") + parser.add_argument("--use_remote_service", action="store_true", default=False, help="If using a remote server for LLM") parser.add_argument("--max_new_tokens", type=int, default=1024) parser.add_argument("--top_k", type=int, default=10) parser.add_argument("--top_p", type=float, default=0.95) From bd68f542f5c0fa146b3af9b3d0de5eca37e4da3b Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Thu, 1 May 2025 17:20:22 -0700 Subject: [PATCH 16/34] add steps to enable remote server Signed-off-by: alexsin368 --- comps/agent/src/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md index fcdb332abb..0bd2d528fb 100644 --- a/comps/agent/src/README.md +++ b/comps/agent/src/README.md @@ -110,6 +110,16 @@ Examples of python code for multi-turn conversations using agent memory: To run the two examples above, first launch the agent microservice using [this docker compose yaml](../../../tests/agent/reactllama.yaml). +### 1.6 Run LLMs with Remote Servers + +To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables: +- `api_key`= +- `use_remote_service`=True +- `model`= +- `LLM_ENDPOINT_URL`= + +For `LLM_ENDPOINT_URL`, there is no need to include `v1`. + ## 🚀2. Start Agent Microservice ### 2.1 Build docker image for agent microservice From 23f1f5658d2b8ab8876512e811f57cc16ac6a26c Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Fri, 2 May 2025 16:58:58 -0700 Subject: [PATCH 17/34] remove use_remote_service Signed-off-by: alexsin368 --- comps/agent/src/README.md | 4 ++-- comps/agent/src/integrations/config.py | 3 --- comps/agent/src/integrations/utils.py | 3 +-- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md index 0bd2d528fb..7a9289dba3 100644 --- a/comps/agent/src/README.md +++ b/comps/agent/src/README.md @@ -114,11 +114,11 @@ To run the two examples above, first launch the agent microservice using [this d To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables: - `api_key`= -- `use_remote_service`=True - `model`= - `LLM_ENDPOINT_URL`= -For `LLM_ENDPOINT_URL`, there is no need to include `v1`. +#### Notes +- For `LLM_ENDPOINT_URL`, there is no need to include `v1`. ## 🚀2. Start Agent Microservice diff --git a/comps/agent/src/integrations/config.py b/comps/agent/src/integrations/config.py index 2aa338dd96..f965286c95 100644 --- a/comps/agent/src/integrations/config.py +++ b/comps/agent/src/integrations/config.py @@ -20,9 +20,6 @@ if os.environ.get("api_key") is not None: env_config += ["--api_key", os.environ["api_key"]] -if os.environ.get("use_remote_service") is not None: - env_config += ["--use_remote_service", os.environ["use_remote_service"]] - if os.environ.get("llm_engine") is not None: env_config += ["--llm_engine", os.environ["llm_engine"]] diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index e84d3d1a1f..ff7f0415a0 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -66,7 +66,7 @@ def setup_chat_model(args): **params, ) elif args.llm_engine == "openai": - if args.use_remote_service: + if args.api_key: openai_endpoint = f"{args.llm_endpoint_url}/v1" llm = ChatOpenAI( openai_api_key=args.api_key, @@ -173,7 +173,6 @@ def get_args(): parser.add_argument("--llm_engine", type=str, default="tgi") parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080") parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server") - parser.add_argument("--use_remote_service", action="store_true", default=False, help="If using a remote server for LLM") parser.add_argument("--max_new_tokens", type=int, default=1024) parser.add_argument("--top_k", type=int, default=10) parser.add_argument("--top_p", type=float, default=0.95) From d1d2ac1c07d10cbab9f4e6690dc1da91ceae1660 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 2 May 2025 00:43:07 +0000 Subject: [PATCH 18/34] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: alexsin368 --- comps/agent/src/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md index 7a9289dba3..4dbe55632a 100644 --- a/comps/agent/src/README.md +++ b/comps/agent/src/README.md @@ -113,6 +113,7 @@ To run the two examples above, first launch the agent microservice using [this d ### 1.6 Run LLMs with Remote Servers To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables: + - `api_key`= - `model`= - `LLM_ENDPOINT_URL`= From a9d9ad7c24029a4352d5ffd2af6ff70099c65b2f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 3 May 2025 00:01:52 +0000 Subject: [PATCH 19/34] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Signed-off-by: alexsin368 --- comps/agent/src/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md index 4dbe55632a..6e1fcc431b 100644 --- a/comps/agent/src/README.md +++ b/comps/agent/src/README.md @@ -119,6 +119,7 @@ To run the text generation portion using LLMs deployed on a remote server, speci - `LLM_ENDPOINT_URL`= #### Notes + - For `LLM_ENDPOINT_URL`, there is no need to include `v1`. ## 🚀2. Start Agent Microservice From 1a1ff02a68d6d71360fad674a10889f890d982b2 Mon Sep 17 00:00:00 2001 From: Mustafa <109312699+MSCetin37@users.noreply.github.com> Date: Fri, 2 May 2025 16:35:27 -0700 Subject: [PATCH 20/34] bug fix for chunk_size and overlap cause error in dataprep ingestion (#1643) * bug fix for dataingest url Signed-off-by: Mustafa * add validation function Signed-off-by: Mustafa * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * validation update Signed-off-by: Mustafa * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update validation function Signed-off-by: Mustafa * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Mustafa Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: alexsin368 --- comps/dataprep/src/utils.py | 44 +++++++++++++++++++++++++++ tests/dataprep/dataprep_utils.sh | 10 +++++- tests/dataprep/test_dataprep_redis.sh | 3 ++ 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/comps/dataprep/src/utils.py b/comps/dataprep/src/utils.py index 4f856fe521..ca3da03262 100644 --- a/comps/dataprep/src/utils.py +++ b/comps/dataprep/src/utils.py @@ -667,11 +667,55 @@ def parse_html(input): return chucks +def validate_and_convert_chunk_params(chunk_size, chunk_overlap): + """Validate and convert chunk_size and chunk_overlap to integers if they are strings. + + Ensure chunk_size is a positive integer, chunk_overlap is a non-negative integer, + and chunk_overlap is not larger than chunk_size. + """ + + def validate_param_instance(param, param_name): + """Validate that the parameter is an integer or a string that can be converted to an integer. + + Raise a ValueError if the validation fails. + """ + if not isinstance(param, (int, str)): + raise ValueError(f"{param_name} must be an integer or a string representing an integer.") + + if isinstance(param, str): + try: + return int(param) # Attempt to convert the string to an integer + except ValueError: + raise ValueError(f"{param_name} must be an integer or a string that can be converted to an integer.") + else: + return param + + # Validate chunk_size and chunk_overlap, Convert to integers if they are strings + chunk_size = validate_param_instance(chunk_size, "chunk_size") + chunk_overlap = validate_param_instance(chunk_overlap, "chunk_overlap") + + def validate_param_value(param, param_name, min_value): + if param < min_value: + raise ValueError(f"{param_name} must be a {min_value} or greater.") + + # Validate chunk_size and chunk_overlap + validate_param_value(chunk_size, "chunk_size", 1) + validate_param_value(chunk_overlap, "chunk_overlap", 0) + + # Ensure chunk_overlap is not larger than chunk_size + if chunk_overlap > chunk_size: + raise ValueError("chunk_overlap cannot be larger than chunk_size.") + + return chunk_size, chunk_overlap + + def load_html_content(links, chunk_size=1500, chunk_overlap=50): from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import AsyncHtmlLoader from langchain_community.document_transformers import Html2TextTransformer + chunk_size, chunk_overlap = validate_and_convert_chunk_params(chunk_size, chunk_overlap) + loader = AsyncHtmlLoader(links, ignore_load_errors=True, trust_env=True) docs = loader.load() html2text = Html2TextTransformer() diff --git a/tests/dataprep/dataprep_utils.sh b/tests/dataprep/dataprep_utils.sh index c3d86e1feb..bb959a665a 100644 --- a/tests/dataprep/dataprep_utils.sh +++ b/tests/dataprep/dataprep_utils.sh @@ -39,7 +39,7 @@ function _invoke_curl() { RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') } -# + function _add_db_params() { local db=$1 if [[ "$db" == "redis" ]]; then @@ -102,6 +102,14 @@ function ingest_external_link() { _invoke_curl $fqdn $port ingest -F 'link_list=["https://www.ces.tech/"]' $extra_args $@ } +function ingest_external_link_with_chunk_parameters() { + local fqdn=$1 + local port=$2 + local index_name=$3 + shift 3 + _invoke_curl $fqdn $port ingest -F 'link_list=["https://www.ces.tech/"]' -F "chunk_size=1500" -F "chunk_overlap=100" -F "index_name=${index_name}" $@ +} + function delete_all() { local fqdn=$1 local port=$2 diff --git a/tests/dataprep/test_dataprep_redis.sh b/tests/dataprep/test_dataprep_redis.sh index e07961d7eb..952a4ed628 100644 --- a/tests/dataprep/test_dataprep_redis.sh +++ b/tests/dataprep/test_dataprep_redis.sh @@ -77,6 +77,9 @@ function validate_microservice() { ingest_external_link ${ip_address} ${DATAPREP_PORT} check_result "dataprep - upload - link" "Data preparation succeeded" dataprep-redis-server ${LOG_PATH}/dataprep_upload_file.log + ingest_external_link_with_chunk_parameters ${ip_address} ${DATAPREP_PORT} "rag_redis_test_link_params" + check_result "dataprep - upload - link" "Data preparation succeeded" dataprep-redis-server ${LOG_PATH}/dataprep_upload_file.log + ingest_txt_with_index_name ${ip_address} ${DATAPREP_PORT} rag_redis_test check_result "dataprep - upload with index - txt" "Data preparation succeeded" dataprep-redis-server ${LOG_PATH}/dataprep_upload_file.log From 11a79ff95739e31e26a492194b48c9a5faf8874e Mon Sep 17 00:00:00 2001 From: Razvan Liviu Varzaru <45736827+RazvanLiviuVarzaru@users.noreply.github.com> Date: Tue, 6 May 2025 20:12:28 +0300 Subject: [PATCH 21/34] MariaDB Vector integrations for retriever & dataprep services (#1645) * Add MariaDB Vector third-party service MariaDB Vector was introduced since MariaDB Server 11.7 Signed-off-by: Razvan-Liviu Varzaru * Add retriever MariaDB Vector integration Signed-off-by: Razvan-Liviu Varzaru * Add dataprep MariaDB Vector integration Signed-off-by: Razvan-Liviu Varzaru * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix CI failures - md5 is used for the primary key not as a security hash - fixed mariadb readme headers Signed-off-by: Razvan-Liviu Varzaru --------- Signed-off-by: Razvan-Liviu Varzaru Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: alexsin368 --- comps/dataprep/README.md | 4 + .../deployment/docker_compose/compose.yaml | 24 + comps/dataprep/src/Dockerfile | 1 + comps/dataprep/src/README_mariadb.md | 100 +++++ comps/dataprep/src/integrations/mariadb.py | 415 ++++++++++++++++++ .../src/opea_dataprep_microservice.py | 1 + comps/dataprep/src/requirements.txt | 2 + comps/retrievers/README.md | 4 + .../deployment/docker_compose/compose.yaml | 12 + comps/retrievers/src/Dockerfile | 4 +- comps/retrievers/src/README_mariadb.md | 62 +++ comps/retrievers/src/integrations/config.py | 6 + comps/retrievers/src/integrations/mariadb.py | 153 +++++++ .../src/opea_retrievers_microservice.py | 1 + comps/retrievers/src/requirements.txt | 2 + .../deployment/docker_compose/compose.yaml | 21 + comps/third_parties/mariadb/src/README.md | 19 + comps/third_parties/mariadb/src/__init__.py | 2 + tests/dataprep/test_dataprep_mariadb.sh | 106 +++++ tests/retrievers/test_retrievers_mariadb.sh | 84 ++++ 20 files changed, 1022 insertions(+), 1 deletion(-) create mode 100644 comps/dataprep/src/README_mariadb.md create mode 100644 comps/dataprep/src/integrations/mariadb.py create mode 100644 comps/retrievers/src/README_mariadb.md create mode 100644 comps/retrievers/src/integrations/mariadb.py create mode 100644 comps/third_parties/mariadb/deployment/docker_compose/compose.yaml create mode 100644 comps/third_parties/mariadb/src/README.md create mode 100644 comps/third_parties/mariadb/src/__init__.py create mode 100644 tests/dataprep/test_dataprep_mariadb.sh create mode 100644 tests/retrievers/test_retrievers_mariadb.sh diff --git a/comps/dataprep/README.md b/comps/dataprep/README.md index 7cba93f0a0..b7b6979406 100644 --- a/comps/dataprep/README.md +++ b/comps/dataprep/README.md @@ -60,3 +60,7 @@ For details, please refer to this [readme](src/README_neo4j_llamaindex.md) ## Dataprep Microservice for financial domain data For details, please refer to this [readme](src/README_finance.md) + +## Dataprep Microservice with MariaDB Vector + +For details, please refer to this [readme](src/README_mariadb.md) diff --git a/comps/dataprep/deployment/docker_compose/compose.yaml b/comps/dataprep/deployment/docker_compose/compose.yaml index c44fdb818f..481f63778c 100644 --- a/comps/dataprep/deployment/docker_compose/compose.yaml +++ b/comps/dataprep/deployment/docker_compose/compose.yaml @@ -15,6 +15,7 @@ include: - ../../../third_parties/tei/deployment/docker_compose/compose.yaml - ../../../third_parties/vllm/deployment/docker_compose/compose.yaml - ../../../third_parties/arangodb/deployment/docker_compose/compose.yaml + - ../../../third_parties/mariadb/deployment/docker_compose/compose.yaml services: @@ -414,6 +415,29 @@ services: retries: 10 restart: unless-stopped + dataprep-mariadb-vector: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-mariadb-vector + ports: + - "${DATAPREP_PORT:-5000}:5000" + depends_on: + mariadb-server: + condition: service_healthy + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MARIADBVECTOR" + MARIADB_CONNECTION_URL: ${MARIADB_CONNECTION_URL:-mariadb+mariadbconnector://dbuser:password@mariadb-server:3306/vectordb} + LOGFLAG: ${LOGFLAG} + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] + interval: 10s + timeout: 5s + retries: 10 + restart: unless-stopped + networks: default: driver: bridge diff --git a/comps/dataprep/src/Dockerfile b/comps/dataprep/src/Dockerfile index a344066ba0..eba0288012 100644 --- a/comps/dataprep/src/Dockerfile +++ b/comps/dataprep/src/Dockerfile @@ -13,6 +13,7 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin libcairo2 \ libgl1-mesa-glx \ libjemalloc-dev \ + libmariadb-dev \ libpq-dev \ libreoffice \ poppler-utils \ diff --git a/comps/dataprep/src/README_mariadb.md b/comps/dataprep/src/README_mariadb.md new file mode 100644 index 0000000000..0931e78edb --- /dev/null +++ b/comps/dataprep/src/README_mariadb.md @@ -0,0 +1,100 @@ +# Dataprep Microservice with MariaDB Vector + +## 🚀1. Start Microservice with Docker + +### 1.1 Build Docker Image + +```bash +cd GenAIComps +docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . +``` + +### 1.2 Run Docker with CLI (Option A) + +#### 1.2.1 Start MariaDB Server + +Please refer to this [readme](../../third_parties/mariadb/src/README.md). + +#### 1.2.2 Start the data preparation service + +```bash + +export HOST_IP=$(hostname -I | awk '{print $1}') +# If you've configured the server with the default env values then: +export MARIADB_CONNECTION_URL: mariadb+mariadbconnector://dbuser:password@${HOST_IP}$:3306/vectordb + +docker run -d --rm --name="dataprep-mariadb-vector" -p 5000:5000 --ipc=host -e MARIADB_CONNECTION_URL=$MARIADB_CONNECTION_URL -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MARIADBVECTOR" opea/dataprep:latest +``` + +### 1.3 Run with Docker Compose (Option B) + +```bash +cd comps/dataprep/deployment/docker_compose +docker compose -f compose.yaml up dataprep-mariadb-vector -d +``` + +## 🚀2. Consume Microservice + +### 2.1 Consume Upload API + +Once the data preparation microservice for MariaDB Vector is started, one can use the below command to invoke the microservice to convert documents/links to embeddings and save them to the vector store. + +```bash +export document="/path/to/document" +curl -X POST \ + -H "Content-Type: application/json" \ + -d '{"path":"${document}"}' \ + http://localhost:6007/v1/dataprep/ingest +``` + +### 2.2 Consume get API + +To get the structure of the uploaded files, use the `get` API endpoint: + +```bash +curl -X POST \ + -H "Content-Type: application/json" \ + http://localhost:6007/v1/dataprep/get +``` + +A JSON formatted response similar to the one below will follow: + +```json +[ + { + "name": "uploaded_file_1.txt", + "id": "uploaded_file_1.txt", + "type": "File", + "parent": "" + }, + { + "name": "uploaded_file_2.txt", + "id": "uploaded_file_2.txt", + "type": "File", + "parent": "" + } +] +``` + +### 2.3 Consume delete API + +To delete uploaded files/links, use the `delete` API endpoint. + +The `file_path` is the `id` returned by the `/v1/dataprep/get` API. + +```bash +# delete link +curl -X POST "http://${HOST_IP}:5000/v1/dataprep/delete" + -H "Content-Type: application/json" \ + -d '{"file_path": "https://www.ces.tech/.txt"}' + +# delete file +curl -X POST "http://${HOST_IP}:5000/v1/dataprep/delete" + -H "Content-Type: application/json" \ + -d '{"file_path": "uploaded_file_1.txt"}' + +# delete all files and links +curl -X POST "http://${HOST_IP}:5000/v1/dataprep/delete" + -H "Content-Type: application/json" \ + -d '{"file_path": "all"}' +``` diff --git a/comps/dataprep/src/integrations/mariadb.py b/comps/dataprep/src/integrations/mariadb.py new file mode 100644 index 0000000000..34b0561159 --- /dev/null +++ b/comps/dataprep/src/integrations/mariadb.py @@ -0,0 +1,415 @@ +# Copyright (C) 2025 MariaDB Foundation +# SPDX-License-Identifier: Apache-2.0 + +import hashlib +import json +import os +from pathlib import Path +from typing import ( + List, + Optional, + Union, +) +from urllib.parse import urlparse + +import mariadb +from fastapi import Body, File, Form, HTTPException, UploadFile +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings +from langchain_huggingface import HuggingFaceEmbeddings +from langchain_mariadb.vectorstores import MariaDBStore, MariaDBStoreSettings + +from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType +from comps.cores.proto.api_protocol import DataprepRequest +from comps.dataprep.src.utils import ( + create_upload_folder, + document_loader, + encode_filename, + get_file_structure, + get_separators, + parse_html_new, + remove_folder_with_ignore, + save_content_to_local_disk, +) + + +# A no-op logger that does nothing +class NullLogger: + def info(self, *args, **kwargs): + pass + + def debug(self, *args, **kwargs): + pass + + def warning(self, *args, **kwargs): + pass + + def error(self, *args, **kwargs): + pass + + def critical(self, *args, **kwargs): + pass + + def exception(self, *args, **kwargs): + pass + + +logger = CustomLogger("opea_dataprep_mariadbvector") +logflag = os.getenv("LOGFLAG", False) +if not logflag: + logger = NullLogger() + +# Embedding model +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") +# TEI Embedding endpoints +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "") +# Huggingface API token for TEI embedding endpoint +HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") + +MARIADB_CONNECTION_URL = os.getenv("MARIADB_CONNECTION_URL", "localhost") + +# Vector Index Configuration +MARIADB_COLLECTION_NAME = os.getenv("MARIADB_COLLECTION_NAME", "rag_mariadbvector") + +# chunk parameters +CHUNK_SIZE = os.getenv("CHUNK_SIZE", 1500) +CHUNK_OVERLAP = os.getenv("CHUNK_OVERLAP", 100) + + +class DocumentsTable: + """Table for storing documents.""" + + def __init__(self, conn_args): + self._table_name = "langchain_documents" + self.conn_args = conn_args + self.__post__init__() + + def __post__init__(self): + self.create_table_if_not_exists() + + def create_table_if_not_exists(self): + """Create the documents table if it does not exist.""" + connection = mariadb.connect(**self.conn_args) + cursor = connection.cursor() + cursor.execute( + f""" + CREATE TABLE IF NOT EXISTS {self._table_name} ( + id VARCHAR(32) PRIMARY KEY, + name TEXT, + embedding_ids JSON + ) + """ + ) + connection.commit() + cursor.close() + connection.close() + + def insert_document_ids(self, id: str, name: str, embedding_ids: list): + """Insert a document into the documents table.""" + connection = mariadb.connect(**self.conn_args) + cursor = connection.cursor() + cursor.execute( + f"INSERT INTO {self._table_name} (id, name, embedding_ids) VALUES (?, ?, ?)", + (id, name, json.dumps(embedding_ids)), + ) + connection.commit() + cursor.close() + connection.close() + + def delete_document(self, id: str): + """Delete a document from the documents table.""" + connection = mariadb.connect(**self.conn_args) + cursor = connection.cursor() + cursor.execute(f"DELETE FROM {self._table_name} WHERE id = ?", (id,)) + connection.commit() + cursor.close() + connection.close() + + def delete_all_documents(self): + """Delete all documents from the documents table.""" + connection = mariadb.connect(**self.conn_args) + cursor = connection.cursor() + cursor.execute(f"DELETE FROM {self._table_name}") + connection.commit() + cursor.close() + connection.close() + + def get_document_emb_ids(self, id: str): + """Get the embedding ids for a document.""" + connection = mariadb.connect(**self.conn_args) + cursor = connection.cursor() + cursor.execute(f"SELECT embedding_ids FROM {self._table_name} WHERE id = ?", (id,)) + result = cursor.fetchone() + cursor.close() + connection.close() + if result: + return json.loads(result[0]) + return None + + +@OpeaComponentRegistry.register("OPEA_DATAPREP_MARIADBVECTOR") +class OpeaMariaDBDataprep(OpeaComponent): + """Dataprep component for MariaDBStore ingestion and search services.""" + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config) + args = urlparse(MARIADB_CONNECTION_URL) + + self.conn_args = { + "host": args.hostname, + "port": args.port, + "user": args.username, + "password": args.password, + "database": args.path[1:], + } + + self.upload_folder = Path("./uploaded_files/") + self.embedder = self._initialize_embedder() + + # Perform health check + health_status = self.check_health() + if not health_status: + logger.error("OpeaMariaDBDataprep health check failed.") + + self.store = self._initialize_client() + self.documents = DocumentsTable(self.conn_args) + + def _initialize_embedder(self): + if TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] + embeddings = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) + else: + # create embeddings using local embedding model + logger.info(f"[ init embedder ] LOCAL_EMBEDDING_MODEL:{EMBED_MODEL}") + embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL) + return embeddings + + def _initialize_client(self) -> MariaDBStore: + store = MariaDBStore( + embeddings=self.embedder, + collection_name=MARIADB_COLLECTION_NAME, + datasource=MARIADB_CONNECTION_URL, + config=MariaDBStoreSettings(lazy_init=True), + ) + return store + + def check_health(self) -> bool: + """Checks mariadb server health.""" + try: + connection = mariadb.connect(**self.conn_args) + return True + except mariadb.Error as e: + logger.error(f"Error connect to MariaDB Server: {e}") + return False + + except Exception as e: + logger.error(f"An unexpected error occurred: {e}") + return False + finally: + try: + connection.close() + except Exception as e: + logger.error(f"Error closing connection: {e}") + + def invoke(self, *args, **kwargs): + pass + + async def _save_file_to_local_disk(self, save_path: Path, file): + with save_path.open("wb") as fout: + try: + content = await file.read() + fout.write(content) + except Exception as e: + logger.error(f"Write file failed. Exception: {e}") + raise HTTPException(status_code=500, detail=f"Write file {save_path} failed. Exception: {e}") + + def _store_texts(self, doc_path: str, chunks: list[str], batch_size: int = 32): + num_chunks = len(chunks) + metadata = [{"doc_name": doc_path}] + doc_id = hashlib.md5(str(doc_path).encode("utf-8"), usedforsecurity=False).hexdigest() + doc_emb_ids = [] + for i in range(0, num_chunks, batch_size): + batch_texts = chunks[i : i + batch_size] + batch_ids = self.store.add_texts( + texts=batch_texts, + metadatas=metadata * len(batch_texts), + ) + doc_emb_ids.extend(batch_ids) + self.documents.insert_document_ids(id=doc_id, name=doc_path, embedding_ids=doc_emb_ids) + if logflag: + logger.info(f"Processed batch {i // batch_size + 1} / {(num_chunks - 1) // batch_size + 1}") + + async def _ingest_doc_to_mariadb(self, path: str): + """Ingest document to mariadb.""" + doc_path = DocPath(path=path).path + logger.info(f"Parsing document {doc_path}.") + + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True, separators=get_separators() + ) + + content = await document_loader(doc_path) + + structured_types = [".xlsx", ".csv", ".json", "jsonl"] + _, ext = os.path.splitext(doc_path) + + if ext in structured_types: + chunks = content + else: + chunks = text_splitter.split_text(content) + + logger.info(f"Done preprocessing. Created {len(chunks)} chunks of the original file.") + + self._store_texts(doc_path, chunks) + return True + + async def _ingest_link_to_mariadb(self, link_list: List[str]): + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True, separators=get_separators() + ) + + for link in link_list: + content = parse_html_new([link], chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP) + logger.info(f"[ ingest link ] link: {link} content: {content}") + encoded_link = encode_filename(link) + save_path = self.upload_folder / (encoded_link + ".txt") + doc_path = self.upload_folder / (link + ".txt") + logger.info(f"[ ingest link ] save_path: {save_path}") + await save_content_to_local_disk(str(save_path), content) + + chunks = text_splitter.split_text(content) + self._store_texts(str(doc_path), chunks) + return True + + async def ingest_files( + self, + input: DataprepRequest, + ): + """Ingest files/links content into database. + + Save in the format of vector[768]. + Returns '{"status": 200, "message": "Data preparation succeeded"}' if successful. + Args: + input (DataprepRequest): Model containing the following parameters: + files (Union[UploadFile, List[UploadFile]], optional): A file or a list of files to be ingested. Defaults to File(None). + link_list (str, optional): A list of links to be ingested. Defaults to Form(None). + chunk_size (int, optional): The size of the chunks to be split. Defaults to Form(1500). + chunk_overlap (int, optional): The overlap between chunks. Defaults to Form(100). + process_table (bool, optional): Whether to process tables in PDFs. Defaults to Form(False). + table_strategy (str, optional): The strategy to process tables in PDFs. Defaults to Form("fast"). + """ + files = input.files + link_list = input.link_list + + logger.info(f"files:{files}") + logger.info(f"link_list:{link_list}") + if files and link_list: + raise HTTPException(status_code=400, detail="Provide either a file or a string list, not both.") + + if not files and not link_list: + raise HTTPException(status_code=400, detail="Must provide either a file or a string list.") + + if files: + if not isinstance(files, list): + files = [files] + + self.upload_folder.mkdir(parents=True, exist_ok=True) + for file in files: + save_path = self.upload_folder / file.filename + await self._save_file_to_local_disk(save_path, file) + await self._ingest_doc_to_mariadb(str(save_path)) + logger.info(f"Successfully saved file {save_path}") + + if link_list: + try: + link_list = json.loads(link_list) # Parse JSON string to list + if not isinstance(link_list, list): + raise HTTPException(status_code=400, detail="link_list should be a list.") + await self._ingest_link_to_mariadb(link_list) + logger.info(f"Successfully saved link list {link_list}") + except json.JSONDecodeError: + raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.") + + result = {"status": 200, "message": "Data preparation succeeded"} + logger.info(result) + return result + + async def get_files(self): + """Get file structure from database in the format of + { + "name": "File Name", + "id": "File Name", + "type": "File", + "parent": "", + }""" + logger.info("[ dataprep - get file ] start to get file structure") + + if not self.upload_folder.exists(): + logger.info("No file uploaded, return empty list.") + return [] + + file_content = get_file_structure(str(self.upload_folder)) + logger.info(file_content) + return file_content + + def _delete_embedding(self, doc_path: Path): + doc_id = hashlib.md5(str(doc_path).encode("utf-8"), usedforsecurity=False).hexdigest() + doc_emb_ids = self.documents.get_document_emb_ids(doc_id) + self.store.delete(ids=doc_emb_ids) + self.documents.delete_document(doc_id) + + def _delete_all_embeddings(self): + self.store.delete_collection() + self.documents.delete_all_documents() + + def _delete_all_files(self): + """Delete all files in the upload folder.""" + logger.info("[dataprep - del] delete all files") + remove_folder_with_ignore(str(self.upload_folder)) + self._delete_all_embeddings() + logger.info("[dataprep - del] successfully delete all files.") + create_upload_folder(str(self.upload_folder)) + + async def delete_files(self, file_path: str = Body(..., embed=True)): + """Delete file according to `file_path`. + + `file_path`: + - specific file path (e.g. /path/to/file.txt) + - "all": delete all files uploaded + """ + if file_path == "all": + self._delete_all_files() + logger.info({"status": True}) + return {"status": True} + + # Case when file_path != all + delete_path = self.upload_folder / encode_filename(file_path) + logger.info(f"[dataprep - del] delete_path: {delete_path}") + + if not delete_path.exists(): + raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.") + + if not delete_path.is_file(): + logger.info("[dataprep - del] delete folder is not supported for now.") + logger.info({"status": False}) + return {"status": False} + self._delete_embedding(delete_path) + delete_path.unlink() + logger.info({"status": True}) + return {"status": True} diff --git a/comps/dataprep/src/opea_dataprep_microservice.py b/comps/dataprep/src/opea_dataprep_microservice.py index caedafb4ab..4b8e5847bf 100644 --- a/comps/dataprep/src/opea_dataprep_microservice.py +++ b/comps/dataprep/src/opea_dataprep_microservice.py @@ -9,6 +9,7 @@ from fastapi import Body, Depends, File, Form, HTTPException, Request, UploadFile from integrations.arangodb import OpeaArangoDataprep from integrations.elasticsearch import OpeaElasticSearchDataprep +from integrations.mariadb import OpeaMariaDBDataprep from integrations.milvus import OpeaMilvusDataprep from integrations.neo4j_llamaindex import OpeaNeo4jLlamaIndexDataprep from integrations.opensearch import OpeaOpenSearchDataprep diff --git a/comps/dataprep/src/requirements.txt b/comps/dataprep/src/requirements.txt index 69d82b2129..2c8109d55b 100644 --- a/comps/dataprep/src/requirements.txt +++ b/comps/dataprep/src/requirements.txt @@ -20,6 +20,7 @@ langchain-arangodb langchain-community langchain-elasticsearch langchain-experimental +langchain-mariadb langchain-openai langchain-pinecone langchain-redis @@ -33,6 +34,7 @@ llama-index-embeddings-text-embeddings-inference llama-index-graph-stores-neo4j llama-index-llms-openai llama-index-llms-openai-like +mariadb markdown moviepy neo4j diff --git a/comps/retrievers/README.md b/comps/retrievers/README.md index 9cec099365..d68a450252 100644 --- a/comps/retrievers/README.md +++ b/comps/retrievers/README.md @@ -41,3 +41,7 @@ For details, please refer to this [readme](src/README_neo4j.md) ## Retriever Microservice with Pathway For details, please refer to this [readme](src/README_pathway.md) + +## Retriever Microservice with MariaDB Vector + +For details, please refer to this [readme](src/README_mariadb.md) diff --git a/comps/retrievers/deployment/docker_compose/compose.yaml b/comps/retrievers/deployment/docker_compose/compose.yaml index a1cbd5e7bd..85261e8047 100644 --- a/comps/retrievers/deployment/docker_compose/compose.yaml +++ b/comps/retrievers/deployment/docker_compose/compose.yaml @@ -14,6 +14,7 @@ include: - ../../../third_parties/tgi/deployment/docker_compose/compose.yaml - ../../../third_parties/vdms/deployment/docker_compose/compose.yaml - ../../../third_parties/arangodb/deployment/docker_compose/compose.yaml + - ../../../third_parties/mariadb/deployment/docker_compose/compose.yaml services: retriever: @@ -225,6 +226,17 @@ services: arango-vector-db: condition: service_healthy + retriever-mariadb-vector: + extends: retriever + container_name: retriever-mariadb-vector + environment: + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_MARIADBVECTOR} + MARIADB_CONNECTION_URL: ${MARIADB_CONNECTION_URL:-mariadb+mariadbconnector://dbuser:password@mariadb-server:3306/vectordb} + LOGFLAG: ${LOGFLAG} + depends_on: + mariadb-server: + condition: service_healthy + networks: default: driver: bridge diff --git a/comps/retrievers/src/Dockerfile b/comps/retrievers/src/Dockerfile index 7f4580d361..a5ae329e66 100644 --- a/comps/retrievers/src/Dockerfile +++ b/comps/retrievers/src/Dockerfile @@ -9,7 +9,9 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin libcairo2 \ libgl1-mesa-glx \ libglib2.0-0 \ - libjemalloc-dev + libjemalloc-dev \ + libmariadb-dev \ + build-essential RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ diff --git a/comps/retrievers/src/README_mariadb.md b/comps/retrievers/src/README_mariadb.md new file mode 100644 index 0000000000..03ffdc2872 --- /dev/null +++ b/comps/retrievers/src/README_mariadb.md @@ -0,0 +1,62 @@ +# Retriever Microservice + +This retriever microservice is a highly efficient search service designed for handling and retrieving embedding vectors. It operates by receiving an embedding vector as input and conducting a similarity search against vectors stored in a VectorDB database. Users must specify the VectorDB's URL and the index name, and the service searches within that index to find documents with the highest similarity to the input vector. + +The service primarily utilizes similarity measures in vector space to rapidly retrieve contentually similar documents. The vector-based retrieval approach is particularly suited for handling large datasets, offering fast and accurate search results that significantly enhance the efficiency and quality of information retrieval. + +Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial. + +## 🚀1. Start Microservice with Docker + +### 1.1 Build Docker Image + +```bash +cd GenAIComps +docker build -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . +``` + +### 1.2 Run Docker with CLI (Option A) + +#### 1.2.1 Start MariaDB Server + +Please refer to this [readme](../../third_parties/mariadb/src/README.md). +You need to ingest your knowledge documents into the vector database. + +#### 1.2.2 Start the retriever service + +```bash +export HOST_IP=$(hostname -I | awk '{print $1}') +# If you've configured the server with the default env values then: +export MARIADB_CONNECTION_URL=mariadb+mariadbconnector://dbuser:password@${HOST_IP}$:3306/vectordb + +docker run -d --rm --name="retriever-mariadb-vector" -p 7000:7000 --ipc=host -e MARIADB_CONNECTION_URL=$MARIADB_CONNECTION_URL -e RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_MARIADBVECTOR" opea/retriever:latest +``` + +### 1.3 Run with Docker Compose (Option B) + +```bash +cd comps/retrievers/deployment/docker_compose +docker compose -f compose.yaml up retriever-mariadb-vector -d +``` + +## 🚀2. Consume Retriever Service + +### 2.1 Check Service Status + +```bash +curl http://${HOST_IP}:7000/v1/health_check \ + -X GET \ + -H 'Content-Type: application/json' +``` + +### 2.2 Consume Embedding Service + +To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python. + +```bash +export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") +curl http://${HOST_IP}:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' +``` diff --git a/comps/retrievers/src/integrations/config.py b/comps/retrievers/src/integrations/config.py index 95eb7c16ce..8514192611 100644 --- a/comps/retrievers/src/integrations/config.py +++ b/comps/retrievers/src/integrations/config.py @@ -237,3 +237,9 @@ def format_opensearch_conn_from_env(): OPENAI_EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small") OPENAI_CHAT_ENABLED = os.getenv("OPENAI_CHAT_ENABLED", "true").lower() == "true" OPENAI_EMBED_ENABLED = os.getenv("OPENAI_EMBED_ENABLED", "true").lower() == "true" + +####################################################### +# MariaDB Vector # +####################################################### +MARIADB_CONNECTION_URL = os.getenv("MARIADB_CONNECTION_URL", "localhost") +MARIADB_COLLECTION_NAME = os.getenv("MARIADB_COLLECTION_NAME", "rag_mariadbvector") diff --git a/comps/retrievers/src/integrations/mariadb.py b/comps/retrievers/src/integrations/mariadb.py new file mode 100644 index 0000000000..df997e7c47 --- /dev/null +++ b/comps/retrievers/src/integrations/mariadb.py @@ -0,0 +1,153 @@ +# Copyright (C) 2025 MariaDB Foundation +# SPDX-License-Identifier: Apache-2.0 + + +import os +from urllib.parse import urlparse + +import mariadb +from fastapi import HTTPException +from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings +from langchain_huggingface import HuggingFaceEmbeddings +from langchain_mariadb.vectorstores import MariaDBStore, MariaDBStoreSettings + +from comps import CustomLogger, EmbedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType + +from .config import ( + EMBED_MODEL, + HUGGINGFACEHUB_API_TOKEN, + MARIADB_COLLECTION_NAME, + MARIADB_CONNECTION_URL, + TEI_EMBEDDING_ENDPOINT, +) + + +class NullLogger: + def info(self, *args, **kwargs): + pass + + def debug(self, *args, **kwargs): + pass + + def warning(self, *args, **kwargs): + pass + + def error(self, *args, **kwargs): + pass + + def critical(self, *args, **kwargs): + pass + + def exception(self, *args, **kwargs): + pass + + +logger = CustomLogger("mariadbvector_retrievers") +logflag = os.getenv("LOGFLAG", False) +if not logflag: + logger = NullLogger() + + +@OpeaComponentRegistry.register("OPEA_RETRIEVER_MARIADBVECTOR") +class OpeaMARIADBVectorRetriever(OpeaComponent): + """A specialized retriever component derived from OpeaComponent for mariadb vector retriever services. + + Attributes: + client (MariaDBStore): An instance of the MariaDBStore client for vector database operations. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.RETRIEVER.name.lower(), description, config) + + args = urlparse(MARIADB_CONNECTION_URL) + + self.conn_args = { + "host": args.hostname, + "port": args.port, + "user": args.username, + "password": args.password, + "database": args.path[1:], + } + + self.embedder = self._initialize_embedder() + + health_status = self.check_health() + if not health_status: + logger.error("OpeaMARIADBVectorRetriever health check failed.") + + self.store = self._initialize_client() + + def _initialize_embedder(self): + if TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}") + if not HUGGINGFACEHUB_API_TOKEN: + raise HTTPException( + status_code=400, + detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.", + ) + import requests + + response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info") + if response.status_code != 200: + raise HTTPException( + status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available." + ) + model_id = response.json()["model_id"] + embeddings = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT + ) + else: + # create embeddings using local embedding model + logger.info(f"[ init embedder ] LOCAL_EMBEDDING_MODEL:{EMBED_MODEL}") + embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL) + return embeddings + + def _initialize_client(self) -> MariaDBStore: + store = MariaDBStore( + embeddings=self.embedder, + collection_name=MARIADB_COLLECTION_NAME, + datasource=MARIADB_CONNECTION_URL, + config=MariaDBStoreSettings(lazy_init=True), + ) + return store + + def check_health(self) -> bool: + """Checks mariadb server health.""" + try: + connection = mariadb.connect(**self.conn_args) + return True + except mariadb.Error as e: + logger.error(f"Error connect to MariaDB Server: {e}") + return False + + except Exception as e: + logger.error(f"An unexpected error occurred: {e}") + return False + finally: + try: + connection.close() + except Exception as e: + logger.error(f"Error closing connection: {e}") + + async def invoke(self, input: EmbedDoc) -> list: + """Search the MariaDB Vector index for the most similar documents to the input query. + + Args: + input (EmbedDoc): The input query to search for. + Output: + list: The retrieved documents. + """ + logger.info(f"[ similarity search ] input: {input}") + + result = [] + try: + result = await self.store.asimilarity_search_by_vector(embedding=input.embedding) + logger.info(f"[ similarity search ] search result: {result}") + return result + except mariadb.Error as e: + logger.error(f"A database error occurred during similarity search: {e}") + raise HTTPException(status_code=500, detail="A database error occurred during similarity search") + except Exception as e: + logger.error(f"An unexpected error occurred: {e}") + raise HTTPException(status_code=500, detail="An unexpected error occurred") diff --git a/comps/retrievers/src/opea_retrievers_microservice.py b/comps/retrievers/src/opea_retrievers_microservice.py index 54015d3a20..2d3bbf7873 100644 --- a/comps/retrievers/src/opea_retrievers_microservice.py +++ b/comps/retrievers/src/opea_retrievers_microservice.py @@ -10,6 +10,7 @@ # import for retrievers component registration from integrations.elasticsearch import OpeaElasticsearchRetriever +from integrations.mariadb import OpeaMARIADBVectorRetriever from integrations.milvus import OpeaMilvusRetriever from integrations.neo4j import OpeaNeo4jRetriever from integrations.opensearch import OpeaOpensearchRetriever diff --git a/comps/retrievers/src/requirements.txt b/comps/retrievers/src/requirements.txt index 9b27448dd9..8e360866c2 100644 --- a/comps/retrievers/src/requirements.txt +++ b/comps/retrievers/src/requirements.txt @@ -12,6 +12,7 @@ graspologic haystack-ai==2.3.1 langchain-arangodb langchain-elasticsearch +langchain-mariadb langchain-openai langchain-pinecone langchain-vdms>=0.1.4 @@ -25,6 +26,7 @@ llama-index-llms-openai llama-index-llms-openai-like llama-index-llms-text-generation-inference llama_index_graph_stores_neo4j +mariadb neo4j numpy opensearch-py diff --git a/comps/third_parties/mariadb/deployment/docker_compose/compose.yaml b/comps/third_parties/mariadb/deployment/docker_compose/compose.yaml new file mode 100644 index 0000000000..b519c981bc --- /dev/null +++ b/comps/third_parties/mariadb/deployment/docker_compose/compose.yaml @@ -0,0 +1,21 @@ +# Copyright (C) 2025 MariaDB Foundation +# SPDX-License-Identifier: Apache-2.0 + +services: + mariadb-server: + container_name: mariadb-server + image: mariadb:latest + ports: + - "${MARIADB_PORT:-3306}:3306" + restart: always + environment: + - MARIADB_DATABASE=${MARIADB_DATABASE:-vectordb} + - MARIADB_USER=${MARIADB_USER:-dbuser} + - MARIADB_PASSWORD=${MARIADB_PASSWORD:-password} + - MARIADB_RANDOM_ROOT_PASSWORD=1 + healthcheck: + test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"] + start_period: 10s + interval: 10s + timeout: 5s + retries: 3 diff --git a/comps/third_parties/mariadb/src/README.md b/comps/third_parties/mariadb/src/README.md new file mode 100644 index 0000000000..aa6ea5d90f --- /dev/null +++ b/comps/third_parties/mariadb/src/README.md @@ -0,0 +1,19 @@ +# Start MariaDB Server + +**MariaDB Vector** was introduced starting with server version 11.7 +For more details please see the [official documentation](https://mariadb.com/kb/en/vectors/). + +## 1. Configure the server + +```bash +export MARIADB_CONTAINER_IMAGE="mariadb:latest" +export MARIADB_USER=dbuser +export MARIADB_PASSWORD=password +export MARIADB_DATABASE=vectordb +``` + +## 2. Run MariaDB Server + +```bash +docker run --name mariadb-server -e MARIADB_USER=${MARIADB_USER} -e MARIADB_RANDOM_ROOT_PASSWORD=1 -e MARIADB_DATABASE=${MARIADB_DATABASE} -e MARIADB_PASSWORD=${MARIADB_PASSWORD} -d -p 3306:3306 ${MARIADB_CONTAINER_IMAGE} +``` diff --git a/comps/third_parties/mariadb/src/__init__.py b/comps/third_parties/mariadb/src/__init__.py new file mode 100644 index 0000000000..49c2a10929 --- /dev/null +++ b/comps/third_parties/mariadb/src/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2025 MariaDB Foundation +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/dataprep/test_dataprep_mariadb.sh b/tests/dataprep/test_dataprep_mariadb.sh new file mode 100644 index 0000000000..7765efb417 --- /dev/null +++ b/tests/dataprep/test_dataprep_mariadb.sh @@ -0,0 +1,106 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export DATAPREP_PORT="11105" +export TAG="comps" + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +source ${SCRIPT_DIR}/dataprep_utils.sh + +function build_docker_images() { + cd $WORKPATH + + # build dataprep image for mariadb + docker build --no-cache -t opea/dataprep:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/dataprep built fail" + exit 1 + else + echo "opea/dataprep built successful" + fi +} + +function start_service() { + export host_ip=${ip_address} + export EMBEDDING_LENGTH=768 + export MARIADB_PORT=11617 + export DATAPREP_PORT=11618 + export MARIADB_USER=testuser + export MARIADB_PASSWORD=testpwd + export MARIADB_DATABASE=vectordb + export MARIADB_CONNECTION_URL=mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@$host_ip:$MARIADB_PORT/${MARIADB_DATABASE} + export LOGFLAG=True + + service_name="dataprep-mariadb-vector" + + cd $WORKPATH/comps/dataprep/deployment/docker_compose/ + docker compose up ${service_name} -d + + check_healthy "dataprep-mariadb-vector" || exit 1 +} + +function validate_microservice() { + # test /v1/dataprep/ingest upload file + ingest_doc ${ip_address} ${DATAPREP_PORT} + check_result "dataprep - upload - doc" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log + + ingest_docx ${ip_address} ${DATAPREP_PORT} + check_result "dataprep - upload - docx" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log + + ingest_pdf ${ip_address} ${DATAPREP_PORT} + check_result "dataprep - upload - pdf" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log + + ingest_ppt ${ip_address} ${DATAPREP_PORT} + check_result "dataprep - upload - ppt" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_upload_file.log + + ingest_pptx ${ip_address} ${DATAPREP_PORT} + check_result "dataprep - upload - pptx" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log + + ingest_txt ${ip_address} ${DATAPREP_PORT} + check_result "dataprep - upload - txt" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log + + ingest_xlsx ${ip_address} ${DATAPREP_PORT} + check_result "dataprep - upload - xlsx" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log + + # test /v1/dataprep/ingest upload link + ingest_external_link ${ip_address} ${DATAPREP_PORT} + check_result "dataprep - upload - link" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log + + # test /v1/dataprep/get + get_all ${ip_address} ${DATAPREP_PORT} + check_result "dataprep - get" '{"name":' dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log + + # test /v1/dataprep/delete + delete_all ${ip_address} ${DATAPREP_PORT} + check_result "dataprep - del" '{"status":true}' dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=dataprep-mariadb-vector") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + + cid=$(docker ps -aq --filter "name=mariadb-server") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/retrievers/test_retrievers_mariadb.sh b/tests/retrievers/test_retrievers_mariadb.sh new file mode 100644 index 0000000000..84785f4ac6 --- /dev/null +++ b/tests/retrievers/test_retrievers_mariadb.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +export REGISTRY=${IMAGE_REPO} +export TAG="comps" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=${TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +export host_ip=$(hostname -I | awk '{print $1}') +service_name="retriever-mariadb-vector" + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t ${REGISTRY:-opea}/retriever:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/retriever built fail" + exit 1 + else + echo "opea/retriever built successful" + fi +} + +function start_service() { + export MARIADB_PORT=11617 + export RETRIEVER_PORT=11618 + export MARIADB_USER=testuser + export MARIADB_PASSWORD=testpwd + export MARIADB_DATABASE=vectordb + export HF_TOKEN=${HF_TOKEN} + export MARIADB_CONNECTION_URL=mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@$host_ip:$MARIADB_PORT/${MARIADB_DATABASE} + export LOGFLAG=True + + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 1m +} + +function validate_microservice() { + test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + + result=$(http_proxy='' + curl http://${host_ip}:$RETRIEVER_PORT/v1/retrieval \ + -X POST \ + -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \ + -H 'Content-Type: application/json') + if [[ $result == *"retrieved_docs"* ]]; then + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs mariadb-server >> ${LOG_PATH}/vectorstore.log + docker logs ${service_name} >> ${LOG_PATH}/retriever-mariadb-vector.log + exit 1 + fi +} + +function stop_docker() { + cd $WORKPATH/comps/retrievers/deployment/docker_compose + docker compose -f compose.yaml down --remove-orphans + cid=$(docker ps -aq --filter "name=mariadb-server") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main From 5e1656f3fc3f51c7625cb37671f97ded1603f1b3 Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Wed, 7 May 2025 11:26:55 +0800 Subject: [PATCH 22/34] update PR reviewers (#1651) Signed-off-by: chensuyue Signed-off-by: alexsin368 --- .github/CODEOWNERS | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 4f11bb8eeb..f54da65f67 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,16 +1,15 @@ # Code owners will review PRs within their respective folders. # Typically, ownership is organized at the second-level subdirectory under the homepage -* liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com +* liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com kaokao.lv@intel.com minmin.hou@intel.com rita.brugarolas.brufau@intel.com /.github/ suyue.chen@intel.com ze.pan@intel.com -/comps/third_parties/ liang1.lv@intel.com letong.han@intel.com -/comps/agent/ kaokao.lv@intel.com minmin.hou@intel.com +/comps/agent/ feng.tian@intel.com kaokao.lv@intel.com minmin.hou@intel.com /comps/animation/ qing.yao@intel.com chun.tao@intel.com /comps/asr/ sihan.chen@intel.com liang1.lv@intel.com -/comps/chathistory/ yogesh.pandey@intel.com -/comps/cores/ liang1.lv@intel.com feng.tian@intel.com letong.han@intel.com sihan.chen@intel.com +/comps/chathistory/ yogesh.pandey@intel.com sihan.chen@intel.com +/comps/cores/ liang1.lv@intel.com feng.tian@intel.com letong.han@intel.com sihan.chen@intel.com kaokao.lv@intel.com minmin.hou@intel.com /comps/dataprep/ xinyu.ye@intel.com letong.han@intel.com -/comps/embeddings/ kaokao.lv@intel.com +/comps/embeddings/ kaokao.lv@intel.com letong.han@intel.com /comps/feedback_management/ hoong.tee.yeoh@intel.com liang1.lv@intel.com /comps/finetuning/ xinyu.ye@intel.com kaokao.lv@intel.com /comps/guardrails/ liang1.lv@intel.com letong.han@intel.com @@ -18,11 +17,15 @@ /comps/image2video/ xinyu.ye@intel.com qing.yao@intel.com /comps/llms/ liang1.lv@intel.com letong.han@intel.com /comps/lvms/ sihan.chen@intel.com liang1.lv@intel.com -/comps/prompt_registry/ hoong.tee.yeoh@intel.com -/comps/ragas/ kaokao.lv@intel.com liang1.lv@intel.com +/comps/prompt_registry/ hoong.tee.yeoh@intel.com letong.han@intel.com /comps/rerankings/ kaokao.lv@intel.com liang1.lv@intel.com /comps/retrievers/ kaokao.lv@intel.com liang1.lv@intel.com +/comps/struct2graph/ siddhi.velankar@intel.com kaokao.lv@intel.com +/comps/text2cypher/ jean1.yu@intel.com sihan.chen@intel.com +/comps/text2graph/ sharath.raghava@intel.com letong.han@intel.com /comps/text2image/ xinyu.ye@intel.com liang1.lv@intel.com +/comps/text2kg/ siddhi.velankar@intel.com letong.han@intel.com /comps/text2sql/ yogesh.pandey@intel.com qing.yao@intel.com +/comps/third_parties/ liang1.lv@intel.com letong.han@intel.com /comps/tts/ sihan.chen@intel.com letong.han@intel.com /comps/web_retrievers/ sihan.chen@intel.com liang1.lv@intel.com From 69fea0d6a88b7b10a6aac70b9a2d6a9f3587c91e Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Wed, 7 May 2025 14:43:32 +0800 Subject: [PATCH 23/34] Expand test matrix, find all tests use 3rd party Dockerfiles (#1676) * Expand test matrix, find all tests use 3rd party Dockerfiles Signed-off-by: chensuyue Signed-off-by: alexsin368 --- .github/workflows/scripts/get_test_matrix.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/scripts/get_test_matrix.sh b/.github/workflows/scripts/get_test_matrix.sh index 0ad28e8749..8b93662796 100644 --- a/.github/workflows/scripts/get_test_matrix.sh +++ b/.github/workflows/scripts/get_test_matrix.sh @@ -60,12 +60,20 @@ function find_test_1() { fi fi elif [[ $(echo ${service_path} | grep "third_parties") ]]; then - # new org with `src` and `third_parties` folder + # new org with `src` and `third_parties` folder service_name=$(echo $service_path | sed 's:/src::' | tr '/' '_' | cut -c7-) # comps/third_parties/vllm/src -> third_parties_vllm find_test=$(find ./tests -type f -name test_${service_name}*.sh) || true if [ "$find_test" ]; then fill_in_matrix "$find_test" fi + # find other tests use 3rd party Dockerfiles + dockerfile_list=$(ls ${service_path}/Dockerfile*) || true + for dockerfile_path in ${dockerfile_list}; do + find_test=$(grep -rl ${dockerfile_path} ./tests) || true + if [ "$find_test" ]; then + fill_in_matrix "$find_test" + fi + done else # old org without 'src' folder service_name=$(echo $service_path | tr '/' '_' | cut -c7-) # comps/retrievers/redis/langchain -> retrievers_redis_langchain @@ -174,6 +182,7 @@ function main() { echo "run_matrix=${run_matrix}" echo "run_matrix=${run_matrix}" >> $GITHUB_OUTPUT + is_empty="true" if [[ $(echo "$run_matrix" | grep -c "service") != 0 ]]; then is_empty="false" fi From 388c264408ef3e9a51341bbd41690aac3bb0bd51 Mon Sep 17 00:00:00 2001 From: Ying Hu Date: Sat, 10 May 2025 21:56:30 +0800 Subject: [PATCH 24/34] fix the typo of README.md Comp (#1679) Update README.md for first entry of OPEA Signed-off-by: alexsin368 --- README.md | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 85221a6bb1..c8ba72baf2 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,8 @@ This initiative empowers the development of high-quality Generative AI applicati ## GenAIComps -GenAIComps provides a suite of microservices, leveraging a service composer to assemble a mega-service tailored for real-world Enterprise AI applications. All the microservices are containerized, allowing cloud native deployment. Checkout how the microservices are used in [GenAIExamples](https://github.com/opea-project/GenAIExamples). +GenAIComps provides a suite of microservices, leveraging a service composer to assemble a mega-service tailored for real-world Enterprise AI applications. All the microservices are containerized, allowing cloud native deployment. Check out how the microservices are used in [GenAIExamples](https://github.com/opea-project/GenAIExamples) +or [Getting Start with OPEA](https://opea-project.github.io/latest/getting-started/README.html) to deploy the ChatQnA application from OPEA GenAIExamples across multiple cloud platforms. ![Architecture](https://i.imgur.com/r5J0i8j.png) @@ -36,27 +37,27 @@ This modular approach allows developers to independently develop, deploy, and sc The initially supported `Microservices` are described in the below table. More `Microservices` are on the way. -| MicroService | Framework | Model | Serving | HW | Description | -| ------------------------------------------------- | ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ------ | ------------------------------------- | -| [Embedding](./comps/embeddings/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi) | Gaudi2 | Embedding on Gaudi2 | -| [Embedding](./comps/embeddings/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Embedding on Xeon CPU | -| [Retriever](./comps/retrievers/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Retriever on Xeon CPU | -| [Reranking](./comps/rerankings/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi) | Gaudi2 | Reranking on Gaudi2 | -| [Reranking](./comps/rerankings/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BBAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Reranking on Xeon CPU | -| [ASR](./comps/asr/src/README.md) | NA | [openai/whisper-small](https://huggingface.co/openai/whisper-small) | NA | Gaudi2 | Audio-Speech-Recognition on Gaudi2 | -| [ASR](./comps/asr/src/README.md) | NA | [openai/whisper-small](https://huggingface.co/openai/whisper-small) | NA | Xeon | Audio-Speech-RecognitionS on Xeon CPU | -| [TTS](./comps/tts/src/README.md) | NA | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) | NA | Gaudi2 | Text-To-Speech on Gaudi2 | -| [TTS](./comps/tts/src/README.md) | NA | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) | NA | Xeon | Text-To-Speech on Xeon CPU | -| [Dataprep](./comps/dataprep/README.md) | [Qdrant](https://qdrant.tech/) | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA | Gaudi2 | Dataprep on Gaudi2 | -| [Dataprep](./comps/dataprep/README.md) | [Qdrant](https://qdrant.tech/) | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA | Xeon | Dataprep on Xeon CPU | -| [Dataprep](./comps/dataprep/README.md) | [Redis](https://redis.io/) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | NA | Gaudi2 | Dataprep on Gaudi2 | -| [Dataprep](./comps/dataprep/README.md) | [Redis](https://redis.io/) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | NA | Xeon | Dataprep on Xeon CPU | -| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [TGI Gaudi](https://github.com/huggingface/tgi-gaudi) | Gaudi2 | LLM on Gaudi2 | -| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon | LLM on Xeon CPU | -| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [Ray Serve](https://github.com/ray-project/ray) | Gaudi2 | LLM on Gaudi2 | -| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [Ray Serve](https://github.com/ray-project/ray) | Xeon | LLM on Xeon CPU | -| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [vLLM](https://github.com/vllm-project/vllm/) | Gaudi2 | LLM on Gaudi2 | -| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [vLLM](https://github.com/vllm-project/vllm/) | Xeon | LLM on Xeon CPU | +| MicroService | Framework | Model | Serving | HW | Description | +| ------------------------------------------------- | ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ------ | ------------------------------------ | +| [Embedding](./comps/embeddings/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi) | Gaudi2 | Embedding on Gaudi2 | +| [Embedding](./comps/embeddings/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Embedding on Xeon CPU | +| [Retriever](./comps/retrievers/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Retriever on Xeon CPU | +| [Reranking](./comps/rerankings/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi) | Gaudi2 | Reranking on Gaudi2 | +| [Reranking](./comps/rerankings/src/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Reranking on Xeon CPU | +| [ASR](./comps/asr/src/README.md) | NA | [openai/whisper-small](https://huggingface.co/openai/whisper-small) | NA | Gaudi2 | Audio-Speech-Recognition on Gaudi2 | +| [ASR](./comps/asr/src/README.md) | NA | [openai/whisper-small](https://huggingface.co/openai/whisper-small) | NA | Xeon | Audio-Speech-Recognition on Xeon CPU | +| [TTS](./comps/tts/src/README.md) | NA | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) | NA | Gaudi2 | Text-To-Speech on Gaudi2 | +| [TTS](./comps/tts/src/README.md) | NA | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) | NA | Xeon | Text-To-Speech on Xeon CPU | +| [Dataprep](./comps/dataprep/README.md) | [Qdrant](https://qdrant.tech/) | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA | Gaudi2 | Dataprep on Gaudi2 | +| [Dataprep](./comps/dataprep/README.md) | [Qdrant](https://qdrant.tech/) | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA | Xeon | Dataprep on Xeon CPU | +| [Dataprep](./comps/dataprep/README.md) | [Redis](https://redis.io/) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | NA | Gaudi2 | Dataprep on Gaudi2 | +| [Dataprep](./comps/dataprep/README.md) | [Redis](https://redis.io/) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | NA | Xeon | Dataprep on Xeon CPU | +| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [TGI Gaudi](https://github.com/huggingface/tgi-gaudi) | Gaudi2 | LLM on Gaudi2 | +| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon | LLM on Xeon CPU | +| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [Ray Serve](https://github.com/ray-project/ray) | Gaudi2 | LLM on Gaudi2 | +| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [Ray Serve](https://github.com/ray-project/ray) | Xeon | LLM on Xeon CPU | +| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [vLLM](https://github.com/vllm-project/vllm/) | Gaudi2 | LLM on Gaudi2 | +| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3) | [vLLM](https://github.com/vllm-project/vllm/) | Xeon | LLM on Xeon CPU | A `Microservices` can be created by using the decorator `register_microservice`. Taking the `embedding microservice` as an example: @@ -129,7 +130,7 @@ self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port ## Check Mega/Micro Service health status and version number -Use below command to check Mega/Micro Service status. +Use the command below to check Mega/Micro Service status. ```bash curl http://${your_ip}:${service_port}/v1/health_check\ @@ -149,7 +150,7 @@ Welcome to the OPEA open-source community! We are thrilled to have you here and Together, we can make OPEA the go-to platform for enterprise AI solutions. Let's work together to push the boundaries of what's possible and create a future where AI is accessible, efficient, and impactful for everyone. -Please check the [Contributing guidelines](https://github.com/opea-project/docs/tree/main/community/CONTRIBUTING.md) for a detailed guide on how to contribute a GenAI example and all the ways you can contribute! +Please check the [Contributing Guidelines](https://github.com/opea-project/docs/tree/main/community/CONTRIBUTING.md) for a detailed guide on how to contribute a GenAI example and all the ways you can contribute! Thank you for being a part of this journey. We can't wait to see what we can achieve together! From 3b428589bdc324ca9e7eb7c387f1f4fbb02383b8 Mon Sep 17 00:00:00 2001 From: Liang Lv Date: Mon, 12 May 2025 15:39:13 +0800 Subject: [PATCH 25/34] Fix request handle timeout issue (#1687) Signed-off-by: lvliang-intel Signed-off-by: alexsin368 --- comps/cores/mega/orchestrator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py index f5198908b8..8295719108 100644 --- a/comps/cores/mega/orchestrator.py +++ b/comps/cores/mega/orchestrator.py @@ -135,7 +135,7 @@ async def schedule(self, initial_inputs: Dict | BaseModel, llm_parameters: LLMPa if LOGFLAG: logger.info(initial_inputs) - timeout = aiohttp.ClientTimeout(total=1000) + timeout = aiohttp.ClientTimeout(total=2000) async with aiohttp.ClientSession(trust_env=True, timeout=timeout) as session: pending = { asyncio.create_task( From 928e0f74d5a58e7d800c6cd2ac4c033ed707a747 Mon Sep 17 00:00:00 2001 From: Spycsh <39623753+Spycsh@users.noreply.github.com> Date: Tue, 13 May 2025 13:20:16 +0800 Subject: [PATCH 26/34] FEAT: Enable OPEA microservices to start as MCP servers (#1635) Signed-off-by: alexsin368 --- .../deployment/docker_compose/compose.yaml | 3 + comps/asr/src/integrations/whisper.py | 55 ++++++++------ comps/asr/src/opea_asr_microservice.py | 10 ++- comps/asr/src/requirements.txt | 1 + comps/cores/mega/constants.py | 10 ++- comps/cores/mega/micro_service.py | 54 +++++++++++++- requirements.txt | 1 + tests/asr/test_asr_whisper_mcp.sh | 74 +++++++++++++++++++ tests/cores/mega/test_mcp.py | 70 ++++++++++++++++++ tests/utils/validate_svc_with_mcp.py | 53 +++++++++++++ 10 files changed, 302 insertions(+), 29 deletions(-) create mode 100644 tests/asr/test_asr_whisper_mcp.sh create mode 100644 tests/cores/mega/test_mcp.py create mode 100644 tests/utils/validate_svc_with_mcp.py diff --git a/comps/asr/deployment/docker_compose/compose.yaml b/comps/asr/deployment/docker_compose/compose.yaml index 3595eaf225..4b0ac07da3 100644 --- a/comps/asr/deployment/docker_compose/compose.yaml +++ b/comps/asr/deployment/docker_compose/compose.yaml @@ -14,11 +14,13 @@ services: environment: ASR_ENDPOINT: ${ASR_ENDPOINT} ASR_COMPONENT_NAME: ${ASR_COMPONENT_NAME:-OPEA_WHISPER_ASR} + ENABLE_MCP: ${ENABLE_MCP:-False} asr-whisper: extends: asr container_name: asr-whisper-service environment: ASR_COMPONENT_NAME: ${ASR_COMPONENT_NAME:-OPEA_WHISPER_ASR} + ENABLE_MCP: ${ENABLE_MCP:-False} depends_on: whisper-service: condition: service_healthy @@ -27,6 +29,7 @@ services: container_name: asr-whisper-gaudi-service environment: ASR_COMPONENT_NAME: ${ASR_COMPONENT_NAME:-OPEA_WHISPER_ASR} + ENABLE_MCP: ${ENABLE_MCP:-False} depends_on: whisper-gaudi-service: condition: service_healthy diff --git a/comps/asr/src/integrations/whisper.py b/comps/asr/src/integrations/whisper.py index eb4c265ea1..39183e3350 100644 --- a/comps/asr/src/integrations/whisper.py +++ b/comps/asr/src/integrations/whisper.py @@ -3,7 +3,7 @@ import asyncio import os -from typing import List +from typing import List, Union import requests from fastapi import File, Form, UploadFile @@ -32,7 +32,7 @@ def __init__(self, name: str, description: str, config: dict = None): async def invoke( self, - file: UploadFile = File(...), # Handling the uploaded file directly + file: Union[str, UploadFile], # accept base64 string or UploadFile model: str = Form("openai/whisper-small"), language: str = Form("english"), prompt: str = Form(None), @@ -41,28 +41,39 @@ async def invoke( timestamp_granularities: List[str] = Form(None), ) -> AudioTranscriptionResponse: """Involve the ASR service to generate transcription for the provided input.""" - # Read the uploaded file - file_contents = await file.read() + if isinstance(file, str): + data = {"audio": file} + # Send the file and model to the server + response = await asyncio.to_thread( + requests.post, + f"{self.base_url}/v1/asr", + json=data, + ) + res = response.json()["asr_result"] + return AudioTranscriptionResponse(text=res) + else: + # Read the uploaded file + file_contents = await file.read() - # Prepare the files and data - files = { - "file": (file.filename, file_contents, file.content_type), - } - data = { - "model": model, - "language": language, - "prompt": prompt, - "response_format": response_format, - "temperature": temperature, - "timestamp_granularities": timestamp_granularities, - } + # Prepare the files and data + files = { + "file": (file.filename, file_contents, file.content_type), + } + data = { + "model": model, + "language": language, + "prompt": prompt, + "response_format": response_format, + "temperature": temperature, + "timestamp_granularities": timestamp_granularities, + } - # Send the file and model to the server - response = await asyncio.to_thread( - requests.post, f"{self.base_url}/v1/audio/transcriptions", files=files, data=data - ) - res = response.json()["text"] - return AudioTranscriptionResponse(text=res) + # Send the file and model to the server + response = await asyncio.to_thread( + requests.post, f"{self.base_url}/v1/audio/transcriptions", files=files, data=data + ) + res = response.json()["text"] + return AudioTranscriptionResponse(text=res) def check_health(self) -> bool: """Checks the health of the embedding service. diff --git a/comps/asr/src/opea_asr_microservice.py b/comps/asr/src/opea_asr_microservice.py index 8210149613..db9bb37947 100644 --- a/comps/asr/src/opea_asr_microservice.py +++ b/comps/asr/src/opea_asr_microservice.py @@ -3,7 +3,7 @@ import os import time -from typing import List +from typing import List, Union from fastapi import File, Form, UploadFile from integrations.whisper import OpeaWhisperAsr @@ -19,12 +19,15 @@ register_statistics, statistics_dict, ) +from comps.cores.mega.constants import MCPFuncType from comps.cores.proto.api_protocol import AudioTranscriptionResponse logger = CustomLogger("opea_asr_microservice") logflag = os.getenv("LOGFLAG", False) asr_component_name = os.getenv("ASR_COMPONENT_NAME", "OPEA_WHISPER_ASR") +enable_mcp = os.getenv("ENABLE_MCP", "").strip().lower() in {"true", "1", "yes"} + # Initialize OpeaComponentLoader loader = OpeaComponentLoader(asr_component_name, description=f"OPEA ASR Component: {asr_component_name}") @@ -37,10 +40,13 @@ port=9099, input_datatype=Base64ByteStrDoc, output_datatype=LLMParamsDoc, + enable_mcp=enable_mcp, + mcp_func_type=MCPFuncType.TOOL, + description="Convert audio to text.", ) @register_statistics(names=["opea_service@asr"]) async def audio_to_text( - file: UploadFile = File(...), # Handling the uploaded file directly + file: Union[str, UploadFile], # accept base64 string or UploadFile model: str = Form("openai/whisper-small"), language: str = Form("english"), prompt: str = Form(None), diff --git a/comps/asr/src/requirements.txt b/comps/asr/src/requirements.txt index f73cc5821a..cca9450d79 100644 --- a/comps/asr/src/requirements.txt +++ b/comps/asr/src/requirements.txt @@ -3,6 +3,7 @@ aiohttp datasets docarray[full] fastapi +mcp opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/comps/cores/mega/constants.py b/comps/cores/mega/constants.py index 0723bbd12a..ed1a2271d0 100644 --- a/comps/cores/mega/constants.py +++ b/comps/cores/mega/constants.py @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from enum import Enum +from enum import Enum, auto class ServiceRoleType(Enum): @@ -92,3 +92,11 @@ class MicroServiceEndpoint(Enum): def __str__(self): return self.value + + +class MCPFuncType(Enum): + """The enum of a MCP function type.""" + + TOOL = auto() + RESOURCE = auto() + PROMPT = auto() diff --git a/comps/cores/mega/micro_service.py b/comps/cores/mega/micro_service.py index 9635b0ac24..5d96be70c4 100644 --- a/comps/cores/mega/micro_service.py +++ b/comps/cores/mega/micro_service.py @@ -4,11 +4,12 @@ import asyncio import os from collections import defaultdict, deque +from collections.abc import Callable from enum import Enum -from typing import Any, List, Optional, Type +from typing import Any, List, Optional, Type, TypeAlias from ..proto.docarray import TextDoc -from .constants import ServiceRoleType, ServiceType +from .constants import MCPFuncType, ServiceRoleType, ServiceType from .http_service import HTTPService from .logger import CustomLogger from .utils import check_ports_availability @@ -17,6 +18,7 @@ logger = CustomLogger("micro_service") logflag = os.getenv("LOGFLAG", False) +AnyFunction: TypeAlias = Callable[..., Any] class MicroService(HTTPService): @@ -43,6 +45,9 @@ def __init__( dynamic_batching: bool = False, dynamic_batching_timeout: int = 1, dynamic_batching_max_batch_size: int = 32, + enable_mcp: bool = False, + mcp_func_type: Enum = MCPFuncType.TOOL, + func: AnyFunction = None, ): """Init the microservice.""" self.service_role = service_role @@ -56,6 +61,7 @@ def __init__( self.output_datatype = output_datatype self.use_remote_service = use_remote_service self.description = description + self.enable_mcp = enable_mcp self.dynamic_batching = dynamic_batching self.dynamic_batching_timeout = dynamic_batching_timeout self.dynamic_batching_max_batch_size = dynamic_batching_max_batch_size @@ -82,7 +88,7 @@ def __init__( "host": self.host, "port": self.port, "title": name, - "description": "OPEA Microservice Infrastructure", + "description": self.description or "OPEA Microservice Infrastructure", } super().__init__(uvicorn_kwargs=self.uvicorn_kwargs, runtime_args=runtime_args) @@ -93,7 +99,21 @@ def __init__( self.request_buffer = defaultdict(deque) self.add_startup_event(self._dynamic_batch_processor()) - self._async_setup() + if not enable_mcp: + self._async_setup() + else: + from mcp.server.fastmcp import FastMCP + + self.mcp = FastMCP(name, host=self.host, port=self.port) + dispatch = { + MCPFuncType.TOOL: self.mcp.add_tool, + MCPFuncType.RESOURCE: self.mcp.add_resource, + MCPFuncType.PROMPT: self.mcp.add_prompt, + } + try: + dispatch[mcp_func_type](func, name=func.__name__, description=description) + except KeyError: + raise ValueError(f"Unknown MCP func type: {mcp_func_type}") # overwrite name self.name = f"{name}/{self.__class__.__name__}" if name else self.__class__.__name__ @@ -144,6 +164,15 @@ def endpoint_path(self, model=None): else: return f"{self.protocol}://{self.host}:{self.port}{self.endpoint}" + def start(self): + """Start the server using MCP if enabled, otherwise fall back to default.""" + if self.enable_mcp: + self.mcp.run( + transport="sse", + ) + else: + super().start() + @property def api_key_value(self): return self.api_key @@ -167,6 +196,9 @@ def register_microservice( dynamic_batching: bool = False, dynamic_batching_timeout: int = 1, dynamic_batching_max_batch_size: int = 32, + enable_mcp: bool = False, + description: str = None, + mcp_func_type: Enum = MCPFuncType.TOOL, ): def decorator(func): if name not in opea_microservices: @@ -187,8 +219,22 @@ def decorator(func): dynamic_batching=dynamic_batching, dynamic_batching_timeout=dynamic_batching_timeout, dynamic_batching_max_batch_size=dynamic_batching_max_batch_size, + enable_mcp=enable_mcp, + func=func, + description=description, + mcp_func_type=mcp_func_type, ) opea_microservices[name] = micro_service + + elif enable_mcp: + mcp_handle = opea_microservices[name].mcp + dispatch = { + MCPFuncType.TOOL: mcp_handle.add_tool, + MCPFuncType.RESOURCE: mcp_handle.add_resource, + MCPFuncType.PROMPT: mcp_handle.add_prompt, + } + dispatch[mcp_func_type](func, name=func.__name__, description=description) + opea_microservices[name].app.router.add_api_route(endpoint, func, methods=methods) return func diff --git a/requirements.txt b/requirements.txt index c16f8ad52b..cca4354342 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ httpx kubernetes langchain langchain-community +mcp opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk diff --git a/tests/asr/test_asr_whisper_mcp.sh b/tests/asr/test_asr_whisper_mcp.sh new file mode 100644 index 0000000000..8bdfa65a7d --- /dev/null +++ b/tests/asr/test_asr_whisper_mcp.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') +export TAG=comps +export WHISPER_PORT=10104 +export ASR_PORT=10105 +export ENABLE_MCP=True +cd $WORKPATH + + +function build_docker_images() { + echo $(pwd) + docker build --no-cache -t opea/whisper:$TAG --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/whisper/src/Dockerfile . + + if [ $? -ne 0 ]; then + echo "opea/whisper built fail" + exit 1 + else + echo "opea/whisper built successful" + fi + + docker build --no-cache -t opea/asr:$TAG --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile . + + if [ $? -ne 0 ]; then + echo "opea/asr built fail" + exit 1 + else + echo "opea/asr built successful" + fi +} + +function start_service() { + unset http_proxy + export ASR_ENDPOINT=http://$ip_address:$WHISPER_PORT + + docker compose -f comps/asr/deployment/docker_compose/compose.yaml up whisper-service asr -d + sleep 1m +} + +function validate_microservice() { + pip install mcp + python3 ${WORKPATH}/tests/utils/validate_svc_with_mcp.py $ip_address $ASR_PORT "asr" + if [ $? -ne 0 ]; then + docker logs whisper-service + docker logs asr-service + exit 1 + fi + +} + +function stop_docker() { + docker ps -a --filter "name=whisper-service" --filter "name=asr-service" --format "{{.Names}}" | xargs -r docker stop +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/cores/mega/test_mcp.py b/tests/cores/mega/test_mcp.py new file mode 100644 index 0000000000..39a38d8168 --- /dev/null +++ b/tests/cores/mega/test_mcp.py @@ -0,0 +1,70 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import multiprocessing +import unittest + +from mcp.client.session import ClientSession +from mcp.client.sse import sse_client + +from comps import TextDoc, opea_microservices, register_microservice +from comps.cores.mega.constants import MCPFuncType +from comps.version import __version__ + + +@register_microservice( + name="mcp_dummy", + host="0.0.0.0", + port=8087, + enable_mcp=True, + mcp_func_type=MCPFuncType.TOOL, + description="dummy mcp add func", +) +async def mcp_dummy(request: TextDoc) -> TextDoc: + req = request.model_dump_json() + req_dict = json.loads(req) + text = req_dict["text"] + text += "OPEA Project MCP!" + return {"text": text} + + +@register_microservice( + name="mcp_dummy", + host="0.0.0.0", + port=8087, + enable_mcp=True, + mcp_func_type=MCPFuncType.TOOL, + description="dummy mcp sum func", +) +async def mcp_dummy_sum(): + return 1 + 1 + + +class TestMicroService(unittest.IsolatedAsyncioTestCase): + def setUp(self): + self.process = multiprocessing.Process( + target=opea_microservices["mcp_dummy"].start, daemon=False, name="mcp_dummy" + ) + self.process.start() + + self.server_url = "http://localhost:8087" + + async def test_mcp(self): + async with sse_client(self.server_url + "/sse") as streams: + async with ClientSession(*streams) as session: + result = await session.initialize() + self.assertEqual(result.serverInfo.name, "mcp_dummy") + tool_result = await session.call_tool("mcp_dummy", {"request": {"text": "Hello "}}) + self.assertEqual(json.loads(tool_result.content[0].text)["text"], "Hello OPEA Project MCP!") + + tool_result = await session.call_tool( + "mcp_dummy_sum", + ) + self.assertEqual(tool_result.content[0].text, "2") + self.process.kill() + self.process.join(timeout=2) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/utils/validate_svc_with_mcp.py b/tests/utils/validate_svc_with_mcp.py new file mode 100644 index 0000000000..77f45fa656 --- /dev/null +++ b/tests/utils/validate_svc_with_mcp.py @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import asyncio +import base64 +import json +import os +import sys + +import requests +from mcp.client.session import ClientSession +from mcp.client.sse import sse_client + + +async def validate_svc(ip_address, service_port, service_type): + + endpoint = f"http://{ip_address}:{service_port}" + + async with sse_client(endpoint + "/sse") as streams: + async with ClientSession(*streams) as session: + result = await session.initialize() + if service_type == "asr": + url = "https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav" + response = requests.get(url) + response.raise_for_status() # Ensure the download succeeded + binary_data = response.content + base64_str = base64.b64encode(binary_data).decode("utf-8") + input_dict = {"file": base64_str, "model": "openai/whisper-small", "language": "english"} + tool_result = await session.call_tool( + "audio_to_text", + input_dict, + ) + result_content = tool_result.content + # Check result + if json.loads(result_content[0].text)["text"].startswith("who is"): + print("Result correct.") + else: + print(f"Result wrong. Received was {result_content}") + exit(1) + else: + print(f"Unknown service type: {service_type}") + exit(1) + + +if __name__ == "__main__": + if len(sys.argv) != 4: + print("Usage: python3 validate_svc_with_mcp.py ") + exit(1) + ip_address = sys.argv[1] + service_port = sys.argv[2] + service_type = sys.argv[3] + asyncio.run(validate_svc(ip_address, service_port, service_type)) From 9be8f9f3ead8eaa559de50ccb368bc684977ccbb Mon Sep 17 00:00:00 2001 From: Liang Lv Date: Tue, 13 May 2025 15:22:19 +0800 Subject: [PATCH 27/34] Fix huggingface_hub API upgrade issue (#1691) * Fix huggingfacehub API upgrade issue Signed-off-by: lvliang-intel Signed-off-by: alexsin368 --- comps/cores/mega/orchestrator.py | 6 ++- comps/embeddings/src/integrations/ovms.py | 51 +++++++++++------------ comps/embeddings/src/integrations/tei.py | 18 ++++---- comps/rerankings/src/requirements.txt | 1 + 4 files changed, 39 insertions(+), 37 deletions(-) diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py index 8295719108..61246ee5a8 100644 --- a/comps/cores/mega/orchestrator.py +++ b/comps/cores/mega/orchestrator.py @@ -274,7 +274,7 @@ async def execute( headers={"Content-type": "application/json", "Authorization": f"Bearer {access_token}"}, proxies={"http": None}, stream=True, - timeout=1000, + timeout=2000, ) else: response = requests.post( @@ -285,7 +285,7 @@ async def execute( }, proxies={"http": None}, stream=True, - timeout=1000, + timeout=2000, ) downstream = runtime_graph.downstream(cur_node) @@ -317,6 +317,7 @@ def generate(): "Authorization": f"Bearer {access_token}", }, proxies={"http": None}, + timeout=2000, ) else: res = requests.post( @@ -326,6 +327,7 @@ def generate(): "Content-type": "application/json", }, proxies={"http": None}, + timeout=2000, ) res_json = res.json() if "text" in res_json: diff --git a/comps/embeddings/src/integrations/ovms.py b/comps/embeddings/src/integrations/ovms.py index 9931fa35e5..16ee4e125a 100644 --- a/comps/embeddings/src/integrations/ovms.py +++ b/comps/embeddings/src/integrations/ovms.py @@ -1,12 +1,10 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import json import os -from typing import List, Union +import aiohttp import requests -from huggingface_hub import AsyncInferenceClient from comps import CustomLogger, OpeaComponent, OpeaComponentRegistry, ServiceType from comps.cores.mega.utils import get_access_token @@ -32,24 +30,11 @@ class OpeaOVMSEmbedding(OpeaComponent): def __init__(self, name: str, description: str, config: dict = None): super().__init__(name, ServiceType.EMBEDDING.name.lower(), description, config) self.base_url = os.getenv("OVMS_EMBEDDING_ENDPOINT", "http://localhost:8080") - self.client = self._initialize_client() health_status = self.check_health() if not health_status: logger.error("OpeaOVMSEmbedding health check failed.") - def _initialize_client(self) -> AsyncInferenceClient: - """Initializes the AsyncInferenceClient.""" - access_token = ( - get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None - ) - headers = {"Authorization": f"Bearer {access_token}"} if access_token else {} - return AsyncInferenceClient( - model=MODEL_ID, - token=os.getenv("HUGGINGFACEHUB_API_TOKEN"), - headers=headers, - ) - async def invoke(self, input: EmbeddingRequest) -> EmbeddingResponse: """Invokes the embedding service to generate embeddings for the provided input. @@ -69,17 +54,31 @@ async def invoke(self, input: EmbeddingRequest) -> EmbeddingResponse: raise ValueError("Invalid input format: Only string or list of strings are supported.") else: raise TypeError("Unsupported input type: input must be a string or list of strings.") - response = await self.client.post( - json={ - "input": texts, - "encoding_format": input.encoding_format, - "model": self.client.model, - "user": input.user, - }, - model=f"{self.base_url}/v3/embeddings", - task="text-embedding", + # Build headers + headers = {"Content-Type": "application/json"} + access_token = ( + get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None ) - embeddings = json.loads(response.decode()) + if access_token: + headers["Authorization"] = f"Bearer {access_token}" + + # Compose request + payload = { + "input": texts, + "encoding_format": input.encoding_format, + "model": MODEL_ID, + "user": input.user, + } + + # Send async POST request using aiohttp + url = f"{self.base_url}/v3/embeddings" + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers, json=payload) as resp: + if resp.status != 200: + logger.error(f"Embedding service error: {resp.status} - {await resp.text()}") + raise RuntimeError(f"Failed to fetch embeddings: HTTP {resp.status}") + embeddings = await resp.json() + return EmbeddingResponse(**embeddings) def check_health(self) -> bool: diff --git a/comps/embeddings/src/integrations/tei.py b/comps/embeddings/src/integrations/tei.py index 8d589fb822..b89eb62625 100644 --- a/comps/embeddings/src/integrations/tei.py +++ b/comps/embeddings/src/integrations/tei.py @@ -10,7 +10,7 @@ from comps import CustomLogger, OpeaComponent, OpeaComponentRegistry, ServiceType from comps.cores.mega.utils import get_access_token -from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse +from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse, EmbeddingResponseData logger = CustomLogger("opea_tei_embedding") logflag = os.getenv("LOGFLAG", False) @@ -44,7 +44,7 @@ def _initialize_client(self) -> AsyncInferenceClient: ) headers = {"Authorization": f"Bearer {access_token}"} if access_token else {} return AsyncInferenceClient( - model=f"{self.base_url}/v1/embeddings", + model=f"{self.base_url}/embed", token=os.getenv("HUGGINGFACEHUB_API_TOKEN"), headers=headers, ) @@ -68,13 +68,13 @@ async def invoke(self, input: EmbeddingRequest) -> EmbeddingResponse: raise ValueError("Invalid input format: Only string or list of strings are supported.") else: raise TypeError("Unsupported input type: input must be a string or list of strings.") - response = await self.client.post( - json={"input": texts, "encoding_format": input.encoding_format, "model": input.model, "user": input.user}, - model=f"{self.base_url}/v1/embeddings", - task="text-embedding", - ) - embeddings = json.loads(response.decode()) - return EmbeddingResponse(**embeddings) + # feature_extraction return np.ndarray + response = await self.client.feature_extraction(text=texts, model=f"{self.base_url}/embed") + # Convert np.ndarray to a list of lists (embedding) + data = [EmbeddingResponseData(index=i, embedding=embedding.tolist()) for i, embedding in enumerate(response)] + # Construct the EmbeddingResponse + response = EmbeddingResponse(data=data) + return response def check_health(self) -> bool: """Checks the health of the embedding service. diff --git a/comps/rerankings/src/requirements.txt b/comps/rerankings/src/requirements.txt index 7260862a3b..b3a0ba6e4b 100644 --- a/comps/rerankings/src/requirements.txt +++ b/comps/rerankings/src/requirements.txt @@ -2,6 +2,7 @@ aiohttp docarray[full] fastapi httpx +huggingface-hub==0.30.2 opentelemetry-api opentelemetry-exporter-otlp opentelemetry-sdk From 0ffa6a6d037873b94020288d4b5469e00bb1cde2 Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Tue, 13 May 2025 17:44:24 -0700 Subject: [PATCH 28/34] add OpenAI models instructions, fix format of commands Signed-off-by: alexsin368 --- comps/agent/src/README.md | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md index 6e1fcc431b..13aee5eaa5 100644 --- a/comps/agent/src/README.md +++ b/comps/agent/src/README.md @@ -82,7 +82,7 @@ for line in resp.iter_lines(decode_unicode=True): **Note**: -1. Currently only `reract_llama` agent is enabled for assistants APIs. +1. Currently only `react_llama` agent is enabled for assistants APIs. 2. Not all keywords of OpenAI APIs are supported yet. ### 1.5 Agent memory @@ -110,13 +110,27 @@ Examples of python code for multi-turn conversations using agent memory: To run the two examples above, first launch the agent microservice using [this docker compose yaml](../../../tests/agent/reactllama.yaml). -### 1.6 Run LLMs with Remote Servers +### 1.6 Run LLMs from OpenAI + +To run any model from OpenAI, just specify the environment variable `OPENAI_API_KEY`: + +```bash +export OPENAI_API_KEY= +``` + +These also need to be passed in to the `docker run` command, or included in a YAML file when running `docker compose`. + +### 1.7 Run LLMs with OpenAI-compatible APIs on Remote Servers To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables: -- `api_key`= -- `model`= -- `LLM_ENDPOINT_URL`= +```bash +export api_key= +export model= +export LLM_ENDPOINT_URL= +``` + +These also need to be passed in to the `docker run` command, or included in a YAML file when running `docker compose`. #### Notes From f83070cf26a681f6b7e52e832cde66a6df6ad3ae Mon Sep 17 00:00:00 2001 From: Liang Lv Date: Wed, 14 May 2025 15:23:06 +0800 Subject: [PATCH 29/34] Fix dataprep opensearch ingest issue (#1697) Signed-off-by: lvliang-intel Signed-off-by: alexsin368 --- comps/dataprep/src/integrations/opensearch.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/comps/dataprep/src/integrations/opensearch.py b/comps/dataprep/src/integrations/opensearch.py index 2b51a5001c..44b2f59207 100644 --- a/comps/dataprep/src/integrations/opensearch.py +++ b/comps/dataprep/src/integrations/opensearch.py @@ -108,6 +108,9 @@ def __init__(self, name: str, description: str, config: dict = None): self.opensearch_client = OpenSearchVectorSearch( opensearch_url=OPENSEARCH_URL, index_name=Config.INDEX_NAME, + # Default engine for OpenSearch is "nmslib", + # but "nmslib" engine is deprecated in OpenSearch and cannot be used for new index creation in OpenSearch from 3.0.0. + engine="faiss", embedding_function=self.embeddings, http_auth=self.auth, use_ssl=True, From 72bc23b8cbdf494358e00d0baa3ba3529652ddbc Mon Sep 17 00:00:00 2001 From: Liang Lv Date: Wed, 14 May 2025 16:08:40 +0800 Subject: [PATCH 30/34] Fix embedding issue with ArangoDB due to deprecated HuggingFace API (#1694) Signed-off-by: lvliang-intel Signed-off-by: alexsin368 --- comps/dataprep/src/integrations/arangodb.py | 6 +++--- comps/retrievers/src/integrations/arangodb.py | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/comps/dataprep/src/integrations/arangodb.py b/comps/dataprep/src/integrations/arangodb.py index 265b13f7ff..54f616ecef 100644 --- a/comps/dataprep/src/integrations/arangodb.py +++ b/comps/dataprep/src/integrations/arangodb.py @@ -10,12 +10,11 @@ from fastapi import Body, File, Form, HTTPException, UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_arangodb import ArangoGraph -from langchain_community.embeddings import HuggingFaceHubEmbeddings from langchain_core.documents import Document from langchain_core.embeddings import Embeddings from langchain_core.prompts import ChatPromptTemplate from langchain_experimental.graph_transformers import LLMGraphTransformer -from langchain_huggingface import HuggingFaceEmbeddings +from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpointEmbeddings from langchain_openai import ChatOpenAI, OpenAIEmbeddings from langchain_text_splitters import HTMLHeaderTextSplitter @@ -200,8 +199,9 @@ def _initialize_embeddings(self): """Initialize the embeddings model.""" if TEI_EMBEDDING_ENDPOINT and HUGGINGFACEHUB_API_TOKEN: - self.embeddings = HuggingFaceHubEmbeddings( + self.embeddings = HuggingFaceEndpointEmbeddings( model=TEI_EMBEDDING_ENDPOINT, + task="feature-extraction", huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN, ) elif TEI_EMBED_MODEL: diff --git a/comps/retrievers/src/integrations/arangodb.py b/comps/retrievers/src/integrations/arangodb.py index dd1ed24319..9905d9134e 100644 --- a/comps/retrievers/src/integrations/arangodb.py +++ b/comps/retrievers/src/integrations/arangodb.py @@ -9,7 +9,8 @@ from arango.database import StandardDatabase from fastapi import HTTPException from langchain_arangodb import ArangoVector -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_huggingface import HuggingFaceEndpointEmbeddings from langchain_openai import ChatOpenAI, OpenAIEmbeddings from comps import CustomLogger, EmbedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType @@ -421,8 +422,10 @@ async def invoke( if OPENAI_API_KEY and OPENAI_EMBED_MODEL and OPENAI_EMBED_ENABLED: embeddings = OpenAIEmbeddings(model=OPENAI_EMBED_MODEL, dimensions=dimension) elif TEI_EMBEDDING_ENDPOINT and HUGGINGFACEHUB_API_TOKEN: - embeddings = HuggingFaceHubEmbeddings( - model=TEI_EMBEDDING_ENDPOINT, huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN + embeddings = HuggingFaceEndpointEmbeddings( + model=TEI_EMBEDDING_ENDPOINT, + task="feature-extraction", + huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN, ) else: embeddings = HuggingFaceBgeEmbeddings(model_name=TEI_EMBED_MODEL) From b2d93ff938d8c45021462722c1854c6421bfc662 Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Wed, 14 May 2025 17:35:08 -0700 Subject: [PATCH 31/34] simplify ChatOpenAI instantiation Signed-off-by: alexsin368 --- comps/agent/src/integrations/utils.py | 34 ++++++++------------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index ff7f0415a0..238f44d3e3 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -7,6 +7,7 @@ from .config import env_config +LLM_ENDPOINT_URL_DEFAULT="http://localhost:8080" def format_date(date): # input m/dd/yyyy hr:min @@ -56,29 +57,14 @@ def setup_chat_model(args): "top_p": args.top_p, "streaming": args.stream, } - if args.llm_engine == "vllm" or args.llm_engine == "tgi": - openai_endpoint = f"{args.llm_endpoint_url}/v1" - llm = ChatOpenAI( - openai_api_key="EMPTY", - openai_api_base=openai_endpoint, - model_name=args.model, - request_timeout=args.timeout, - **params, - ) - elif args.llm_engine == "openai": - if args.api_key: - openai_endpoint = f"{args.llm_endpoint_url}/v1" - llm = ChatOpenAI( - openai_api_key=args.api_key, - openai_api_base=openai_endpoint, - model_name=args.model, - request_timeout=args.timeout, - **params, - ) - else: - llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params) - else: - raise ValueError("llm_engine must be vllm, tgi or openai") + openai_endpoint=None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + '/v1' + llm = ChatOpenAI( + openai_api_key=args.api_key, + openai_api_base=openai_endpoint, + model_name=args.model, + request_timeout=args.timeout, + **params + ) return llm @@ -171,7 +157,7 @@ def get_args(): parser.add_argument("--model", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct") parser.add_argument("--llm_engine", type=str, default="tgi") - parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080") + parser.add_argument("--llm_endpoint_url", type=str, default=LLM_ENDPOINT_URL_DEFAULT) parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server") parser.add_argument("--max_new_tokens", type=int, default=1024) parser.add_argument("--top_k", type=int, default=10) From 78001b0e2da08baa0b24307a158a26bb5f6cbc95 Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Wed, 14 May 2025 18:10:19 -0700 Subject: [PATCH 32/34] Revert "simplify ChatOpenAI instantiation" This reverts commit b7c4acf7d397a284f1499254fa8832533c0c98e3. Signed-off-by: alexsin368 --- comps/agent/src/integrations/utils.py | 34 +++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index 238f44d3e3..ff7f0415a0 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -7,7 +7,6 @@ from .config import env_config -LLM_ENDPOINT_URL_DEFAULT="http://localhost:8080" def format_date(date): # input m/dd/yyyy hr:min @@ -57,14 +56,29 @@ def setup_chat_model(args): "top_p": args.top_p, "streaming": args.stream, } - openai_endpoint=None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + '/v1' - llm = ChatOpenAI( - openai_api_key=args.api_key, - openai_api_base=openai_endpoint, - model_name=args.model, - request_timeout=args.timeout, - **params - ) + if args.llm_engine == "vllm" or args.llm_engine == "tgi": + openai_endpoint = f"{args.llm_endpoint_url}/v1" + llm = ChatOpenAI( + openai_api_key="EMPTY", + openai_api_base=openai_endpoint, + model_name=args.model, + request_timeout=args.timeout, + **params, + ) + elif args.llm_engine == "openai": + if args.api_key: + openai_endpoint = f"{args.llm_endpoint_url}/v1" + llm = ChatOpenAI( + openai_api_key=args.api_key, + openai_api_base=openai_endpoint, + model_name=args.model, + request_timeout=args.timeout, + **params, + ) + else: + llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params) + else: + raise ValueError("llm_engine must be vllm, tgi or openai") return llm @@ -157,7 +171,7 @@ def get_args(): parser.add_argument("--model", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct") parser.add_argument("--llm_engine", type=str, default="tgi") - parser.add_argument("--llm_endpoint_url", type=str, default=LLM_ENDPOINT_URL_DEFAULT) + parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080") parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server") parser.add_argument("--max_new_tokens", type=int, default=1024) parser.add_argument("--top_k", type=int, default=10) From 1f4b7468e04e7b8a340d64152e85e023b52dfb45 Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Wed, 14 May 2025 18:20:35 -0700 Subject: [PATCH 33/34] add back check and logic for llm_engine, set openai_key argument Signed-off-by: alexsin368 --- comps/agent/src/integrations/utils.py | 33 +++++++++++---------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index ff7f0415a0..6d73e2fff5 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -7,6 +7,7 @@ from .config import env_config +LLM_ENDPOINT_URL_DEFAULT = "http://localhost:8080" def format_date(date): # input m/dd/yyyy hr:min @@ -57,28 +58,20 @@ def setup_chat_model(args): "streaming": args.stream, } if args.llm_engine == "vllm" or args.llm_engine == "tgi": - openai_endpoint = f"{args.llm_endpoint_url}/v1" - llm = ChatOpenAI( - openai_api_key="EMPTY", - openai_api_base=openai_endpoint, - model_name=args.model, - request_timeout=args.timeout, - **params, - ) + openai_key = "EMPTY" elif args.llm_engine == "openai": - if args.api_key: - openai_endpoint = f"{args.llm_endpoint_url}/v1" - llm = ChatOpenAI( - openai_api_key=args.api_key, - openai_api_base=openai_endpoint, - model_name=args.model, - request_timeout=args.timeout, - **params, - ) - else: - llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params) + openai_key = args.api_key else: - raise ValueError("llm_engine must be vllm, tgi or openai") + raise ValueError("llm_engine must be vllm, tgi, or openai") + + openai_endpoint = None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + "/v1" + llm = ChatOpenAI( + openai_api_key=openai_key, + openai_api_base=openai_endpoint, + model_name=args.model, + request_timeout=args.timeout, + **params, + ) return llm From 45376b9c6280d7b81137321d34bae73ca067bc41 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 15 May 2025 01:39:42 +0000 Subject: [PATCH 34/34] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- comps/agent/src/integrations/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py index 6d73e2fff5..3c940a53b9 100644 --- a/comps/agent/src/integrations/utils.py +++ b/comps/agent/src/integrations/utils.py @@ -9,6 +9,7 @@ LLM_ENDPOINT_URL_DEFAULT = "http://localhost:8080" + def format_date(date): # input m/dd/yyyy hr:min # output yyyy-mm-dd