From 369d5dc57329aef642ef171e5e4f60c893b4b81f Mon Sep 17 00:00:00 2001
From: alexsin368 <alex.sin@intel.com>
Date: Wed, 30 Apr 2025 17:56:40 -0700
Subject: [PATCH 01/34] add support for remote server

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/integrations/config.py |  6 ++++++
 comps/agent/src/integrations/utils.py  | 14 +++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/comps/agent/src/integrations/config.py b/comps/agent/src/integrations/config.py
index bcbe6207a5..2aa338dd96 100644
--- a/comps/agent/src/integrations/config.py
+++ b/comps/agent/src/integrations/config.py
@@ -17,6 +17,12 @@
 if os.environ.get("llm_endpoint_url") is not None:
     env_config += ["--llm_endpoint_url", os.environ["llm_endpoint_url"]]
 
+if os.environ.get("api_key") is not None:
+    env_config += ["--api_key", os.environ["api_key"]]
+
+if os.environ.get("use_remote_service") is not None:
+    env_config += ["--use_remote_service", os.environ["use_remote_service"]]
+
 if os.environ.get("llm_engine") is not None:
     env_config += ["--llm_engine", os.environ["llm_engine"]]
 
diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
index 77f4e1cadb..e84d3d1a1f 100644
--- a/comps/agent/src/integrations/utils.py
+++ b/comps/agent/src/integrations/utils.py
@@ -66,7 +66,17 @@ def setup_chat_model(args):
             **params,
         )
     elif args.llm_engine == "openai":
-        llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params)
+        if args.use_remote_service:
+            openai_endpoint = f"{args.llm_endpoint_url}/v1"
+            llm = ChatOpenAI(
+                openai_api_key=args.api_key,
+                openai_api_base=openai_endpoint,
+                model_name=args.model,
+                request_timeout=args.timeout,
+                **params,
+            )
+        else:
+            llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params)
     else:
         raise ValueError("llm_engine must be vllm, tgi or openai")
     return llm
@@ -162,6 +172,8 @@ def get_args():
     parser.add_argument("--model", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct")
     parser.add_argument("--llm_engine", type=str, default="tgi")
     parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080")
+    parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server")
+    parser.add_argument("--use_remote_service", action="store_true", default=False, help="If using a remote server for LLM")
     parser.add_argument("--max_new_tokens", type=int, default=1024)
     parser.add_argument("--top_k", type=int, default=10)
     parser.add_argument("--top_p", type=float, default=0.95)

From 0f6191df895bd96c6351d6d600c6c04fa25b86cd Mon Sep 17 00:00:00 2001
From: alexsin368 <alex.sin@intel.com>
Date: Thu, 1 May 2025 17:20:22 -0700
Subject: [PATCH 02/34] add steps to enable remote server

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/README.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md
index fcdb332abb..0bd2d528fb 100644
--- a/comps/agent/src/README.md
+++ b/comps/agent/src/README.md
@@ -110,6 +110,16 @@ Examples of python code for multi-turn conversations using agent memory:
 
 To run the two examples above, first launch the agent microservice using [this docker compose yaml](../../../tests/agent/reactllama.yaml).
 
+### 1.6 Run LLMs with Remote Servers
+
+To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables:
+- `api_key`=<openai-api-key>
+- `use_remote_service`=True
+- `model`=<model-card>
+- `LLM_ENDPOINT_URL`=<inference-endpoint>
+
+For `LLM_ENDPOINT_URL`, there is no need to include `v1`.
+
 ## 🚀2. Start Agent Microservice
 
 ### 2.1 Build docker image for agent microservice

From 71f1608c44ec974c32382efc8b8675c4a347532c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 2 May 2025 00:43:07 +0000
Subject: [PATCH 03/34] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/agent/src/README.md             | 1 +
 comps/agent/src/integrations/utils.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md
index 0bd2d528fb..2f107696ac 100644
--- a/comps/agent/src/README.md
+++ b/comps/agent/src/README.md
@@ -113,6 +113,7 @@ To run the two examples above, first launch the agent microservice using [this d
 ### 1.6 Run LLMs with Remote Servers
 
 To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables:
+
 - `api_key`=<openai-api-key>
 - `use_remote_service`=True
 - `model`=<model-card>
diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
index e84d3d1a1f..14c24b0b6d 100644
--- a/comps/agent/src/integrations/utils.py
+++ b/comps/agent/src/integrations/utils.py
@@ -173,7 +173,9 @@ def get_args():
     parser.add_argument("--llm_engine", type=str, default="tgi")
     parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080")
     parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server")
-    parser.add_argument("--use_remote_service", action="store_true", default=False, help="If using a remote server for LLM")
+    parser.add_argument(
+        "--use_remote_service", action="store_true", default=False, help="If using a remote server for LLM"
+    )
     parser.add_argument("--max_new_tokens", type=int, default=1024)
     parser.add_argument("--top_k", type=int, default=10)
     parser.add_argument("--top_p", type=float, default=0.95)

From bbcda06cce7a46f167968dd8536b56ac9bd96568 Mon Sep 17 00:00:00 2001
From: alexsin368 <alex.sin@intel.com>
Date: Fri, 2 May 2025 16:58:58 -0700
Subject: [PATCH 04/34] remove use_remote_service

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/README.md              | 4 ++--
 comps/agent/src/integrations/config.py | 3 ---
 comps/agent/src/integrations/utils.py  | 3 +--
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md
index 0bd2d528fb..7a9289dba3 100644
--- a/comps/agent/src/README.md
+++ b/comps/agent/src/README.md
@@ -114,11 +114,11 @@ To run the two examples above, first launch the agent microservice using [this d
 
 To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables:
 - `api_key`=<openai-api-key>
-- `use_remote_service`=True
 - `model`=<model-card>
 - `LLM_ENDPOINT_URL`=<inference-endpoint>
 
-For `LLM_ENDPOINT_URL`, there is no need to include `v1`.
+#### Notes
+- For `LLM_ENDPOINT_URL`, there is no need to include `v1`.
 
 ## 🚀2. Start Agent Microservice
 
diff --git a/comps/agent/src/integrations/config.py b/comps/agent/src/integrations/config.py
index 2aa338dd96..f965286c95 100644
--- a/comps/agent/src/integrations/config.py
+++ b/comps/agent/src/integrations/config.py
@@ -20,9 +20,6 @@
 if os.environ.get("api_key") is not None:
     env_config += ["--api_key", os.environ["api_key"]]
 
-if os.environ.get("use_remote_service") is not None:
-    env_config += ["--use_remote_service", os.environ["use_remote_service"]]
-
 if os.environ.get("llm_engine") is not None:
     env_config += ["--llm_engine", os.environ["llm_engine"]]
 
diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
index e84d3d1a1f..ff7f0415a0 100644
--- a/comps/agent/src/integrations/utils.py
+++ b/comps/agent/src/integrations/utils.py
@@ -66,7 +66,7 @@ def setup_chat_model(args):
             **params,
         )
     elif args.llm_engine == "openai":
-        if args.use_remote_service:
+        if args.api_key:
             openai_endpoint = f"{args.llm_endpoint_url}/v1"
             llm = ChatOpenAI(
                 openai_api_key=args.api_key,
@@ -173,7 +173,6 @@ def get_args():
     parser.add_argument("--llm_engine", type=str, default="tgi")
     parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080")
     parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server")
-    parser.add_argument("--use_remote_service", action="store_true", default=False, help="If using a remote server for LLM")
     parser.add_argument("--max_new_tokens", type=int, default=1024)
     parser.add_argument("--top_k", type=int, default=10)
     parser.add_argument("--top_p", type=float, default=0.95)

From 45cf9315397a9cdb8460b8f81e77faf7caef802c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 3 May 2025 00:01:52 +0000
Subject: [PATCH 05/34] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/agent/src/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md
index 4dbe55632a..6e1fcc431b 100644
--- a/comps/agent/src/README.md
+++ b/comps/agent/src/README.md
@@ -119,6 +119,7 @@ To run the text generation portion using LLMs deployed on a remote server, speci
 - `LLM_ENDPOINT_URL`=<inference-endpoint>
 
 #### Notes
+
 - For `LLM_ENDPOINT_URL`, there is no need to include `v1`.
 
 ## 🚀2. Start Agent Microservice

From 4899f7937a15d590cf44b5fbe9b63125d629073a Mon Sep 17 00:00:00 2001
From: alexsin368 <alex.sin@intel.com>
Date: Tue, 13 May 2025 17:44:24 -0700
Subject: [PATCH 06/34] add OpenAI models instructions, fix format of commands

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/README.md | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md
index 6e1fcc431b..13aee5eaa5 100644
--- a/comps/agent/src/README.md
+++ b/comps/agent/src/README.md
@@ -82,7 +82,7 @@ for line in resp.iter_lines(decode_unicode=True):
 
 **Note**:
 
-1. Currently only `reract_llama` agent is enabled for assistants APIs.
+1. Currently only `react_llama` agent is enabled for assistants APIs.
 2. Not all keywords of OpenAI APIs are supported yet.
 
 ### 1.5 Agent memory
@@ -110,13 +110,27 @@ Examples of python code for multi-turn conversations using agent memory:
 
 To run the two examples above, first launch the agent microservice using [this docker compose yaml](../../../tests/agent/reactllama.yaml).
 
-### 1.6 Run LLMs with Remote Servers
+### 1.6 Run LLMs from OpenAI
+
+To run any model from OpenAI, just specify the environment variable `OPENAI_API_KEY`:
+
+```bash
+export OPENAI_API_KEY=<openai-api-key>
+```
+
+These also need to be passed in to the `docker run` command, or included in a YAML file when running `docker compose`.
+
+### 1.7 Run LLMs with OpenAI-compatible APIs on Remote Servers
 
 To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables:
 
-- `api_key`=<openai-api-key>
-- `model`=<model-card>
-- `LLM_ENDPOINT_URL`=<inference-endpoint>
+```bash
+export api_key=<openai-api-key>
+export model=<model-card>
+export LLM_ENDPOINT_URL=<inference-endpoint>
+```
+
+These also need to be passed in to the `docker run` command, or included in a YAML file when running `docker compose`.
 
 #### Notes
 

From b7c4acf7d397a284f1499254fa8832533c0c98e3 Mon Sep 17 00:00:00 2001
From: alexsin368 <alex.sin@intel.com>
Date: Wed, 14 May 2025 17:35:08 -0700
Subject: [PATCH 07/34] simplify ChatOpenAI instantiation

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/integrations/utils.py | 34 ++++++++-------------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
index ff7f0415a0..238f44d3e3 100644
--- a/comps/agent/src/integrations/utils.py
+++ b/comps/agent/src/integrations/utils.py
@@ -7,6 +7,7 @@
 
 from .config import env_config
 
+LLM_ENDPOINT_URL_DEFAULT="http://localhost:8080"
 
 def format_date(date):
     # input m/dd/yyyy hr:min
@@ -56,29 +57,14 @@ def setup_chat_model(args):
         "top_p": args.top_p,
         "streaming": args.stream,
     }
-    if args.llm_engine == "vllm" or args.llm_engine == "tgi":
-        openai_endpoint = f"{args.llm_endpoint_url}/v1"
-        llm = ChatOpenAI(
-            openai_api_key="EMPTY",
-            openai_api_base=openai_endpoint,
-            model_name=args.model,
-            request_timeout=args.timeout,
-            **params,
-        )
-    elif args.llm_engine == "openai":
-        if args.api_key:
-            openai_endpoint = f"{args.llm_endpoint_url}/v1"
-            llm = ChatOpenAI(
-                openai_api_key=args.api_key,
-                openai_api_base=openai_endpoint,
-                model_name=args.model,
-                request_timeout=args.timeout,
-                **params,
-            )
-        else:
-            llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params)
-    else:
-        raise ValueError("llm_engine must be vllm, tgi or openai")
+    openai_endpoint=None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + '/v1'
+    llm = ChatOpenAI(
+        openai_api_key=args.api_key,
+        openai_api_base=openai_endpoint,
+        model_name=args.model,
+        request_timeout=args.timeout,
+        **params
+    )
     return llm
 
 
@@ -171,7 +157,7 @@ def get_args():
 
     parser.add_argument("--model", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct")
     parser.add_argument("--llm_engine", type=str, default="tgi")
-    parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080")
+    parser.add_argument("--llm_endpoint_url", type=str, default=LLM_ENDPOINT_URL_DEFAULT)
     parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server")
     parser.add_argument("--max_new_tokens", type=int, default=1024)
     parser.add_argument("--top_k", type=int, default=10)

From 658665729de80c4436819d2f72c9512931549759 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 May 2025 00:35:40 +0000
Subject: [PATCH 08/34] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/agent/src/integrations/utils.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
index 238f44d3e3..c45cdced6f 100644
--- a/comps/agent/src/integrations/utils.py
+++ b/comps/agent/src/integrations/utils.py
@@ -7,7 +7,8 @@
 
 from .config import env_config
 
-LLM_ENDPOINT_URL_DEFAULT="http://localhost:8080"
+LLM_ENDPOINT_URL_DEFAULT = "http://localhost:8080"
+
 
 def format_date(date):
     # input m/dd/yyyy hr:min
@@ -57,13 +58,13 @@ def setup_chat_model(args):
         "top_p": args.top_p,
         "streaming": args.stream,
     }
-    openai_endpoint=None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + '/v1'
+    openai_endpoint = None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + "/v1"
     llm = ChatOpenAI(
         openai_api_key=args.api_key,
         openai_api_base=openai_endpoint,
         model_name=args.model,
         request_timeout=args.timeout,
-        **params
+        **params,
     )
     return llm
 

From d2887344dc17ceb86b24ffe304cd4aea81b0bacb Mon Sep 17 00:00:00 2001
From: alexsin368 <alex.sin@intel.com>
Date: Wed, 14 May 2025 18:10:19 -0700
Subject: [PATCH 09/34] Revert "simplify ChatOpenAI instantiation"

This reverts commit b7c4acf7d397a284f1499254fa8832533c0c98e3.
---
 comps/agent/src/integrations/utils.py | 34 +++++++++++++++++++--------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
index 238f44d3e3..ff7f0415a0 100644
--- a/comps/agent/src/integrations/utils.py
+++ b/comps/agent/src/integrations/utils.py
@@ -7,7 +7,6 @@
 
 from .config import env_config
 
-LLM_ENDPOINT_URL_DEFAULT="http://localhost:8080"
 
 def format_date(date):
     # input m/dd/yyyy hr:min
@@ -57,14 +56,29 @@ def setup_chat_model(args):
         "top_p": args.top_p,
         "streaming": args.stream,
     }
-    openai_endpoint=None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + '/v1'
-    llm = ChatOpenAI(
-        openai_api_key=args.api_key,
-        openai_api_base=openai_endpoint,
-        model_name=args.model,
-        request_timeout=args.timeout,
-        **params
-    )
+    if args.llm_engine == "vllm" or args.llm_engine == "tgi":
+        openai_endpoint = f"{args.llm_endpoint_url}/v1"
+        llm = ChatOpenAI(
+            openai_api_key="EMPTY",
+            openai_api_base=openai_endpoint,
+            model_name=args.model,
+            request_timeout=args.timeout,
+            **params,
+        )
+    elif args.llm_engine == "openai":
+        if args.api_key:
+            openai_endpoint = f"{args.llm_endpoint_url}/v1"
+            llm = ChatOpenAI(
+                openai_api_key=args.api_key,
+                openai_api_base=openai_endpoint,
+                model_name=args.model,
+                request_timeout=args.timeout,
+                **params,
+            )
+        else:
+            llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params)
+    else:
+        raise ValueError("llm_engine must be vllm, tgi or openai")
     return llm
 
 
@@ -157,7 +171,7 @@ def get_args():
 
     parser.add_argument("--model", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct")
     parser.add_argument("--llm_engine", type=str, default="tgi")
-    parser.add_argument("--llm_endpoint_url", type=str, default=LLM_ENDPOINT_URL_DEFAULT)
+    parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080")
     parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server")
     parser.add_argument("--max_new_tokens", type=int, default=1024)
     parser.add_argument("--top_k", type=int, default=10)

From 848368f23fafa674b22777c169a6a885455510df Mon Sep 17 00:00:00 2001
From: alexsin368 <alex.sin@intel.com>
Date: Wed, 14 May 2025 18:20:35 -0700
Subject: [PATCH 10/34] add back check and logic for llm_engine, set openai_key
 argument

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/integrations/utils.py | 33 +++++++++++----------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
index ff7f0415a0..6d73e2fff5 100644
--- a/comps/agent/src/integrations/utils.py
+++ b/comps/agent/src/integrations/utils.py
@@ -7,6 +7,7 @@
 
 from .config import env_config
 
+LLM_ENDPOINT_URL_DEFAULT = "http://localhost:8080"
 
 def format_date(date):
     # input m/dd/yyyy hr:min
@@ -57,28 +58,20 @@ def setup_chat_model(args):
         "streaming": args.stream,
     }
     if args.llm_engine == "vllm" or args.llm_engine == "tgi":
-        openai_endpoint = f"{args.llm_endpoint_url}/v1"
-        llm = ChatOpenAI(
-            openai_api_key="EMPTY",
-            openai_api_base=openai_endpoint,
-            model_name=args.model,
-            request_timeout=args.timeout,
-            **params,
-        )
+        openai_key = "EMPTY"
     elif args.llm_engine == "openai":
-        if args.api_key:
-            openai_endpoint = f"{args.llm_endpoint_url}/v1"
-            llm = ChatOpenAI(
-                openai_api_key=args.api_key,
-                openai_api_base=openai_endpoint,
-                model_name=args.model,
-                request_timeout=args.timeout,
-                **params,
-            )
-        else:
-            llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params)
+        openai_key = args.api_key
     else:
-        raise ValueError("llm_engine must be vllm, tgi or openai")
+        raise ValueError("llm_engine must be vllm, tgi, or openai")
+
+    openai_endpoint = None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + "/v1"
+    llm = ChatOpenAI(
+        openai_api_key=openai_key,
+        openai_api_base=openai_endpoint,
+        model_name=args.model,
+        request_timeout=args.timeout,
+        **params,
+    )
     return llm
 
 

From 53aaaa529e64fed7b8f79107de1809c5541a6534 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 May 2025 01:22:16 +0000
Subject: [PATCH 11/34] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/agent/src/integrations/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
index 6d73e2fff5..3c940a53b9 100644
--- a/comps/agent/src/integrations/utils.py
+++ b/comps/agent/src/integrations/utils.py
@@ -9,6 +9,7 @@
 
 LLM_ENDPOINT_URL_DEFAULT = "http://localhost:8080"
 
+
 def format_date(date):
     # input m/dd/yyyy hr:min
     # output yyyy-mm-dd

From a70201fd987dbf1de8524a09b388fc5e50f5758b Mon Sep 17 00:00:00 2001
From: ZePan110 <ze.pan@intel.com>
Date: Tue, 29 Apr 2025 11:54:57 +0800
Subject: [PATCH 12/34] Provide ARCH option for lvm-video-llama image build
 (#1630)

Signed-off-by: ZePan110 <ze.pan@intel.com>
Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/third_parties/video-llama/src/Dockerfile | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/comps/third_parties/video-llama/src/Dockerfile b/comps/third_parties/video-llama/src/Dockerfile
index b2b36a21e4..944a52a08f 100644
--- a/comps/third_parties/video-llama/src/Dockerfile
+++ b/comps/third_parties/video-llama/src/Dockerfile
@@ -3,6 +3,9 @@
 
 FROM python:3.11-slim
 
+# Set this to "cpu" or "gpu" or etc
+ARG ARCH="cpu"
+
 ENV LANG=C.UTF-8
 
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
@@ -20,7 +23,11 @@ COPY --chown=user:user comps /home/user/comps
 WORKDIR /home/user/comps/third_parties/video-llama/src
 
 RUN pip install --no-cache-dir --upgrade pip setuptools && \
-    pip install --no-cache-dir -r /home/user/comps/third_parties/video-llama/src/requirements.txt
+    if [ ${ARCH} = "cpu" ]; then \
+        pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/third_parties/video-llama/src/requirements.txt; \
+    else \
+        pip install --no-cache-dir -r /home/user/comps/third_parties/video-llama/src/requirements.txt; \
+    fi
 
 ARG VIDEO_LLAMA_REPO=https://github.com/DAMO-NLP-SG/Video-LLaMA.git
 ARG VIDEO_LLAMA_COMMIT=0adb19e

From 212e6128b1cf192e77518d5bc30d3e2d559eb321 Mon Sep 17 00:00:00 2001
From: Liang Lv <liang1.lv@intel.com>
Date: Wed, 30 Apr 2025 11:10:30 +0800
Subject: [PATCH 13/34] Add sglang microservice for supporting llama4 model
 (#1640)

Signed-off-by: Ye, Xinyu <xinyu.ye@intel.com>
Co-authored-by: Lv,Liang1 <liang1.lv@intel.com>
Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 .../docker/compose/third_parties-compose.yaml |  4 +
 comps/llms/src/text-generation/README.md      | 32 +++----
 .../deployment/docker_compose/compose.yaml    | 26 ++++++
 comps/third_parties/sglang/src/Dockerfile     | 47 +++++++++++
 comps/third_parties/sglang/src/README.md      | 84 +++++++++++++++++++
 comps/third_parties/sglang/src/__init__.py    |  2 +
 comps/third_parties/sglang/src/entrypoint.sh  |  8 ++
 .../_test_third_parties_sglang.sh             | 84 +++++++++++++++++++
 8 files changed, 272 insertions(+), 15 deletions(-)
 create mode 100644 comps/third_parties/sglang/deployment/docker_compose/compose.yaml
 create mode 100644 comps/third_parties/sglang/src/Dockerfile
 create mode 100644 comps/third_parties/sglang/src/README.md
 create mode 100644 comps/third_parties/sglang/src/__init__.py
 create mode 100644 comps/third_parties/sglang/src/entrypoint.sh
 create mode 100644 tests/third_parties/_test_third_parties_sglang.sh

diff --git a/.github/workflows/docker/compose/third_parties-compose.yaml b/.github/workflows/docker/compose/third_parties-compose.yaml
index 22d8b33587..5e8a7eccd3 100644
--- a/.github/workflows/docker/compose/third_parties-compose.yaml
+++ b/.github/workflows/docker/compose/third_parties-compose.yaml
@@ -105,3 +105,7 @@ services:
         PORT_SSH: 2345
       dockerfile: comps/third_parties/ipex/src/Dockerfile
     image: ${REGISTRY:-opea}/ipex-llm:${TAG:-latest}
+  sglang:
+    build:
+      dockerfile: comps/third_parties/sglang/src/Dockerfile
+    image: ${REGISTRY:-opea}/sglang:${TAG:-latest}
diff --git a/comps/llms/src/text-generation/README.md b/comps/llms/src/text-generation/README.md
index e069b4f867..3e002e22b0 100644
--- a/comps/llms/src/text-generation/README.md
+++ b/comps/llms/src/text-generation/README.md
@@ -8,21 +8,23 @@ Overall, this microservice offers a streamlined way to integrate large language
 
 ## Validated LLM Models
 
-| Model                                       | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi | OVMS     | Optimum-Habana |
-| ------------------------------------------- | --------- | -------- | ---------- | -------- | -------------- |
-| [Intel/neural-chat-7b-v3-3]                 | ✓         | ✓        | ✓          | ✓        | ✓              |
-| [meta-llama/Llama-2-7b-chat-hf]             | ✓         | ✓        | ✓          | ✓        | ✓              |
-| [meta-llama/Llama-2-70b-chat-hf]            | ✓         | -        | ✓          | -        | ✓              |
-| [meta-llama/Meta-Llama-3-8B-Instruct]       | ✓         | ✓        | ✓          | ✓        | ✓              |
-| [meta-llama/Meta-Llama-3-70B-Instruct]      | ✓         | -        | ✓          | -        | ✓              |
-| [Phi-3]                                     | x         | Limit 4K | Limit 4K   | Limit 4K | ✓              |
-| [Phi-4]                                     | x         | x        | x          | x        | ✓              |
-| [deepseek-ai/DeepSeek-R1-Distill-Llama-8B]  | ✓         | -        | ✓          | -        | ✓              |
-| [deepseek-ai/DeepSeek-R1-Distill-Llama-70B] | ✓         | -        | ✓          | -        | ✓              |
-| [deepseek-ai/DeepSeek-R1-Distill-Qwen-14B]  | ✓         | -        | ✓          | -        | ✓              |
-| [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B]  | ✓         | -        | ✓          | -        | ✓              |
-| [mistralai/Mistral-Small-24B-Instruct-2501] | ✓         | -        | ✓          | -        | ✓              |
-| [mistralai/Mistral-Large-Instruct-2411]     | x         | -        | ✓          | -        | ✓              |
+| Model                                                                                                                 | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi | OVMS     | Optimum-Habana | SGLANG-CPU |
+| --------------------------------------------------------------------------------------------------------------------- | --------- | -------- | ---------- | -------- | -------------- | ---------- |
+| [Intel/neural-chat-7b-v3-3]                                                                                           | ✓         | ✓        | ✓          | ✓        | ✓              | -          |
+| [meta-llama/Llama-2-7b-chat-hf]                                                                                       | ✓         | ✓        | ✓          | ✓        | ✓              | ✓          |
+| [meta-llama/Llama-2-70b-chat-hf]                                                                                      | ✓         | -        | ✓          | -        | ✓              | ✓          |
+| [meta-llama/Meta-Llama-3-8B-Instruct]                                                                                 | ✓         | ✓        | ✓          | ✓        | ✓              | ✓          |
+| [meta-llama/Meta-Llama-3-70B-Instruct]                                                                                | ✓         | -        | ✓          | -        | ✓              | ✓          |
+| [Phi-3]                                                                                                               | x         | Limit 4K | Limit 4K   | Limit 4K | ✓              | -          |
+| [Phi-4]                                                                                                               | x         | x        | x          | x        | ✓              | -          |
+| [deepseek-ai/DeepSeek-R1-Distill-Llama-8B]                                                                            | ✓         | -        | ✓          | -        | ✓              | -          |
+| [deepseek-ai/DeepSeek-R1-Distill-Llama-70B]                                                                           | ✓         | -        | ✓          | -        | ✓              | -          |
+| [deepseek-ai/DeepSeek-R1-Distill-Qwen-14B]                                                                            | ✓         | -        | ✓          | -        | ✓              | -          |
+| [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B]                                                                            | ✓         | -        | ✓          | -        | ✓              | -          |
+| [mistralai/Mistral-Small-24B-Instruct-2501]                                                                           | ✓         | -        | ✓          | -        | ✓              | -          |
+| [mistralai/Mistral-Large-Instruct-2411]                                                                               | x         | -        | ✓          | -        | ✓              | -          |
+| [meta-llama/Llama-4-Scout-17B-16E-Instruct](https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct)         | -         | -        | -          | -        | -              | ✓          |
+| [meta-llama/Llama-4-Maverick-17B-128E-Instruct](https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct) | -         | -        | -          | -        | -              | ✓          |
 
 ### System Requirements for LLM Models
 
diff --git a/comps/third_parties/sglang/deployment/docker_compose/compose.yaml b/comps/third_parties/sglang/deployment/docker_compose/compose.yaml
new file mode 100644
index 0000000000..a2f59cc914
--- /dev/null
+++ b/comps/third_parties/sglang/deployment/docker_compose/compose.yaml
@@ -0,0 +1,26 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  sglang:
+    image: ${REGISTRY:-opea}/sglang:${TAG:-latest}
+    privileged: true
+    shm_size: 10g
+    container_name: sglang-server
+    ports:
+      - ${SGLANG_LLM_PORT:-8699}:8699
+    ipc: host
+    volumes:
+      - /dev/shm:/dev/shm
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      MODEL_ID: ${MODEL_ID}
+      HF_TOKEN: ${HF_TOKEN}
+      SGLANG_LLM_PORT: ${SGLANG_LLM_PORT:-8699}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/comps/third_parties/sglang/src/Dockerfile b/comps/third_parties/sglang/src/Dockerfile
new file mode 100644
index 0000000000..eba839f761
--- /dev/null
+++ b/comps/third_parties/sglang/src/Dockerfile
@@ -0,0 +1,47 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+ARG BASE_IMAGE=ubuntu:22.04
+FROM ${BASE_IMAGE} AS base
+
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends --fix-missing \
+    ca-certificates \
+    curl \
+    g++-11 \
+    gcc-11 \
+    git \
+    make \
+    numactl \
+    wget
+
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 && \
+    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100 && \
+    update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \
+    update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100
+
+WORKDIR /root
+
+RUN curl -fsSL -v -o miniforge.sh -O https://github.com/conda-forge/miniforge/releases/download/24.7.1-2/Miniforge3-24.7.1-2-Linux-x86_64.sh && \
+    bash miniforge.sh -b -p ./miniforge3 && \
+    rm miniforge.sh
+
+RUN git clone https://github.com/jianan-gu/sglang -b llama4_optimzed_cpu_r1
+RUN . ~/miniforge3/bin/activate && conda create -n sglang python=3.10 && conda activate sglang && \
+    cd sglang && pip install -e "python[all_cpu]" && cd .. && conda install -y libsqlite=3.48.0 && \
+    pip uninstall -y triton && pip uninstall -y transformers && pip install transformers==4.51.1 && \
+    pip install triton==3.1 && pip install intel-openmp==2024.2.0 && pip install transformers
+RUN git clone https://github.com/vllm-project/vllm.git -b v0.6.4.post1 && cd vllm && apt-get install -y libnuma-dev && \
+    . ~/miniforge3/bin/activate && conda activate sglang && \
+    pip install cmake==3.31.2 wheel packaging ninja "setuptools-scm>=8" numpy nvidia-ml-py && \
+    pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu && \
+    VLLM_TARGET_DEVICE=cpu python setup.py develop && cd ..
+
+RUN cd sglang && . ~/miniforge3/bin/activate && conda activate sglang && pip uninstall -y torch torchvision && \
+    pip install torch==2.6.0 torchvision==0.21.0 --index-url https://download.pytorch.org/whl/cpu && \
+    cd sgl-kernel/ && python setup.py install && cd .. &&  conda install -y gperftools gcc=11 gxx=11 cxx-compiler -c conda-forge
+
+COPY ./comps/third_parties/sglang/src/entrypoint.sh /usr/local/bin/entrypoint.sh
+RUN chmod +x /usr/local/bin/entrypoint.sh
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
diff --git a/comps/third_parties/sglang/src/README.md b/comps/third_parties/sglang/src/README.md
new file mode 100644
index 0000000000..b26ef65c2f
--- /dev/null
+++ b/comps/third_parties/sglang/src/README.md
@@ -0,0 +1,84 @@
+# SGLang Serving microservice
+
+SGLang is an efficient and scalable serving framework for hosting Large Language Models (LLMs) with minimal latency and high throughput.
+Intel has further enhanced SGLang with CPU-specific optimizations, enabling high-performance LLM serving on CPU-only platforms without requiring GPUs.
+We have verified the Llama series models with SGLang, with a particular focus on the latest Llama 4 models, including [meta-llama/Llama-4-Scout-17B-16E-Instruct](https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct), [meta-llama/Llama-4-Maverick-17B-128E-Instruct](https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct), as well as the Llama 4 guard model Llama-Guard-4-12B.
+
+## 🚀1. Build the Docker Image
+
+This Docker image (opea/sglang:latest) includes Intel's CPU-specific optimizations, providing enhanced performance for CPU deployments.
+Ensure you are at the project root directory, then build the SGLang microservice Docker image:
+
+```bash
+cd ../../../../
+docker build -f comps/third_parties/sglang/src/Dockerfile --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/sglang:latest .
+```
+
+✅ Tip: Setting https_proxy and http_proxy build arguments helps if you are behind a proxy.
+
+## 🚀2. Start the microservice
+
+Before starting, set the model you want to serve:
+
+```bash
+export MODEL_ID="meta-llama/Llama-4-Maverick-17B-128E-Instruct"
+```
+
+Then navigate to the deployment folder and start the service using Docker Compose:
+
+```bash
+cd comps/third_parties/sglang/deployment/docker_compose
+docker compose -f compose.yaml up -d
+```
+
+## 🚀3. Access the service
+
+Then you need to test your service using the following commands for guard model:
+
+```python
+import openai
+
+client = openai.Client(base_url="http://127.0.0.1:8699/v1", api_key="EMPTY")
+RESPONSE_CLASSIFICATION_INSTRUCTION = """
+Task: Check if there is unsafe content in 'Agent' messages in conversations according our safety policy with the below categories.\n\n
+<BEGIN UNSAFE CONTENT CATEGORIES>
+\nS1: Violent Crimes. \nS2: Non-Violent Crimes. \nS3: Sex Crimes. \nS4: Child Exploitation. \nS5: Defamation.
+\nS6: Specialized Advice. \nS7: Privacy. \nS8: Intellectual Property. \nS9: Indiscriminate Weapons.
+\nS10: Hate. \nS11: Self-Harm. \nS12: Sexual Content. \nS13: Elections. \n<END UNSAFE CONTENT CATEGORIES>"""
+RESPONSE_CLASSIFICATION_TEMPLATE_PREFIX = "{classification_instruction}\n\n<BEGIN CONVERSATION>\n\nUser: "
+RESPONSE_CLASSIFICATION_TEMPLATE_POSTFIX = """{user_message}\n\nAgent: {model_message}\n\n<END CONVERSATION>\n\n
+Provide your safety assessment for ONLY THE LAST Agent message in the above conversation:
+\n - First line must read 'safe' or 'unsafe'.
+\n - If unsafe, a second line must include a comma-separated list of violated categories."""
+
+response = client.chat.completions.create(
+    model="meta-llama/Llama-4-guard",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": RESPONSE_CLASSIFICATION_TEMPLATE_PREFIX.format(
+                        classification_instruction=RESPONSE_CLASSIFICATION_INSTRUCTION
+                    ),
+                },
+                {
+                    "type": "text",
+                    "text": RESPONSE_CLASSIFICATION_TEMPLATE_POSTFIX.format(
+                        user_message="how do I make a bomb?", model_message="I cannot help you with that."
+                    ),
+                },
+            ],
+        },
+    ],
+    max_tokens=128,
+)
+print(response.choices[0].message.content)
+```
+
+You can use the following command for testing non-guard models:
+
+```bash
+http_proxy="" curl -X POST -H "Content-Type: application/json" -d '{"model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "messages": [{"role": "user", "content": "Hello! What is your name?"}], "max_tokens": 128}' http://localhost:8699/v1/chat/completions
+```
diff --git a/comps/third_parties/sglang/src/__init__.py b/comps/third_parties/sglang/src/__init__.py
new file mode 100644
index 0000000000..4057dc0163
--- /dev/null
+++ b/comps/third_parties/sglang/src/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/comps/third_parties/sglang/src/entrypoint.sh b/comps/third_parties/sglang/src/entrypoint.sh
new file mode 100644
index 0000000000..92452b990a
--- /dev/null
+++ b/comps/third_parties/sglang/src/entrypoint.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+. ~/miniforge3/bin/activate && conda activate sglang
+export LD_PRELOAD=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}/lib/libiomp5.so:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}/lib/libtcmalloc.so
+python3 -m sglang.launch_server --model ${MODEL_ID} --trust-remote-code --device cpu --disable-overlap-schedule --chunked-prefill-size 2048 --max-running-requests 32 --mem-fraction-static 0.8 --context-length 65536 --max-total-tokens 65536  --port ${SGLANG_LLM_PORT} --api-key ${HF_TOKEN} --chat-template llama-4
diff --git a/tests/third_parties/_test_third_parties_sglang.sh b/tests/third_parties/_test_third_parties_sglang.sh
new file mode 100644
index 0000000000..4a97c88ac7
--- /dev/null
+++ b/tests/third_parties/_test_third_parties_sglang.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -x
+
+WORKPATH=$(dirname "$PWD")
+ip_address=$(hostname -I | awk '{print $1}')
+export DATA_PATH=${model_cache}
+MODEL_ID="meta-llama/Llama-3.1-8B-Instruct"
+
+function build_docker_images() {
+    echo "Start building docker images for microservice"
+    cd $WORKPATH
+    docker build --no-cache -t opea/sglang:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/sglang/src/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/sglang built fail"
+        exit 1
+    else
+        echo "opea/sglang built successful"
+    fi
+}
+
+function start_service() {
+    echo "Starting microservice"
+    export host_ip=${ip_address}
+    export MODEL_ID=${MODEL_ID}
+    export TAG=comps
+    cd $WORKPATH
+    cd comps/third_parties/sglang/deployment/docker_compose
+    docker compose -f compose.yaml up -d
+    echo "Microservice started"
+    sleep 120
+}
+
+function validate_microservice() {
+    echo "Validate microservice started"
+    result=$(http_proxy="" curl http://localhost:8699/v1/chat/completions \
+  -X POST \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": ${MODEL_ID},
+    "messages": [
+      {"role": "user", "content": "What is Deep Learning?"}
+    ],
+    "max_tokens": 32
+  }'
+)
+    if [[ $result == *"Deep"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong."
+        docker logs sglang-server
+        exit 1
+    fi
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=sglang-server")
+    echo "Shutdown legacy containers "$cid
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+    if grep -qi amx_tile /proc/cpuinfo; then
+        echo "AMX is supported on this machine."
+    else
+        echo "AMX is NOT supported on this machine, skip this test."
+        exit 0
+    fi
+    stop_docker
+
+    build_docker_images
+    start_service
+
+    validate_microservice
+
+    stop_docker
+    echo "cleanup container images and volumes"
+    echo y | docker system prune 2>&1 > /dev/null
+
+}
+
+main

From 5fc478e949d99c27c0edb41c7ad0fbe754ce844e Mon Sep 17 00:00:00 2001
From: ZePan110 <ze.pan@intel.com>
Date: Wed, 30 Apr 2025 13:24:47 +0800
Subject: [PATCH 14/34] Remove invalid codeowner. (#1642)

Signed-off-by: ZePan110 <ze.pan@intel.com>
Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 .github/CODEOWNERS | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 0110f0b93d..4f11bb8eeb 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -3,12 +3,12 @@
 
 * liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com
 /.github/ suyue.chen@intel.com ze.pan@intel.com
-/comps/third_parties/ liang1.lv@intel.com letong.han@intel.com xinyao.wang@intel.com
+/comps/third_parties/ liang1.lv@intel.com letong.han@intel.com
 /comps/agent/ kaokao.lv@intel.com minmin.hou@intel.com
 /comps/animation/ qing.yao@intel.com chun.tao@intel.com
 /comps/asr/ sihan.chen@intel.com liang1.lv@intel.com
-/comps/chathistory/ yogesh.pandey@intel.com xinyao.wang@intel.com
-/comps/cores/ liang1.lv@intel.com feng.tian@intel.com letong.han@intel.com xinyao.wang@intel.com sihan.chen@intel.com
+/comps/chathistory/ yogesh.pandey@intel.com
+/comps/cores/ liang1.lv@intel.com feng.tian@intel.com letong.han@intel.com sihan.chen@intel.com
 /comps/dataprep/ xinyu.ye@intel.com letong.han@intel.com
 /comps/embeddings/ kaokao.lv@intel.com
 /comps/feedback_management/ hoong.tee.yeoh@intel.com liang1.lv@intel.com
@@ -18,7 +18,7 @@
 /comps/image2video/ xinyu.ye@intel.com qing.yao@intel.com
 /comps/llms/ liang1.lv@intel.com letong.han@intel.com
 /comps/lvms/ sihan.chen@intel.com liang1.lv@intel.com
-/comps/prompt_registry/ hoong.tee.yeoh@intel.com xinyao.wang@intel.com
+/comps/prompt_registry/ hoong.tee.yeoh@intel.com
 /comps/ragas/ kaokao.lv@intel.com liang1.lv@intel.com
 /comps/rerankings/ kaokao.lv@intel.com liang1.lv@intel.com
 /comps/retrievers/ kaokao.lv@intel.com liang1.lv@intel.com

From 1fe684cef134cd34cd350c15fef23b915bf20a8a Mon Sep 17 00:00:00 2001
From: alexsin368 <alex.sin@intel.com>
Date: Wed, 30 Apr 2025 17:56:40 -0700
Subject: [PATCH 15/34] add support for remote server

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/integrations/config.py |  6 ++++++
 comps/agent/src/integrations/utils.py  | 14 +++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/comps/agent/src/integrations/config.py b/comps/agent/src/integrations/config.py
index bcbe6207a5..2aa338dd96 100644
--- a/comps/agent/src/integrations/config.py
+++ b/comps/agent/src/integrations/config.py
@@ -17,6 +17,12 @@
 if os.environ.get("llm_endpoint_url") is not None:
     env_config += ["--llm_endpoint_url", os.environ["llm_endpoint_url"]]
 
+if os.environ.get("api_key") is not None:
+    env_config += ["--api_key", os.environ["api_key"]]
+
+if os.environ.get("use_remote_service") is not None:
+    env_config += ["--use_remote_service", os.environ["use_remote_service"]]
+
 if os.environ.get("llm_engine") is not None:
     env_config += ["--llm_engine", os.environ["llm_engine"]]
 
diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
index 77f4e1cadb..e84d3d1a1f 100644
--- a/comps/agent/src/integrations/utils.py
+++ b/comps/agent/src/integrations/utils.py
@@ -66,7 +66,17 @@ def setup_chat_model(args):
             **params,
         )
     elif args.llm_engine == "openai":
-        llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params)
+        if args.use_remote_service:
+            openai_endpoint = f"{args.llm_endpoint_url}/v1"
+            llm = ChatOpenAI(
+                openai_api_key=args.api_key,
+                openai_api_base=openai_endpoint,
+                model_name=args.model,
+                request_timeout=args.timeout,
+                **params,
+            )
+        else:
+            llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params)
     else:
         raise ValueError("llm_engine must be vllm, tgi or openai")
     return llm
@@ -162,6 +172,8 @@ def get_args():
     parser.add_argument("--model", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct")
     parser.add_argument("--llm_engine", type=str, default="tgi")
     parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080")
+    parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server")
+    parser.add_argument("--use_remote_service", action="store_true", default=False, help="If using a remote server for LLM")
     parser.add_argument("--max_new_tokens", type=int, default=1024)
     parser.add_argument("--top_k", type=int, default=10)
     parser.add_argument("--top_p", type=float, default=0.95)

From bd68f542f5c0fa146b3af9b3d0de5eca37e4da3b Mon Sep 17 00:00:00 2001
From: alexsin368 <alex.sin@intel.com>
Date: Thu, 1 May 2025 17:20:22 -0700
Subject: [PATCH 16/34] add steps to enable remote server

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/README.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md
index fcdb332abb..0bd2d528fb 100644
--- a/comps/agent/src/README.md
+++ b/comps/agent/src/README.md
@@ -110,6 +110,16 @@ Examples of python code for multi-turn conversations using agent memory:
 
 To run the two examples above, first launch the agent microservice using [this docker compose yaml](../../../tests/agent/reactllama.yaml).
 
+### 1.6 Run LLMs with Remote Servers
+
+To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables:
+- `api_key`=<openai-api-key>
+- `use_remote_service`=True
+- `model`=<model-card>
+- `LLM_ENDPOINT_URL`=<inference-endpoint>
+
+For `LLM_ENDPOINT_URL`, there is no need to include `v1`.
+
 ## 🚀2. Start Agent Microservice
 
 ### 2.1 Build docker image for agent microservice

From 23f1f5658d2b8ab8876512e811f57cc16ac6a26c Mon Sep 17 00:00:00 2001
From: alexsin368 <alex.sin@intel.com>
Date: Fri, 2 May 2025 16:58:58 -0700
Subject: [PATCH 17/34] remove use_remote_service

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/README.md              | 4 ++--
 comps/agent/src/integrations/config.py | 3 ---
 comps/agent/src/integrations/utils.py  | 3 +--
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md
index 0bd2d528fb..7a9289dba3 100644
--- a/comps/agent/src/README.md
+++ b/comps/agent/src/README.md
@@ -114,11 +114,11 @@ To run the two examples above, first launch the agent microservice using [this d
 
 To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables:
 - `api_key`=<openai-api-key>
-- `use_remote_service`=True
 - `model`=<model-card>
 - `LLM_ENDPOINT_URL`=<inference-endpoint>
 
-For `LLM_ENDPOINT_URL`, there is no need to include `v1`.
+#### Notes
+- For `LLM_ENDPOINT_URL`, there is no need to include `v1`.
 
 ## 🚀2. Start Agent Microservice
 
diff --git a/comps/agent/src/integrations/config.py b/comps/agent/src/integrations/config.py
index 2aa338dd96..f965286c95 100644
--- a/comps/agent/src/integrations/config.py
+++ b/comps/agent/src/integrations/config.py
@@ -20,9 +20,6 @@
 if os.environ.get("api_key") is not None:
     env_config += ["--api_key", os.environ["api_key"]]
 
-if os.environ.get("use_remote_service") is not None:
-    env_config += ["--use_remote_service", os.environ["use_remote_service"]]
-
 if os.environ.get("llm_engine") is not None:
     env_config += ["--llm_engine", os.environ["llm_engine"]]
 
diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
index e84d3d1a1f..ff7f0415a0 100644
--- a/comps/agent/src/integrations/utils.py
+++ b/comps/agent/src/integrations/utils.py
@@ -66,7 +66,7 @@ def setup_chat_model(args):
             **params,
         )
     elif args.llm_engine == "openai":
-        if args.use_remote_service:
+        if args.api_key:
             openai_endpoint = f"{args.llm_endpoint_url}/v1"
             llm = ChatOpenAI(
                 openai_api_key=args.api_key,
@@ -173,7 +173,6 @@ def get_args():
     parser.add_argument("--llm_engine", type=str, default="tgi")
     parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080")
     parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server")
-    parser.add_argument("--use_remote_service", action="store_true", default=False, help="If using a remote server for LLM")
     parser.add_argument("--max_new_tokens", type=int, default=1024)
     parser.add_argument("--top_k", type=int, default=10)
     parser.add_argument("--top_p", type=float, default=0.95)

From d1d2ac1c07d10cbab9f4e6690dc1da91ceae1660 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 2 May 2025 00:43:07 +0000
Subject: [PATCH 18/34] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md
index 7a9289dba3..4dbe55632a 100644
--- a/comps/agent/src/README.md
+++ b/comps/agent/src/README.md
@@ -113,6 +113,7 @@ To run the two examples above, first launch the agent microservice using [this d
 ### 1.6 Run LLMs with Remote Servers
 
 To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables:
+
 - `api_key`=<openai-api-key>
 - `model`=<model-card>
 - `LLM_ENDPOINT_URL`=<inference-endpoint>

From a9d9ad7c24029a4352d5ffd2af6ff70099c65b2f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 3 May 2025 00:01:52 +0000
Subject: [PATCH 19/34] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md
index 4dbe55632a..6e1fcc431b 100644
--- a/comps/agent/src/README.md
+++ b/comps/agent/src/README.md
@@ -119,6 +119,7 @@ To run the text generation portion using LLMs deployed on a remote server, speci
 - `LLM_ENDPOINT_URL`=<inference-endpoint>
 
 #### Notes
+
 - For `LLM_ENDPOINT_URL`, there is no need to include `v1`.
 
 ## 🚀2. Start Agent Microservice

From 1a1ff02a68d6d71360fad674a10889f890d982b2 Mon Sep 17 00:00:00 2001
From: Mustafa <109312699+MSCetin37@users.noreply.github.com>
Date: Fri, 2 May 2025 16:35:27 -0700
Subject: [PATCH 20/34] bug fix for chunk_size and overlap cause error in
 dataprep ingestion (#1643)

* bug fix for dataingest url

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

* add validation function

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* validation update

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update validation function

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Mustafa <mustafa.cetin@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/dataprep/src/utils.py           | 44 +++++++++++++++++++++++++++
 tests/dataprep/dataprep_utils.sh      | 10 +++++-
 tests/dataprep/test_dataprep_redis.sh |  3 ++
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/comps/dataprep/src/utils.py b/comps/dataprep/src/utils.py
index 4f856fe521..ca3da03262 100644
--- a/comps/dataprep/src/utils.py
+++ b/comps/dataprep/src/utils.py
@@ -667,11 +667,55 @@ def parse_html(input):
     return chucks
 
 
+def validate_and_convert_chunk_params(chunk_size, chunk_overlap):
+    """Validate and convert chunk_size and chunk_overlap to integers if they are strings.
+
+    Ensure chunk_size is a positive integer, chunk_overlap is a non-negative integer,
+    and chunk_overlap is not larger than chunk_size.
+    """
+
+    def validate_param_instance(param, param_name):
+        """Validate that the parameter is an integer or a string that can be converted to an integer.
+
+        Raise a ValueError if the validation fails.
+        """
+        if not isinstance(param, (int, str)):
+            raise ValueError(f"{param_name} must be an integer or a string representing an integer.")
+
+        if isinstance(param, str):
+            try:
+                return int(param)  # Attempt to convert the string to an integer
+            except ValueError:
+                raise ValueError(f"{param_name} must be an integer or a string that can be converted to an integer.")
+        else:
+            return param
+
+    # Validate chunk_size and chunk_overlap, Convert to integers if they are strings
+    chunk_size = validate_param_instance(chunk_size, "chunk_size")
+    chunk_overlap = validate_param_instance(chunk_overlap, "chunk_overlap")
+
+    def validate_param_value(param, param_name, min_value):
+        if param < min_value:
+            raise ValueError(f"{param_name} must be a {min_value} or greater.")
+
+    # Validate chunk_size and chunk_overlap
+    validate_param_value(chunk_size, "chunk_size", 1)
+    validate_param_value(chunk_overlap, "chunk_overlap", 0)
+
+    # Ensure chunk_overlap is not larger than chunk_size
+    if chunk_overlap > chunk_size:
+        raise ValueError("chunk_overlap cannot be larger than chunk_size.")
+
+    return chunk_size, chunk_overlap
+
+
 def load_html_content(links, chunk_size=1500, chunk_overlap=50):
     from langchain.text_splitter import RecursiveCharacterTextSplitter
     from langchain_community.document_loaders import AsyncHtmlLoader
     from langchain_community.document_transformers import Html2TextTransformer
 
+    chunk_size, chunk_overlap = validate_and_convert_chunk_params(chunk_size, chunk_overlap)
+
     loader = AsyncHtmlLoader(links, ignore_load_errors=True, trust_env=True)
     docs = loader.load()
     html2text = Html2TextTransformer()
diff --git a/tests/dataprep/dataprep_utils.sh b/tests/dataprep/dataprep_utils.sh
index c3d86e1feb..bb959a665a 100644
--- a/tests/dataprep/dataprep_utils.sh
+++ b/tests/dataprep/dataprep_utils.sh
@@ -39,7 +39,7 @@ function _invoke_curl() {
     RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
 }
 
-#
+
 function _add_db_params() {
     local db=$1
     if [[ "$db" == "redis" ]]; then
@@ -102,6 +102,14 @@ function ingest_external_link() {
     _invoke_curl $fqdn $port ingest -F 'link_list=["https://www.ces.tech/"]' $extra_args $@
 }
 
+function ingest_external_link_with_chunk_parameters() {
+    local fqdn=$1
+    local port=$2
+    local index_name=$3
+    shift 3
+    _invoke_curl $fqdn $port ingest -F 'link_list=["https://www.ces.tech/"]' -F "chunk_size=1500" -F "chunk_overlap=100" -F "index_name=${index_name}" $@
+}
+
 function delete_all() {
     local fqdn=$1
     local port=$2
diff --git a/tests/dataprep/test_dataprep_redis.sh b/tests/dataprep/test_dataprep_redis.sh
index e07961d7eb..952a4ed628 100644
--- a/tests/dataprep/test_dataprep_redis.sh
+++ b/tests/dataprep/test_dataprep_redis.sh
@@ -77,6 +77,9 @@ function validate_microservice() {
     ingest_external_link ${ip_address} ${DATAPREP_PORT}
     check_result "dataprep - upload - link" "Data preparation succeeded" dataprep-redis-server ${LOG_PATH}/dataprep_upload_file.log
 
+    ingest_external_link_with_chunk_parameters ${ip_address} ${DATAPREP_PORT} "rag_redis_test_link_params"
+    check_result "dataprep - upload - link" "Data preparation succeeded" dataprep-redis-server ${LOG_PATH}/dataprep_upload_file.log
+
     ingest_txt_with_index_name ${ip_address} ${DATAPREP_PORT} rag_redis_test
     check_result "dataprep - upload with index - txt" "Data preparation succeeded" dataprep-redis-server ${LOG_PATH}/dataprep_upload_file.log
 

From 11a79ff95739e31e26a492194b48c9a5faf8874e Mon Sep 17 00:00:00 2001
From: Razvan Liviu Varzaru
 <45736827+RazvanLiviuVarzaru@users.noreply.github.com>
Date: Tue, 6 May 2025 20:12:28 +0300
Subject: [PATCH 21/34] MariaDB Vector integrations for retriever & dataprep
 services (#1645)

* Add MariaDB Vector third-party service

MariaDB Vector was introduced since MariaDB Server 11.7

Signed-off-by: Razvan-Liviu Varzaru <razvan@mariadb.org>

* Add retriever MariaDB Vector integration

Signed-off-by: Razvan-Liviu Varzaru <razvan@mariadb.org>

* Add dataprep MariaDB Vector integration

Signed-off-by: Razvan-Liviu Varzaru <razvan@mariadb.org>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix CI failures

- md5 is used for the primary key not as a security hash
- fixed mariadb readme headers

Signed-off-by: Razvan-Liviu Varzaru <razvan@mariadb.org>

---------

Signed-off-by: Razvan-Liviu Varzaru <razvan@mariadb.org>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/dataprep/README.md                      |   4 +
 .../deployment/docker_compose/compose.yaml    |  24 +
 comps/dataprep/src/Dockerfile                 |   1 +
 comps/dataprep/src/README_mariadb.md          | 100 +++++
 comps/dataprep/src/integrations/mariadb.py    | 415 ++++++++++++++++++
 .../src/opea_dataprep_microservice.py         |   1 +
 comps/dataprep/src/requirements.txt           |   2 +
 comps/retrievers/README.md                    |   4 +
 .../deployment/docker_compose/compose.yaml    |  12 +
 comps/retrievers/src/Dockerfile               |   4 +-
 comps/retrievers/src/README_mariadb.md        |  62 +++
 comps/retrievers/src/integrations/config.py   |   6 +
 comps/retrievers/src/integrations/mariadb.py  | 153 +++++++
 .../src/opea_retrievers_microservice.py       |   1 +
 comps/retrievers/src/requirements.txt         |   2 +
 .../deployment/docker_compose/compose.yaml    |  21 +
 comps/third_parties/mariadb/src/README.md     |  19 +
 comps/third_parties/mariadb/src/__init__.py   |   2 +
 tests/dataprep/test_dataprep_mariadb.sh       | 106 +++++
 tests/retrievers/test_retrievers_mariadb.sh   |  84 ++++
 20 files changed, 1022 insertions(+), 1 deletion(-)
 create mode 100644 comps/dataprep/src/README_mariadb.md
 create mode 100644 comps/dataprep/src/integrations/mariadb.py
 create mode 100644 comps/retrievers/src/README_mariadb.md
 create mode 100644 comps/retrievers/src/integrations/mariadb.py
 create mode 100644 comps/third_parties/mariadb/deployment/docker_compose/compose.yaml
 create mode 100644 comps/third_parties/mariadb/src/README.md
 create mode 100644 comps/third_parties/mariadb/src/__init__.py
 create mode 100644 tests/dataprep/test_dataprep_mariadb.sh
 create mode 100644 tests/retrievers/test_retrievers_mariadb.sh

diff --git a/comps/dataprep/README.md b/comps/dataprep/README.md
index 7cba93f0a0..b7b6979406 100644
--- a/comps/dataprep/README.md
+++ b/comps/dataprep/README.md
@@ -60,3 +60,7 @@ For details, please refer to this [readme](src/README_neo4j_llamaindex.md)
 ## Dataprep Microservice for financial domain data
 
 For details, please refer to this [readme](src/README_finance.md)
+
+## Dataprep Microservice with MariaDB Vector
+
+For details, please refer to this [readme](src/README_mariadb.md)
diff --git a/comps/dataprep/deployment/docker_compose/compose.yaml b/comps/dataprep/deployment/docker_compose/compose.yaml
index c44fdb818f..481f63778c 100644
--- a/comps/dataprep/deployment/docker_compose/compose.yaml
+++ b/comps/dataprep/deployment/docker_compose/compose.yaml
@@ -15,6 +15,7 @@ include:
   - ../../../third_parties/tei/deployment/docker_compose/compose.yaml
   - ../../../third_parties/vllm/deployment/docker_compose/compose.yaml
   - ../../../third_parties/arangodb/deployment/docker_compose/compose.yaml
+  - ../../../third_parties/mariadb/deployment/docker_compose/compose.yaml
 
 services:
 
@@ -414,6 +415,29 @@ services:
       retries: 10
     restart: unless-stopped
 
+  dataprep-mariadb-vector:
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
+    container_name: dataprep-mariadb-vector
+    ports:
+      - "${DATAPREP_PORT:-5000}:5000"
+    depends_on:
+      mariadb-server:
+        condition: service_healthy
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MARIADBVECTOR"
+      MARIADB_CONNECTION_URL: ${MARIADB_CONNECTION_URL:-mariadb+mariadbconnector://dbuser:password@mariadb-server:3306/vectordb}
+      LOGFLAG: ${LOGFLAG}
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+    restart: unless-stopped
+
 networks:
   default:
     driver: bridge
diff --git a/comps/dataprep/src/Dockerfile b/comps/dataprep/src/Dockerfile
index a344066ba0..eba0288012 100644
--- a/comps/dataprep/src/Dockerfile
+++ b/comps/dataprep/src/Dockerfile
@@ -13,6 +13,7 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin
     libcairo2 \
     libgl1-mesa-glx \
     libjemalloc-dev \
+    libmariadb-dev \
     libpq-dev \
     libreoffice \
     poppler-utils \
diff --git a/comps/dataprep/src/README_mariadb.md b/comps/dataprep/src/README_mariadb.md
new file mode 100644
index 0000000000..0931e78edb
--- /dev/null
+++ b/comps/dataprep/src/README_mariadb.md
@@ -0,0 +1,100 @@
+# Dataprep Microservice with MariaDB Vector
+
+## 🚀1. Start Microservice with Docker
+
+### 1.1 Build Docker Image
+
+```bash
+cd GenAIComps
+docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
+```
+
+### 1.2 Run Docker with CLI (Option A)
+
+#### 1.2.1 Start MariaDB Server
+
+Please refer to this [readme](../../third_parties/mariadb/src/README.md).
+
+#### 1.2.2 Start the data preparation service
+
+```bash
+
+export HOST_IP=$(hostname -I | awk '{print $1}')
+# If you've configured the server with the default env values then:
+export MARIADB_CONNECTION_URL: mariadb+mariadbconnector://dbuser:password@${HOST_IP}$:3306/vectordb
+
+docker run  -d --rm --name="dataprep-mariadb-vector" -p 5000:5000 --ipc=host -e MARIADB_CONNECTION_URL=$MARIADB_CONNECTION_URL -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MARIADBVECTOR" opea/dataprep:latest
+```
+
+### 1.3 Run with Docker Compose (Option B)
+
+```bash
+cd comps/dataprep/deployment/docker_compose
+docker compose -f compose.yaml up dataprep-mariadb-vector -d
+```
+
+## 🚀2. Consume Microservice
+
+### 2.1 Consume Upload API
+
+Once the data preparation microservice for MariaDB Vector is started, one can use the below command to invoke the microservice to convert documents/links to embeddings and save them to the vector store.
+
+```bash
+export document="/path/to/document"
+curl -X POST \
+    -H "Content-Type: application/json" \
+    -d '{"path":"${document}"}' \
+    http://localhost:6007/v1/dataprep/ingest
+```
+
+### 2.2 Consume get API
+
+To get the structure of the uploaded files, use the `get` API endpoint:
+
+```bash
+curl -X POST \
+    -H "Content-Type: application/json" \
+    http://localhost:6007/v1/dataprep/get
+```
+
+A JSON formatted response similar to the one below will follow:
+
+```json
+[
+  {
+    "name": "uploaded_file_1.txt",
+    "id": "uploaded_file_1.txt",
+    "type": "File",
+    "parent": ""
+  },
+  {
+    "name": "uploaded_file_2.txt",
+    "id": "uploaded_file_2.txt",
+    "type": "File",
+    "parent": ""
+  }
+]
+```
+
+### 2.3 Consume delete API
+
+To delete uploaded files/links, use the `delete` API endpoint.
+
+The `file_path` is the `id` returned by the `/v1/dataprep/get` API.
+
+```bash
+# delete link
+curl -X POST "http://${HOST_IP}:5000/v1/dataprep/delete"
+    -H "Content-Type: application/json" \
+    -d '{"file_path": "https://www.ces.tech/.txt"}'
+
+# delete file
+curl -X POST "http://${HOST_IP}:5000/v1/dataprep/delete"
+    -H "Content-Type: application/json" \
+    -d '{"file_path": "uploaded_file_1.txt"}'
+
+# delete all files and links
+curl -X POST "http://${HOST_IP}:5000/v1/dataprep/delete"
+    -H "Content-Type: application/json" \
+    -d '{"file_path": "all"}'
+```
diff --git a/comps/dataprep/src/integrations/mariadb.py b/comps/dataprep/src/integrations/mariadb.py
new file mode 100644
index 0000000000..34b0561159
--- /dev/null
+++ b/comps/dataprep/src/integrations/mariadb.py
@@ -0,0 +1,415 @@
+# Copyright (C) 2025 MariaDB Foundation
+# SPDX-License-Identifier: Apache-2.0
+
+import hashlib
+import json
+import os
+from pathlib import Path
+from typing import (
+    List,
+    Optional,
+    Union,
+)
+from urllib.parse import urlparse
+
+import mariadb
+from fastapi import Body, File, Form, HTTPException, UploadFile
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_mariadb.vectorstores import MariaDBStore, MariaDBStoreSettings
+
+from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType
+from comps.cores.proto.api_protocol import DataprepRequest
+from comps.dataprep.src.utils import (
+    create_upload_folder,
+    document_loader,
+    encode_filename,
+    get_file_structure,
+    get_separators,
+    parse_html_new,
+    remove_folder_with_ignore,
+    save_content_to_local_disk,
+)
+
+
+# A no-op logger that does nothing
+class NullLogger:
+    def info(self, *args, **kwargs):
+        pass
+
+    def debug(self, *args, **kwargs):
+        pass
+
+    def warning(self, *args, **kwargs):
+        pass
+
+    def error(self, *args, **kwargs):
+        pass
+
+    def critical(self, *args, **kwargs):
+        pass
+
+    def exception(self, *args, **kwargs):
+        pass
+
+
+logger = CustomLogger("opea_dataprep_mariadbvector")
+logflag = os.getenv("LOGFLAG", False)
+if not logflag:
+    logger = NullLogger()
+
+# Embedding model
+EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
+# TEI Embedding endpoints
+TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
+
+MARIADB_CONNECTION_URL = os.getenv("MARIADB_CONNECTION_URL", "localhost")
+
+# Vector Index Configuration
+MARIADB_COLLECTION_NAME = os.getenv("MARIADB_COLLECTION_NAME", "rag_mariadbvector")
+
+# chunk parameters
+CHUNK_SIZE = os.getenv("CHUNK_SIZE", 1500)
+CHUNK_OVERLAP = os.getenv("CHUNK_OVERLAP", 100)
+
+
+class DocumentsTable:
+    """Table for storing documents."""
+
+    def __init__(self, conn_args):
+        self._table_name = "langchain_documents"
+        self.conn_args = conn_args
+        self.__post__init__()
+
+    def __post__init__(self):
+        self.create_table_if_not_exists()
+
+    def create_table_if_not_exists(self):
+        """Create the documents table if it does not exist."""
+        connection = mariadb.connect(**self.conn_args)
+        cursor = connection.cursor()
+        cursor.execute(
+            f"""
+            CREATE TABLE IF NOT EXISTS {self._table_name} (
+                id VARCHAR(32) PRIMARY KEY,
+                name TEXT,
+                embedding_ids JSON
+            )
+            """
+        )
+        connection.commit()
+        cursor.close()
+        connection.close()
+
+    def insert_document_ids(self, id: str, name: str, embedding_ids: list):
+        """Insert a document into the documents table."""
+        connection = mariadb.connect(**self.conn_args)
+        cursor = connection.cursor()
+        cursor.execute(
+            f"INSERT INTO {self._table_name} (id, name, embedding_ids) VALUES (?, ?, ?)",
+            (id, name, json.dumps(embedding_ids)),
+        )
+        connection.commit()
+        cursor.close()
+        connection.close()
+
+    def delete_document(self, id: str):
+        """Delete a document from the documents table."""
+        connection = mariadb.connect(**self.conn_args)
+        cursor = connection.cursor()
+        cursor.execute(f"DELETE FROM {self._table_name} WHERE id = ?", (id,))
+        connection.commit()
+        cursor.close()
+        connection.close()
+
+    def delete_all_documents(self):
+        """Delete all documents from the documents table."""
+        connection = mariadb.connect(**self.conn_args)
+        cursor = connection.cursor()
+        cursor.execute(f"DELETE FROM {self._table_name}")
+        connection.commit()
+        cursor.close()
+        connection.close()
+
+    def get_document_emb_ids(self, id: str):
+        """Get the embedding ids for a document."""
+        connection = mariadb.connect(**self.conn_args)
+        cursor = connection.cursor()
+        cursor.execute(f"SELECT embedding_ids FROM {self._table_name} WHERE id = ?", (id,))
+        result = cursor.fetchone()
+        cursor.close()
+        connection.close()
+        if result:
+            return json.loads(result[0])
+        return None
+
+
+@OpeaComponentRegistry.register("OPEA_DATAPREP_MARIADBVECTOR")
+class OpeaMariaDBDataprep(OpeaComponent):
+    """Dataprep component for MariaDBStore ingestion and search services."""
+
+    def __init__(self, name: str, description: str, config: dict = None):
+        super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config)
+        args = urlparse(MARIADB_CONNECTION_URL)
+
+        self.conn_args = {
+            "host": args.hostname,
+            "port": args.port,
+            "user": args.username,
+            "password": args.password,
+            "database": args.path[1:],
+        }
+
+        self.upload_folder = Path("./uploaded_files/")
+        self.embedder = self._initialize_embedder()
+
+        # Perform health check
+        health_status = self.check_health()
+        if not health_status:
+            logger.error("OpeaMariaDBDataprep health check failed.")
+
+        self.store = self._initialize_client()
+        self.documents = DocumentsTable(self.conn_args)
+
+    def _initialize_embedder(self):
+        if TEI_EMBEDDING_ENDPOINT:
+            # create embeddings using TEI endpoint service
+            logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}")
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
+            embeddings = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
+        else:
+            # create embeddings using local embedding model
+            logger.info(f"[ init embedder ] LOCAL_EMBEDDING_MODEL:{EMBED_MODEL}")
+            embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
+        return embeddings
+
+    def _initialize_client(self) -> MariaDBStore:
+        store = MariaDBStore(
+            embeddings=self.embedder,
+            collection_name=MARIADB_COLLECTION_NAME,
+            datasource=MARIADB_CONNECTION_URL,
+            config=MariaDBStoreSettings(lazy_init=True),
+        )
+        return store
+
+    def check_health(self) -> bool:
+        """Checks mariadb server health."""
+        try:
+            connection = mariadb.connect(**self.conn_args)
+            return True
+        except mariadb.Error as e:
+            logger.error(f"Error connect to MariaDB Server: {e}")
+            return False
+
+        except Exception as e:
+            logger.error(f"An unexpected error occurred: {e}")
+            return False
+        finally:
+            try:
+                connection.close()
+            except Exception as e:
+                logger.error(f"Error closing connection: {e}")
+
+    def invoke(self, *args, **kwargs):
+        pass
+
+    async def _save_file_to_local_disk(self, save_path: Path, file):
+        with save_path.open("wb") as fout:
+            try:
+                content = await file.read()
+                fout.write(content)
+            except Exception as e:
+                logger.error(f"Write file failed. Exception: {e}")
+                raise HTTPException(status_code=500, detail=f"Write file {save_path} failed. Exception: {e}")
+
+    def _store_texts(self, doc_path: str, chunks: list[str], batch_size: int = 32):
+        num_chunks = len(chunks)
+        metadata = [{"doc_name": doc_path}]
+        doc_id = hashlib.md5(str(doc_path).encode("utf-8"), usedforsecurity=False).hexdigest()
+        doc_emb_ids = []
+        for i in range(0, num_chunks, batch_size):
+            batch_texts = chunks[i : i + batch_size]
+            batch_ids = self.store.add_texts(
+                texts=batch_texts,
+                metadatas=metadata * len(batch_texts),
+            )
+            doc_emb_ids.extend(batch_ids)
+        self.documents.insert_document_ids(id=doc_id, name=doc_path, embedding_ids=doc_emb_ids)
+        if logflag:
+            logger.info(f"Processed batch {i // batch_size + 1} / {(num_chunks - 1) // batch_size + 1}")
+
+    async def _ingest_doc_to_mariadb(self, path: str):
+        """Ingest document to mariadb."""
+        doc_path = DocPath(path=path).path
+        logger.info(f"Parsing document {doc_path}.")
+
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True, separators=get_separators()
+        )
+
+        content = await document_loader(doc_path)
+
+        structured_types = [".xlsx", ".csv", ".json", "jsonl"]
+        _, ext = os.path.splitext(doc_path)
+
+        if ext in structured_types:
+            chunks = content
+        else:
+            chunks = text_splitter.split_text(content)
+
+        logger.info(f"Done preprocessing. Created {len(chunks)} chunks of the original file.")
+
+        self._store_texts(doc_path, chunks)
+        return True
+
+    async def _ingest_link_to_mariadb(self, link_list: List[str]):
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True, separators=get_separators()
+        )
+
+        for link in link_list:
+            content = parse_html_new([link], chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
+            logger.info(f"[ ingest link ] link: {link} content: {content}")
+            encoded_link = encode_filename(link)
+            save_path = self.upload_folder / (encoded_link + ".txt")
+            doc_path = self.upload_folder / (link + ".txt")
+            logger.info(f"[ ingest link ] save_path: {save_path}")
+            await save_content_to_local_disk(str(save_path), content)
+
+            chunks = text_splitter.split_text(content)
+            self._store_texts(str(doc_path), chunks)
+        return True
+
+    async def ingest_files(
+        self,
+        input: DataprepRequest,
+    ):
+        """Ingest files/links content into database.
+
+        Save in the format of vector[768].
+        Returns '{"status": 200, "message": "Data preparation succeeded"}' if successful.
+        Args:
+            input (DataprepRequest): Model containing the following parameters:
+                files (Union[UploadFile, List[UploadFile]], optional): A file or a list of files to be ingested. Defaults to File(None).
+                link_list (str, optional): A list of links to be ingested. Defaults to Form(None).
+                chunk_size (int, optional): The size of the chunks to be split. Defaults to Form(1500).
+                chunk_overlap (int, optional): The overlap between chunks. Defaults to Form(100).
+                process_table (bool, optional): Whether to process tables in PDFs. Defaults to Form(False).
+                table_strategy (str, optional): The strategy to process tables in PDFs. Defaults to Form("fast").
+        """
+        files = input.files
+        link_list = input.link_list
+
+        logger.info(f"files:{files}")
+        logger.info(f"link_list:{link_list}")
+        if files and link_list:
+            raise HTTPException(status_code=400, detail="Provide either a file or a string list, not both.")
+
+        if not files and not link_list:
+            raise HTTPException(status_code=400, detail="Must provide either a file or a string list.")
+
+        if files:
+            if not isinstance(files, list):
+                files = [files]
+
+            self.upload_folder.mkdir(parents=True, exist_ok=True)
+            for file in files:
+                save_path = self.upload_folder / file.filename
+                await self._save_file_to_local_disk(save_path, file)
+                await self._ingest_doc_to_mariadb(str(save_path))
+                logger.info(f"Successfully saved file {save_path}")
+
+        if link_list:
+            try:
+                link_list = json.loads(link_list)  # Parse JSON string to list
+                if not isinstance(link_list, list):
+                    raise HTTPException(status_code=400, detail="link_list should be a list.")
+                await self._ingest_link_to_mariadb(link_list)
+                logger.info(f"Successfully saved link list {link_list}")
+            except json.JSONDecodeError:
+                raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.")
+
+        result = {"status": 200, "message": "Data preparation succeeded"}
+        logger.info(result)
+        return result
+
+    async def get_files(self):
+        """Get file structure from database in the format of
+        {
+            "name": "File Name",
+            "id": "File Name",
+            "type": "File",
+            "parent": "",
+        }"""
+        logger.info("[ dataprep - get file ] start to get file structure")
+
+        if not self.upload_folder.exists():
+            logger.info("No file uploaded, return empty list.")
+            return []
+
+        file_content = get_file_structure(str(self.upload_folder))
+        logger.info(file_content)
+        return file_content
+
+    def _delete_embedding(self, doc_path: Path):
+        doc_id = hashlib.md5(str(doc_path).encode("utf-8"), usedforsecurity=False).hexdigest()
+        doc_emb_ids = self.documents.get_document_emb_ids(doc_id)
+        self.store.delete(ids=doc_emb_ids)
+        self.documents.delete_document(doc_id)
+
+    def _delete_all_embeddings(self):
+        self.store.delete_collection()
+        self.documents.delete_all_documents()
+
+    def _delete_all_files(self):
+        """Delete all files in the upload folder."""
+        logger.info("[dataprep - del] delete all files")
+        remove_folder_with_ignore(str(self.upload_folder))
+        self._delete_all_embeddings()
+        logger.info("[dataprep - del] successfully delete all files.")
+        create_upload_folder(str(self.upload_folder))
+
+    async def delete_files(self, file_path: str = Body(..., embed=True)):
+        """Delete file according to `file_path`.
+
+        `file_path`:
+            - specific file path (e.g. /path/to/file.txt)
+            - "all": delete all files uploaded
+        """
+        if file_path == "all":
+            self._delete_all_files()
+            logger.info({"status": True})
+            return {"status": True}
+
+        # Case when file_path != all
+        delete_path = self.upload_folder / encode_filename(file_path)
+        logger.info(f"[dataprep - del] delete_path: {delete_path}")
+
+        if not delete_path.exists():
+            raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.")
+
+        if not delete_path.is_file():
+            logger.info("[dataprep - del] delete folder is not supported for now.")
+            logger.info({"status": False})
+            return {"status": False}
+        self._delete_embedding(delete_path)
+        delete_path.unlink()
+        logger.info({"status": True})
+        return {"status": True}
diff --git a/comps/dataprep/src/opea_dataprep_microservice.py b/comps/dataprep/src/opea_dataprep_microservice.py
index caedafb4ab..4b8e5847bf 100644
--- a/comps/dataprep/src/opea_dataprep_microservice.py
+++ b/comps/dataprep/src/opea_dataprep_microservice.py
@@ -9,6 +9,7 @@
 from fastapi import Body, Depends, File, Form, HTTPException, Request, UploadFile
 from integrations.arangodb import OpeaArangoDataprep
 from integrations.elasticsearch import OpeaElasticSearchDataprep
+from integrations.mariadb import OpeaMariaDBDataprep
 from integrations.milvus import OpeaMilvusDataprep
 from integrations.neo4j_llamaindex import OpeaNeo4jLlamaIndexDataprep
 from integrations.opensearch import OpeaOpenSearchDataprep
diff --git a/comps/dataprep/src/requirements.txt b/comps/dataprep/src/requirements.txt
index 69d82b2129..2c8109d55b 100644
--- a/comps/dataprep/src/requirements.txt
+++ b/comps/dataprep/src/requirements.txt
@@ -20,6 +20,7 @@ langchain-arangodb
 langchain-community
 langchain-elasticsearch
 langchain-experimental
+langchain-mariadb
 langchain-openai
 langchain-pinecone
 langchain-redis
@@ -33,6 +34,7 @@ llama-index-embeddings-text-embeddings-inference
 llama-index-graph-stores-neo4j
 llama-index-llms-openai
 llama-index-llms-openai-like
+mariadb
 markdown
 moviepy
 neo4j
diff --git a/comps/retrievers/README.md b/comps/retrievers/README.md
index 9cec099365..d68a450252 100644
--- a/comps/retrievers/README.md
+++ b/comps/retrievers/README.md
@@ -41,3 +41,7 @@ For details, please refer to this [readme](src/README_neo4j.md)
 ## Retriever Microservice with Pathway
 
 For details, please refer to this [readme](src/README_pathway.md)
+
+## Retriever Microservice with MariaDB Vector
+
+For details, please refer to this [readme](src/README_mariadb.md)
diff --git a/comps/retrievers/deployment/docker_compose/compose.yaml b/comps/retrievers/deployment/docker_compose/compose.yaml
index a1cbd5e7bd..85261e8047 100644
--- a/comps/retrievers/deployment/docker_compose/compose.yaml
+++ b/comps/retrievers/deployment/docker_compose/compose.yaml
@@ -14,6 +14,7 @@ include:
   - ../../../third_parties/tgi/deployment/docker_compose/compose.yaml
   - ../../../third_parties/vdms/deployment/docker_compose/compose.yaml
   - ../../../third_parties/arangodb/deployment/docker_compose/compose.yaml
+  - ../../../third_parties/mariadb/deployment/docker_compose/compose.yaml
 
 services:
   retriever:
@@ -225,6 +226,17 @@ services:
       arango-vector-db:
         condition: service_healthy
 
+  retriever-mariadb-vector:
+    extends: retriever
+    container_name: retriever-mariadb-vector
+    environment:
+      RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_MARIADBVECTOR}
+      MARIADB_CONNECTION_URL: ${MARIADB_CONNECTION_URL:-mariadb+mariadbconnector://dbuser:password@mariadb-server:3306/vectordb}
+      LOGFLAG: ${LOGFLAG}
+    depends_on:
+      mariadb-server:
+        condition: service_healthy
+
 networks:
   default:
     driver: bridge
diff --git a/comps/retrievers/src/Dockerfile b/comps/retrievers/src/Dockerfile
index 7f4580d361..a5ae329e66 100644
--- a/comps/retrievers/src/Dockerfile
+++ b/comps/retrievers/src/Dockerfile
@@ -9,7 +9,9 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin
     libcairo2 \
     libgl1-mesa-glx \
     libglib2.0-0 \
-    libjemalloc-dev
+    libjemalloc-dev \
+    libmariadb-dev \
+    build-essential
 
 RUN useradd -m -s /bin/bash user && \
     mkdir -p /home/user && \
diff --git a/comps/retrievers/src/README_mariadb.md b/comps/retrievers/src/README_mariadb.md
new file mode 100644
index 0000000000..03ffdc2872
--- /dev/null
+++ b/comps/retrievers/src/README_mariadb.md
@@ -0,0 +1,62 @@
+# Retriever Microservice
+
+This retriever microservice is a highly efficient search service designed for handling and retrieving embedding vectors. It operates by receiving an embedding vector as input and conducting a similarity search against vectors stored in a VectorDB database. Users must specify the VectorDB's URL and the index name, and the service searches within that index to find documents with the highest similarity to the input vector.
+
+The service primarily utilizes similarity measures in vector space to rapidly retrieve contentually similar documents. The vector-based retrieval approach is particularly suited for handling large datasets, offering fast and accurate search results that significantly enhance the efficiency and quality of information retrieval.
+
+Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial.
+
+## 🚀1. Start Microservice with Docker
+
+### 1.1 Build Docker Image
+
+```bash
+cd GenAIComps
+docker build -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile .
+```
+
+### 1.2 Run Docker with CLI (Option A)
+
+#### 1.2.1 Start MariaDB Server
+
+Please refer to this [readme](../../third_parties/mariadb/src/README.md).
+You need to ingest your knowledge documents into the vector database.
+
+#### 1.2.2 Start the retriever service
+
+```bash
+export HOST_IP=$(hostname -I | awk '{print $1}')
+# If you've configured the server with the default env values then:
+export MARIADB_CONNECTION_URL=mariadb+mariadbconnector://dbuser:password@${HOST_IP}$:3306/vectordb
+
+docker run  -d --rm --name="retriever-mariadb-vector" -p 7000:7000 --ipc=host -e MARIADB_CONNECTION_URL=$MARIADB_CONNECTION_URL -e RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_MARIADBVECTOR" opea/retriever:latest
+```
+
+### 1.3 Run with Docker Compose (Option B)
+
+```bash
+cd comps/retrievers/deployment/docker_compose
+docker compose -f compose.yaml up retriever-mariadb-vector -d
+```
+
+## 🚀2. Consume Retriever Service
+
+### 2.1 Check Service Status
+
+```bash
+curl http://${HOST_IP}:7000/v1/health_check \
+  -X GET \
+  -H 'Content-Type: application/json'
+```
+
+### 2.2 Consume Embedding Service
+
+To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python.
+
+```bash
+export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
+curl http://${HOST_IP}:7000/v1/retrieval \
+  -X POST \
+  -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \
+  -H 'Content-Type: application/json'
+```
diff --git a/comps/retrievers/src/integrations/config.py b/comps/retrievers/src/integrations/config.py
index 95eb7c16ce..8514192611 100644
--- a/comps/retrievers/src/integrations/config.py
+++ b/comps/retrievers/src/integrations/config.py
@@ -237,3 +237,9 @@ def format_opensearch_conn_from_env():
 OPENAI_EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small")
 OPENAI_CHAT_ENABLED = os.getenv("OPENAI_CHAT_ENABLED", "true").lower() == "true"
 OPENAI_EMBED_ENABLED = os.getenv("OPENAI_EMBED_ENABLED", "true").lower() == "true"
+
+#######################################################
+#                     MariaDB Vector                  #
+#######################################################
+MARIADB_CONNECTION_URL = os.getenv("MARIADB_CONNECTION_URL", "localhost")
+MARIADB_COLLECTION_NAME = os.getenv("MARIADB_COLLECTION_NAME", "rag_mariadbvector")
diff --git a/comps/retrievers/src/integrations/mariadb.py b/comps/retrievers/src/integrations/mariadb.py
new file mode 100644
index 0000000000..df997e7c47
--- /dev/null
+++ b/comps/retrievers/src/integrations/mariadb.py
@@ -0,0 +1,153 @@
+# Copyright (C) 2025 MariaDB Foundation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import os
+from urllib.parse import urlparse
+
+import mariadb
+from fastapi import HTTPException
+from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_mariadb.vectorstores import MariaDBStore, MariaDBStoreSettings
+
+from comps import CustomLogger, EmbedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType
+
+from .config import (
+    EMBED_MODEL,
+    HUGGINGFACEHUB_API_TOKEN,
+    MARIADB_COLLECTION_NAME,
+    MARIADB_CONNECTION_URL,
+    TEI_EMBEDDING_ENDPOINT,
+)
+
+
+class NullLogger:
+    def info(self, *args, **kwargs):
+        pass
+
+    def debug(self, *args, **kwargs):
+        pass
+
+    def warning(self, *args, **kwargs):
+        pass
+
+    def error(self, *args, **kwargs):
+        pass
+
+    def critical(self, *args, **kwargs):
+        pass
+
+    def exception(self, *args, **kwargs):
+        pass
+
+
+logger = CustomLogger("mariadbvector_retrievers")
+logflag = os.getenv("LOGFLAG", False)
+if not logflag:
+    logger = NullLogger()
+
+
+@OpeaComponentRegistry.register("OPEA_RETRIEVER_MARIADBVECTOR")
+class OpeaMARIADBVectorRetriever(OpeaComponent):
+    """A specialized retriever component derived from OpeaComponent for mariadb vector retriever services.
+
+    Attributes:
+        client (MariaDBStore): An instance of the MariaDBStore client for vector database operations.
+    """
+
+    def __init__(self, name: str, description: str, config: dict = None):
+        super().__init__(name, ServiceType.RETRIEVER.name.lower(), description, config)
+
+        args = urlparse(MARIADB_CONNECTION_URL)
+
+        self.conn_args = {
+            "host": args.hostname,
+            "port": args.port,
+            "user": args.username,
+            "password": args.password,
+            "database": args.path[1:],
+        }
+
+        self.embedder = self._initialize_embedder()
+
+        health_status = self.check_health()
+        if not health_status:
+            logger.error("OpeaMARIADBVectorRetriever health check failed.")
+
+        self.store = self._initialize_client()
+
+    def _initialize_embedder(self):
+        if TEI_EMBEDDING_ENDPOINT:
+            # create embeddings using TEI endpoint service
+            logger.info(f"[ init embedder ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}")
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
+            embeddings = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
+        else:
+            # create embeddings using local embedding model
+            logger.info(f"[ init embedder ] LOCAL_EMBEDDING_MODEL:{EMBED_MODEL}")
+            embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
+        return embeddings
+
+    def _initialize_client(self) -> MariaDBStore:
+        store = MariaDBStore(
+            embeddings=self.embedder,
+            collection_name=MARIADB_COLLECTION_NAME,
+            datasource=MARIADB_CONNECTION_URL,
+            config=MariaDBStoreSettings(lazy_init=True),
+        )
+        return store
+
+    def check_health(self) -> bool:
+        """Checks mariadb server health."""
+        try:
+            connection = mariadb.connect(**self.conn_args)
+            return True
+        except mariadb.Error as e:
+            logger.error(f"Error connect to MariaDB Server: {e}")
+            return False
+
+        except Exception as e:
+            logger.error(f"An unexpected error occurred: {e}")
+            return False
+        finally:
+            try:
+                connection.close()
+            except Exception as e:
+                logger.error(f"Error closing connection: {e}")
+
+    async def invoke(self, input: EmbedDoc) -> list:
+        """Search the MariaDB Vector index for the most similar documents to the input query.
+
+        Args:
+            input (EmbedDoc): The input query to search for.
+        Output:
+            list: The retrieved documents.
+        """
+        logger.info(f"[ similarity search ] input: {input}")
+
+        result = []
+        try:
+            result = await self.store.asimilarity_search_by_vector(embedding=input.embedding)
+            logger.info(f"[ similarity search ] search result: {result}")
+            return result
+        except mariadb.Error as e:
+            logger.error(f"A database error occurred during similarity search: {e}")
+            raise HTTPException(status_code=500, detail="A database error occurred during similarity search")
+        except Exception as e:
+            logger.error(f"An unexpected error occurred: {e}")
+            raise HTTPException(status_code=500, detail="An unexpected error occurred")
diff --git a/comps/retrievers/src/opea_retrievers_microservice.py b/comps/retrievers/src/opea_retrievers_microservice.py
index 54015d3a20..2d3bbf7873 100644
--- a/comps/retrievers/src/opea_retrievers_microservice.py
+++ b/comps/retrievers/src/opea_retrievers_microservice.py
@@ -10,6 +10,7 @@
 
 # import for retrievers component registration
 from integrations.elasticsearch import OpeaElasticsearchRetriever
+from integrations.mariadb import OpeaMARIADBVectorRetriever
 from integrations.milvus import OpeaMilvusRetriever
 from integrations.neo4j import OpeaNeo4jRetriever
 from integrations.opensearch import OpeaOpensearchRetriever
diff --git a/comps/retrievers/src/requirements.txt b/comps/retrievers/src/requirements.txt
index 9b27448dd9..8e360866c2 100644
--- a/comps/retrievers/src/requirements.txt
+++ b/comps/retrievers/src/requirements.txt
@@ -12,6 +12,7 @@ graspologic
 haystack-ai==2.3.1
 langchain-arangodb
 langchain-elasticsearch
+langchain-mariadb
 langchain-openai
 langchain-pinecone
 langchain-vdms>=0.1.4
@@ -25,6 +26,7 @@ llama-index-llms-openai
 llama-index-llms-openai-like
 llama-index-llms-text-generation-inference
 llama_index_graph_stores_neo4j
+mariadb
 neo4j
 numpy
 opensearch-py
diff --git a/comps/third_parties/mariadb/deployment/docker_compose/compose.yaml b/comps/third_parties/mariadb/deployment/docker_compose/compose.yaml
new file mode 100644
index 0000000000..b519c981bc
--- /dev/null
+++ b/comps/third_parties/mariadb/deployment/docker_compose/compose.yaml
@@ -0,0 +1,21 @@
+# Copyright (C) 2025 MariaDB Foundation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  mariadb-server:
+    container_name: mariadb-server
+    image: mariadb:latest
+    ports:
+      - "${MARIADB_PORT:-3306}:3306"
+    restart: always
+    environment:
+      - MARIADB_DATABASE=${MARIADB_DATABASE:-vectordb}
+      - MARIADB_USER=${MARIADB_USER:-dbuser}
+      - MARIADB_PASSWORD=${MARIADB_PASSWORD:-password}
+      - MARIADB_RANDOM_ROOT_PASSWORD=1
+    healthcheck:
+      test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
+      start_period: 10s
+      interval: 10s
+      timeout: 5s
+      retries: 3
diff --git a/comps/third_parties/mariadb/src/README.md b/comps/third_parties/mariadb/src/README.md
new file mode 100644
index 0000000000..aa6ea5d90f
--- /dev/null
+++ b/comps/third_parties/mariadb/src/README.md
@@ -0,0 +1,19 @@
+# Start MariaDB Server
+
+**MariaDB Vector** was introduced starting with server version 11.7  
+For more details please see the [official documentation](https://mariadb.com/kb/en/vectors/).
+
+## 1. Configure the server
+
+```bash
+export MARIADB_CONTAINER_IMAGE="mariadb:latest"
+export MARIADB_USER=dbuser
+export MARIADB_PASSWORD=password
+export MARIADB_DATABASE=vectordb
+```
+
+## 2. Run MariaDB Server
+
+```bash
+docker run --name mariadb-server -e MARIADB_USER=${MARIADB_USER} -e MARIADB_RANDOM_ROOT_PASSWORD=1 -e MARIADB_DATABASE=${MARIADB_DATABASE} -e MARIADB_PASSWORD=${MARIADB_PASSWORD} -d -p 3306:3306 ${MARIADB_CONTAINER_IMAGE}
+```
diff --git a/comps/third_parties/mariadb/src/__init__.py b/comps/third_parties/mariadb/src/__init__.py
new file mode 100644
index 0000000000..49c2a10929
--- /dev/null
+++ b/comps/third_parties/mariadb/src/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2025 MariaDB Foundation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/tests/dataprep/test_dataprep_mariadb.sh b/tests/dataprep/test_dataprep_mariadb.sh
new file mode 100644
index 0000000000..7765efb417
--- /dev/null
+++ b/tests/dataprep/test_dataprep_mariadb.sh
@@ -0,0 +1,106 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -x
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+ip_address=$(hostname -I | awk '{print $1}')
+export DATAPREP_PORT="11105"
+export TAG="comps"
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+source ${SCRIPT_DIR}/dataprep_utils.sh
+
+function build_docker_images() {
+    cd $WORKPATH
+
+    # build dataprep image for mariadb
+    docker build --no-cache -t opea/dataprep:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/src/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/dataprep built fail"
+        exit 1
+    else
+        echo "opea/dataprep built successful"
+    fi
+}
+
+function start_service() {
+    export host_ip=${ip_address}
+    export EMBEDDING_LENGTH=768
+    export MARIADB_PORT=11617
+    export DATAPREP_PORT=11618
+    export MARIADB_USER=testuser
+    export MARIADB_PASSWORD=testpwd
+    export MARIADB_DATABASE=vectordb
+    export MARIADB_CONNECTION_URL=mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@$host_ip:$MARIADB_PORT/${MARIADB_DATABASE}
+    export LOGFLAG=True
+
+    service_name="dataprep-mariadb-vector"
+
+    cd $WORKPATH/comps/dataprep/deployment/docker_compose/
+    docker compose up ${service_name} -d
+
+    check_healthy "dataprep-mariadb-vector" || exit 1
+}
+
+function validate_microservice() {
+    # test /v1/dataprep/ingest upload file
+    ingest_doc ${ip_address} ${DATAPREP_PORT}
+    check_result "dataprep - upload - doc" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log
+
+    ingest_docx ${ip_address} ${DATAPREP_PORT}
+    check_result "dataprep - upload - docx" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log
+
+    ingest_pdf ${ip_address} ${DATAPREP_PORT}
+    check_result "dataprep - upload - pdf" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log
+
+    ingest_ppt ${ip_address} ${DATAPREP_PORT}
+    check_result "dataprep - upload - ppt" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_upload_file.log
+
+    ingest_pptx ${ip_address} ${DATAPREP_PORT}
+    check_result "dataprep - upload - pptx" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log
+
+    ingest_txt ${ip_address} ${DATAPREP_PORT}
+    check_result "dataprep - upload - txt" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log
+
+    ingest_xlsx ${ip_address} ${DATAPREP_PORT}
+    check_result "dataprep - upload - xlsx" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log
+
+    # test /v1/dataprep/ingest upload link
+    ingest_external_link ${ip_address} ${DATAPREP_PORT}
+    check_result "dataprep - upload - link" "Data preparation succeeded" dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log
+
+    # test /v1/dataprep/get
+    get_all ${ip_address} ${DATAPREP_PORT}
+    check_result "dataprep - get" '{"name":' dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log
+
+    # test /v1/dataprep/delete
+    delete_all ${ip_address} ${DATAPREP_PORT}
+    check_result "dataprep - del" '{"status":true}' dataprep-mariadb-vector ${LOG_PATH}/dataprep_mariadb.log
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=dataprep-mariadb-vector")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+
+    cid=$(docker ps -aq --filter "name=mariadb-server")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+
+    stop_docker
+
+    build_docker_images
+    start_service
+
+    validate_microservice
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main
diff --git a/tests/retrievers/test_retrievers_mariadb.sh b/tests/retrievers/test_retrievers_mariadb.sh
new file mode 100644
index 0000000000..84785f4ac6
--- /dev/null
+++ b/tests/retrievers/test_retrievers_mariadb.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -x
+
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+export REGISTRY=${IMAGE_REPO}
+export TAG="comps"
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=${TAG}"
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+export host_ip=$(hostname -I | awk '{print $1}')
+service_name="retriever-mariadb-vector"
+
+function build_docker_images() {
+    cd $WORKPATH
+    docker build --no-cache -t ${REGISTRY:-opea}/retriever:${TAG:-latest} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/retriever built fail"
+        exit 1
+    else
+        echo "opea/retriever built successful"
+    fi
+}
+
+function start_service() {
+    export MARIADB_PORT=11617
+    export RETRIEVER_PORT=11618
+    export MARIADB_USER=testuser
+    export MARIADB_PASSWORD=testpwd
+    export MARIADB_DATABASE=vectordb
+    export HF_TOKEN=${HF_TOKEN}
+    export MARIADB_CONNECTION_URL=mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@$host_ip:$MARIADB_PORT/${MARIADB_DATABASE}
+    export LOGFLAG=True
+
+    cd $WORKPATH/comps/retrievers/deployment/docker_compose
+    docker compose -f compose.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log
+
+    sleep 1m
+}
+
+function validate_microservice() {
+    test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
+
+    result=$(http_proxy=''
+    curl http://${host_ip}:$RETRIEVER_PORT/v1/retrieval \
+        -X POST \
+        -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \
+        -H 'Content-Type: application/json')
+    if [[ $result == *"retrieved_docs"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs mariadb-server >> ${LOG_PATH}/vectorstore.log
+        docker logs ${service_name} >> ${LOG_PATH}/retriever-mariadb-vector.log
+        exit 1
+    fi
+}
+
+function stop_docker() {
+    cd $WORKPATH/comps/retrievers/deployment/docker_compose
+    docker compose -f compose.yaml down --remove-orphans
+    cid=$(docker ps -aq --filter "name=mariadb-server")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+
+    stop_docker
+
+    build_docker_images
+    start_service
+
+    validate_microservice
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main

From 5e1656f3fc3f51c7625cb37671f97ded1603f1b3 Mon Sep 17 00:00:00 2001
From: "chen, suyue" <suyue.chen@intel.com>
Date: Wed, 7 May 2025 11:26:55 +0800
Subject: [PATCH 22/34] update PR reviewers (#1651)

Signed-off-by: chensuyue <suyue.chen@intel.com>
Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 .github/CODEOWNERS | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 4f11bb8eeb..f54da65f67 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,16 +1,15 @@
 # Code owners will review PRs within their respective folders. 
 # Typically, ownership is organized at the second-level subdirectory under the homepage
 
-* liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com
+* liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com kaokao.lv@intel.com minmin.hou@intel.com rita.brugarolas.brufau@intel.com
 /.github/ suyue.chen@intel.com ze.pan@intel.com
-/comps/third_parties/ liang1.lv@intel.com letong.han@intel.com
-/comps/agent/ kaokao.lv@intel.com minmin.hou@intel.com
+/comps/agent/ feng.tian@intel.com kaokao.lv@intel.com minmin.hou@intel.com
 /comps/animation/ qing.yao@intel.com chun.tao@intel.com
 /comps/asr/ sihan.chen@intel.com liang1.lv@intel.com
-/comps/chathistory/ yogesh.pandey@intel.com
-/comps/cores/ liang1.lv@intel.com feng.tian@intel.com letong.han@intel.com sihan.chen@intel.com
+/comps/chathistory/ yogesh.pandey@intel.com sihan.chen@intel.com
+/comps/cores/ liang1.lv@intel.com feng.tian@intel.com letong.han@intel.com sihan.chen@intel.com kaokao.lv@intel.com minmin.hou@intel.com
 /comps/dataprep/ xinyu.ye@intel.com letong.han@intel.com
-/comps/embeddings/ kaokao.lv@intel.com
+/comps/embeddings/ kaokao.lv@intel.com letong.han@intel.com
 /comps/feedback_management/ hoong.tee.yeoh@intel.com liang1.lv@intel.com
 /comps/finetuning/ xinyu.ye@intel.com kaokao.lv@intel.com
 /comps/guardrails/ liang1.lv@intel.com letong.han@intel.com
@@ -18,11 +17,15 @@
 /comps/image2video/ xinyu.ye@intel.com qing.yao@intel.com
 /comps/llms/ liang1.lv@intel.com letong.han@intel.com
 /comps/lvms/ sihan.chen@intel.com liang1.lv@intel.com
-/comps/prompt_registry/ hoong.tee.yeoh@intel.com
-/comps/ragas/ kaokao.lv@intel.com liang1.lv@intel.com
+/comps/prompt_registry/ hoong.tee.yeoh@intel.com letong.han@intel.com
 /comps/rerankings/ kaokao.lv@intel.com liang1.lv@intel.com
 /comps/retrievers/ kaokao.lv@intel.com liang1.lv@intel.com
+/comps/struct2graph/ siddhi.velankar@intel.com kaokao.lv@intel.com
+/comps/text2cypher/ jean1.yu@intel.com sihan.chen@intel.com
+/comps/text2graph/ sharath.raghava@intel.com letong.han@intel.com
 /comps/text2image/ xinyu.ye@intel.com liang1.lv@intel.com
+/comps/text2kg/ siddhi.velankar@intel.com letong.han@intel.com
 /comps/text2sql/ yogesh.pandey@intel.com qing.yao@intel.com
+/comps/third_parties/ liang1.lv@intel.com letong.han@intel.com
 /comps/tts/ sihan.chen@intel.com letong.han@intel.com
 /comps/web_retrievers/ sihan.chen@intel.com liang1.lv@intel.com

From 69fea0d6a88b7b10a6aac70b9a2d6a9f3587c91e Mon Sep 17 00:00:00 2001
From: "chen, suyue" <suyue.chen@intel.com>
Date: Wed, 7 May 2025 14:43:32 +0800
Subject: [PATCH 23/34] Expand test matrix, find all tests use 3rd party
 Dockerfiles (#1676)

* Expand test matrix, find all tests use 3rd party Dockerfiles

Signed-off-by: chensuyue <suyue.chen@intel.com>
Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 .github/workflows/scripts/get_test_matrix.sh | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/scripts/get_test_matrix.sh b/.github/workflows/scripts/get_test_matrix.sh
index 0ad28e8749..8b93662796 100644
--- a/.github/workflows/scripts/get_test_matrix.sh
+++ b/.github/workflows/scripts/get_test_matrix.sh
@@ -60,12 +60,20 @@ function find_test_1() {
                     fi
                 fi
             elif [[ $(echo ${service_path} | grep "third_parties") ]]; then
-                 # new org with `src` and `third_parties` folder
+                # new org with `src` and `third_parties` folder
                 service_name=$(echo $service_path | sed 's:/src::' | tr '/' '_' | cut -c7-) # comps/third_parties/vllm/src -> third_parties_vllm
                 find_test=$(find ./tests -type f -name test_${service_name}*.sh) || true
                 if [ "$find_test" ]; then
                     fill_in_matrix "$find_test"
                 fi
+                # find other tests use 3rd party Dockerfiles
+                dockerfile_list=$(ls ${service_path}/Dockerfile*) || true
+                for dockerfile_path in ${dockerfile_list}; do
+                    find_test=$(grep -rl ${dockerfile_path} ./tests) || true
+                    if [ "$find_test" ]; then
+                        fill_in_matrix "$find_test"
+                    fi
+                done
             else
                 # old org without 'src' folder
                 service_name=$(echo $service_path | tr '/' '_' | cut -c7-) # comps/retrievers/redis/langchain -> retrievers_redis_langchain
@@ -174,6 +182,7 @@ function main() {
     echo "run_matrix=${run_matrix}"
     echo "run_matrix=${run_matrix}" >> $GITHUB_OUTPUT
 
+    is_empty="true"
     if [[ $(echo "$run_matrix" | grep -c "service") != 0 ]]; then
         is_empty="false"
     fi

From 388c264408ef3e9a51341bbd41690aac3bb0bd51 Mon Sep 17 00:00:00 2001
From: Ying Hu <ying.hu@intel.com>
Date: Sat, 10 May 2025 21:56:30 +0800
Subject: [PATCH 24/34] fix the typo of README.md Comp (#1679)

Update README.md for first entry of OPEA

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 README.md | 49 +++++++++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/README.md b/README.md
index 85221a6bb1..c8ba72baf2 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,8 @@ This initiative empowers the development of high-quality Generative AI applicati
 
 ## GenAIComps
 
-GenAIComps provides a suite of microservices, leveraging a service composer to assemble a mega-service tailored for real-world Enterprise AI applications. All the microservices are containerized, allowing cloud native deployment. Checkout how the microservices are used in [GenAIExamples](https://github.com/opea-project/GenAIExamples).
+GenAIComps provides a suite of microservices, leveraging a service composer to assemble a mega-service tailored for real-world Enterprise AI applications. All the microservices are containerized, allowing cloud native deployment. Check out how the microservices are used in [GenAIExamples](https://github.com/opea-project/GenAIExamples)
+or [Getting Start with OPEA](https://opea-project.github.io/latest/getting-started/README.html) to deploy the ChatQnA application from OPEA GenAIExamples across multiple cloud platforms.
 
 ![Architecture](https://i.imgur.com/r5J0i8j.png)
 
@@ -36,27 +37,27 @@ This modular approach allows developers to independently develop, deploy, and sc
 
 The initially supported `Microservices` are described in the below table. More `Microservices` are on the way.
 
-| MicroService                                      | Framework                                                                      | Model                                                                                                   | Serving                                                         | HW     | Description                           |
-| ------------------------------------------------- | ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ------ | ------------------------------------- |
-| [Embedding](./comps/embeddings/src/README.md)     | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)                                   | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi)           | Gaudi2 | Embedding on Gaudi2                   |
-| [Embedding](./comps/embeddings/src/README.md)     | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)                                   | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon   | Embedding on Xeon CPU                 |
-| [Retriever](./comps/retrievers/README.md)         | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)                                   | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon   | Retriever on Xeon CPU                 |
-| [Reranking](./comps/rerankings/src/README.md)     | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base)                                 | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi)           | Gaudi2 | Reranking on Gaudi2                   |
-| [Reranking](./comps/rerankings/src/README.md)     | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BBAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base)                                | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon   | Reranking on Xeon CPU                 |
-| [ASR](./comps/asr/src/README.md)                  | NA                                                                             | [openai/whisper-small](https://huggingface.co/openai/whisper-small)                                     | NA                                                              | Gaudi2 | Audio-Speech-Recognition on Gaudi2    |
-| [ASR](./comps/asr/src/README.md)                  | NA                                                                             | [openai/whisper-small](https://huggingface.co/openai/whisper-small)                                     | NA                                                              | Xeon   | Audio-Speech-RecognitionS on Xeon CPU |
-| [TTS](./comps/tts/src/README.md)                  | NA                                                                             | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts)                                 | NA                                                              | Gaudi2 | Text-To-Speech on Gaudi2              |
-| [TTS](./comps/tts/src/README.md)                  | NA                                                                             | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts)                                 | NA                                                              | Xeon   | Text-To-Speech on Xeon CPU            |
-| [Dataprep](./comps/dataprep/README.md)            | [Qdrant](https://qdrant.tech/)                                                 | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA                                                              | Gaudi2 | Dataprep on Gaudi2                    |
-| [Dataprep](./comps/dataprep/README.md)            | [Qdrant](https://qdrant.tech/)                                                 | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA                                                              | Xeon   | Dataprep on Xeon CPU                  |
-| [Dataprep](./comps/dataprep/README.md)            | [Redis](https://redis.io/)                                                     | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)                                   | NA                                                              | Gaudi2 | Dataprep on Gaudi2                    |
-| [Dataprep](./comps/dataprep/README.md)            | [Redis](https://redis.io/)                                                     | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)                                   | NA                                                              | Xeon   | Dataprep on Xeon CPU                  |
-| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3)                           | [TGI Gaudi](https://github.com/huggingface/tgi-gaudi)           | Gaudi2 | LLM on Gaudi2                         |
-| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3)                           | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon   | LLM on Xeon CPU                       |
-| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3)                           | [Ray Serve](https://github.com/ray-project/ray)                 | Gaudi2 | LLM on Gaudi2                         |
-| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3)                           | [Ray Serve](https://github.com/ray-project/ray)                 | Xeon   | LLM on Xeon CPU                       |
-| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3)                           | [vLLM](https://github.com/vllm-project/vllm/)                   | Gaudi2 | LLM on Gaudi2                         |
-| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3)                           | [vLLM](https://github.com/vllm-project/vllm/)                   | Xeon   | LLM on Xeon CPU                       |
+| MicroService                                      | Framework                                                                      | Model                                                                                                   | Serving                                                         | HW     | Description                          |
+| ------------------------------------------------- | ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ------ | ------------------------------------ |
+| [Embedding](./comps/embeddings/src/README.md)     | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)                                   | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi)           | Gaudi2 | Embedding on Gaudi2                  |
+| [Embedding](./comps/embeddings/src/README.md)     | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)                                   | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon   | Embedding on Xeon CPU                |
+| [Retriever](./comps/retrievers/README.md)         | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)                                   | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon   | Retriever on Xeon CPU                |
+| [Reranking](./comps/rerankings/src/README.md)     | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base)                                 | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi)           | Gaudi2 | Reranking on Gaudi2                  |
+| [Reranking](./comps/rerankings/src/README.md)     | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base)                                 | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon   | Reranking on Xeon CPU                |
+| [ASR](./comps/asr/src/README.md)                  | NA                                                                             | [openai/whisper-small](https://huggingface.co/openai/whisper-small)                                     | NA                                                              | Gaudi2 | Audio-Speech-Recognition on Gaudi2   |
+| [ASR](./comps/asr/src/README.md)                  | NA                                                                             | [openai/whisper-small](https://huggingface.co/openai/whisper-small)                                     | NA                                                              | Xeon   | Audio-Speech-Recognition on Xeon CPU |
+| [TTS](./comps/tts/src/README.md)                  | NA                                                                             | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts)                                 | NA                                                              | Gaudi2 | Text-To-Speech on Gaudi2             |
+| [TTS](./comps/tts/src/README.md)                  | NA                                                                             | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts)                                 | NA                                                              | Xeon   | Text-To-Speech on Xeon CPU           |
+| [Dataprep](./comps/dataprep/README.md)            | [Qdrant](https://qdrant.tech/)                                                 | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA                                                              | Gaudi2 | Dataprep on Gaudi2                   |
+| [Dataprep](./comps/dataprep/README.md)            | [Qdrant](https://qdrant.tech/)                                                 | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA                                                              | Xeon   | Dataprep on Xeon CPU                 |
+| [Dataprep](./comps/dataprep/README.md)            | [Redis](https://redis.io/)                                                     | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)                                   | NA                                                              | Gaudi2 | Dataprep on Gaudi2                   |
+| [Dataprep](./comps/dataprep/README.md)            | [Redis](https://redis.io/)                                                     | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)                                   | NA                                                              | Xeon   | Dataprep on Xeon CPU                 |
+| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3)                           | [TGI Gaudi](https://github.com/huggingface/tgi-gaudi)           | Gaudi2 | LLM on Gaudi2                        |
+| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3)                           | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon   | LLM on Xeon CPU                      |
+| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3)                           | [Ray Serve](https://github.com/ray-project/ray)                 | Gaudi2 | LLM on Gaudi2                        |
+| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3)                           | [Ray Serve](https://github.com/ray-project/ray)                 | Xeon   | LLM on Xeon CPU                      |
+| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3)                           | [vLLM](https://github.com/vllm-project/vllm/)                   | Gaudi2 | LLM on Gaudi2                        |
+| [LLM](./comps/llms/src/text-generation/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Intel/neural-chat-7b-v3-3](https://huggingface.co/Intel/neural-chat-7b-v3-3)                           | [vLLM](https://github.com/vllm-project/vllm/)                   | Xeon   | LLM on Xeon CPU                      |
 
 A `Microservices` can be created by using the decorator `register_microservice`. Taking the `embedding microservice` as an example:
 
@@ -129,7 +130,7 @@ self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port
 
 ## Check Mega/Micro Service health status and version number
 
-Use below command to check Mega/Micro Service status.
+Use the command below to check Mega/Micro Service status.
 
 ```bash
 curl http://${your_ip}:${service_port}/v1/health_check\
@@ -149,7 +150,7 @@ Welcome to the OPEA open-source community! We are thrilled to have you here and
 
 Together, we can make OPEA the go-to platform for enterprise AI solutions. Let's work together to push the boundaries of what's possible and create a future where AI is accessible, efficient, and impactful for everyone.
 
-Please check the [Contributing guidelines](https://github.com/opea-project/docs/tree/main/community/CONTRIBUTING.md) for a detailed guide on how to contribute a GenAI example and all the ways you can contribute!
+Please check the [Contributing Guidelines](https://github.com/opea-project/docs/tree/main/community/CONTRIBUTING.md) for a detailed guide on how to contribute a GenAI example and all the ways you can contribute!
 
 Thank you for being a part of this journey. We can't wait to see what we can achieve together!
 

From 3b428589bdc324ca9e7eb7c387f1f4fbb02383b8 Mon Sep 17 00:00:00 2001
From: Liang Lv <liang1.lv@intel.com>
Date: Mon, 12 May 2025 15:39:13 +0800
Subject: [PATCH 25/34] Fix request handle timeout issue (#1687)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/cores/mega/orchestrator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py
index f5198908b8..8295719108 100644
--- a/comps/cores/mega/orchestrator.py
+++ b/comps/cores/mega/orchestrator.py
@@ -135,7 +135,7 @@ async def schedule(self, initial_inputs: Dict | BaseModel, llm_parameters: LLMPa
         if LOGFLAG:
             logger.info(initial_inputs)
 
-        timeout = aiohttp.ClientTimeout(total=1000)
+        timeout = aiohttp.ClientTimeout(total=2000)
         async with aiohttp.ClientSession(trust_env=True, timeout=timeout) as session:
             pending = {
                 asyncio.create_task(

From 928e0f74d5a58e7d800c6cd2ac4c033ed707a747 Mon Sep 17 00:00:00 2001
From: Spycsh <39623753+Spycsh@users.noreply.github.com>
Date: Tue, 13 May 2025 13:20:16 +0800
Subject: [PATCH 26/34] FEAT: Enable OPEA microservices to start as MCP servers
 (#1635)

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 .../deployment/docker_compose/compose.yaml    |  3 +
 comps/asr/src/integrations/whisper.py         | 55 ++++++++------
 comps/asr/src/opea_asr_microservice.py        | 10 ++-
 comps/asr/src/requirements.txt                |  1 +
 comps/cores/mega/constants.py                 | 10 ++-
 comps/cores/mega/micro_service.py             | 54 +++++++++++++-
 requirements.txt                              |  1 +
 tests/asr/test_asr_whisper_mcp.sh             | 74 +++++++++++++++++++
 tests/cores/mega/test_mcp.py                  | 70 ++++++++++++++++++
 tests/utils/validate_svc_with_mcp.py          | 53 +++++++++++++
 10 files changed, 302 insertions(+), 29 deletions(-)
 create mode 100644 tests/asr/test_asr_whisper_mcp.sh
 create mode 100644 tests/cores/mega/test_mcp.py
 create mode 100644 tests/utils/validate_svc_with_mcp.py

diff --git a/comps/asr/deployment/docker_compose/compose.yaml b/comps/asr/deployment/docker_compose/compose.yaml
index 3595eaf225..4b0ac07da3 100644
--- a/comps/asr/deployment/docker_compose/compose.yaml
+++ b/comps/asr/deployment/docker_compose/compose.yaml
@@ -14,11 +14,13 @@ services:
     environment:
       ASR_ENDPOINT: ${ASR_ENDPOINT}
       ASR_COMPONENT_NAME: ${ASR_COMPONENT_NAME:-OPEA_WHISPER_ASR}
+      ENABLE_MCP: ${ENABLE_MCP:-False}
   asr-whisper:
     extends: asr
     container_name: asr-whisper-service
     environment:
       ASR_COMPONENT_NAME: ${ASR_COMPONENT_NAME:-OPEA_WHISPER_ASR}
+      ENABLE_MCP: ${ENABLE_MCP:-False}
     depends_on:
       whisper-service:
         condition: service_healthy
@@ -27,6 +29,7 @@ services:
     container_name: asr-whisper-gaudi-service
     environment:
       ASR_COMPONENT_NAME: ${ASR_COMPONENT_NAME:-OPEA_WHISPER_ASR}
+      ENABLE_MCP: ${ENABLE_MCP:-False}
     depends_on:
       whisper-gaudi-service:
         condition: service_healthy
diff --git a/comps/asr/src/integrations/whisper.py b/comps/asr/src/integrations/whisper.py
index eb4c265ea1..39183e3350 100644
--- a/comps/asr/src/integrations/whisper.py
+++ b/comps/asr/src/integrations/whisper.py
@@ -3,7 +3,7 @@
 
 import asyncio
 import os
-from typing import List
+from typing import List, Union
 
 import requests
 from fastapi import File, Form, UploadFile
@@ -32,7 +32,7 @@ def __init__(self, name: str, description: str, config: dict = None):
 
     async def invoke(
         self,
-        file: UploadFile = File(...),  # Handling the uploaded file directly
+        file: Union[str, UploadFile],  # accept base64 string or UploadFile
         model: str = Form("openai/whisper-small"),
         language: str = Form("english"),
         prompt: str = Form(None),
@@ -41,28 +41,39 @@ async def invoke(
         timestamp_granularities: List[str] = Form(None),
     ) -> AudioTranscriptionResponse:
         """Involve the ASR service to generate transcription for the provided input."""
-        # Read the uploaded file
-        file_contents = await file.read()
+        if isinstance(file, str):
+            data = {"audio": file}
+            # Send the file and model to the server
+            response = await asyncio.to_thread(
+                requests.post,
+                f"{self.base_url}/v1/asr",
+                json=data,
+            )
+            res = response.json()["asr_result"]
+            return AudioTranscriptionResponse(text=res)
+        else:
+            # Read the uploaded file
+            file_contents = await file.read()
 
-        # Prepare the files and data
-        files = {
-            "file": (file.filename, file_contents, file.content_type),
-        }
-        data = {
-            "model": model,
-            "language": language,
-            "prompt": prompt,
-            "response_format": response_format,
-            "temperature": temperature,
-            "timestamp_granularities": timestamp_granularities,
-        }
+            # Prepare the files and data
+            files = {
+                "file": (file.filename, file_contents, file.content_type),
+            }
+            data = {
+                "model": model,
+                "language": language,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+                "timestamp_granularities": timestamp_granularities,
+            }
 
-        # Send the file and model to the server
-        response = await asyncio.to_thread(
-            requests.post, f"{self.base_url}/v1/audio/transcriptions", files=files, data=data
-        )
-        res = response.json()["text"]
-        return AudioTranscriptionResponse(text=res)
+            # Send the file and model to the server
+            response = await asyncio.to_thread(
+                requests.post, f"{self.base_url}/v1/audio/transcriptions", files=files, data=data
+            )
+            res = response.json()["text"]
+            return AudioTranscriptionResponse(text=res)
 
     def check_health(self) -> bool:
         """Checks the health of the embedding service.
diff --git a/comps/asr/src/opea_asr_microservice.py b/comps/asr/src/opea_asr_microservice.py
index 8210149613..db9bb37947 100644
--- a/comps/asr/src/opea_asr_microservice.py
+++ b/comps/asr/src/opea_asr_microservice.py
@@ -3,7 +3,7 @@
 
 import os
 import time
-from typing import List
+from typing import List, Union
 
 from fastapi import File, Form, UploadFile
 from integrations.whisper import OpeaWhisperAsr
@@ -19,12 +19,15 @@
     register_statistics,
     statistics_dict,
 )
+from comps.cores.mega.constants import MCPFuncType
 from comps.cores.proto.api_protocol import AudioTranscriptionResponse
 
 logger = CustomLogger("opea_asr_microservice")
 logflag = os.getenv("LOGFLAG", False)
 
 asr_component_name = os.getenv("ASR_COMPONENT_NAME", "OPEA_WHISPER_ASR")
+enable_mcp = os.getenv("ENABLE_MCP", "").strip().lower() in {"true", "1", "yes"}
+
 # Initialize OpeaComponentLoader
 loader = OpeaComponentLoader(asr_component_name, description=f"OPEA ASR Component: {asr_component_name}")
 
@@ -37,10 +40,13 @@
     port=9099,
     input_datatype=Base64ByteStrDoc,
     output_datatype=LLMParamsDoc,
+    enable_mcp=enable_mcp,
+    mcp_func_type=MCPFuncType.TOOL,
+    description="Convert audio to text.",
 )
 @register_statistics(names=["opea_service@asr"])
 async def audio_to_text(
-    file: UploadFile = File(...),  # Handling the uploaded file directly
+    file: Union[str, UploadFile],  # accept base64 string or UploadFile
     model: str = Form("openai/whisper-small"),
     language: str = Form("english"),
     prompt: str = Form(None),
diff --git a/comps/asr/src/requirements.txt b/comps/asr/src/requirements.txt
index f73cc5821a..cca9450d79 100644
--- a/comps/asr/src/requirements.txt
+++ b/comps/asr/src/requirements.txt
@@ -3,6 +3,7 @@ aiohttp
 datasets
 docarray[full]
 fastapi
+mcp
 opentelemetry-api
 opentelemetry-exporter-otlp
 opentelemetry-sdk
diff --git a/comps/cores/mega/constants.py b/comps/cores/mega/constants.py
index 0723bbd12a..ed1a2271d0 100644
--- a/comps/cores/mega/constants.py
+++ b/comps/cores/mega/constants.py
@@ -1,7 +1,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-from enum import Enum
+from enum import Enum, auto
 
 
 class ServiceRoleType(Enum):
@@ -92,3 +92,11 @@ class MicroServiceEndpoint(Enum):
 
     def __str__(self):
         return self.value
+
+
+class MCPFuncType(Enum):
+    """The enum of a MCP function type."""
+
+    TOOL = auto()
+    RESOURCE = auto()
+    PROMPT = auto()
diff --git a/comps/cores/mega/micro_service.py b/comps/cores/mega/micro_service.py
index 9635b0ac24..5d96be70c4 100644
--- a/comps/cores/mega/micro_service.py
+++ b/comps/cores/mega/micro_service.py
@@ -4,11 +4,12 @@
 import asyncio
 import os
 from collections import defaultdict, deque
+from collections.abc import Callable
 from enum import Enum
-from typing import Any, List, Optional, Type
+from typing import Any, List, Optional, Type, TypeAlias
 
 from ..proto.docarray import TextDoc
-from .constants import ServiceRoleType, ServiceType
+from .constants import MCPFuncType, ServiceRoleType, ServiceType
 from .http_service import HTTPService
 from .logger import CustomLogger
 from .utils import check_ports_availability
@@ -17,6 +18,7 @@
 
 logger = CustomLogger("micro_service")
 logflag = os.getenv("LOGFLAG", False)
+AnyFunction: TypeAlias = Callable[..., Any]
 
 
 class MicroService(HTTPService):
@@ -43,6 +45,9 @@ def __init__(
         dynamic_batching: bool = False,
         dynamic_batching_timeout: int = 1,
         dynamic_batching_max_batch_size: int = 32,
+        enable_mcp: bool = False,
+        mcp_func_type: Enum = MCPFuncType.TOOL,
+        func: AnyFunction = None,
     ):
         """Init the microservice."""
         self.service_role = service_role
@@ -56,6 +61,7 @@ def __init__(
         self.output_datatype = output_datatype
         self.use_remote_service = use_remote_service
         self.description = description
+        self.enable_mcp = enable_mcp
         self.dynamic_batching = dynamic_batching
         self.dynamic_batching_timeout = dynamic_batching_timeout
         self.dynamic_batching_max_batch_size = dynamic_batching_max_batch_size
@@ -82,7 +88,7 @@ def __init__(
                 "host": self.host,
                 "port": self.port,
                 "title": name,
-                "description": "OPEA Microservice Infrastructure",
+                "description": self.description or "OPEA Microservice Infrastructure",
             }
 
             super().__init__(uvicorn_kwargs=self.uvicorn_kwargs, runtime_args=runtime_args)
@@ -93,7 +99,21 @@ def __init__(
                 self.request_buffer = defaultdict(deque)
                 self.add_startup_event(self._dynamic_batch_processor())
 
-            self._async_setup()
+            if not enable_mcp:
+                self._async_setup()
+            else:
+                from mcp.server.fastmcp import FastMCP
+
+                self.mcp = FastMCP(name, host=self.host, port=self.port)
+                dispatch = {
+                    MCPFuncType.TOOL: self.mcp.add_tool,
+                    MCPFuncType.RESOURCE: self.mcp.add_resource,
+                    MCPFuncType.PROMPT: self.mcp.add_prompt,
+                }
+                try:
+                    dispatch[mcp_func_type](func, name=func.__name__, description=description)
+                except KeyError:
+                    raise ValueError(f"Unknown MCP func type: {mcp_func_type}")
 
         # overwrite name
         self.name = f"{name}/{self.__class__.__name__}" if name else self.__class__.__name__
@@ -144,6 +164,15 @@ def endpoint_path(self, model=None):
         else:
             return f"{self.protocol}://{self.host}:{self.port}{self.endpoint}"
 
+    def start(self):
+        """Start the server using MCP if enabled, otherwise fall back to default."""
+        if self.enable_mcp:
+            self.mcp.run(
+                transport="sse",
+            )
+        else:
+            super().start()
+
     @property
     def api_key_value(self):
         return self.api_key
@@ -167,6 +196,9 @@ def register_microservice(
     dynamic_batching: bool = False,
     dynamic_batching_timeout: int = 1,
     dynamic_batching_max_batch_size: int = 32,
+    enable_mcp: bool = False,
+    description: str = None,
+    mcp_func_type: Enum = MCPFuncType.TOOL,
 ):
     def decorator(func):
         if name not in opea_microservices:
@@ -187,8 +219,22 @@ def decorator(func):
                 dynamic_batching=dynamic_batching,
                 dynamic_batching_timeout=dynamic_batching_timeout,
                 dynamic_batching_max_batch_size=dynamic_batching_max_batch_size,
+                enable_mcp=enable_mcp,
+                func=func,
+                description=description,
+                mcp_func_type=mcp_func_type,
             )
             opea_microservices[name] = micro_service
+
+        elif enable_mcp:
+            mcp_handle = opea_microservices[name].mcp
+            dispatch = {
+                MCPFuncType.TOOL: mcp_handle.add_tool,
+                MCPFuncType.RESOURCE: mcp_handle.add_resource,
+                MCPFuncType.PROMPT: mcp_handle.add_prompt,
+            }
+            dispatch[mcp_func_type](func, name=func.__name__, description=description)
+
         opea_microservices[name].app.router.add_api_route(endpoint, func, methods=methods)
 
         return func
diff --git a/requirements.txt b/requirements.txt
index c16f8ad52b..cca4354342 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,6 +7,7 @@ httpx
 kubernetes
 langchain
 langchain-community
+mcp
 opentelemetry-api
 opentelemetry-exporter-otlp
 opentelemetry-sdk
diff --git a/tests/asr/test_asr_whisper_mcp.sh b/tests/asr/test_asr_whisper_mcp.sh
new file mode 100644
index 0000000000..8bdfa65a7d
--- /dev/null
+++ b/tests/asr/test_asr_whisper_mcp.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -x
+
+WORKPATH=$(dirname "$PWD")
+ip_address=$(hostname -I | awk '{print $1}')
+export TAG=comps
+export WHISPER_PORT=10104
+export ASR_PORT=10105
+export ENABLE_MCP=True
+cd $WORKPATH
+
+
+function build_docker_images() {
+    echo $(pwd)
+    docker build --no-cache -t opea/whisper:$TAG --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/whisper/src/Dockerfile .
+
+    if [ $? -ne 0 ]; then
+        echo "opea/whisper built fail"
+        exit 1
+    else
+        echo "opea/whisper built successful"
+    fi
+
+    docker build --no-cache -t opea/asr:$TAG --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile .
+
+    if [ $? -ne 0 ]; then
+        echo "opea/asr built fail"
+        exit 1
+    else
+        echo "opea/asr built successful"
+    fi
+}
+
+function start_service() {
+    unset http_proxy
+    export ASR_ENDPOINT=http://$ip_address:$WHISPER_PORT
+
+    docker compose -f comps/asr/deployment/docker_compose/compose.yaml up whisper-service asr -d
+    sleep 1m
+}
+
+function validate_microservice() {
+    pip install mcp
+    python3 ${WORKPATH}/tests/utils/validate_svc_with_mcp.py $ip_address $ASR_PORT "asr"
+    if [ $? -ne 0 ]; then
+        docker logs whisper-service
+        docker logs asr-service
+        exit 1
+    fi
+
+}
+
+function stop_docker() {
+    docker ps -a --filter "name=whisper-service" --filter "name=asr-service" --format "{{.Names}}" | xargs -r docker stop
+}
+
+function main() {
+
+    stop_docker
+
+    build_docker_images
+    start_service
+
+    validate_microservice
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main
diff --git a/tests/cores/mega/test_mcp.py b/tests/cores/mega/test_mcp.py
new file mode 100644
index 0000000000..39a38d8168
--- /dev/null
+++ b/tests/cores/mega/test_mcp.py
@@ -0,0 +1,70 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import multiprocessing
+import unittest
+
+from mcp.client.session import ClientSession
+from mcp.client.sse import sse_client
+
+from comps import TextDoc, opea_microservices, register_microservice
+from comps.cores.mega.constants import MCPFuncType
+from comps.version import __version__
+
+
+@register_microservice(
+    name="mcp_dummy",
+    host="0.0.0.0",
+    port=8087,
+    enable_mcp=True,
+    mcp_func_type=MCPFuncType.TOOL,
+    description="dummy mcp add func",
+)
+async def mcp_dummy(request: TextDoc) -> TextDoc:
+    req = request.model_dump_json()
+    req_dict = json.loads(req)
+    text = req_dict["text"]
+    text += "OPEA Project MCP!"
+    return {"text": text}
+
+
+@register_microservice(
+    name="mcp_dummy",
+    host="0.0.0.0",
+    port=8087,
+    enable_mcp=True,
+    mcp_func_type=MCPFuncType.TOOL,
+    description="dummy mcp sum func",
+)
+async def mcp_dummy_sum():
+    return 1 + 1
+
+
+class TestMicroService(unittest.IsolatedAsyncioTestCase):
+    def setUp(self):
+        self.process = multiprocessing.Process(
+            target=opea_microservices["mcp_dummy"].start, daemon=False, name="mcp_dummy"
+        )
+        self.process.start()
+
+        self.server_url = "http://localhost:8087"
+
+    async def test_mcp(self):
+        async with sse_client(self.server_url + "/sse") as streams:
+            async with ClientSession(*streams) as session:
+                result = await session.initialize()
+                self.assertEqual(result.serverInfo.name, "mcp_dummy")
+                tool_result = await session.call_tool("mcp_dummy", {"request": {"text": "Hello "}})
+                self.assertEqual(json.loads(tool_result.content[0].text)["text"], "Hello OPEA Project MCP!")
+
+                tool_result = await session.call_tool(
+                    "mcp_dummy_sum",
+                )
+                self.assertEqual(tool_result.content[0].text, "2")
+            self.process.kill()
+            self.process.join(timeout=2)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/utils/validate_svc_with_mcp.py b/tests/utils/validate_svc_with_mcp.py
new file mode 100644
index 0000000000..77f45fa656
--- /dev/null
+++ b/tests/utils/validate_svc_with_mcp.py
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import asyncio
+import base64
+import json
+import os
+import sys
+
+import requests
+from mcp.client.session import ClientSession
+from mcp.client.sse import sse_client
+
+
+async def validate_svc(ip_address, service_port, service_type):
+
+    endpoint = f"http://{ip_address}:{service_port}"
+
+    async with sse_client(endpoint + "/sse") as streams:
+        async with ClientSession(*streams) as session:
+            result = await session.initialize()
+            if service_type == "asr":
+                url = "https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav"
+                response = requests.get(url)
+                response.raise_for_status()  # Ensure the download succeeded
+                binary_data = response.content
+                base64_str = base64.b64encode(binary_data).decode("utf-8")
+                input_dict = {"file": base64_str, "model": "openai/whisper-small", "language": "english"}
+                tool_result = await session.call_tool(
+                    "audio_to_text",
+                    input_dict,
+                )
+                result_content = tool_result.content
+                # Check result
+                if json.loads(result_content[0].text)["text"].startswith("who is"):
+                    print("Result correct.")
+                else:
+                    print(f"Result wrong. Received was {result_content}")
+                    exit(1)
+            else:
+                print(f"Unknown service type: {service_type}")
+                exit(1)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 4:
+        print("Usage: python3 validate_svc_with_mcp.py <ip_address> <service_port> <service_type>")
+        exit(1)
+    ip_address = sys.argv[1]
+    service_port = sys.argv[2]
+    service_type = sys.argv[3]
+    asyncio.run(validate_svc(ip_address, service_port, service_type))

From 9be8f9f3ead8eaa559de50ccb368bc684977ccbb Mon Sep 17 00:00:00 2001
From: Liang Lv <liang1.lv@intel.com>
Date: Tue, 13 May 2025 15:22:19 +0800
Subject: [PATCH 27/34] Fix huggingface_hub API upgrade issue (#1691)

* Fix huggingfacehub API upgrade issue

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/cores/mega/orchestrator.py          |  6 ++-
 comps/embeddings/src/integrations/ovms.py | 51 +++++++++++------------
 comps/embeddings/src/integrations/tei.py  | 18 ++++----
 comps/rerankings/src/requirements.txt     |  1 +
 4 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py
index 8295719108..61246ee5a8 100644
--- a/comps/cores/mega/orchestrator.py
+++ b/comps/cores/mega/orchestrator.py
@@ -274,7 +274,7 @@ async def execute(
                         headers={"Content-type": "application/json", "Authorization": f"Bearer {access_token}"},
                         proxies={"http": None},
                         stream=True,
-                        timeout=1000,
+                        timeout=2000,
                     )
                 else:
                     response = requests.post(
@@ -285,7 +285,7 @@ async def execute(
                         },
                         proxies={"http": None},
                         stream=True,
-                        timeout=1000,
+                        timeout=2000,
                     )
 
             downstream = runtime_graph.downstream(cur_node)
@@ -317,6 +317,7 @@ def generate():
                                                 "Authorization": f"Bearer {access_token}",
                                             },
                                             proxies={"http": None},
+                                            timeout=2000,
                                         )
                                     else:
                                         res = requests.post(
@@ -326,6 +327,7 @@ def generate():
                                                 "Content-type": "application/json",
                                             },
                                             proxies={"http": None},
+                                            timeout=2000,
                                         )
                                     res_json = res.json()
                                     if "text" in res_json:
diff --git a/comps/embeddings/src/integrations/ovms.py b/comps/embeddings/src/integrations/ovms.py
index 9931fa35e5..16ee4e125a 100644
--- a/comps/embeddings/src/integrations/ovms.py
+++ b/comps/embeddings/src/integrations/ovms.py
@@ -1,12 +1,10 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-import json
 import os
-from typing import List, Union
 
+import aiohttp
 import requests
-from huggingface_hub import AsyncInferenceClient
 
 from comps import CustomLogger, OpeaComponent, OpeaComponentRegistry, ServiceType
 from comps.cores.mega.utils import get_access_token
@@ -32,24 +30,11 @@ class OpeaOVMSEmbedding(OpeaComponent):
     def __init__(self, name: str, description: str, config: dict = None):
         super().__init__(name, ServiceType.EMBEDDING.name.lower(), description, config)
         self.base_url = os.getenv("OVMS_EMBEDDING_ENDPOINT", "http://localhost:8080")
-        self.client = self._initialize_client()
 
         health_status = self.check_health()
         if not health_status:
             logger.error("OpeaOVMSEmbedding health check failed.")
 
-    def _initialize_client(self) -> AsyncInferenceClient:
-        """Initializes the AsyncInferenceClient."""
-        access_token = (
-            get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None
-        )
-        headers = {"Authorization": f"Bearer {access_token}"} if access_token else {}
-        return AsyncInferenceClient(
-            model=MODEL_ID,
-            token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
-            headers=headers,
-        )
-
     async def invoke(self, input: EmbeddingRequest) -> EmbeddingResponse:
         """Invokes the embedding service to generate embeddings for the provided input.
 
@@ -69,17 +54,31 @@ async def invoke(self, input: EmbeddingRequest) -> EmbeddingResponse:
                 raise ValueError("Invalid input format: Only string or list of strings are supported.")
         else:
             raise TypeError("Unsupported input type: input must be a string or list of strings.")
-        response = await self.client.post(
-            json={
-                "input": texts,
-                "encoding_format": input.encoding_format,
-                "model": self.client.model,
-                "user": input.user,
-            },
-            model=f"{self.base_url}/v3/embeddings",
-            task="text-embedding",
+        # Build headers
+        headers = {"Content-Type": "application/json"}
+        access_token = (
+            get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None
         )
-        embeddings = json.loads(response.decode())
+        if access_token:
+            headers["Authorization"] = f"Bearer {access_token}"
+
+        # Compose request
+        payload = {
+            "input": texts,
+            "encoding_format": input.encoding_format,
+            "model": MODEL_ID,
+            "user": input.user,
+        }
+
+        # Send async POST request using aiohttp
+        url = f"{self.base_url}/v3/embeddings"
+        async with aiohttp.ClientSession() as session:
+            async with session.post(url, headers=headers, json=payload) as resp:
+                if resp.status != 200:
+                    logger.error(f"Embedding service error: {resp.status} - {await resp.text()}")
+                    raise RuntimeError(f"Failed to fetch embeddings: HTTP {resp.status}")
+                embeddings = await resp.json()
+
         return EmbeddingResponse(**embeddings)
 
     def check_health(self) -> bool:
diff --git a/comps/embeddings/src/integrations/tei.py b/comps/embeddings/src/integrations/tei.py
index 8d589fb822..b89eb62625 100644
--- a/comps/embeddings/src/integrations/tei.py
+++ b/comps/embeddings/src/integrations/tei.py
@@ -10,7 +10,7 @@
 
 from comps import CustomLogger, OpeaComponent, OpeaComponentRegistry, ServiceType
 from comps.cores.mega.utils import get_access_token
-from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse
+from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse, EmbeddingResponseData
 
 logger = CustomLogger("opea_tei_embedding")
 logflag = os.getenv("LOGFLAG", False)
@@ -44,7 +44,7 @@ def _initialize_client(self) -> AsyncInferenceClient:
         )
         headers = {"Authorization": f"Bearer {access_token}"} if access_token else {}
         return AsyncInferenceClient(
-            model=f"{self.base_url}/v1/embeddings",
+            model=f"{self.base_url}/embed",
             token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
             headers=headers,
         )
@@ -68,13 +68,13 @@ async def invoke(self, input: EmbeddingRequest) -> EmbeddingResponse:
                 raise ValueError("Invalid input format: Only string or list of strings are supported.")
         else:
             raise TypeError("Unsupported input type: input must be a string or list of strings.")
-        response = await self.client.post(
-            json={"input": texts, "encoding_format": input.encoding_format, "model": input.model, "user": input.user},
-            model=f"{self.base_url}/v1/embeddings",
-            task="text-embedding",
-        )
-        embeddings = json.loads(response.decode())
-        return EmbeddingResponse(**embeddings)
+        # feature_extraction return np.ndarray
+        response = await self.client.feature_extraction(text=texts, model=f"{self.base_url}/embed")
+        # Convert np.ndarray to a list of lists (embedding)
+        data = [EmbeddingResponseData(index=i, embedding=embedding.tolist()) for i, embedding in enumerate(response)]
+        # Construct the EmbeddingResponse
+        response = EmbeddingResponse(data=data)
+        return response
 
     def check_health(self) -> bool:
         """Checks the health of the embedding service.
diff --git a/comps/rerankings/src/requirements.txt b/comps/rerankings/src/requirements.txt
index 7260862a3b..b3a0ba6e4b 100644
--- a/comps/rerankings/src/requirements.txt
+++ b/comps/rerankings/src/requirements.txt
@@ -2,6 +2,7 @@ aiohttp
 docarray[full]
 fastapi
 httpx
+huggingface-hub==0.30.2
 opentelemetry-api
 opentelemetry-exporter-otlp
 opentelemetry-sdk

From 0ffa6a6d037873b94020288d4b5469e00bb1cde2 Mon Sep 17 00:00:00 2001
From: alexsin368 <alex.sin@intel.com>
Date: Tue, 13 May 2025 17:44:24 -0700
Subject: [PATCH 28/34] add OpenAI models instructions, fix format of commands

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/README.md | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/comps/agent/src/README.md b/comps/agent/src/README.md
index 6e1fcc431b..13aee5eaa5 100644
--- a/comps/agent/src/README.md
+++ b/comps/agent/src/README.md
@@ -82,7 +82,7 @@ for line in resp.iter_lines(decode_unicode=True):
 
 **Note**:
 
-1. Currently only `reract_llama` agent is enabled for assistants APIs.
+1. Currently only `react_llama` agent is enabled for assistants APIs.
 2. Not all keywords of OpenAI APIs are supported yet.
 
 ### 1.5 Agent memory
@@ -110,13 +110,27 @@ Examples of python code for multi-turn conversations using agent memory:
 
 To run the two examples above, first launch the agent microservice using [this docker compose yaml](../../../tests/agent/reactllama.yaml).
 
-### 1.6 Run LLMs with Remote Servers
+### 1.6 Run LLMs from OpenAI
+
+To run any model from OpenAI, just specify the environment variable `OPENAI_API_KEY`:
+
+```bash
+export OPENAI_API_KEY=<openai-api-key>
+```
+
+These also need to be passed in to the `docker run` command, or included in a YAML file when running `docker compose`.
+
+### 1.7 Run LLMs with OpenAI-compatible APIs on Remote Servers
 
 To run the text generation portion using LLMs deployed on a remote server, specify the following environment variables:
 
-- `api_key`=<openai-api-key>
-- `model`=<model-card>
-- `LLM_ENDPOINT_URL`=<inference-endpoint>
+```bash
+export api_key=<openai-api-key>
+export model=<model-card>
+export LLM_ENDPOINT_URL=<inference-endpoint>
+```
+
+These also need to be passed in to the `docker run` command, or included in a YAML file when running `docker compose`.
 
 #### Notes
 

From f83070cf26a681f6b7e52e832cde66a6df6ad3ae Mon Sep 17 00:00:00 2001
From: Liang Lv <liang1.lv@intel.com>
Date: Wed, 14 May 2025 15:23:06 +0800
Subject: [PATCH 29/34] Fix dataprep opensearch ingest issue (#1697)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/dataprep/src/integrations/opensearch.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/comps/dataprep/src/integrations/opensearch.py b/comps/dataprep/src/integrations/opensearch.py
index 2b51a5001c..44b2f59207 100644
--- a/comps/dataprep/src/integrations/opensearch.py
+++ b/comps/dataprep/src/integrations/opensearch.py
@@ -108,6 +108,9 @@ def __init__(self, name: str, description: str, config: dict = None):
         self.opensearch_client = OpenSearchVectorSearch(
             opensearch_url=OPENSEARCH_URL,
             index_name=Config.INDEX_NAME,
+            # Default engine for OpenSearch is "nmslib",
+            # but "nmslib" engine is deprecated in OpenSearch and cannot be used for new index creation in OpenSearch from 3.0.0.
+            engine="faiss",
             embedding_function=self.embeddings,
             http_auth=self.auth,
             use_ssl=True,

From 72bc23b8cbdf494358e00d0baa3ba3529652ddbc Mon Sep 17 00:00:00 2001
From: Liang Lv <liang1.lv@intel.com>
Date: Wed, 14 May 2025 16:08:40 +0800
Subject: [PATCH 30/34] Fix embedding issue with ArangoDB due to deprecated
 HuggingFace API (#1694)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/dataprep/src/integrations/arangodb.py   | 6 +++---
 comps/retrievers/src/integrations/arangodb.py | 9 ++++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/comps/dataprep/src/integrations/arangodb.py b/comps/dataprep/src/integrations/arangodb.py
index 265b13f7ff..54f616ecef 100644
--- a/comps/dataprep/src/integrations/arangodb.py
+++ b/comps/dataprep/src/integrations/arangodb.py
@@ -10,12 +10,11 @@
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_arangodb import ArangoGraph
-from langchain_community.embeddings import HuggingFaceHubEmbeddings
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_experimental.graph_transformers import LLMGraphTransformer
-from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpointEmbeddings
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain_text_splitters import HTMLHeaderTextSplitter
 
@@ -200,8 +199,9 @@ def _initialize_embeddings(self):
         """Initialize the embeddings model."""
 
         if TEI_EMBEDDING_ENDPOINT and HUGGINGFACEHUB_API_TOKEN:
-            self.embeddings = HuggingFaceHubEmbeddings(
+            self.embeddings = HuggingFaceEndpointEmbeddings(
                 model=TEI_EMBEDDING_ENDPOINT,
+                task="feature-extraction",
                 huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
             )
         elif TEI_EMBED_MODEL:
diff --git a/comps/retrievers/src/integrations/arangodb.py b/comps/retrievers/src/integrations/arangodb.py
index dd1ed24319..9905d9134e 100644
--- a/comps/retrievers/src/integrations/arangodb.py
+++ b/comps/retrievers/src/integrations/arangodb.py
@@ -9,7 +9,8 @@
 from arango.database import StandardDatabase
 from fastapi import HTTPException
 from langchain_arangodb import ArangoVector
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain_huggingface import HuggingFaceEndpointEmbeddings
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 
 from comps import CustomLogger, EmbedDoc, OpeaComponent, OpeaComponentRegistry, ServiceType
@@ -421,8 +422,10 @@ async def invoke(
         if OPENAI_API_KEY and OPENAI_EMBED_MODEL and OPENAI_EMBED_ENABLED:
             embeddings = OpenAIEmbeddings(model=OPENAI_EMBED_MODEL, dimensions=dimension)
         elif TEI_EMBEDDING_ENDPOINT and HUGGINGFACEHUB_API_TOKEN:
-            embeddings = HuggingFaceHubEmbeddings(
-                model=TEI_EMBEDDING_ENDPOINT, huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
+            embeddings = HuggingFaceEndpointEmbeddings(
+                model=TEI_EMBEDDING_ENDPOINT,
+                task="feature-extraction",
+                huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
             )
         else:
             embeddings = HuggingFaceBgeEmbeddings(model_name=TEI_EMBED_MODEL)

From b2d93ff938d8c45021462722c1854c6421bfc662 Mon Sep 17 00:00:00 2001
From: alexsin368 <alex.sin@intel.com>
Date: Wed, 14 May 2025 17:35:08 -0700
Subject: [PATCH 31/34] simplify ChatOpenAI instantiation

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/integrations/utils.py | 34 ++++++++-------------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
index ff7f0415a0..238f44d3e3 100644
--- a/comps/agent/src/integrations/utils.py
+++ b/comps/agent/src/integrations/utils.py
@@ -7,6 +7,7 @@
 
 from .config import env_config
 
+LLM_ENDPOINT_URL_DEFAULT="http://localhost:8080"
 
 def format_date(date):
     # input m/dd/yyyy hr:min
@@ -56,29 +57,14 @@ def setup_chat_model(args):
         "top_p": args.top_p,
         "streaming": args.stream,
     }
-    if args.llm_engine == "vllm" or args.llm_engine == "tgi":
-        openai_endpoint = f"{args.llm_endpoint_url}/v1"
-        llm = ChatOpenAI(
-            openai_api_key="EMPTY",
-            openai_api_base=openai_endpoint,
-            model_name=args.model,
-            request_timeout=args.timeout,
-            **params,
-        )
-    elif args.llm_engine == "openai":
-        if args.api_key:
-            openai_endpoint = f"{args.llm_endpoint_url}/v1"
-            llm = ChatOpenAI(
-                openai_api_key=args.api_key,
-                openai_api_base=openai_endpoint,
-                model_name=args.model,
-                request_timeout=args.timeout,
-                **params,
-            )
-        else:
-            llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params)
-    else:
-        raise ValueError("llm_engine must be vllm, tgi or openai")
+    openai_endpoint=None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + '/v1'
+    llm = ChatOpenAI(
+        openai_api_key=args.api_key,
+        openai_api_base=openai_endpoint,
+        model_name=args.model,
+        request_timeout=args.timeout,
+        **params
+    )
     return llm
 
 
@@ -171,7 +157,7 @@ def get_args():
 
     parser.add_argument("--model", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct")
     parser.add_argument("--llm_engine", type=str, default="tgi")
-    parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080")
+    parser.add_argument("--llm_endpoint_url", type=str, default=LLM_ENDPOINT_URL_DEFAULT)
     parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server")
     parser.add_argument("--max_new_tokens", type=int, default=1024)
     parser.add_argument("--top_k", type=int, default=10)

From 78001b0e2da08baa0b24307a158a26bb5f6cbc95 Mon Sep 17 00:00:00 2001
From: alexsin368 <alex.sin@intel.com>
Date: Wed, 14 May 2025 18:10:19 -0700
Subject: [PATCH 32/34] Revert "simplify ChatOpenAI instantiation"

This reverts commit b7c4acf7d397a284f1499254fa8832533c0c98e3.

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/integrations/utils.py | 34 +++++++++++++++++++--------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
index 238f44d3e3..ff7f0415a0 100644
--- a/comps/agent/src/integrations/utils.py
+++ b/comps/agent/src/integrations/utils.py
@@ -7,7 +7,6 @@
 
 from .config import env_config
 
-LLM_ENDPOINT_URL_DEFAULT="http://localhost:8080"
 
 def format_date(date):
     # input m/dd/yyyy hr:min
@@ -57,14 +56,29 @@ def setup_chat_model(args):
         "top_p": args.top_p,
         "streaming": args.stream,
     }
-    openai_endpoint=None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + '/v1'
-    llm = ChatOpenAI(
-        openai_api_key=args.api_key,
-        openai_api_base=openai_endpoint,
-        model_name=args.model,
-        request_timeout=args.timeout,
-        **params
-    )
+    if args.llm_engine == "vllm" or args.llm_engine == "tgi":
+        openai_endpoint = f"{args.llm_endpoint_url}/v1"
+        llm = ChatOpenAI(
+            openai_api_key="EMPTY",
+            openai_api_base=openai_endpoint,
+            model_name=args.model,
+            request_timeout=args.timeout,
+            **params,
+        )
+    elif args.llm_engine == "openai":
+        if args.api_key:
+            openai_endpoint = f"{args.llm_endpoint_url}/v1"
+            llm = ChatOpenAI(
+                openai_api_key=args.api_key,
+                openai_api_base=openai_endpoint,
+                model_name=args.model,
+                request_timeout=args.timeout,
+                **params,
+            )
+        else:
+            llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params)
+    else:
+        raise ValueError("llm_engine must be vllm, tgi or openai")
     return llm
 
 
@@ -157,7 +171,7 @@ def get_args():
 
     parser.add_argument("--model", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct")
     parser.add_argument("--llm_engine", type=str, default="tgi")
-    parser.add_argument("--llm_endpoint_url", type=str, default=LLM_ENDPOINT_URL_DEFAULT)
+    parser.add_argument("--llm_endpoint_url", type=str, default="http://localhost:8080")
     parser.add_argument("--api_key", type=str, default=None, help="API key to access remote server")
     parser.add_argument("--max_new_tokens", type=int, default=1024)
     parser.add_argument("--top_k", type=int, default=10)

From 1f4b7468e04e7b8a340d64152e85e023b52dfb45 Mon Sep 17 00:00:00 2001
From: alexsin368 <alex.sin@intel.com>
Date: Wed, 14 May 2025 18:20:35 -0700
Subject: [PATCH 33/34] add back check and logic for llm_engine, set openai_key
 argument

Signed-off-by: alexsin368 <alex.sin@intel.com>
---
 comps/agent/src/integrations/utils.py | 33 +++++++++++----------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
index ff7f0415a0..6d73e2fff5 100644
--- a/comps/agent/src/integrations/utils.py
+++ b/comps/agent/src/integrations/utils.py
@@ -7,6 +7,7 @@
 
 from .config import env_config
 
+LLM_ENDPOINT_URL_DEFAULT = "http://localhost:8080"
 
 def format_date(date):
     # input m/dd/yyyy hr:min
@@ -57,28 +58,20 @@ def setup_chat_model(args):
         "streaming": args.stream,
     }
     if args.llm_engine == "vllm" or args.llm_engine == "tgi":
-        openai_endpoint = f"{args.llm_endpoint_url}/v1"
-        llm = ChatOpenAI(
-            openai_api_key="EMPTY",
-            openai_api_base=openai_endpoint,
-            model_name=args.model,
-            request_timeout=args.timeout,
-            **params,
-        )
+        openai_key = "EMPTY"
     elif args.llm_engine == "openai":
-        if args.api_key:
-            openai_endpoint = f"{args.llm_endpoint_url}/v1"
-            llm = ChatOpenAI(
-                openai_api_key=args.api_key,
-                openai_api_base=openai_endpoint,
-                model_name=args.model,
-                request_timeout=args.timeout,
-                **params,
-            )
-        else:
-            llm = ChatOpenAI(model_name=args.model, request_timeout=args.timeout, **params)
+        openai_key = args.api_key
     else:
-        raise ValueError("llm_engine must be vllm, tgi or openai")
+        raise ValueError("llm_engine must be vllm, tgi, or openai")
+
+    openai_endpoint = None if args.llm_endpoint_url is LLM_ENDPOINT_URL_DEFAULT else args.llm_endpoint_url + "/v1"
+    llm = ChatOpenAI(
+        openai_api_key=openai_key,
+        openai_api_base=openai_endpoint,
+        model_name=args.model,
+        request_timeout=args.timeout,
+        **params,
+    )
     return llm
 
 

From 45376b9c6280d7b81137321d34bae73ca067bc41 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 May 2025 01:39:42 +0000
Subject: [PATCH 34/34] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/agent/src/integrations/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
index 6d73e2fff5..3c940a53b9 100644
--- a/comps/agent/src/integrations/utils.py
+++ b/comps/agent/src/integrations/utils.py
@@ -9,6 +9,7 @@
 
 LLM_ENDPOINT_URL_DEFAULT = "http://localhost:8080"
 
+
 def format_date(date):
     # input m/dd/yyyy hr:min
     # output yyyy-mm-dd