diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py
index 8c71086f58..56de4a8c60 100644
--- a/comps/cores/proto/docarray.py
+++ b/comps/cores/proto/docarray.py
@@ -278,7 +278,7 @@ class GraphDoc(BaseDoc):
 
 
 class LVMDoc(BaseDoc):
-    image: str
+    image: Union[str, List[str]]
     prompt: str
     max_new_tokens: conint(ge=0, le=1024) = 512
     top_k: int = 10
diff --git a/comps/embeddings/multimodal/README.md b/comps/embeddings/multimodal/README.md
index c75a60f12a..c839365bcd 100644
--- a/comps/embeddings/multimodal/README.md
+++ b/comps/embeddings/multimodal/README.md
@@ -170,11 +170,18 @@ docker compose -f docker_compose_multimodal_embedding.yaml up -d
 
 **Compute a joint embedding of an image-text pair**
 
+The image can be passed as a URL:
 ```bash
 curl -X POST http://0.0.0.0:6600/v1/embeddings \
      -H "Content-Type: application/json" \
      -d '{"text": {"text" : "This is some sample text."}, "image" : {"url": "https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true"}}'
 ```
+Or as a base64 encoded string:
+```bash
+curl -X POST http://0.0.0.0:6600/v1/embeddings \
+     -H "Content-Type: application/json" \
+     -d '{"text": {"text" : "This is some sample text."}, "image" : {"base64_image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC"}}'
+```
 
 **Compute an embedding of a text**
 
diff --git a/comps/embeddings/multimodal/multimodal_langchain/mm_embedding_mmei.py b/comps/embeddings/multimodal/multimodal_langchain/mm_embedding_mmei.py
index fbd972a202..cd052fc288 100644
--- a/comps/embeddings/multimodal/multimodal_langchain/mm_embedding_mmei.py
+++ b/comps/embeddings/multimodal/multimodal_langchain/mm_embedding_mmei.py
@@ -7,6 +7,7 @@
 
 import requests
 from fastapi.responses import JSONResponse
+from typing import Union
 
 from comps import (
     CustomLogger,
@@ -38,7 +39,7 @@
     output_datatype=EmbedMultimodalDoc,
 )
 @register_statistics(names=["opea_service@multimodal_embedding_mmei_langchain"])
-def embedding(input: MultimodalDoc) -> EmbedDoc:
+def embedding(input: MultimodalDoc) -> Union[EmbedDoc, EmbedMultimodalDoc]:
     start = time.time()
     if logflag:
         logger.info(input)
@@ -48,9 +49,15 @@ def embedding(input: MultimodalDoc) -> EmbedDoc:
         json["text"] = input.text
     elif isinstance(input, TextImageDoc):
         json["text"] = input.text.text
-        img_bytes = input.image.url.load_bytes()
-        base64_img = base64.b64encode(img_bytes).decode("utf-8")
-        json["img_b64_str"] = base64_img
+        base64_img = ""
+        if input.image.url:
+            img_bytes = input.image.url.load_bytes()
+            base64_img = base64.b64encode(img_bytes).decode("utf-8")
+        elif input.image.base64_image:
+            base64_img = input.image.base64_image
+
+        if base64_img:
+            json["img_b64_str"] = base64_img
     else:
         return JSONResponse(status_code=400, content={"message": "Bad request!"})
 
@@ -66,6 +73,9 @@ def embedding(input: MultimodalDoc) -> EmbedDoc:
             res = EmbedDoc(text=input.text, embedding=embed_vector)
         elif isinstance(input, TextImageDoc):
             res = EmbedMultimodalDoc(text=input.text.text, url=input.image.url, embedding=embed_vector)
+
+            if base64_img:
+                res.base64_image = base64_img
     except requests.exceptions.ConnectionError:
         res = JSONResponse(status_code=503, content={"message": "Multimodal embedding endpoint not started!"})
     statistics_dict["opea_service@multimodal_embedding_mmei_langchain"].append_latency(time.time() - start, None)
diff --git a/comps/lvms/llava/README.md b/comps/lvms/llava/README.md
index 998eb4b664..74e1de706f 100644
--- a/comps/lvms/llava/README.md
+++ b/comps/lvms/llava/README.md
@@ -1,6 +1,6 @@
 # LVM Microservice
 
-Visual Question and Answering is one of the multimodal tasks empowered by LVMs (Large Visual Models). This microservice supports visual Q&A by using LLaVA as the base large visual model. It accepts two inputs: a prompt and an image. It outputs the answer to the prompt about the image.
+Visual Question and Answering is one of the multimodal tasks empowered by LVMs (Large Visual Models). This microservice supports visual Q&A by using LLaVA as the base large visual model. It accepts two inputs: a prompt and images. It outputs the answer to the prompt about the images.
 
 ## 🚀1. Start Microservice with Python (Option 1)
 
@@ -92,10 +92,15 @@ docker run -p 8399:8399 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_M
 
 #### 2.2.2 Start LVM service
 
+> Note: The `MAX_IMAGES` environment variable is used to specify the maximum number of images that will be sent from the LVM service to the LLaVA server.
+> If an image list longer than `MAX_IMAGES` is sent to the LVM server, a shortened image list will be sent to the LLaVA service. If the image list
+> needs to be shortened, the most recent images (the ones at the end of the list) are prioritized to send to the LLaVA service. Some LLaVA models have not
+> been trained with multiple images and may lead to inaccurate results. If `MAX_IMAGES` is not set, it will default to `1`.
+
 ```bash
 ip_address=$(hostname -I | awk '{print $1}')
 
-docker run -p 9399:9399 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LVM_ENDPOINT=http://$ip_address:8399 opea/lvm-llava-svc:latest
+docker run -p 9399:9399 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LVM_ENDPOINT=http://$ip_address:8399 -e MAX_IMAGES=1 opea/lvm-llava-svc:latest
 ```
 
 #### 2.2.3 Test
@@ -106,6 +111,9 @@ docker run -p 9399:9399 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$htt
 # curl with an image and a prompt
 http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json'
 
+# curl with multiple images and a prompt (Note that depending on your MAX_IMAGES value, both images may not be sent to the LLaVA model)
+http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": ["iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNkYPhfz0AEYBxVSF+FAP5FDvcfRYWgAAAAAElFTkSuQmCC", "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNk+M9Qz0AEYBxVSF+FAAhKDveksOjmAAAAAElFTkSuQmCC"], "prompt":"What is in these images?"}' -H 'Content-Type: application/json'
+
 # curl with a prompt only (no image)
 http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "", "prompt":"What is deep learning?"}' -H 'Content-Type: application/json'
 
diff --git a/comps/lvms/llava/dependency/llava_server.py b/comps/lvms/llava/dependency/llava_server.py
index 644e15a82e..4fc0043805 100644
--- a/comps/lvms/llava/dependency/llava_server.py
+++ b/comps/lvms/llava/dependency/llava_server.py
@@ -13,6 +13,7 @@
 import uvicorn
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse, Response
+from transformers import AutoProcessor
 from transformers import pipeline
 from transformers.image_utils import load_image
 
@@ -33,9 +34,16 @@ def pipeline_preprocess(self, image, prompt=None, timeout=None):
     The original transformers image-to-text pipeline preprocess function requires that an image is passed in, and will
     fail if the image parameter is null/empty. In order to support multimodal use cases with the same pipeline, this
     preprocess function handles the case where there is no image with the prompt.
+    Also, the image-to-text pipeline typically treats multiple images passed in as a list as a batch (where it iterates
+    over the image inputs for generation). For that reason, the original pipeline_preprocess code would only get a
+    single image at a time. To support multiple images, the pipeline call is updated to send a list of lists for the
+    images (so that when iterated, we still get multiple images) and this pipeline_preprocess function has been updated
+    to handle a list of images in addition to single images.
     """
 
-    if image:
+    if isinstance(image, list):
+        image = [load_image(i, timeout=timeout) for i in image]
+    elif image:
         image = load_image(image, timeout=timeout)
 
     if prompt is not None:
@@ -114,23 +122,52 @@ async def health() -> Response:
 
 
 @app.post("/generate")
-async def generate(request: Request) -> Response:  # FIXME batch_size=1 for now, only accept single image
+async def generate(request: Request) -> Response:  # FIXME batch_size=1 for now
     print("LLaVA generation begin.")
     request_dict = await request.json()
     prompt = request_dict.pop("prompt")
-    img_b64_str = request_dict.pop("img_b64_str")
+    img_b64_str = request_dict.pop("img_b64_str")  # String or list of strings
     max_new_tokens = request_dict.pop("max_new_tokens", 100)
 
+    # Determine the format of the role labels based on the model name
+    model_name = generator.model.name_or_path
+    user_label = "USER:"
+    assistant_label = "ASSISTANT:"
+    image_tag = "<image>\n"
+
+    # This is the role label that we see in the results from the pipeline. This is used to split the output.
+    output_assistant_label = "ASSISTANT: "
+
+    if "llava-interleave" in model_name:
+        user_label = "<|im_start|>user"
+        assistant_label = "<|im_end|><|im_start|>assistant"
+        output_assistant_label = "assistant "
+    elif "llava-v1.6-mistral" in model_name:
+        user_label = "[INST]"
+        assistant_label = " [/INST]"
+        output_assistant_label = "[/INST] "
+
     if img_b64_str:
-        # Decode and Resize the image
-        image = PIL.Image.open(BytesIO(base64.b64decode(img_b64_str)))
-        image = process_image(image)
-        # format the prompt with an image
-        prompt = f"<image>\nUSER: {prompt}\nASSISTANT:"
+        if isinstance(img_b64_str, str):
+            img_b64_str = [img_b64_str]
+
+        # Decode and Resize the images
+        images = []
+        for img_b64 in img_b64_str:
+            if img_b64:
+                image = PIL.Image.open(BytesIO(base64.b64decode(img_b64)))
+                image = process_image(image)
+                images.append(image)
+
+        # If the prompt provided does not have all the image tags, format the prompt with images
+        num_images = len(images)
+        num_image_tags = prompt.count(image_tag)
+        image_tags = image_tag * (num_images - num_image_tags) if num_images > num_image_tags else ""
+        prompt = f"{user_label}{image_tags} {prompt}{assistant_label}"
     else:
-        image = None
+        images = None
         # format the prompt with text only
-        prompt = f"USER: {prompt}\nASSISTANT:"
+        prompt = f"{user_label} {prompt}\n{assistant_label}"
 
     if args.device == "hpu":
         generate_kwargs = {
@@ -149,12 +186,13 @@ async def generate(request: Request) -> Response:  # FIXME batch_size=1 for now,
     # Override the pipeline preprocessing
     generator.preprocess = pipeline_preprocess.__get__(generator, type(generator))
 
-    result = generator(image, prompt=prompt, batch_size=1, generate_kwargs=generate_kwargs)
+    result = generator([images], prompt=prompt, batch_size=1, generate_kwargs=generate_kwargs)
     end = time.time()
-    result = result[0]["generated_text"].split("ASSISTANT: ")[-1]
+    result = result[0][0]["generated_text"].split(output_assistant_label.strip())[-1].strip()
     print(f"LLaVA result = {result}, time = {(end-start) * 1000 }ms")
-    if image:
-        image.close()
+    if images:
+        for i in images:
+            i.close()
 
     ret = {"text": result}
     return JSONResponse(ret)
@@ -191,6 +229,8 @@ async def generate(request: Request) -> Response:  # FIXME batch_size=1 for now,
         device=args.device,
     )
 
+    processor = AutoProcessor.from_pretrained(model_name_or_path)
+
     # warmup
     print("LLaVA warmup...")
     if args.device == "hpu":
@@ -214,10 +254,23 @@ async def generate(request: Request) -> Response:  # FIXME batch_size=1 for now,
     images = []
     for image_path in image_paths:
         images.append(PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw))
+
+    # Generate a text prompt to use for warm up
+    conversation = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image"},
+                {"type": "text", "text": "What's the content of the image?"},
+                ],
+        },
+    ]
+    text_prompt = processor.apply_chat_template(conversation)
+
     for i in range(args.warmup):
         generator(
             images,
-            prompt="<image>\nUSER: What's the content of the image?\nASSISTANT:",
+            prompt=text_prompt,
             batch_size=1,
             generate_kwargs=generate_kwargs,
         )
diff --git a/comps/lvms/llava/lvm.py b/comps/lvms/llava/lvm.py
index 897f7cbbe4..9d7bde0f90 100644
--- a/comps/lvms/llava/lvm.py
+++ b/comps/lvms/llava/lvm.py
@@ -28,6 +28,9 @@
 logger = CustomLogger("lvm")
 logflag = os.getenv("LOGFLAG", False)
 
+# The maximum number of images that should be sent to the LVM
+max_images = int(os.getenv("MAX_IMAGES", 1))
+
 
 @register_microservice(
     name="opea_service@lvm",
@@ -76,6 +79,17 @@ async def lvm(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> Union[TextDoc
         prompt = request.prompt
         max_new_tokens = request.max_new_tokens
 
+    # Limit the number of images being sent to the LVM
+    if isinstance(img_b64_str, list) and len(img_b64_str) > max_images:
+        img_b64_str=img_b64_str[-max_images:]
+
+        # Adjust the number of images tags in the prompt
+        image_tag = "<image>\n"
+        num_tags_in_prompt = prompt.count(image_tag)
+
+        if len(img_b64_str) < num_tags_in_prompt:
+            prompt = prompt.replace(image_tag, "", num_tags_in_prompt - len(img_b64_str))
+
     inputs = {"img_b64_str": img_b64_str, "prompt": prompt, "max_new_tokens": max_new_tokens}
     # forward to the LLaVA server
     response = requests.post(url=f"{lvm_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None})
@@ -99,5 +113,8 @@ async def lvm(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> Union[TextDoc
 if __name__ == "__main__":
     lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399")
 
+    if logflag:
+        logger.info(f"MAX_IMAGES: {max_images}")
+
     logger.info("[LVM] LVM initialized.")
     opea_microservices["opea_service@lvm"].start()
diff --git a/comps/lvms/tgi-llava/lvm_tgi.py b/comps/lvms/tgi-llava/lvm_tgi.py
index 38b492c395..04ceee400c 100644
--- a/comps/lvms/tgi-llava/lvm_tgi.py
+++ b/comps/lvms/tgi-llava/lvm_tgi.py
@@ -27,6 +27,9 @@
 logger = CustomLogger("lvm_tgi")
 logflag = os.getenv("LOGFLAG", False)
 
+# The maximum number of images that should be sent to the LVM
+max_images = int(os.getenv("MAX_IMAGES", 1))
+
 
 @register_microservice(
     name="opea_service@lvm_tgi",
@@ -88,15 +91,41 @@ async def lvm(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> Union[TextDoc
         top_k = request.top_k
         top_p = request.top_p
 
-    if not img_b64_str:
-        # Work around an issue where LLaVA-NeXT is not providing good responses when prompted without an image.
-        # Provide an image and then instruct the model to ignore the image. The base64 string below is the encoded png:
-        # https://raw.githubusercontent.com/opea-project/GenAIExamples/refs/tags/v1.0/AudioQnA/ui/svelte/src/lib/assets/icons/png/audio1.png
-        img_b64_str = "iVBORw0KGgoAAAANSUhEUgAAADUAAAAlCAYAAADiMKHrAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAKPSURBVHgB7Zl/btowFMefnUTqf+MAHYMTjN4gvcGOABpM+8E0doLSE4xpsE3rKuAG3KC5Ad0J6MYOkP07YnvvhR9y0lVzupTIVT5SwDjB9fd97WfsMkCef1rUXM8dY9HHK4hWUevzi/oVWAqnF8fzLmAtiPA3Aq0lFsVA1fRKxlgNLIbDPaQUZQuu6YO98aIipHOiFGtIqaYfn1UnUCDds6WPyeANlTFbv9WztbFTK+HNUVAPiz7nbPzq7HsPCoKWIBREGfsJXZit5xT07X0jp6iRdIbEHOnjyyD97OvzH00lVS2K5OS2ax11cBXxJgYxlEIE6XZclzdTX6n8XjkkcEIfbj2nMO0/SNd1vy4vsCNjYPyEovfyy88GZIQCSKOCMf6ORgStoboLJuSWKDYCfK2q4jjrMZ+GOh7Pib/gek5DHxVUJtcgA7mJ4kwZRbN7viQXFzQn0Nl52gXG4Fo7DKAYp0yI3VHQ16oaWV0wYa+iGE8nG+wAdx5DzpS/KGyhFGULpShbKEXZQinqLlBK/IKc2asoh4sZvoXJWhlAzuxV1KBVD3HrfYTFAK8ZHgu0hu36DHLG+Izinw250WUkXHJht02QUnxLP7fZxR7f1I6S7Ir2GgmYvIQM5OYUuYBdainATq2ZjTqPBlnbGXYeBrg9Od18DKmc1U0jpw4OIIwEJFxQSl2b4MN2lf74fw8nFNbHt/5N9xWKTZvJ2S6YZk6RC3j2cKpVhSIShZ0mea6caCOCAjyNHd5gPPxGncMBTvI6hunYdaJ6kf8VoSCP2odxX6RkR6NOtanfj13EswKVqEQrPzzFL1lK+YvCFraiEqs8TrwQLGYraqpX4kr/Hixml+63Z+CoM9DTo438AUmP+KyMWT+tAAAAAElFTkSuQmCC"
-        prompt = f"Please disregard the image and answer the question. {prompt}"
+    # Make img_b64_str into a list of strings (if it's not already a list)
+    if not isinstance(img_b64_str, list):
+        if img_b64_str:
+            img_b64_str = [img_b64_str]
+        else:
+            # If img_b64_str was an empty string, which means we have just have a text prompt.
+            # Work around an issue where LLaVA-NeXT is not providing good responses when prompted without an image.
+            # Provide an image and then instruct the model to ignore the image. The base64 string below is the encoded png:
+            # https://raw.githubusercontent.com/opea-project/GenAIExamples/refs/tags/v1.0/AudioQnA/ui/svelte/src/lib/assets/icons/png/audio1.png
+            img_b64_str = ["iVBORw0KGgoAAAANSUhEUgAAADUAAAAlCAYAAADiMKHrAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAKPSURBVHgB7Zl/btowFMefnUTqf+MAHYMTjN4gvcGOABpM+8E0doLSE4xpsE3rKuAG3KC5Ad0J6MYOkP07YnvvhR9y0lVzupTIVT5SwDjB9fd97WfsMkCef1rUXM8dY9HHK4hWUevzi/oVWAqnF8fzLmAtiPA3Aq0lFsVA1fRKxlgNLIbDPaQUZQuu6YO98aIipHOiFGtIqaYfn1UnUCDds6WPyeANlTFbv9WztbFTK+HNUVAPiz7nbPzq7HsPCoKWIBREGfsJXZit5xT07X0jp6iRdIbEHOnjyyD97OvzH00lVS2K5OS2ax11cBXxJgYxlEIE6XZclzdTX6n8XjkkcEIfbj2nMO0/SNd1vy4vsCNjYPyEovfyy88GZIQCSKOCMf6ORgStoboLJuSWKDYCfK2q4jjrMZ+GOh7Pib/gek5DHxVUJtcgA7mJ4kwZRbN7viQXFzQn0Nl52gXG4Fo7DKAYp0yI3VHQ16oaWV0wYa+iGE8nG+wAdx5DzpS/KGyhFGULpShbKEXZQinqLlBK/IKc2asoh4sZvoXJWhlAzuxV1KBVD3HrfYTFAK8ZHgu0hu36DHLG+Izinw250WUkXHJht02QUnxLP7fZxR7f1I6S7Ir2GgmYvIQM5OYUuYBdainATq2ZjTqPBlnbGXYeBrg9Od18DKmc1U0jpw4OIIwEJFxQSl2b4MN2lf74fw8nFNbHt/5N9xWKTZvJ2S6YZk6RC3j2cKpVhSIShZ0mea6caCOCAjyNHd5gPPxGncMBTvI6hunYdaJ6kf8VoSCP2odxX6RkR6NOtanfj13EswKVqEQrPzzFL1lK+YvCFraiEqs8TrwQLGYraqpX4kr/Hixml+63Z+CoM9DTo438AUmP+KyMWT+tAAAAAElFTkSuQmCC"]
+            prompt = f"Please disregard the image and answer the question. {prompt}"
+
+    # Truncate the list of images if we have too many, only sending the most recent ones at the end of the list
+    if len(img_b64_str) > max_images:
+        img_b64_str=img_b64_str[-max_images:]
 
-    image = f"data:image/png;base64,{img_b64_str}"
-    image_prompt = f"![]({image})\n{prompt}\nASSISTANT:"
+    # Check the number of image tags in the prompt and adjust them to match the number of images that we have
+    image_tag = "<image>\n"
+    num_tags_in_prompt = prompt.count(image_tag)
+
+    # We have too many image tags in the prompt replace the first x instance of the tag with an empty string
+    if  len(img_b64_str) < num_tags_in_prompt:
+        prompt = prompt.replace(image_tag, "", num_tags_in_prompt - len(img_b64_str))
+    
+    # We don't have enough image tags in the prompt, add them
+    if len(img_b64_str) > num_tags_in_prompt:
+        num_tags_to_add = len(img_b64_str) - num_tags_in_prompt
+        tags_to_add = image_tag * num_tags_to_add
+        prompt = f"{tags_to_add}{prompt}"
+
+    # Replace image tags with the data
+    for i in img_b64_str:
+        formatted_image_str = f"![](data:image/png;base64,{i})\n"
+        prompt = prompt.replace(image_tag, formatted_image_str, 1)
+    image_prompt = f"{prompt}\nASSISTANT:"
 
     if streaming:
 
@@ -152,4 +181,6 @@ async def stream_generator():
     lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399")
     lvm_client = AsyncInferenceClient(lvm_endpoint)
     logger.info("[LVM] LVM initialized.")
+    if logflag:
+        logger.info(f"MAX_IMAGES: {max_images}")
     opea_microservices["opea_service@lvm_tgi"].start()
diff --git a/comps/retrievers/multimodal/redis/langchain/retriever_redis.py b/comps/retrievers/multimodal/redis/langchain/retriever_redis.py
index a01b3e20c4..a92d59aba2 100644
--- a/comps/retrievers/multimodal/redis/langchain/retriever_redis.py
+++ b/comps/retrievers/multimodal/redis/langchain/retriever_redis.py
@@ -69,6 +69,12 @@ async def retrieve(
     if isinstance(input, EmbedMultimodalDoc):
         metadata_list = []
         for r in search_res:
+            # If the input had an image, pass that through in the metadata along with the search result image
+            if input.base64_image:
+                if r.metadata["b64_img_str"]:
+                    r.metadata["b64_img_str"] = [input.base64_image, r.metadata["b64_img_str"]]
+                else:
+                    r.metadata["b64_img_str"] = input.base64_image
             metadata_list.append(r.metadata)
             retrieved_docs.append(TextDoc(text=r.page_content))
         result = SearchedMultimodalDoc(retrieved_docs=retrieved_docs, initial_query=input.text, metadata=metadata_list)
diff --git a/tests/embeddings/test_embeddings_multimodal.sh b/tests/embeddings/test_embeddings_multimodal.sh
index bd2ca93b70..5bb2fd9f93 100644
--- a/tests/embeddings/test_embeddings_multimodal.sh
+++ b/tests/embeddings/test_embeddings_multimodal.sh
@@ -85,6 +85,22 @@ function validate_microservice_image_text_pair_embedding() {
     fi
 }
 
+function validate_microservice_b64_image_text_pair_embedding() {
+    result=$(http_proxy="" curl http://${ip_address}:$MM_EMBEDDING_PORT_MICROSERVICE/v1/embeddings \
+        -X POST \
+        -H "Content-Type: application/json" \
+        -d '{"text": {"text" : "This is some sample text."}, "image" : {"base64_image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC"}}')
+
+    if [[ $result == *"embedding"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs embedding-multimodal-bridgetower
+        docker logs embedding-multimodal
+        exit 1
+    fi
+}
+
 function validate_microservice() {
     validate_microservice_text_embedding
     validate_microservice_image_text_pair_embedding
diff --git a/tests/lvms/test_lvms_llava.sh b/tests/lvms/test_lvms_llava.sh
index 4627ec6ee7..8558fa5e3d 100644
--- a/tests/lvms/test_lvms_llava.sh
+++ b/tests/lvms/test_lvms_llava.sh
@@ -48,6 +48,42 @@ function validate_microservice() {
         exit 1
     fi
 
+    # Test sending two images with a text prompt with one image tag in the prompt.
+    # The first image is green and the second image is blue. Since the default MAX_IMAGES is 1, only the blue image should be sent to the LVM.
+    result=$(http_proxy="" curl http://localhost:$lvm_port/v1/lvm -XPOST -d '{"image": ["iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNk+M9Qz0AEYBxVSF+FAAhKDveksOjmAAAAAElFTkSuQmCC", "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNkYPhfz0AEYBxVSF+FAP5FDvcfRYWgAAAAAElFTkSuQmCC"], "prompt":"<image>\nWhat are in these images?"}' -H 'Content-Type: application/json')
+    if [[ $result == *"blue"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong."
+        docker logs test-comps-lvm-llava >> ${LOG_PATH}/llava-dependency.log
+        docker logs test-comps-lvm-llava-svc >> ${LOG_PATH}/llava-server.log
+        exit 1
+    fi
+
+    # Test sending two images with a text prompt without any image tags.
+    # The first image is blue and the second image is green. Since the default MAX_IMAGES is 1, only the green image should be sent to the LVM.
+    result=$(http_proxy="" curl http://localhost:$lvm_port/v1/lvm -XPOST -d '{"image": ["iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNkYPhfz0AEYBxVSF+FAP5FDvcfRYWgAAAAAElFTkSuQmCC", "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNk+M9Qz0AEYBxVSF+FAAhKDveksOjmAAAAAElFTkSuQmCC"], "prompt":"What are in these images?"}' -H 'Content-Type: application/json')
+    if [[ $result == *"green"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong."
+        docker logs test-comps-lvm-llava >> ${LOG_PATH}/llava-dependency.log
+        docker logs test-comps-lvm-llava-svc >> ${LOG_PATH}/llava-server.log
+        exit 1
+    fi
+
+    # Same test as above, except including two image tags with the prompt to ensure the number of image tags is reconciled.
+    # The first image is blue and the second image is green. Since the default MAX_IMAGES is 1, only the green image should be sent to the LVM.
+    result=$(http_proxy="" curl http://localhost:$lvm_port/v1/lvm -XPOST -d '{"image": ["iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNkYPhfz0AEYBxVSF+FAP5FDvcfRYWgAAAAAElFTkSuQmCC", "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNk+M9Qz0AEYBxVSF+FAAhKDveksOjmAAAAAElFTkSuQmCC"], "prompt":"<image>\n<image>\nWhat are in these images?"}' -H 'Content-Type: application/json')
+    if [[ $result == *"green"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong."
+        docker logs test-comps-lvm-llava >> ${LOG_PATH}/llava-dependency.log
+        docker logs test-comps-lvm-llava-svc >> ${LOG_PATH}/llava-server.log
+        exit 1
+    fi
+
     result=$(http_proxy="" curl http://localhost:$lvm_port/v1/lvm -XPOST -d '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [{"b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "transcript_for_inference": "yellow image", "video_id": "8c7461df-b373-4a00-8696-9a2234359fe0", "time_of_frame_ms":"37000000", "source_video":"WeAreGoingOnBullrun_8c7461df-b373-4a00-8696-9a2234359fe0.mp4"}]}' -H 'Content-Type: application/json')
     if [[ $result == *"yellow"* ]]; then
         echo "Result correct."
diff --git a/tests/lvms/test_lvms_tgi-llava_on_intel_hpu.sh b/tests/lvms/test_lvms_tgi-llava_on_intel_hpu.sh
index 1fa0155266..9d1a69a7ae 100644
--- a/tests/lvms/test_lvms_tgi-llava_on_intel_hpu.sh
+++ b/tests/lvms/test_lvms_tgi-llava_on_intel_hpu.sh
@@ -54,6 +54,41 @@ function validate_microservice() {
         echo "LVM prompt without image - HTTP status (successful)"
     fi
 
+    # Test sending two images with a text prompt with one image tag in the prompt.
+    # The first image is green and the second image is blue. Since the default MAX_IMAGES is 1, only the blue image should be sent to the LVM.
+    result=$(http_proxy="" curl http://localhost:$lvm_port/v1/lvm -XPOST -d '{"image": ["iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNk+M9Qz0AEYBxVSF+FAAhKDveksOjmAAAAAElFTkSuQmCC", "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNkYPhfz0AEYBxVSF+FAP5FDvcfRYWgAAAAAElFTkSuQmCC"], "prompt":"<image>\nWhat are in these images?"}' -H 'Content-Type: application/json')
+    if [[ $result == *"blue"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong."
+        docker logs test-comps-lvm-llava >> ${LOG_PATH}/llava-dependency.log
+        docker logs test-comps-lvm-llava-svc >> ${LOG_PATH}/llava-server.log
+        exit 1
+    fi
+
+    # Test sending two images with a text prompt without any image tags.
+    # The first image is blue and the second image is green. Since the default MAX_IMAGES is 1, only the green image should be sent to the LVM.
+    result=$(http_proxy="" curl http://localhost:$lvm_port/v1/lvm -XPOST -d '{"image": ["iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNkYPhfz0AEYBxVSF+FAP5FDvcfRYWgAAAAAElFTkSuQmCC", "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNk+M9Qz0AEYBxVSF+FAAhKDveksOjmAAAAAElFTkSuQmCC"], "prompt":"What are in these images?"}' -H 'Content-Type: application/json')
+    if [[ $result == *"green"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong."
+        docker logs test-comps-lvm-llava >> ${LOG_PATH}/llava-dependency.log
+        docker logs test-comps-lvm-llava-svc >> ${LOG_PATH}/llava-server.log
+        exit 1
+    fi
+
+    # Same test as above, except including two image tags with the prompt to ensure the number of image tags is reconciled.
+    # The first image is blue and the second image is green. Since the default MAX_IMAGES is 1, only the green image should be sent to the LVM.
+    result=$(http_proxy="" curl http://localhost:$lvm_port/v1/lvm -XPOST -d '{"image": ["iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNkYPhfz0AEYBxVSF+FAP5FDvcfRYWgAAAAAElFTkSuQmCC", "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mNk+M9Qz0AEYBxVSF+FAAhKDveksOjmAAAAAElFTkSuQmCC"], "prompt":"<image>\n<image>\nWhat are in these images?"}' -H 'Content-Type: application/json')
+    if [[ $result == *"green"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong."
+        docker logs test-comps-lvm-llava >> ${LOG_PATH}/llava-dependency.log
+        docker logs test-comps-lvm-llava-svc >> ${LOG_PATH}/llava-server.log
+        exit 1
+    fi
 }
 
 function stop_docker() {
diff --git a/tests/retrievers/test_retrievers_multimodal_redis_langchain.sh b/tests/retrievers/test_retrievers_multimodal_redis_langchain.sh
index 873516ddc5..06fecec69d 100644
--- a/tests/retrievers/test_retrievers_multimodal_redis_langchain.sh
+++ b/tests/retrievers/test_retrievers_multimodal_redis_langchain.sh
@@ -58,6 +58,32 @@ function validate_microservice() {
         docker logs test-comps-retriever-multimodal-redis >> ${LOG_PATH}/retriever.log
         exit 1
     fi
+
+    # Test the retriever with a b64 image that should be passed through
+    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding},\"img_b64_str\":\"iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC\"}" -H 'Content-Type: application/json' "$URL")
+    if [ "$HTTP_STATUS" -eq 200 ]; then
+        echo "[ retriever ] HTTP status is 200. Checking content..."
+        local CONTENT=$(curl -s -X POST -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/retriever.log)
+
+        if echo "$CONTENT" | grep -q "retrieved_docs"; then
+            echo "[ retriever ] Content has retrieved_docs as expected."
+            if echo "$CONTENT" | grep -q "b64_img_str"; then
+                echo "[ retriever ] Content has b64_img_str as expected."
+            else
+                echo "[ retriever ] Content does not include the b64_img_str: $CONTENT"
+                docker logs test-comps-retriever-multimodal-redis >> ${LOG_PATH}/retriever.log
+                exit 1
+            fi
+        else
+            echo "[ retriever ] Content does not match the expected result: $CONTENT"
+            docker logs test-comps-retriever-multimodal-redis >> ${LOG_PATH}/retriever.log
+            exit 1
+        fi
+    else
+        echo "[ retriever ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        docker logs test-comps-retriever-multimodal-redis >> ${LOG_PATH}/retriever.log
+        exit 1
+    fi
 }
 
 function stop_docker() {