From 0727384606ba212a838e479895506d5a2f15c704 Mon Sep 17 00:00:00 2001
From: Spycsh <sihan.chen@intel.com>
Date: Tue, 25 Mar 2025 21:13:56 -0700
Subject: [PATCH 01/14] integrate UI-TARS vLLM in lvm component

---
 .../deployment/docker_compose/compose.yaml    |  7 +-
 comps/lvms/src/README.md                      | 12 +++-
 comps/lvms/src/integrations/vllm.py           | 64 +++++++++++++++++++
 3 files changed, 79 insertions(+), 4 deletions(-)

diff --git a/comps/lvms/deployment/docker_compose/compose.yaml b/comps/lvms/deployment/docker_compose/compose.yaml
index 01a2b90ce6..808f159ab9 100644
--- a/comps/lvms/deployment/docker_compose/compose.yaml
+++ b/comps/lvms/deployment/docker_compose/compose.yaml
@@ -24,7 +24,7 @@ services:
     ports:
       - ${VLLM_PORT:-9699}:80
     volumes:
-      - "./data:/data"
+      - "./data:/root/.cache/huggingface/hub/"
     shm_size: 128g
     environment:
       no_proxy: ${no_proxy}
@@ -45,7 +45,7 @@ services:
     ports:
       - ${VLLM_PORT:-9699}:80
     volumes:
-      - "./data:/data"
+      - "./data:/root/.cache/huggingface/hub/"
     shm_size: 128g
     environment:
       no_proxy: ${no_proxy}
@@ -60,6 +60,7 @@ services:
       MAX_MODEL_LEN: ${MAX_TOTAL_TOKENS:-4096}
       MAX_SEQ_LEN_TO_CAPTURE: ${MAX_TOTAL_TOKENS:-4096}
       PT_HPUGRAPH_DISABLE_TENSOR_CACHE: false # https://github.com/HabanaAI/vllm-fork/issues/841#issuecomment-2700421704
+      PT_HPU_ENABLE_LAZY_COLLECTIVES: true # for tensor parallel inference with hpu graphs
     runtime: habana
     cap_add:
       - SYS_NICE
@@ -69,7 +70,7 @@ services:
       interval: 10s
       timeout: 10s
       retries: 150
-    command: --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --chat-template examples/template_llava.jinja  # https://docs.vllm.ai/en/v0.5.0/models/vlm.html
+    command: --model $LLM_MODEL_ID --tensor-parallel-size ${TP_SIZE:-1} --host 0.0.0.0 --port 80 --enable-auto-tool-choice --tool-call-parser hermes  --chat-template ${CHAT_TEMPLATE:-examples/template_llava.jinja} # https://docs.vllm.ai/en/v0.5.0/models/vlm.html
   llava-tgi-service:
     image: ghcr.io/huggingface/tgi-gaudi:2.3.1
     container_name: llava-tgi-service
diff --git a/comps/lvms/src/README.md b/comps/lvms/src/README.md
index 4c8281065b..49d71d1a5b 100644
--- a/comps/lvms/src/README.md
+++ b/comps/lvms/src/README.md
@@ -83,8 +83,18 @@ export ip_address=$(hostname -I | awk '{print $1}')
 export LVM_PORT=9399
 export VLLM_PORT=11507
 export LVM_ENDPOINT=http://$ip_address:$VLLM_PORT
+
+# llava (option 1)
 export LLM_MODEL_ID=llava-hf/llava-1.5-7b-hf
-docker compose -f comps/lvms/deployment/docker_compose/compose.yaml up vllm-service lvm-vllm -d
+export CHAT_TEMPLATE=examples/template_llava.jinja
+# UI-TARS (option 2)
+export LLM_MODEL_ID=bytedance-research/UI-TARS-7B-DPO
+export TP_SIZE=1    # change to 4 or 8 if using UI-TARS-72B-DPO
+export CHAT_TEMPLATE=None
+
+export VLLM_SKIP_WARMUP=true # skip the warmup-phase will start the vLLM server quickly on Gaudi, but increase runtime inference time when meeting unseen HPU shape
+
+docker compose -f comps/lvms/deployment/docker_compose/compose.yaml up vllm-gaudi-service lvm-vllm -d
 ```
 
 ## Test
diff --git a/comps/lvms/src/integrations/vllm.py b/comps/lvms/src/integrations/vllm.py
index 5764299e76..7e35d23311 100644
--- a/comps/lvms/src/integrations/vllm.py
+++ b/comps/lvms/src/integrations/vllm.py
@@ -45,6 +45,65 @@ def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_im
 
         return template.format(context=context, question=question)
 
+    @staticmethod
+    def generate_ui_tars_prompt_for_computer(prompt: str):
+        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
+
+        ## Output Format
+        ```\nThought: ...
+        Action: ...\n```
+
+        ## Action Space
+
+        click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+        left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+        right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+        drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+        hotkey(key='')
+        type(content='') #If you want to submit your input, use \"\
+        \" at the end of `content`.
+        scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+        wait() #Sleep for 5s and take a screenshot to check for any changes.
+        finished()
+        call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+        ## Note
+        - Use Chinese in `Thought` part.
+        - Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+        ## User Instruction
+        """
+
+        return template + prompt
+
+    @staticmethod
+    def generate_ui_tars_prompt_for_mobile(prompt: str):
+        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
+
+        ## Output Format
+        ```\nThought: ...
+        Action: ...\n```
+
+        ## Action Space
+        click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+        long_press(start_box='<|box_start|>(x1,y1)<|box_end|>', time='')
+        type(content='')
+        scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+        press_home()
+        press_back()
+        finished(content='') # Submit the task regardless of whether it succeeds or fails.
+
+        ## Note
+        - Use English in `Thought` part.
+
+        - Write a small plan and finally summarize your next action (with its target element) in one sentence in `Thought` part.
+
+        ## User Instruction
+        """
+
+        return template + prompt
+
 
 @OpeaComponentRegistry.register("OPEA_VLLM_LVM")
 class OpeaVllmLvm(OpeaComponent):
@@ -116,6 +175,11 @@ async def invoke(
             # top_k = request.top_k
             top_p = request.top_p
 
+        # update ui_tars prompt
+        if "UI-TARS" in LLM_MODEL_ID:
+            # TODO validate mobile flow https://github.com/bytedance/UI-TARS
+            prompt = ChatTemplate.generate_ui_tars_prompt_for_computer(prompt=prompt)
+
         if not img_b64_str:
             # If img_b64_str was an empty string, which means we have just have a text prompt.
             # Work around an issue where LLaVA-NeXT is not providing good responses when prompted without an image.

From dbc1ee8e8faa325eaf5fec9ba5309a16712ffb6d Mon Sep 17 00:00:00 2001
From: Spycsh <sihan.chen@intel.com>
Date: Tue, 25 Mar 2025 21:14:59 -0700
Subject: [PATCH 02/14] fix name

---
 comps/lvms/src/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comps/lvms/src/README.md b/comps/lvms/src/README.md
index 49d71d1a5b..9ee0756aab 100644
--- a/comps/lvms/src/README.md
+++ b/comps/lvms/src/README.md
@@ -94,7 +94,7 @@ export CHAT_TEMPLATE=None
 
 export VLLM_SKIP_WARMUP=true # skip the warmup-phase will start the vLLM server quickly on Gaudi, but increase runtime inference time when meeting unseen HPU shape
 
-docker compose -f comps/lvms/deployment/docker_compose/compose.yaml up vllm-gaudi-service lvm-vllm -d
+docker compose -f comps/lvms/deployment/docker_compose/compose.yaml up vllm-gaudi-service lvm-vllm-gaudi -d
 ```
 
 ## Test

From 9c5d816acaea3ea083f7adfdf7bd8df6cdf3dc26 Mon Sep 17 00:00:00 2001
From: Spycsh <sihan.chen@intel.com>
Date: Tue, 25 Mar 2025 22:16:41 -0700
Subject: [PATCH 03/14] fix acc issue caused by template indent

---
 comps/lvms/src/integrations/vllm.py | 82 ++++++++++++++---------------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/comps/lvms/src/integrations/vllm.py b/comps/lvms/src/integrations/vllm.py
index 7e35d23311..c053ecc5dc 100644
--- a/comps/lvms/src/integrations/vllm.py
+++ b/comps/lvms/src/integrations/vllm.py
@@ -49,31 +49,31 @@ def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_im
     def generate_ui_tars_prompt_for_computer(prompt: str):
         template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
 
-        ## Output Format
-        ```\nThought: ...
-        Action: ...\n```
-
-        ## Action Space
-
-        click(start_box='<|box_start|>(x1,y1)<|box_end|>')
-        left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
-        right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
-        drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
-        hotkey(key='')
-        type(content='') #If you want to submit your input, use \"\
-        \" at the end of `content`.
-        scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
-        wait() #Sleep for 5s and take a screenshot to check for any changes.
-        finished()
-        call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
-
-
-        ## Note
-        - Use Chinese in `Thought` part.
-        - Summarize your next action (with its target element) in one sentence in `Thought` part.
-
-        ## User Instruction
-        """
+## Output Format
+```\nThought: ...
+Action: ...\n```
+
+## Action Space
+
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+left_double(start_box='<|box_start|>(x1,y1)<|box_end|>')
+right_single(start_box='<|box_start|>(x1,y1)<|box_end|>')
+drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+hotkey(key='')
+type(content='') #If you want to submit your input, use \"\
+\" at the end of `content`.
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+
+
+## Note
+- Use Chinese in `Thought` part.
+- Summarize your next action (with its target element) in one sentence in `Thought` part.
+
+## User Instruction
+"""
 
         return template + prompt
 
@@ -81,26 +81,26 @@ def generate_ui_tars_prompt_for_computer(prompt: str):
     def generate_ui_tars_prompt_for_mobile(prompt: str):
         template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
 
-        ## Output Format
-        ```\nThought: ...
-        Action: ...\n```
+## Output Format
+```\nThought: ...
+Action: ...\n```
 
-        ## Action Space
-        click(start_box='<|box_start|>(x1,y1)<|box_end|>')
-        long_press(start_box='<|box_start|>(x1,y1)<|box_end|>', time='')
-        type(content='')
-        scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
-        press_home()
-        press_back()
-        finished(content='') # Submit the task regardless of whether it succeeds or fails.
+## Action Space
+click(start_box='<|box_start|>(x1,y1)<|box_end|>')
+long_press(start_box='<|box_start|>(x1,y1)<|box_end|>', time='')
+type(content='')
+scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+press_home()
+press_back()
+finished(content='') # Submit the task regardless of whether it succeeds or fails.
 
-        ## Note
-        - Use English in `Thought` part.
+## Note
+- Use English in `Thought` part.
 
-        - Write a small plan and finally summarize your next action (with its target element) in one sentence in `Thought` part.
+- Write a small plan and finally summarize your next action (with its target element) in one sentence in `Thought` part.
 
-        ## User Instruction
-        """
+## User Instruction
+"""
 
         return template + prompt
 

From 904e4d7afbe0aa61beb80ef964332943a5f69a41 Mon Sep 17 00:00:00 2001
From: Spycsh <sihan.chen@intel.com>
Date: Tue, 25 Mar 2025 22:29:04 -0700
Subject: [PATCH 04/14] add space for accuracy issue

---
 comps/lvms/src/integrations/vllm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comps/lvms/src/integrations/vllm.py b/comps/lvms/src/integrations/vllm.py
index c053ecc5dc..1393b289ca 100644
--- a/comps/lvms/src/integrations/vllm.py
+++ b/comps/lvms/src/integrations/vllm.py
@@ -47,7 +47,7 @@ def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_im
 
     @staticmethod
     def generate_ui_tars_prompt_for_computer(prompt: str):
-        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
+        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
 
 ## Output Format
 ```\nThought: ...
@@ -79,7 +79,7 @@ def generate_ui_tars_prompt_for_computer(prompt: str):
 
     @staticmethod
     def generate_ui_tars_prompt_for_mobile(prompt: str):
-        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
+        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
 
 ## Output Format
 ```\nThought: ...

From c18191bdfb4a1e71df3b3dcbac05bb1147003392 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 26 Mar 2025 05:29:57 +0000
Subject: [PATCH 05/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/lvms/src/integrations/vllm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comps/lvms/src/integrations/vllm.py b/comps/lvms/src/integrations/vllm.py
index 1393b289ca..c053ecc5dc 100644
--- a/comps/lvms/src/integrations/vllm.py
+++ b/comps/lvms/src/integrations/vllm.py
@@ -47,7 +47,7 @@ def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_im
 
     @staticmethod
     def generate_ui_tars_prompt_for_computer(prompt: str):
-        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
 
 ## Output Format
 ```\nThought: ...
@@ -79,7 +79,7 @@ def generate_ui_tars_prompt_for_computer(prompt: str):
 
     @staticmethod
     def generate_ui_tars_prompt_for_mobile(prompt: str):
-        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
 
 ## Output Format
 ```\nThought: ...

From 24b16cbd922eda5c9d49917c055f368123ef94f2 Mon Sep 17 00:00:00 2001
From: Spycsh <sihan.chen@intel.com>
Date: Tue, 25 Mar 2025 23:02:59 -0700
Subject: [PATCH 06/14] force to use frequency_penalty 1.0

---
 comps/lvms/src/integrations/vllm.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/comps/lvms/src/integrations/vllm.py b/comps/lvms/src/integrations/vllm.py
index 1393b289ca..2cd0466fb7 100644
--- a/comps/lvms/src/integrations/vllm.py
+++ b/comps/lvms/src/integrations/vllm.py
@@ -179,6 +179,9 @@ async def invoke(
         if "UI-TARS" in LLM_MODEL_ID:
             # TODO validate mobile flow https://github.com/bytedance/UI-TARS
             prompt = ChatTemplate.generate_ui_tars_prompt_for_computer(prompt=prompt)
+            frequency_penalty = 1.0 # force to use frequency_penalty 1.0
+        else:
+            frequency_penalty = 0.0 # default
 
         if not img_b64_str:
             # If img_b64_str was an empty string, which means we have just have a text prompt.
@@ -211,6 +214,7 @@ def stream_generator(time_start):
                         }
                     ],
                     max_tokens=max_new_tokens,
+                    frequency_penalty=frequency_penalty,
                     temperature=temperature,
                     top_p=top_p,
                     stream=True,

From 44957c9fbe103f3e5ec40015bfd90fed1b70f8bf Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 26 Mar 2025 06:04:10 +0000
Subject: [PATCH 07/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/lvms/src/integrations/vllm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comps/lvms/src/integrations/vllm.py b/comps/lvms/src/integrations/vllm.py
index bfc16ab842..85e31210e9 100644
--- a/comps/lvms/src/integrations/vllm.py
+++ b/comps/lvms/src/integrations/vllm.py
@@ -179,9 +179,9 @@ async def invoke(
         if "UI-TARS" in LLM_MODEL_ID:
             # TODO validate mobile flow https://github.com/bytedance/UI-TARS
             prompt = ChatTemplate.generate_ui_tars_prompt_for_computer(prompt=prompt)
-            frequency_penalty = 1.0 # force to use frequency_penalty 1.0
+            frequency_penalty = 1.0  # force to use frequency_penalty 1.0
         else:
-            frequency_penalty = 0.0 # default
+            frequency_penalty = 0.0  # default
 
         if not img_b64_str:
             # If img_b64_str was an empty string, which means we have just have a text prompt.

From ff829b382db5f0577b5e40328d80e6970aa06904 Mon Sep 17 00:00:00 2001
From: Spycsh <sihan.chen@intel.com>
Date: Tue, 25 Mar 2025 23:13:02 -0700
Subject: [PATCH 08/14] bypass autofix whitespace

---
 comps/lvms/src/integrations/vllm.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/comps/lvms/src/integrations/vllm.py b/comps/lvms/src/integrations/vllm.py
index bfc16ab842..2d0cb7a05e 100644
--- a/comps/lvms/src/integrations/vllm.py
+++ b/comps/lvms/src/integrations/vllm.py
@@ -47,7 +47,8 @@ def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_im
 
     @staticmethod
     def generate_ui_tars_prompt_for_computer(prompt: str):
-        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
+        # fmt: off
+        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
 
 ## Output Format
 ```\nThought: ...
@@ -79,7 +80,8 @@ def generate_ui_tars_prompt_for_computer(prompt: str):
 
     @staticmethod
     def generate_ui_tars_prompt_for_mobile(prompt: str):
-        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
+        # fmt: off
+        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
 
 ## Output Format
 ```\nThought: ...

From 3c5a1071be59cc7b3dc9f0bcd932aecc412b22c8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 26 Mar 2025 06:13:38 +0000
Subject: [PATCH 09/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/lvms/src/integrations/vllm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comps/lvms/src/integrations/vllm.py b/comps/lvms/src/integrations/vllm.py
index bf016c6d87..4ceaa9ee5c 100644
--- a/comps/lvms/src/integrations/vllm.py
+++ b/comps/lvms/src/integrations/vllm.py
@@ -48,7 +48,7 @@ def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_im
     @staticmethod
     def generate_ui_tars_prompt_for_computer(prompt: str):
         # fmt: off
-        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
 
 ## Output Format
 ```\nThought: ...
@@ -81,7 +81,7 @@ def generate_ui_tars_prompt_for_computer(prompt: str):
     @staticmethod
     def generate_ui_tars_prompt_for_mobile(prompt: str):
         # fmt: off
-        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. 
+        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
 
 ## Output Format
 ```\nThought: ...

From 39833d82e88717a43ac8230ade39421537a86176 Mon Sep 17 00:00:00 2001
From: Spycsh <sihan.chen@intel.com>
Date: Tue, 25 Mar 2025 23:38:35 -0700
Subject: [PATCH 10/14] concat two str

---
 comps/lvms/src/integrations/vllm.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/comps/lvms/src/integrations/vllm.py b/comps/lvms/src/integrations/vllm.py
index 4ceaa9ee5c..6c2aecac4d 100644
--- a/comps/lvms/src/integrations/vllm.py
+++ b/comps/lvms/src/integrations/vllm.py
@@ -47,8 +47,7 @@ def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_im
 
     @staticmethod
     def generate_ui_tars_prompt_for_computer(prompt: str):
-        # fmt: off
-        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
+        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. """ + r"""
 
 ## Output Format
 ```\nThought: ...
@@ -80,8 +79,7 @@ def generate_ui_tars_prompt_for_computer(prompt: str):
 
     @staticmethod
     def generate_ui_tars_prompt_for_mobile(prompt: str):
-        # fmt: off
-        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
+        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. """ + """
 
 ## Output Format
 ```\nThought: ...

From 0778038c420ac66bc535b494cff697dccb51be6d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 26 Mar 2025 06:39:03 +0000
Subject: [PATCH 11/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/lvms/src/integrations/vllm.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/comps/lvms/src/integrations/vllm.py b/comps/lvms/src/integrations/vllm.py
index 6c2aecac4d..d1a21d5210 100644
--- a/comps/lvms/src/integrations/vllm.py
+++ b/comps/lvms/src/integrations/vllm.py
@@ -47,7 +47,9 @@ def generate_multimodal_rag_on_videos_prompt(question: str, context: str, has_im
 
     @staticmethod
     def generate_ui_tars_prompt_for_computer(prompt: str):
-        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. """ + r"""
+        template = (
+            r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. """
+            + r"""
 
 ## Output Format
 ```\nThought: ...
@@ -74,12 +76,15 @@ def generate_ui_tars_prompt_for_computer(prompt: str):
 
 ## User Instruction
 """
+        )
 
         return template + prompt
 
     @staticmethod
     def generate_ui_tars_prompt_for_mobile(prompt: str):
-        template = r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. """ + """
+        template = (
+            r"""You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. """
+            + """
 
 ## Output Format
 ```\nThought: ...
@@ -101,6 +106,7 @@ def generate_ui_tars_prompt_for_mobile(prompt: str):
 
 ## User Instruction
 """
+        )
 
         return template + prompt
 

From 3828119bc17541f28122419cf754d512b2493211 Mon Sep 17 00:00:00 2001
From: Spycsh <sihan.chen@intel.com>
Date: Wed, 26 Mar 2025 00:02:59 -0700
Subject: [PATCH 12/14] fix

---
 comps/lvms/src/integrations/vllm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/comps/lvms/src/integrations/vllm.py b/comps/lvms/src/integrations/vllm.py
index d1a21d5210..78178b2325 100644
--- a/comps/lvms/src/integrations/vllm.py
+++ b/comps/lvms/src/integrations/vllm.py
@@ -259,6 +259,7 @@ def stream_generator(time_start):
                     }
                 ],
                 max_tokens=max_new_tokens,
+                frequency_penalty=frequency_penalty,
                 temperature=temperature,
                 top_p=top_p,
             )

From 09c834b53d13fc8e54ef37165b110d0e7f15ad15 Mon Sep 17 00:00:00 2001
From: Spycsh <sihan.chen@intel.com>
Date: Wed, 26 Mar 2025 00:19:32 -0700
Subject: [PATCH 13/14] force param

---
 comps/lvms/src/integrations/vllm.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/comps/lvms/src/integrations/vllm.py b/comps/lvms/src/integrations/vllm.py
index 78178b2325..2c7a0f6509 100644
--- a/comps/lvms/src/integrations/vllm.py
+++ b/comps/lvms/src/integrations/vllm.py
@@ -186,6 +186,8 @@ async def invoke(
             # TODO validate mobile flow https://github.com/bytedance/UI-TARS
             prompt = ChatTemplate.generate_ui_tars_prompt_for_computer(prompt=prompt)
             frequency_penalty = 1.0  # force to use frequency_penalty 1.0
+            temperature=1.0
+            top_p=1.0
         else:
             frequency_penalty = 0.0  # default
 

From 505b9844710d13cadff7ef8e4dd8ea8f9fd3e42a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 26 Mar 2025 07:19:59 +0000
Subject: [PATCH 14/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/lvms/src/integrations/vllm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comps/lvms/src/integrations/vllm.py b/comps/lvms/src/integrations/vllm.py
index 2c7a0f6509..1042734760 100644
--- a/comps/lvms/src/integrations/vllm.py
+++ b/comps/lvms/src/integrations/vllm.py
@@ -186,8 +186,8 @@ async def invoke(
             # TODO validate mobile flow https://github.com/bytedance/UI-TARS
             prompt = ChatTemplate.generate_ui_tars_prompt_for_computer(prompt=prompt)
             frequency_penalty = 1.0  # force to use frequency_penalty 1.0
-            temperature=1.0
-            top_p=1.0
+            temperature = 1.0
+            top_p = 1.0
         else:
             frequency_penalty = 0.0  # default