From 717db9c088b02ffa950229db288fabbdb5840fcd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E0=AE=AE=E0=AE=A9=E0=AF=8B=E0=AE=9C=E0=AF=8D=E0=AE=95?=
 =?UTF-8?q?=E0=AF=81=E0=AE=AE=E0=AE=BE=E0=AE=B0=E0=AF=8D=20=E0=AE=AA?=
 =?UTF-8?q?=E0=AE=B4=E0=AE=A9=E0=AE=BF=E0=AE=9A=E0=AF=8D=E0=AE=9A=E0=AE=BE?=
 =?UTF-8?q?=E0=AE=AE=E0=AE=BF?= <smartmanoj42857@gmail.com>
Date: Wed, 5 Mar 2025 15:29:24 +0530
Subject: [PATCH 1/2] Set default value of seed to None
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: மனோஜ்குமார் பழனிச்சாமி <smartmanoj42857@gmail.com>
---
 vllm/engine/arg_utils.py | 2 +-
 vllm/entrypoints/llm.py  | 2 +-
 vllm/utils.py            | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 989eb4dbfd14..e4e718bd4d01 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -104,7 +104,7 @@ class EngineArgs:
     config_format: ConfigFormat = ConfigFormat.AUTO
     dtype: str = 'auto'
     kv_cache_dtype: str = 'auto'
-    seed: int = 0
+    seed: Optional[int] = None
     max_model_len: Optional[int] = None
     # Note: Specifying a custom executor backend by passing a class
     # is intended for expert use only. The API may change without
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index fc585ee9e54b..a25a778b1fd3 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -168,7 +168,7 @@ def __init__(
         quantization: Optional[str] = None,
         revision: Optional[str] = None,
         tokenizer_revision: Optional[str] = None,
-        seed: int = 0,
+        seed: Optional[int] = None,
         gpu_memory_utilization: float = 0.9,
         swap_space: float = 4,
         cpu_offload_gb: float = 0,
diff --git a/vllm/utils.py b/vllm/utils.py
index 1de2180deb50..4c4c25098501 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -644,7 +644,7 @@ def create_kv_caches_with_random_flash(
     head_size: int,
     cache_dtype: Optional[Union[str, torch.dtype]],
     model_dtype: Optional[Union[str, torch.dtype]] = None,
-    seed: int = 0,
+    seed: Optional[int] = None,
     device: Optional[str] = "cuda",
 ) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
     from vllm.platforms import current_platform
@@ -681,7 +681,7 @@ def create_kv_caches_with_random(
     head_size: int,
     cache_dtype: Optional[Union[str, torch.dtype]],
     model_dtype: Optional[Union[str, torch.dtype]] = None,
-    seed: int = 0,
+    seed: Optional[int] = None,
     device: Optional[str] = "cuda",
 ) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
 

From f2872e8808c8974132b0eeadd3882829efa7b11f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E0=AE=AE=E0=AE=A9=E0=AF=8B=E0=AE=9C=E0=AF=8D=E0=AE=95?=
 =?UTF-8?q?=E0=AF=81=E0=AE=AE=E0=AE=BE=E0=AE=B0=E0=AF=8D=20=E0=AE=AA?=
 =?UTF-8?q?=E0=AE=B4=E0=AE=A9=E0=AE=BF=E0=AE=9A=E0=AF=8D=E0=AE=9A=E0=AE=BE?=
 =?UTF-8?q?=E0=AE=AE=E0=AE=BF?= <smartmanoj42857@gmail.com>
Date: Fri, 7 Mar 2025 12:03:23 +0530
Subject: [PATCH 2/2] Add seed for consistent outputs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: மனோஜ்குமார் பழனிச்சாமி <smartmanoj42857@gmail.com>
---
 tests/distributed/test_torchrun_example.py    | 3 ++-
 tests/entrypoints/llm/test_encode.py          | 3 ++-
 tests/entrypoints/llm/test_guided_generate.py | 2 +-
 tests/entrypoints/openai/test_chat_echo.py    | 2 ++
 tests/entrypoints/openai/test_metrics.py      | 2 ++
 tests/entrypoints/openai/test_root_path.py    | 2 ++
 6 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/tests/distributed/test_torchrun_example.py b/tests/distributed/test_torchrun_example.py
index 1c6c28b4ed35..4ef33932538e 100644
--- a/tests/distributed/test_torchrun_example.py
+++ b/tests/distributed/test_torchrun_example.py
@@ -25,7 +25,8 @@
           tensor_parallel_size=2,
           distributed_executor_backend="external_launcher",
           gpu_memory_utilization=random.uniform(0.7, 0.9),
-          swap_space=random.randint(1, 4))
+          swap_space=random.randint(1, 4),
+          seed=0)
 
 outputs = llm.generate(prompts, sampling_params)
 
diff --git a/tests/entrypoints/llm/test_encode.py b/tests/entrypoints/llm/test_encode.py
index 6438743b6494..d10257761c86 100644
--- a/tests/entrypoints/llm/test_encode.py
+++ b/tests/entrypoints/llm/test_encode.py
@@ -34,7 +34,8 @@ def llm():
               max_num_batched_tokens=32768,
               tensor_parallel_size=1,
               gpu_memory_utilization=0.75,
-              enforce_eager=True)
+              enforce_eager=True,
+              seed=0)
 
     with llm.deprecate_legacy_api():
         yield weakref.proxy(llm)
diff --git a/tests/entrypoints/llm/test_guided_generate.py b/tests/entrypoints/llm/test_guided_generate.py
index fce581c78288..97ee027bde3b 100644
--- a/tests/entrypoints/llm/test_guided_generate.py
+++ b/tests/entrypoints/llm/test_guided_generate.py
@@ -21,7 +21,7 @@
 def llm():
     # pytest caches the fixture so we use weakref.proxy to
     # enable garbage collection
-    llm = LLM(model=MODEL_NAME, max_model_len=1024)
+    llm = LLM(model=MODEL_NAME, max_model_len=1024, seed=0)
 
     with llm.deprecate_legacy_api():
         yield weakref.proxy(llm)
diff --git a/tests/entrypoints/openai/test_chat_echo.py b/tests/entrypoints/openai/test_chat_echo.py
index 3e76158a8c14..15da0f2fb5fe 100644
--- a/tests/entrypoints/openai/test_chat_echo.py
+++ b/tests/entrypoints/openai/test_chat_echo.py
@@ -24,6 +24,8 @@ def server():
         "4080",
         "--chat-template",
         DUMMY_CHAT_TEMPLATE,
+        "--seed",
+        "0",
     ]
 
     with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
diff --git a/tests/entrypoints/openai/test_metrics.py b/tests/entrypoints/openai/test_metrics.py
index 2bffd0ce138e..aa290fc25d7f 100644
--- a/tests/entrypoints/openai/test_metrics.py
+++ b/tests/entrypoints/openai/test_metrics.py
@@ -47,6 +47,8 @@ def default_server_args():
         "--enforce-eager",
         "--max-num-seqs",
         "128",
+        "--seed",
+        "0",
     ]
 
 
diff --git a/tests/entrypoints/openai/test_root_path.py b/tests/entrypoints/openai/test_root_path.py
index c9fa192fb6ae..71fe8cbdba38 100644
--- a/tests/entrypoints/openai/test_root_path.py
+++ b/tests/entrypoints/openai/test_root_path.py
@@ -30,6 +30,8 @@ def server():
         "/" + ROOT_PATH,
         "--chat-template",
         DUMMY_CHAT_TEMPLATE,
+        "--seed",
+        "0",
     ]
     envs = os.environ.copy()