From 717db9c088b02ffa950229db288fabbdb5840fcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E0=AE=AE=E0=AE=A9=E0=AF=8B=E0=AE=9C=E0=AF=8D=E0=AE=95?= =?UTF-8?q?=E0=AF=81=E0=AE=AE=E0=AE=BE=E0=AE=B0=E0=AF=8D=20=E0=AE=AA?= =?UTF-8?q?=E0=AE=B4=E0=AE=A9=E0=AE=BF=E0=AE=9A=E0=AF=8D=E0=AE=9A=E0=AE=BE?= =?UTF-8?q?=E0=AE=AE=E0=AE=BF?= Date: Wed, 5 Mar 2025 15:29:24 +0530 Subject: [PATCH 1/2] Set default value of seed to None MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: மனோஜ்குமார் பழனிச்சாமி --- vllm/engine/arg_utils.py | 2 +- vllm/entrypoints/llm.py | 2 +- vllm/utils.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 989eb4dbfd14..e4e718bd4d01 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -104,7 +104,7 @@ class EngineArgs: config_format: ConfigFormat = ConfigFormat.AUTO dtype: str = 'auto' kv_cache_dtype: str = 'auto' - seed: int = 0 + seed: Optional[int] = None max_model_len: Optional[int] = None # Note: Specifying a custom executor backend by passing a class # is intended for expert use only. The API may change without diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index fc585ee9e54b..a25a778b1fd3 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -168,7 +168,7 @@ def __init__( quantization: Optional[str] = None, revision: Optional[str] = None, tokenizer_revision: Optional[str] = None, - seed: int = 0, + seed: Optional[int] = None, gpu_memory_utilization: float = 0.9, swap_space: float = 4, cpu_offload_gb: float = 0, diff --git a/vllm/utils.py b/vllm/utils.py index 1de2180deb50..4c4c25098501 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -644,7 +644,7 @@ def create_kv_caches_with_random_flash( head_size: int, cache_dtype: Optional[Union[str, torch.dtype]], model_dtype: Optional[Union[str, torch.dtype]] = None, - seed: int = 0, + seed: Optional[int] = None, device: Optional[str] = "cuda", ) -> tuple[list[torch.Tensor], list[torch.Tensor]]: from vllm.platforms import current_platform @@ -681,7 +681,7 @@ def create_kv_caches_with_random( head_size: int, cache_dtype: Optional[Union[str, torch.dtype]], model_dtype: Optional[Union[str, torch.dtype]] = None, - seed: int = 0, + seed: Optional[int] = None, device: Optional[str] = "cuda", ) -> tuple[list[torch.Tensor], list[torch.Tensor]]: From f2872e8808c8974132b0eeadd3882829efa7b11f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E0=AE=AE=E0=AE=A9=E0=AF=8B=E0=AE=9C=E0=AF=8D=E0=AE=95?= =?UTF-8?q?=E0=AF=81=E0=AE=AE=E0=AE=BE=E0=AE=B0=E0=AF=8D=20=E0=AE=AA?= =?UTF-8?q?=E0=AE=B4=E0=AE=A9=E0=AE=BF=E0=AE=9A=E0=AF=8D=E0=AE=9A=E0=AE=BE?= =?UTF-8?q?=E0=AE=AE=E0=AE=BF?= Date: Fri, 7 Mar 2025 12:03:23 +0530 Subject: [PATCH 2/2] Add seed for consistent outputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: மனோஜ்குமார் பழனிச்சாமி --- tests/distributed/test_torchrun_example.py | 3 ++- tests/entrypoints/llm/test_encode.py | 3 ++- tests/entrypoints/llm/test_guided_generate.py | 2 +- tests/entrypoints/openai/test_chat_echo.py | 2 ++ tests/entrypoints/openai/test_metrics.py | 2 ++ tests/entrypoints/openai/test_root_path.py | 2 ++ 6 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/distributed/test_torchrun_example.py b/tests/distributed/test_torchrun_example.py index 1c6c28b4ed35..4ef33932538e 100644 --- a/tests/distributed/test_torchrun_example.py +++ b/tests/distributed/test_torchrun_example.py @@ -25,7 +25,8 @@ tensor_parallel_size=2, distributed_executor_backend="external_launcher", gpu_memory_utilization=random.uniform(0.7, 0.9), - swap_space=random.randint(1, 4)) + swap_space=random.randint(1, 4), + seed=0) outputs = llm.generate(prompts, sampling_params) diff --git a/tests/entrypoints/llm/test_encode.py b/tests/entrypoints/llm/test_encode.py index 6438743b6494..d10257761c86 100644 --- a/tests/entrypoints/llm/test_encode.py +++ b/tests/entrypoints/llm/test_encode.py @@ -34,7 +34,8 @@ def llm(): max_num_batched_tokens=32768, tensor_parallel_size=1, gpu_memory_utilization=0.75, - enforce_eager=True) + enforce_eager=True, + seed=0) with llm.deprecate_legacy_api(): yield weakref.proxy(llm) diff --git a/tests/entrypoints/llm/test_guided_generate.py b/tests/entrypoints/llm/test_guided_generate.py index fce581c78288..97ee027bde3b 100644 --- a/tests/entrypoints/llm/test_guided_generate.py +++ b/tests/entrypoints/llm/test_guided_generate.py @@ -21,7 +21,7 @@ def llm(): # pytest caches the fixture so we use weakref.proxy to # enable garbage collection - llm = LLM(model=MODEL_NAME, max_model_len=1024) + llm = LLM(model=MODEL_NAME, max_model_len=1024, seed=0) with llm.deprecate_legacy_api(): yield weakref.proxy(llm) diff --git a/tests/entrypoints/openai/test_chat_echo.py b/tests/entrypoints/openai/test_chat_echo.py index 3e76158a8c14..15da0f2fb5fe 100644 --- a/tests/entrypoints/openai/test_chat_echo.py +++ b/tests/entrypoints/openai/test_chat_echo.py @@ -24,6 +24,8 @@ def server(): "4080", "--chat-template", DUMMY_CHAT_TEMPLATE, + "--seed", + "0", ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: diff --git a/tests/entrypoints/openai/test_metrics.py b/tests/entrypoints/openai/test_metrics.py index 2bffd0ce138e..aa290fc25d7f 100644 --- a/tests/entrypoints/openai/test_metrics.py +++ b/tests/entrypoints/openai/test_metrics.py @@ -47,6 +47,8 @@ def default_server_args(): "--enforce-eager", "--max-num-seqs", "128", + "--seed", + "0", ] diff --git a/tests/entrypoints/openai/test_root_path.py b/tests/entrypoints/openai/test_root_path.py index c9fa192fb6ae..71fe8cbdba38 100644 --- a/tests/entrypoints/openai/test_root_path.py +++ b/tests/entrypoints/openai/test_root_path.py @@ -30,6 +30,8 @@ def server(): "/" + ROOT_PATH, "--chat-template", DUMMY_CHAT_TEMPLATE, + "--seed", + "0", ] envs = os.environ.copy()