diff --git a/tests/v1/tpu/test_basic.py b/tests/v1/tpu/test_basic.py index dd89059ded52..865b58bc7f4b 100644 --- a/tests/v1/tpu/test_basic.py +++ b/tests/v1/tpu/test_basic.py @@ -59,7 +59,7 @@ def test_basic( # actually test chunked prompt max_num_batched_tokens=1024, max_model_len=8192, - gpu_memory_utilization=0.95, + gpu_memory_utilization=0.7, max_num_seqs=max_num_seqs, tensor_parallel_size=tensor_parallel_size) as vllm_model: vllm_outputs = vllm_model.generate_greedy(example_prompts,