Skip to content

Commit 39c502c

Browse files
committed
fix ray local_world_size cannot little than visiable device count error
Signed-off-by: leo-pony <[email protected]>
1 parent 84d7f5a commit 39c502c

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

vllm_ascend/worker/worker_v1.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -208,12 +208,18 @@ def _init_device(self):
208208
NPUPlatform.set_device(device)
209209
NPUPlatform.empty_cache()
210210

211-
visible_device_count = (torch.npu.device_count()
212-
if torch.npu.is_available() else 0)
213-
assert self.parallel_config.local_world_size <= visible_device_count, (
214-
f"local_world_size ({self.parallel_config.local_world_size}) must be "
215-
f"less than or equal to the number of visible devices "
216-
f"({visible_device_count}).")
211+
if (self.parallel_config.data_parallel_size > 1
212+
and self.parallel_config.data_parallel_size_local > 0
213+
and self.parallel_config.distributed_executor_backend
214+
not in ["ray", "external_launcher"] and
215+
self.vllm_config.parallel_config.data_parallel_backend != "ray"
216+
and self.vllm_config.parallel_config.nnodes_within_dp == 1):
217+
visible_device_count = (torch.npu.device_count()
218+
if torch.npu.is_available() else 0)
219+
assert self.parallel_config.local_world_size <= visible_device_count, (
220+
f"local_world_size ({self.parallel_config.local_world_size}) must "
221+
f"be less than or equal to the number of visible devices "
222+
f"({visible_device_count}).")
217223

218224
self.init_npu_memory = NPUPlatform.mem_get_info()[0]
219225
# Initialize the distributed environment.

0 commit comments

Comments
 (0)