Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion areal/launcher/vllm_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,11 @@ def run(self):
visible = os.getenv(device_control_env_var).split(",")
n_visible_devices = len(visible)
n_servers_per_proc = max(1, n_visible_devices // gpus_per_server)
server_idx_offset = min(list(map(int, visible))) // gpus_per_server
# Use modulo to ensure server_idx_offset is node-local (0 to n_servers_per_node-1)
# This prevents port overflow when running multiple nodes
server_idx_offset = (
min(list(map(int, visible))) // gpus_per_server
) % n_servers_per_node
else:
visible = [str(i) for i in range(self.n_gpus_per_node)]
n_servers_per_proc = n_servers_per_node
Expand Down
Loading