Skip to content

Commit 721fa52

Browse files
committed
[Bugfix] fix server startup for embedding models/in-process frontend
vllm-project#8491 (comment)
1 parent 6ffa3f3 commit 721fa52

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

vllm/entrypoints/openai/api_server.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -532,8 +532,11 @@ async def run_server(args, **uvicorn_kwargs) -> None:
532532
logger.info("vLLM API server version %s", VLLM_VERSION)
533533
logger.info("args: %s", args)
534534

535-
temp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
536-
temp_socket.bind(("", args.port))
535+
# workaround to make sure that we bind the port before the engine is set up.
536+
# This avoids race conditions with ray.
537+
# see https://github.com/vllm-project/vllm/issues/8204
538+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
539+
sock.bind(("", args.port))
537540

538541
def signal_handler(*_) -> None:
539542
# Interrupt server on sigterm while initializing
@@ -551,8 +554,6 @@ def signal_handler(*_) -> None:
551554
model_config = await async_engine_client.get_model_config()
552555
init_app_state(async_engine_client, model_config, app.state, args)
553556

554-
temp_socket.close()
555-
556557
shutdown_task = await serve_http(
557558
app,
558559
limit_concurrency=async_engine_client.limit_concurrency,
@@ -564,6 +565,7 @@ def signal_handler(*_) -> None:
564565
ssl_certfile=args.ssl_certfile,
565566
ssl_ca_certs=args.ssl_ca_certs,
566567
ssl_cert_reqs=args.ssl_cert_reqs,
568+
fd=sock.fileno(),
567569
**uvicorn_kwargs,
568570
)
569571

0 commit comments

Comments
 (0)