Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions tests/entrypoints/openai/rpc/test_zmq_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,12 @@ async def test_client_aborts_use_timeouts(monkeypatch, dummy_server,
m.setattr(dummy_server, "abort", lambda x: None)
m.setattr(client, "_data_timeout", 10)

# Ensure the client doesn't hang
# The client should suppress timeouts on `abort`s
# and return normally, assuming the server will eventually
# abort the request.
client_task = asyncio.get_running_loop().create_task(
client.abort("test request id"))
with pytest.raises(TimeoutError, match="Server didn't reply within"):
await asyncio.wait_for(client_task, timeout=0.05)
await asyncio.wait_for(client_task, timeout=0.05)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I right that test request id will trigger an Exception?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The m.setattr(dummy_server, "abort", lambda x: None) patches the server to just do nothing when it gets an abort request, so no it shouldn't cause an exception

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Thanks



@pytest.mark.asyncio
Expand Down
5 changes: 2 additions & 3 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import re
import tempfile
from argparse import Namespace
from contextlib import asynccontextmanager, suppress
from contextlib import asynccontextmanager
from http import HTTPStatus
from typing import AsyncIterator, Optional, Set

Expand Down Expand Up @@ -83,8 +83,7 @@ async def lifespan(app: FastAPI):
async def _force_log():
while True:
await asyncio.sleep(10)
with suppress(Exception):
await async_engine_client.do_log_stats()
await async_engine_client.do_log_stats()

if not engine_args.disable_log_stats:
task = asyncio.create_task(_force_log())
Expand Down
5 changes: 4 additions & 1 deletion vllm/entrypoints/openai/rpc/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,10 @@ async def _is_tracing_enabled_rpc(self) -> bool:

async def abort(self, request_id: str):
"""Send an ABORT_REQUEST signal to the RPC Server"""
with suppress(RPCClientClosedError):

# Suppress timeouts as well- if the server is busy and does not ack in
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should have a very detailed comment here about why this is okay, including the notes from slack

# time we assume it got the message.
with suppress(RPCClientClosedError, TimeoutError):
await self._send_one_way_rpc_request(
request=RPCAbortRequest(request_id),
error_message=f"RPCAbortRequest {request_id} failed")
Expand Down