File tree Expand file tree Collapse file tree 2 files changed +7
-6
lines changed
Expand file tree Collapse file tree 2 files changed +7
-6
lines changed Original file line number Diff line number Diff line change @@ -201,6 +201,7 @@ async def runtime_error_handler(_, __):
201201 # In this case we cannot await the server shutdown here because
202202 # this handler must first return to close the connection for
203203 # this request.
204+ global server
204205 server .should_exit = True
205206
206207 return Response (status_code = HTTPStatus .INTERNAL_SERVER_ERROR )
@@ -212,6 +213,7 @@ async def engine_dead_handler(_, __):
212213 if not args .keep_alive_on_engine_death :
213214 logger .fatal ("AsyncLLMEngine is already dead, terminating server "
214215 "process" )
216+ global server
215217 server .should_exit = True
216218
217219 return Response (status_code = HTTPStatus .INTERNAL_SERVER_ERROR )
Original file line number Diff line number Diff line change @@ -134,12 +134,11 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
134134 help = "When --max-logprobs is specified, represents single tokens as"
135135 "strings of the form 'token_id:{token_id}' so that tokens that"
136136 "are not JSON-encodable can be identified." )
137- parser .add_argument (
138- "--keep-alive-on-engine-death" ,
139- action = "store_true" ,
140- help = "The default behavior is to stop the server "
141- "process when the LLM engine dies. Set this flag to "
142- "keep the server up instead." )
137+ parser .add_argument ("--keep-alive-on-engine-death" ,
138+ action = "store_true" ,
139+ help = "The default behavior is to stop the server "
140+ "process when the LLM engine dies. Set this flag to "
141+ "keep the server up instead." )
143142
144143 parser = AsyncEngineArgs .add_cli_args (parser )
145144
You can’t perform that action at this time.
0 commit comments