Skip to content

Commit 035e54b

Browse files
committed
full propagation
Signed-off-by: Will Eaton <[email protected]>
1 parent d3d247d commit 035e54b

File tree

5 files changed

+48
-24
lines changed

5 files changed

+48
-24
lines changed

tests/v1/kv_connector/nixl_integration/toy_proxy_server.py

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@
1212
from fastapi import FastAPI, Request
1313
from fastapi.responses import StreamingResponse
1414

15-
logger = logging.getLogger(__name__)
16-
logger.setLevel(logging.DEBUG)
15+
logger = logging.getLogger("uvicorn.error")
1716

1817

1918
@asynccontextmanager
@@ -180,7 +179,8 @@ async def send_request_to_service(client_info: dict, endpoint: str,
180179

181180

182181
async def stream_service_response(client_info: dict, endpoint: str,
183-
req_data: dict, request_id: str):
182+
req_data: dict, request_id: str,
183+
request: Request):
184184
"""
185185
Asynchronously stream response from a service using a client from the pool.
186186
"""
@@ -189,29 +189,41 @@ async def stream_service_response(client_info: dict, endpoint: str,
189189
"X-Request-Id": request_id
190190
}
191191

192+
# get logger from request state for ASGI integration
193+
req_logger = getattr(request.app.state, 'logger', logger)
194+
192195
async with client_info['client'].stream("POST",
193196
endpoint,
194197
json=req_data,
195198
headers=headers) as response:
196-
logger.info("Decode server response status: %s for request %s",
197-
response.status_code, request_id)
199+
req_logger.info("Decode server response status: %s for request %s",
200+
response.status_code, request_id)
198201

199202
# handle error responses with context
200203
if response.status_code >= 400:
201204
error_body = await response.aread()
202205
try:
203206
import json
204207
error_data = json.loads(error_body)
205-
logger.error(
206-
"Decode server error %d for request %s: %s. " \
207-
"Error context: %s",
208-
response.status_code, request_id,
209-
error_data.get('message', 'no message'),
210-
error_data.get('error_context', 'no context'))
208+
error_ctx = error_data.get('error', {}).get('error_context')
209+
if error_ctx:
210+
req_logger.error(
211+
"Decode server error %d for request %s: %s. "
212+
"Error type: %s, Metadata: %s", response.status_code,
213+
request_id,
214+
error_data.get('error',
215+
{}).get('message', 'no message'),
216+
error_ctx.get('error_type'), error_ctx.get('metadata'))
217+
else:
218+
req_logger.error(
219+
"Decode server error %d for request %s: %s",
220+
response.status_code, request_id,
221+
error_data.get('error',
222+
{}).get('message', 'no message'))
211223
except json.JSONDecodeError:
212-
logger.error("Decode server error %d for request %s: %s",
213-
response.status_code, request_id,
214-
error_body.decode('utf-8'))
224+
req_logger.error("Decode server error %d for request %s: %s",
225+
response.status_code, request_id,
226+
error_body.decode('utf-8'))
215227
response.raise_for_status()
216228

217229
async for chunk in response.aiter_bytes():
@@ -247,7 +259,8 @@ async def generate_stream():
247259
async for chunk in stream_service_response(decode_client_info,
248260
api,
249261
req_data,
250-
request_id=request_id):
262+
request_id=request_id,
263+
request=request):
251264
chunk_count += 1
252265
# parse SSE data to log key fields
253266
chunk_str = chunk.decode('utf-8')

vllm/entrypoints/openai/protocol.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ class ErrorInfo(OpenAIBaseModel):
115115
type: str
116116
param: Optional[str] = None
117117
code: int
118+
error_context: Optional[dict[str, Any]] = None
118119

119120

120121
class ErrorResponse(OpenAIBaseModel):

vllm/entrypoints/openai/serving_chat.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -585,7 +585,8 @@ async def chat_completion_stream_generator(
585585
error_ctx.message,
586586
err_type=error_ctx.error_type,
587587
status_code=error_ctx.http_status
588-
or HTTPStatus.INTERNAL_SERVER_ERROR)
588+
or HTTPStatus.INTERNAL_SERVER_ERROR,
589+
error_context=res.error_context)
589590
else:
590591
yield self.create_streaming_error_response(
591592
"Request aborted due to an internal error.",
@@ -1197,7 +1198,8 @@ async def chat_completion_full_generator(
11971198
error_ctx.message,
11981199
err_type=error_ctx.error_type,
11991200
status_code=error_ctx.http_status
1200-
or HTTPStatus.INTERNAL_SERVER_ERROR)
1201+
or HTTPStatus.INTERNAL_SERVER_ERROR,
1202+
error_context=res.error_context)
12011203
else:
12021204
return self.create_error_response(
12031205
"Request aborted due to an internal error.",

vllm/entrypoints/openai/serving_completion.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,8 @@ async def create_completion(
282282
error_ctx.message,
283283
err_type=error_ctx.error_type,
284284
status_code=error_ctx.http_status
285-
or HTTPStatus.INTERNAL_SERVER_ERROR)
285+
or HTTPStatus.INTERNAL_SERVER_ERROR,
286+
error_context=final_res.error_context)
286287
else:
287288
return self.create_error_response(
288289
"Request aborted due to an internal error.",
@@ -372,7 +373,8 @@ async def completion_stream_generator(
372373
error_ctx.message,
373374
err_type=error_ctx.error_type,
374375
status_code=error_ctx.http_status
375-
or HTTPStatus.INTERNAL_SERVER_ERROR)
376+
or HTTPStatus.INTERNAL_SERVER_ERROR,
377+
error_context=res.error_context)
376378
else:
377379
yield self.create_streaming_error_response(
378380
"Request aborted due to an internal error.",

vllm/entrypoints/openai/serving_engine.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -441,26 +441,32 @@ def create_error_response(
441441
message: str,
442442
err_type: str = "BadRequestError",
443443
status_code: HTTPStatus = HTTPStatus.BAD_REQUEST,
444+
error_context: Optional[dict[str, Any]] = None,
444445
) -> ErrorResponse:
445446
if self.log_error_stack:
446447
exc_type, _, _ = sys.exc_info()
447448
if exc_type is not None:
448449
traceback.print_exc()
449450
else:
450451
traceback.print_stack()
451-
return ErrorResponse(error=ErrorInfo(
452-
message=message, type=err_type, code=status_code.value))
452+
return ErrorResponse(error=ErrorInfo(message=message,
453+
type=err_type,
454+
code=status_code.value,
455+
error_context=error_context))
453456

454457
def create_streaming_error_response(
455458
self,
456459
message: str,
457460
err_type: str = "BadRequestError",
458461
status_code: HTTPStatus = HTTPStatus.BAD_REQUEST,
462+
error_context: Optional[dict[str, Any]] = None,
459463
) -> str:
460464
json_str = json.dumps(
461-
self.create_error_response(message=message,
462-
err_type=err_type,
463-
status_code=status_code).model_dump())
465+
self.create_error_response(
466+
message=message,
467+
err_type=err_type,
468+
status_code=status_code,
469+
error_context=error_context).model_dump())
464470
return json_str
465471

466472
async def _check_model(

0 commit comments

Comments
 (0)