Skip to content

Commit c78b81e

Browse files
merrymercyMahmoudAshraf97
authored andcommitted
Split the scheduler into multiple mixin classes to reduce the file size (sgl-project#8483)
1 parent e104779 commit c78b81e

12 files changed

Lines changed: 868 additions & 784 deletions

python/sglang/srt/disaggregation/decode.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -694,10 +694,7 @@ def event_loop_normal_disagg_decode(self: Scheduler):
694694
+ len(self.disagg_decode_prealloc_queue.queue)
695695
== 0
696696
):
697-
# When the server is idle, do self-check and re-init some states
698-
self.check_memory()
699-
self.new_token_ratio = self.init_new_token_ratio
700-
self.maybe_sleep_on_idle()
697+
self.self_check_during_idle()
701698

702699
self.last_batch = batch
703700

@@ -771,10 +768,7 @@ def event_loop_overlap_disagg_decode(self: Scheduler):
771768
+ len(self.disagg_decode_prealloc_queue.queue)
772769
== 0
773770
):
774-
# When the server is idle, do self-check and re-init some states
775-
self.check_memory()
776-
self.new_token_ratio = self.init_new_token_ratio
777-
self.maybe_sleep_on_idle()
771+
self.self_check_during_idle()
778772

779773
self.last_batch = batch
780774
self.last_batch_in_queue = last_batch_in_queue

python/sglang/srt/disaggregation/prefill.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,7 @@ def event_loop_normal_disagg_prefill(self: Scheduler) -> None:
287287
self.process_disagg_prefill_inflight_queue()
288288

289289
if batch is None and len(self.disagg_prefill_inflight_queue) == 0:
290-
self.check_memory()
291-
self.new_token_ratio = self.init_new_token_ratio
292-
self.maybe_sleep_on_idle()
290+
self.self_check_during_idle()
293291

294292
self.last_batch = batch
295293
# HACK (byronhsu): reset the batch_is_full flag because we never enter update_running_batch which resets it
@@ -337,9 +335,7 @@ def event_loop_overlap_disagg_prefill(self: Scheduler) -> None:
337335
self.process_disagg_prefill_inflight_queue()
338336

339337
if batch is None and len(self.disagg_prefill_inflight_queue) == 0:
340-
self.check_memory()
341-
self.new_token_ratio = self.init_new_token_ratio
342-
self.maybe_sleep_on_idle()
338+
self.self_check_during_idle()
343339

344340
self.last_batch = batch
345341
# HACK (byronhsu): reset the batch_is_full flag because we never enter update_running_batch which resets it

python/sglang/srt/entrypoints/engine.py

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -652,25 +652,19 @@ def _set_envs_and_config(server_args: ServerArgs):
652652
"Please reinstall the latest version with `pip install sgl-kernel --force-reinstall`",
653653
)
654654

655-
def sigchld_handler(signum, frame):
656-
pid, exitcode = os.waitpid(0, os.WNOHANG)
657-
if exitcode != 0:
658-
logger.warning(
659-
f"Child process unexpectedly failed with {exitcode=}. {pid=}"
655+
if True: # Keep this check for internal code compatibility
656+
# Register the signal handler.
657+
# The child processes will send SIGQUIT to this process when any error happens
658+
# This process then clean up the whole process tree
659+
# Note: This sigquit handler is used in the launch phase, and may be replaced by
660+
# the running_phase_sigquit_handler in the tokenizer manager after the grpc server is launched.
661+
def launch_phase_sigquit_handler(signum, frame):
662+
logger.error(
663+
"Received sigquit from a child process. It usually means the child failed."
660664
)
665+
kill_process_tree(os.getpid())
661666

662-
signal.signal(signal.SIGCHLD, sigchld_handler)
663-
664-
# Register the signal handler.
665-
# The child processes will send SIGQUIT to this process when any error happens
666-
# This process then clean up the whole process tree
667-
def sigquit_handler(signum, frame):
668-
logger.error(
669-
"Received sigquit from a child process. It usually means the child failed."
670-
)
671-
kill_process_tree(os.getpid())
672-
673-
signal.signal(signal.SIGQUIT, sigquit_handler)
667+
signal.signal(signal.SIGQUIT, launch_phase_sigquit_handler)
674668

675669
# Set mp start method
676670
mp.set_start_method("spawn", force=True)

python/sglang/srt/entrypoints/http_server.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,9 @@ async def health() -> Response:
238238
@app.get("/health_generate")
239239
async def health_generate(request: Request) -> Response:
240240
"""Check the health of the inference server by generating one token."""
241+
if _global_state.tokenizer_manager.gracefully_exit:
242+
logger.info("Health check request received during shutdown. Returning 503.")
243+
return Response(status_code=503)
241244

242245
sampling_params = {"max_new_tokens": 1, "temperature": 0.0}
243246
rid = f"HEALTH_CHECK_{time.time()}"
@@ -260,9 +263,14 @@ async def gen():
260263
async for _ in _global_state.tokenizer_manager.generate_request(gri, request):
261264
break
262265

263-
tic = time.perf_counter()
266+
# This request is a special request.
267+
# If the server already has something running, this request will be ignored, so it creates zero overhead.
268+
# If the server is not running, this request will be run, so we know whether the server is healthy.
264269
task = asyncio.create_task(gen())
265-
while time.perf_counter() < tic + HEALTH_CHECK_TIMEOUT:
270+
271+
# As long as we receive any response from the detokenizer/scheduler, we consider the server is healthy.
272+
tic = time.time()
273+
while time.time() < tic + HEALTH_CHECK_TIMEOUT:
266274
await asyncio.sleep(1)
267275
if _global_state.tokenizer_manager.last_receive_tstamp > tic:
268276
task.cancel()

python/sglang/srt/managers/io_struct.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,6 @@ def normalize_batch_and_arguments(self):
152152
else:
153153
self._normalize_batch_inputs()
154154

155-
self._validate_session_params()
156-
157155
def _validate_inputs(self):
158156
"""Validate that the input configuration is valid."""
159157
if (

0 commit comments

Comments
 (0)