Skip to content
This repository was archived by the owner on Nov 25, 2025. It is now read-only.

Commit d1e9dc7

Browse files
oldsharpcctry
authored andcommitted
[feature] [sgl-router] Add a dp-aware routing strategy (sgl-project#6869)
1 parent dbf44c7 commit d1e9dc7

File tree

19 files changed

+726
-16
lines changed

19 files changed

+726
-16
lines changed

docs/router/router.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,14 @@ Process:
141141

142142
For unbalanced systems, this strategy tracks pending request counts per worker and routes new requests to the least busy worker. This helps maintain optimal load distribution across workers.
143143

144+
***Data-Parallelism Aware Routing***
145+
146+
An additional DP-aware routing strategy can be enabled on top of the sgl-router’s hybrid cache-aware load-balancing strategy by setting the `--dp-aware` flag when starting the router.
147+
148+
When this flag is enabled, the router attempts to contact the workers to retrieve the `dp_size` of each one and registers the new workers at the DP-rank level. In this mode, the router applies the cache-aware routing strategy in a more fine-grained manner, with assistance from the DP controller on the SRT side.
149+
150+
By default (when the flag is not set), the SRT’s DP controller distributes incoming requests across DP ranks in a round-robin fashion.
151+
144152
## Configuration Parameters
145153

146154
1. `cache_threshold`: (float, 0.0 to 1.0, default: 0.5)

sgl-router/py_src/sglang_router/launch_router.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ class RouterArgs:
5050
eviction_interval: int = 60
5151
max_tree_size: int = 2**24
5252
max_payload_size: int = 256 * 1024 * 1024 # 256MB default for large batches
53+
dp_aware: bool = False
54+
api_key: Optional[str] = None
5355
log_dir: Optional[str] = None
5456
log_level: Optional[str] = None
5557
# Service discovery configuration
@@ -197,6 +199,17 @@ def add_cli_args(
197199
default=RouterArgs.max_payload_size,
198200
help="Maximum payload size in bytes",
199201
)
202+
parser.add_argument(
203+
f"--{prefix}dp-aware",
204+
action="store_true",
205+
help="Enable data parallelism aware schedule",
206+
)
207+
parser.add_argument(
208+
f"--{prefix}api-key",
209+
type=str,
210+
default=None,
211+
help="The api key used for the authorization with the worker. Useful when the dp aware scheduling strategy is enaled.",
212+
)
200213
parser.add_argument(
201214
f"--{prefix}log-dir",
202215
type=str,
@@ -304,6 +317,8 @@ def from_cli_args(
304317
eviction_interval=getattr(args, f"{prefix}eviction_interval"),
305318
max_tree_size=getattr(args, f"{prefix}max_tree_size"),
306319
max_payload_size=getattr(args, f"{prefix}max_payload_size"),
320+
dp_aware=getattr(args, f"{prefix}dp_aware", False),
321+
api_key=getattr(args, f"{prefix}api_key", None),
307322
log_dir=getattr(args, f"{prefix}log_dir", None),
308323
log_level=getattr(args, f"{prefix}log_level", None),
309324
service_discovery=getattr(args, f"{prefix}service_discovery", False),
@@ -463,6 +478,8 @@ def launch_router(args: argparse.Namespace) -> Optional[Router]:
463478
eviction_interval_secs=router_args.eviction_interval,
464479
max_tree_size=router_args.max_tree_size,
465480
max_payload_size=router_args.max_payload_size,
481+
dp_aware=router_args.dp_aware,
482+
api_key=router_args.api_key,
466483
log_dir=router_args.log_dir,
467484
log_level=router_args.log_level,
468485
service_discovery=router_args.service_discovery,

sgl-router/py_src/sglang_router/router.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ class Router:
3131
routing. Default: 60
3232
max_payload_size: Maximum payload size in bytes. Default: 256MB
3333
max_tree_size: Maximum size of the approximation tree for cache-aware routing. Default: 2^24
34+
dp_aware: Enable data parallelism aware schedule. Default: False
35+
api_key: The api key used for the authorization with the worker.
36+
Useful when the dp aware scheduling strategy is enabled.
37+
Default: None
3438
log_dir: Directory to store log files. If None, logs are only output to console. Default: None
3539
log_level: Logging level. Options: 'debug', 'info', 'warning', 'error', 'critical'.
3640
service_discovery: Enable Kubernetes service discovery. When enabled, the router will
@@ -73,6 +77,8 @@ def __init__(
7377
eviction_interval_secs: int = 60,
7478
max_tree_size: int = 2**24,
7579
max_payload_size: int = 256 * 1024 * 1024, # 256MB
80+
dp_aware: bool = False,
81+
api_key: Optional[str] = None,
7682
log_dir: Optional[str] = None,
7783
log_level: Optional[str] = None,
7884
service_discovery: bool = False,
@@ -110,6 +116,8 @@ def __init__(
110116
eviction_interval_secs=eviction_interval_secs,
111117
max_tree_size=max_tree_size,
112118
max_payload_size=max_payload_size,
119+
dp_aware=dp_aware,
120+
api_key=api_key,
113121
log_dir=log_dir,
114122
log_level=log_level,
115123
service_discovery=service_discovery,

sgl-router/py_test/run_suite.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
arg_parser.add_argument(
99
"--timeout-per-file",
1010
type=int,
11-
default=1000,
11+
default=2000,
1212
help="The time limit for running one file in seconds.",
1313
)
1414
args = arg_parser.parse_args()

sgl-router/py_test/test_launch_router.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def setUp(self):
4343
selector=None,
4444
service_discovery_port=80,
4545
service_discovery_namespace=None,
46+
dp_aware=False,
4647
prometheus_port=None,
4748
prometheus_host=None,
4849
# PD-specific attributes
@@ -111,6 +112,52 @@ def test_launch_router_with_service_discovery_namespace(self):
111112
)
112113
self.run_router_process(args)
113114

115+
def test_launch_router_common_with_dp_aware(self):
116+
args = self.create_router_args(
117+
worker_urls=["http://localhost:8000"],
118+
dp_aware=True,
119+
)
120+
self.run_router_process(args)
121+
122+
def test_launch_router_with_empty_worker_urls_with_dp_aware(self):
123+
args = self.create_router_args(
124+
worker_urls=[],
125+
dp_aware=True,
126+
)
127+
self.run_router_process(args)
128+
129+
def test_launch_router_common_with_dp_aware_service_discovery(self):
130+
# Test launch router with bot srevice_discovery and dp_aware enabled
131+
# Should fail since service_discovery and dp_aware is conflict
132+
args = self.create_router_args(
133+
worker_urls=["http://localhost:8000"],
134+
dp_aware=True,
135+
service_discovery=True,
136+
selector=["app=test-worker"],
137+
)
138+
139+
def run_router():
140+
try:
141+
from sglang_router.launch_router import launch_router
142+
143+
router = launch_router(args)
144+
if router is None:
145+
return 1
146+
return 0
147+
except Exception as e:
148+
print(e)
149+
return 1
150+
151+
process = multiprocessing.Process(target=run_router)
152+
try:
153+
process.start()
154+
# Wait 3 seconds
155+
time.sleep(3)
156+
# Should fail since service_discovery and dp_aware is conflict
157+
self.assertFalse(process.is_alive())
158+
finally:
159+
terminate_process(process)
160+
114161
def test_launch_router_pd_mode_basic(self):
115162
"""Test basic PD router functionality without actually starting servers."""
116163
# This test just verifies the PD router can be created and configured

0 commit comments

Comments
 (0)