We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 7c01f70 commit f7dac83Copy full SHA for f7dac83
vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -423,6 +423,11 @@ def fused_experts(hidden_states: torch.Tensor,
423
M, _ = hidden_states.shape
424
E, N, _ = w1.shape
425
426
+ if M > 65536:
427
+ # https://github.com/vllm-project/vllm/issues/5938
428
+ raise ValueError("MoE kernel does not support more than 65536 tokens, "
429
+ f"but got {M}")
430
+
431
if override_config:
432
config = override_config
433
else:
0 commit comments