Skip to content

Commit 145c2ff

Browse files
[Bugfix] Revert MoE Triton Config Default (#12629)
SUMMARY: * previous PR for pulling in block configs also changed defaults (https://github.com/vllm-project/vllm/pull/11589/files) for FP8 * this broke L4 MoE since there was not enough SHM for the default configuration * this reverts the non-block example to the default Signed-off-by: [email protected] <[email protected]>
1 parent 415f194 commit 145c2ff

File tree

1 file changed

+11
-30
lines changed

1 file changed

+11
-30
lines changed

vllm/model_executor/layers/fused_moe/fused_moe.py

Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -660,36 +660,17 @@ def get_default_config(
660660
is_marlin: bool,
661661
block_shape: Optional[List[int]] = None,
662662
) -> Dict[str, int]:
663-
if dtype == "fp8_w8a8":
664-
if block_shape is None:
665-
config = {
666-
"BLOCK_SIZE_M": 128,
667-
"BLOCK_SIZE_N": 256,
668-
"BLOCK_SIZE_K": 128,
669-
"GROUP_SIZE_M": 32,
670-
"num_warps": 8,
671-
"num_stages": 4,
672-
}
673-
if M <= E:
674-
config = {
675-
"BLOCK_SIZE_M": 64,
676-
"BLOCK_SIZE_N": 128,
677-
"BLOCK_SIZE_K": 128,
678-
"GROUP_SIZE_M": 1,
679-
"num_warps": 4,
680-
"num_stages": 4,
681-
}
682-
else:
683-
# Block-wise quant: BLOCK_SIZE_N must be divisible by block_shape[0]
684-
# BLOCK_SIZE_K must be divisible by block_shape[1]
685-
config = {
686-
"BLOCK_SIZE_M": 64,
687-
"BLOCK_SIZE_N": block_shape[0],
688-
"BLOCK_SIZE_K": block_shape[1],
689-
"GROUP_SIZE_M": 32,
690-
"num_warps": 4,
691-
"num_stages": 3,
692-
}
663+
if dtype == "fp8_w8a8" and block_shape is not None:
664+
# Block-wise quant: BLOCK_SIZE_N must be divisible by block_shape[0]
665+
# BLOCK_SIZE_K must be divisible by block_shape[1]
666+
config = {
667+
"BLOCK_SIZE_M": 64,
668+
"BLOCK_SIZE_N": block_shape[0],
669+
"BLOCK_SIZE_K": block_shape[1],
670+
"GROUP_SIZE_M": 32,
671+
"num_warps": 4,
672+
"num_stages": 3,
673+
}
693674
else:
694675
config = {
695676
"BLOCK_SIZE_M": 64,

0 commit comments

Comments
 (0)