Skip to content

Commit c4eaac0

Browse files
committed
Bug fix for Fuse Moe Lora trition kernel
Signed-off-by: chaojun-zhang <[email protected]>
1 parent a7adbc6 commit c4eaac0

File tree

1 file changed

+1
-3
lines changed

1 file changed

+1
-3
lines changed

vllm/lora/ops/triton_ops/fused_moe_lora_op.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def _get_ptr(lora_weights: list[torch.Tensor], device: torch.device):
2626
tensor_ptrs = []
2727
for lora_weight in lora_weights:
2828
tensor_ptrs.append(lora_weight.data_ptr())
29-
ptr_tensor = torch.tensor(tensor_ptrs, device=device)
29+
ptr_tensor = torch.tensor(tensor_ptrs, device=device, dtype=torch.uint64)
3030

3131
_LORA_PTR_DICT[key] = ptr_tensor
3232
return _LORA_PTR_DICT.get(key)
@@ -229,7 +229,6 @@ def _fused_moe_lora_shrink(
229229
"num_stages": num_stages,
230230
"SPLIT_K": split_k,
231231
"USE_GDC": use_gdc,
232-
"launch_pdl": use_gdc, # triton kernel metadata
233232
}
234233

235234
b_ptr = _get_ptr(lora_a_stacked, device)
@@ -336,7 +335,6 @@ def _fused_moe_lora_expand(
336335
"num_stages": num_stages,
337336
"SPLIT_K": split_k, # Set split_k = 1 for expand calls
338337
"USE_GDC": use_gdc,
339-
"launch_pdl": use_gdc, # triton kernel metadata
340338
}
341339

342340
grid = lambda META: (

0 commit comments

Comments
 (0)