File tree Expand file tree Collapse file tree 1 file changed +12
-1
lines changed
Expand file tree Collapse file tree 1 file changed +12
-1
lines changed Original file line number Diff line number Diff line change @@ -3602,7 +3602,18 @@ def _dummy_run(
36023602 cudagraph_runtime_mode == CUDAGraphMode .PIECEWISE
36033603 and not self .speculative_config .enforce_eager
36043604 )
3605- self .drafter .dummy_run (num_tokens , use_cudagraphs = use_cudagraphs )
3605+
3606+ # Note(gnovack) - We need to disable cudagraphs for one of the two
3607+ # lora cases when cudagraph_specialize_lora is enabled. This is a
3608+ # short term mitigation for issue mentioned in
3609+ # https://github.com/vllm-project/vllm/issues/28334
3610+ if self .compilation_config .cudagraph_specialize_lora and activate_lora :
3611+ use_cudagraphs = False
3612+
3613+ self .drafter .dummy_run (
3614+ num_tokens ,
3615+ use_cudagraphs = use_cudagraphs ,
3616+ )
36063617
36073618 # This is necessary to avoid blocking DP.
36083619 # For dummy runs, we typically skip EPLB since we don't have any real
You can’t perform that action at this time.
0 commit comments