Skip to content

Commit a6ac802

Browse files
committed
[bugfix] support eagle with lora cudagraph specialization
Signed-off-by: gnovack <[email protected]>
1 parent d1dd5f5 commit a6ac802

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

vllm/v1/spec_decode/eagle.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
get_layers_from_vllm_config,
1616
)
1717
from vllm.distributed.parallel_state import get_pp_group
18-
from vllm.forward_context import set_forward_context
18+
from vllm.forward_context import BatchDescriptor, set_forward_context
1919
from vllm.logger import init_logger
2020
from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
2121
from vllm.model_executor.model_loader import get_model
@@ -1055,6 +1055,7 @@ def dummy_run(
10551055
self,
10561056
num_tokens: int,
10571057
use_cudagraphs=True,
1058+
batch_descriptor: BatchDescriptor | None = None,
10581059
) -> None:
10591060
# Determine if CUDA graphs should be used for this run.
10601061
cudagraphs_enabled = use_cudagraphs and self.use_cuda_graph
@@ -1065,6 +1066,7 @@ def dummy_run(
10651066
None,
10661067
self.vllm_config,
10671068
num_tokens=num_tokens,
1069+
batch_descriptor=batch_descriptor,
10681070
cudagraph_runtime_mode=(
10691071
CUDAGraphMode.PIECEWISE if cudagraphs_enabled else CUDAGraphMode.NONE
10701072
),

vllm/v1/worker/gpu_model_runner.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3585,7 +3585,11 @@ def _dummy_run(
35853585
cudagraph_runtime_mode == CUDAGraphMode.PIECEWISE
35863586
and not self.speculative_config.enforce_eager
35873587
)
3588-
self.drafter.dummy_run(num_tokens, use_cudagraphs=use_cudagraphs)
3588+
self.drafter.dummy_run(
3589+
num_tokens,
3590+
use_cudagraphs=use_cudagraphs,
3591+
batch_descriptor=batch_descriptor,
3592+
)
35893593

35903594
# This is necessary to avoid blocking DP.
35913595
# For dummy runs, we typically skip EPLB since we don't have any real

0 commit comments

Comments
 (0)