Skip to content

Commit 4902926

Browse files
Isotr0pyweilong.yu
authored andcommitted
[Bugfix][Hardware][CPU] Fix broken encoder-decoder CPU runner (vllm-project#10218)
Signed-off-by: Isotr0py <[email protected]>
1 parent e317b12 commit 4902926

File tree

4 files changed

+16
-0
lines changed

4 files changed

+16
-0
lines changed

.buildkite/run-cpu-test-ppc64le.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ source /etc/environment
1818
docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN="$HF_TOKEN" --name cpu-test cpu-test
1919

2020
function cpu_tests() {
21+
set -e
22+
2123
# Run basic model test
2224
docker exec cpu-test bash -c "
2325
set -e

.buildkite/run-cpu-test.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/hugg
2020
--cpuset-mems=1 --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-avx2 cpu-test-avx2
2121

2222
function cpu_tests() {
23+
set -e
24+
2325
# offline inference
2426
docker exec cpu-test-avx2 bash -c "
2527
set -e

vllm/worker/cpu_embedding_model_runner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ def prepare_model_input(
9595
model_input.seq_lens)
9696

9797
return dataclasses.replace(model_input,
98+
virtual_engine=virtual_engine,
9899
pooling_metadata=pooling_metadata)
99100

100101
def _prepare_pooling(

vllm/worker/cpu_enc_dec_model_runner.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import torch
55

66
from vllm.attention import AttentionMetadata
7+
from vllm.model_executor import SamplingMetadata
78
from vllm.model_executor.layers.sampler import SamplerOutput
89
from vllm.multimodal import MultiModalKwargs
910
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
@@ -96,11 +97,21 @@ def prepare_model_input(
9697
encoder_input_positions_tensor,
9798
) = self._prepare_encoder_model_input_tensors(seq_group_metadata_list,
9899
model_input)
100+
# Sampling metadata is only required for the final pp group
101+
generators = self.get_generators(finished_requests_ids)
102+
sampling_metadata = SamplingMetadata.prepare(seq_group_metadata_list,
103+
model_input.seq_lens,
104+
model_input.query_lens,
105+
self.device,
106+
pin_memory=False,
107+
generators=generators)
99108
return dataclasses.replace(
100109
model_input,
110+
sampling_metadata=sampling_metadata,
101111
attn_metadata=attn_metadata,
102112
encoder_input_tokens=encoder_input_tokens_tensor,
103113
encoder_input_positions=encoder_input_positions_tensor,
114+
virtual_engine=virtual_engine,
104115
)
105116

106117
def _prepare_encoder_model_input_tensors(

0 commit comments

Comments
 (0)