File tree Expand file tree Collapse file tree 4 files changed +16
-0
lines changed
Expand file tree Collapse file tree 4 files changed +16
-0
lines changed Original file line number Diff line number Diff line change @@ -18,6 +18,8 @@ source /etc/environment
1818docker run -itd --entrypoint /bin/bash -v ~ /.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN=" $HF_TOKEN " --name cpu-test cpu-test
1919
2020function cpu_tests() {
21+ set -e
22+
2123 # Run basic model test
2224 docker exec cpu-test bash -c "
2325 set -e
Original file line number Diff line number Diff line change @@ -20,6 +20,8 @@ docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/hugg
2020 --cpuset-mems=1 --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-avx2 cpu-test-avx2
2121
2222function cpu_tests() {
23+ set -e
24+
2325 # offline inference
2426 docker exec cpu-test-avx2 bash -c "
2527 set -e
Original file line number Diff line number Diff line change @@ -95,6 +95,7 @@ def prepare_model_input(
9595 model_input .seq_lens )
9696
9797 return dataclasses .replace (model_input ,
98+ virtual_engine = virtual_engine ,
9899 pooling_metadata = pooling_metadata )
99100
100101 def _prepare_pooling (
Original file line number Diff line number Diff line change 44import torch
55
66from vllm .attention import AttentionMetadata
7+ from vllm .model_executor import SamplingMetadata
78from vllm .model_executor .layers .sampler import SamplerOutput
89from vllm .multimodal import MultiModalKwargs
910from vllm .sequence import IntermediateTensors , SequenceGroupMetadata
@@ -96,11 +97,21 @@ def prepare_model_input(
9697 encoder_input_positions_tensor ,
9798 ) = self ._prepare_encoder_model_input_tensors (seq_group_metadata_list ,
9899 model_input )
100+ # Sampling metadata is only required for the final pp group
101+ generators = self .get_generators (finished_requests_ids )
102+ sampling_metadata = SamplingMetadata .prepare (seq_group_metadata_list ,
103+ model_input .seq_lens ,
104+ model_input .query_lens ,
105+ self .device ,
106+ pin_memory = False ,
107+ generators = generators )
99108 return dataclasses .replace (
100109 model_input ,
110+ sampling_metadata = sampling_metadata ,
101111 attn_metadata = attn_metadata ,
102112 encoder_input_tokens = encoder_input_tokens_tensor ,
103113 encoder_input_positions = encoder_input_positions_tensor ,
114+ virtual_engine = virtual_engine ,
104115 )
105116
106117 def _prepare_encoder_model_input_tensors (
You can’t perform that action at this time.
0 commit comments