File tree Expand file tree Collapse file tree 1 file changed +9
-0
lines changed
vllm/model_executor/models Expand file tree Collapse file tree 1 file changed +9
-0
lines changed Original file line number Diff line number Diff line change @@ -123,6 +123,13 @@ def input_processor_for_mllama(
123123
124124 assert is_list_of (image_data , Image .Image )
125125
126+ num_image_tokens = dec_inputs ['prompt_token_ids' ].count (
127+ MLLAMA_IMAGE_TOKEN_ID )
128+ if num_image_tokens != len (image_data ):
129+ raise ValueError (
130+ f"The number of image tokens ({ num_image_tokens } ) must be"
131+ f" the same as the number of images ({ len (image_data )} )" )
132+
126133 # Since only the last group of consecutive images
127134 # are attended by the decoded tokens, we only need to
128135 # get the number of tiles for those images.
@@ -1493,6 +1500,8 @@ def convert_sparse_cross_attention_mask_to_dense(
14931500 dense_mask [seq_start + start :seq_start + end ,
14941501 tile_start :tile_start + tile ] = 1
14951502 tile_start += tile
1503+ assert ts != - 1
1504+ assert td != 0
14961505 tile_range_for_decode .append ((ts , ts + td ))
14971506 seq_start += length
14981507
You can’t perform that action at this time.
0 commit comments