@@ -923,14 +923,19 @@ def forward(
923923 def get_input_positions (
924924 input_tokens : List [int ],
925925 hf_config : PretrainedConfig ,
926- image_grid_thw : Union [List [List [int ]], torch .Tensor ],
927- video_grid_thw : Union [List [List [int ]], torch .Tensor ],
928- second_per_grid_ts : Optional [List [float ]] = None ,
926+ image_grid_thw : Optional [ Union [List [List [int ]], torch .Tensor ] ],
927+ video_grid_thw : Optional [ Union [List [List [int ]], torch .Tensor ] ],
928+ second_per_grid_ts : Optional [List [float ]],
929929 context_len : int = 0 ,
930930 seq_len : Optional [int ] = None ,
931931 ) -> Tuple [List [List [int ]], int ]:
932932 """Get mrope input positions and delta value."""
933933
934+ image_grid_thw = [] if image_grid_thw is None else image_grid_thw
935+ video_grid_thw = [] if video_grid_thw is None else video_grid_thw
936+ second_per_grid_ts = [] if second_per_grid_ts is None else \
937+ second_per_grid_ts
938+
934939 llm_positions , mrope_position_delta = \
935940 MRotaryEmbedding .get_input_positions_tensor (
936941 input_tokens = input_tokens ,
@@ -950,7 +955,7 @@ def get_input_positions_tensor(
950955 hf_config : PretrainedConfig ,
951956 image_grid_thw : Union [List [List [int ]], torch .Tensor ],
952957 video_grid_thw : Union [List [List [int ]], torch .Tensor ],
953- second_per_grid_ts : Optional [ List [float ]] = None ,
958+ second_per_grid_ts : List [float ],
954959 context_len : int = 0 ,
955960 seq_len : Optional [int ] = None ,
956961 ) -> Tuple [torch .Tensor , int ]:
@@ -1006,7 +1011,7 @@ def get_input_positions_tensor(
10061011 video_grid_thw [video_index ][2 ],
10071012 )
10081013 video_second_per_grid_t = 1.0
1009- if second_per_grid_ts is not None :
1014+ if second_per_grid_ts :
10101015 video_second_per_grid_t = second_per_grid_ts [video_index ]
10111016 video_index += 1
10121017 remain_videos -= 1
0 commit comments