File tree Expand file tree Collapse file tree 4 files changed +8
-4
lines changed
vllm/model_executor/models Expand file tree Collapse file tree 4 files changed +8
-4
lines changed Original file line number Diff line number Diff line change @@ -273,7 +273,8 @@ def __init__(self,
273273 config .projector_hidden_act = "gelu"
274274
275275 # TODO: Optionally initializes this for supporting embeddings.
276- self .vision_tower = init_vision_tower_for_llava (config , quant_config )
276+ self .vision_tower = init_vision_tower_for_llava (
277+ config , quant_config , require_post_norm = False )
277278 self .multi_modal_projector = LlavaMultiModalProjector (
278279 vision_hidden_size = config .vision_config .hidden_size ,
279280 text_hidden_size = config .text_config .hidden_size ,
Original file line number Diff line number Diff line change @@ -277,7 +277,8 @@ def __init__(self,
277277 self .multimodal_config = multimodal_config
278278
279279 # TODO: Optionally initializes this for supporting embeddings.
280- self .vision_tower = init_vision_tower_for_llava (config , quant_config )
280+ self .vision_tower = init_vision_tower_for_llava (
281+ config , quant_config , require_post_norm = False )
281282 self .image_newline = nn .Parameter (
282283 torch .empty (config .text_config .hidden_size ))
283284 self .multi_modal_projector = LlavaMultiModalProjector (
Original file line number Diff line number Diff line change @@ -256,7 +256,8 @@ def __init__(self,
256256 self .multimodal_config = multimodal_config
257257
258258 # Initialize the vision tower only up to the required feature layer
259- self .vision_tower = init_vision_tower_for_llava (config , quant_config )
259+ self .vision_tower = init_vision_tower_for_llava (
260+ config , quant_config , require_post_norm = False )
260261 self .vision_resampler = LlavaNextVideoPooler (config )
261262 self .multi_modal_projector = LlavaNextMultiModalProjector (
262263 vision_hidden_size = config .vision_config .hidden_size ,
Original file line number Diff line number Diff line change @@ -400,7 +400,8 @@ def __init__(self,
400400 self .multimodal_config = multimodal_config
401401
402402 # Initialize the vision tower only up to the required feature layer
403- self .vision_tower = init_vision_tower_for_llava (config , quant_config )
403+ self .vision_tower = init_vision_tower_for_llava (
404+ config , quant_config , require_post_norm = False )
404405 self .multi_modal_projector = LlavaOnevisionMultiModalProjector (config )
405406 self .language_model = init_vllm_registered_model (
406407 config .text_config , cache_config , quant_config )
You can’t perform that action at this time.
0 commit comments