Skip to content

Commit 4e9faaf

Browse files
authored
[model] fix: stuck issue with mixed text-image data (#3670)
1 parent f50e5c2 commit 4e9faaf

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

verl/models/transformers/glm4v.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ def _get_input_embeds(
378378
video_embeds = video_embeds.to(inputs_embeds.device, inputs_embeds.dtype)
379379
inputs_embeds = inputs_embeds.masked_scatter(video_mask, video_embeds)
380380

381-
if model.training and pixel_values is None and pixel_values_videos is None: # handle mixed text-image data
381+
if pixel_values is None and pixel_values_videos is None: # handle mixed text-image data
382382
pixel_values = torch.zeros((16, 1176), dtype=inputs_embeds.dtype, device=inputs_embeds.device)
383383
image_grid_thw = torch.tensor([[1, 4, 4]], dtype=torch.long, device=inputs_embeds.device)
384384
image_embeds = model.visual(pixel_values, grid_thw=image_grid_thw)

verl/models/transformers/qwen2_vl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ def _get_input_embeds(
378378
video_embeds = video_embeds.to(inputs_embeds.device, inputs_embeds.dtype)
379379
inputs_embeds = inputs_embeds.masked_scatter(video_mask, video_embeds)
380380

381-
if model.training and pixel_values is None and pixel_values_videos is None: # handle mixed text-image data
381+
if pixel_values is None and pixel_values_videos is None: # handle mixed text-image data
382382
pixel_values = torch.zeros((16, 1176), dtype=inputs_embeds.dtype, device=inputs_embeds.device)
383383
image_grid_thw = torch.tensor([[1, 4, 4]], dtype=torch.long, device=inputs_embeds.device)
384384
image_embeds = model.visual(pixel_values, grid_thw=image_grid_thw)

0 commit comments

Comments
 (0)