Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 3 additions & 14 deletions unsloth_zoo/temporary_patches/gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,6 @@ def forward(
logits_to_keep=logits_to_keep,
**lm_kwargs,
)
labels = None
# We NEVER ENTER if labels is not None: since we already accounted for it


logits = outputs.logits
loss = None
if labels is not None:
Expand All @@ -288,7 +284,7 @@ def forward(
flat_logits = shift_logits.view(-1, self.config.text_config.vocab_size)
flat_labels = shift_labels.view(-1).to(shift_logits.device)
loss = loss_fct(flat_logits, flat_labels)
loss = outputs.loss
loss = getattr(outputs, "loss", loss)

return Gemma3CausalLMOutputWithPast(
loss=loss,
Expand Down Expand Up @@ -352,14 +348,7 @@ def forward(
cache_position=cache_position,
**lm_kwargs,
)
labels = None
# We NEVER ENTER if labels is not None: since we already accounted for it

hidden_states = outputs[0]
# Only compute necessary logits, and do not upcast them to float if we are not computing the loss
slice_indices = slice(-logits_to_keep, None) if isinstance(logits_to_keep, int) else logits_to_keep
logits = self.lm_head(hidden_states[:, slice_indices, :])

logits = outputs.logits
loss = None
if labels is not None:
# Upcast to float if we need to compute the loss to avoid potential precision issues
Expand All @@ -381,7 +370,7 @@ def forward(
flat_logits = shift_logits.view(-1, self.config.text_config.vocab_size)
flat_labels = shift_labels.view(-1).to(shift_logits.device)
loss = loss_fct(flat_logits, flat_labels)
loss = outputs.loss
loss = getattr(outputs, "loss", loss)

if not return_dict:
output = (logits,) + outputs[1:]
Expand Down