Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions vllm/model_executor/models/nemotron_h.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import typing
from collections.abc import Callable, Iterable
from itertools import islice

import torch
from torch import nn
Expand Down Expand Up @@ -549,7 +550,7 @@ def get_layer(prefix: str):
self.start_layer, self.end_layer, self.layers = make_layers(
len(config.hybrid_override_pattern), get_layer, prefix=f"{prefix}.layers"
)
self.make_empty_intmd_tensors = make_empty_intermediate_tensors_factory(
self.make_empty_intermediate_tensors = make_empty_intermediate_tensors_factory(
["hidden_states", "residual"], config.hidden_size
)

Expand All @@ -564,7 +565,7 @@ def forward(
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
) -> torch.Tensor:
) -> torch.Tensor | IntermediateTensors:
if get_pp_group().is_first_rank:
if inputs_embeds is not None:
hidden_states = inputs_embeds
Expand All @@ -576,8 +577,7 @@ def forward(
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]

residual = None
for i, layer in enumerate(self.layers):
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions=positions,
hidden_states=hidden_states,
Expand Down Expand Up @@ -633,6 +633,9 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
if name.endswith(".bias") and name not in params_dict:
continue

if is_pp_missing_parameter(name, self):
continue

param = params_dict[name]
weight_loader = param.weight_loader
weight_loader(param, loaded_weight, shard_id)
Expand Down Expand Up @@ -678,6 +681,9 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
if is_expert_weight:
continue

if is_pp_missing_parameter(name, self):
continue

param = params_dict[name]
weight_loader = getattr(
param, "weight_loader", default_weight_loader
Expand Down Expand Up @@ -792,7 +798,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
self.unpadded_vocab_size, config.vocab_size
)

self.make_empty_intmd_tensors = self.model.make_empty_intmd_tensors
self.make_empty_intermediate_tensors = (
self.model.make_empty_intermediate_tensors
)

# Set MoE hyperparameters
if self.model.has_moe:
Expand Down