From 609ecd1b2f07efadf2c937f5f0e91a584313ceb7 Mon Sep 17 00:00:00 2001 From: Tomer Asida <57313761+tomeras91@users.noreply.github.com> Date: Sun, 2 Nov 2025 11:39:45 +0200 Subject: [PATCH 1/2] first working version of nemotronH with pipeline parallelism (including commented code and debug prints) Signed-off-by: Tomer Asida <57313761+tomeras91@users.noreply.github.com> --- vllm/model_executor/models/nemotron_h.py | 28 +++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/models/nemotron_h.py b/vllm/model_executor/models/nemotron_h.py index 457d3910d0e5..e75e4c855a15 100644 --- a/vllm/model_executor/models/nemotron_h.py +++ b/vllm/model_executor/models/nemotron_h.py @@ -20,6 +20,7 @@ import typing from collections.abc import Callable, Iterable +from itertools import islice import torch from torch import nn @@ -549,7 +550,7 @@ def get_layer(prefix: str): self.start_layer, self.end_layer, self.layers = make_layers( len(config.hybrid_override_pattern), get_layer, prefix=f"{prefix}.layers" ) - self.make_empty_intmd_tensors = make_empty_intermediate_tensors_factory( + self.make_empty_intermediate_tensors = make_empty_intermediate_tensors_factory( ["hidden_states", "residual"], config.hidden_size ) @@ -564,7 +565,7 @@ def forward( positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, - ) -> torch.Tensor: + ) -> torch.Tensor | IntermediateTensors: if get_pp_group().is_first_rank: if inputs_embeds is not None: hidden_states = inputs_embeds @@ -576,13 +577,22 @@ def forward( hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - residual = None - for i, layer in enumerate(self.layers): + # residual = None + # for layer in islice(self.layers, self.start_layer, self.end_layer): + for i, layer in enumerate( + islice(self.layers, self.start_layer, self.end_layer) + ): + # for i, layer in enumerate(self.layers): + # print(f"##################### {get_pp_group().rank=}, layer_idx={i} #####################", flush=True) # noqa: E501 + # print(f"hidden_states before: {hidden_states[-1,...]}", flush=True) + # print(f"layer state dict: {layer.state_dict()}", flush=True) hidden_states, residual = layer( positions=positions, hidden_states=hidden_states, residual=residual, ) + # print(f"hidden_states after: {hidden_states[-1,...]}", flush=True) + # print("--------------------------------\n\n", flush=True) if not get_pp_group().is_last_rank: return IntermediateTensors( @@ -633,6 +643,9 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: if name.endswith(".bias") and name not in params_dict: continue + if is_pp_missing_parameter(name, self): + continue + param = params_dict[name] weight_loader = param.weight_loader weight_loader(param, loaded_weight, shard_id) @@ -678,6 +691,9 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: if is_expert_weight: continue + if is_pp_missing_parameter(name, self): + continue + param = params_dict[name] weight_loader = getattr( param, "weight_loader", default_weight_loader @@ -792,7 +808,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.unpadded_vocab_size, config.vocab_size ) - self.make_empty_intmd_tensors = self.model.make_empty_intmd_tensors + self.make_empty_intermediate_tensors = ( + self.model.make_empty_intermediate_tensors + ) # Set MoE hyperparameters if self.model.has_moe: From 0f4a28e96a031a2591bae20597f50c0f9c5ee00a Mon Sep 17 00:00:00 2001 From: Tomer Asida <57313761+tomeras91@users.noreply.github.com> Date: Mon, 3 Nov 2025 09:52:19 +0200 Subject: [PATCH 2/2] remove debug prints Signed-off-by: Tomer Asida <57313761+tomeras91@users.noreply.github.com> --- vllm/model_executor/models/nemotron_h.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/vllm/model_executor/models/nemotron_h.py b/vllm/model_executor/models/nemotron_h.py index e75e4c855a15..324b63c1732f 100644 --- a/vllm/model_executor/models/nemotron_h.py +++ b/vllm/model_executor/models/nemotron_h.py @@ -577,22 +577,12 @@ def forward( hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - # residual = None - # for layer in islice(self.layers, self.start_layer, self.end_layer): - for i, layer in enumerate( - islice(self.layers, self.start_layer, self.end_layer) - ): - # for i, layer in enumerate(self.layers): - # print(f"##################### {get_pp_group().rank=}, layer_idx={i} #####################", flush=True) # noqa: E501 - # print(f"hidden_states before: {hidden_states[-1,...]}", flush=True) - # print(f"layer state dict: {layer.state_dict()}", flush=True) + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions=positions, hidden_states=hidden_states, residual=residual, ) - # print(f"hidden_states after: {hidden_states[-1,...]}", flush=True) - # print("--------------------------------\n\n", flush=True) if not get_pp_group().is_last_rank: return IntermediateTensors(