NVIDIA-NeMo · andrusenkoau · Jul 12, 2025 · Jun 17, 2025 · Jun 17, 2025 · Jun 18, 2025
diff --git a/nemo/collections/asr/modules/transformer/__init__.py b/nemo/collections/asr/modules/transformer/__init__.py
@@ -24,8 +24,8 @@
 from nemo.collections.asr.modules.transformer.transformer_encoders import TransformerEncoder
 from nemo.collections.asr.modules.transformer.transformer_generators import (
     BeamSearchSequenceGenerator,
+    BeamSearchSequenceGeneratorWithFusionModels,
     BeamSearchSequenceGeneratorWithLanguageModel,
-    BeamSearchSequenceGeneratorWithNGramLM,
     EnsembleBeamSearchSequenceGenerator,
     GreedySequenceGenerator,
     TopKSequenceGenerator,
@@ -44,7 +44,7 @@
     "TransformerEncoder",
     "BeamSearchSequenceGenerator",
     "BeamSearchSequenceGeneratorWithLanguageModel",
-    "BeamSearchSequenceGeneratorWithNGramLM",
+    "BeamSearchSequenceGeneratorWithFusionModels",
     "EnsembleBeamSearchSequenceGenerator",
     "GreedySequenceGenerator",
     "TopKSequenceGenerator",

diff --git a/nemo/collections/asr/modules/transformer/transformer_generators.py b/nemo/collections/asr/modules/transformer/transformer_generators.py
@@ -19,7 +19,6 @@
 from omegaconf import DictConfig
 from torch.distributions import Categorical
 
-from nemo.collections.asr.parts.submodules.ngram_lm import NGramGPULanguageModel
 from nemo.collections.asr.parts.submodules.token_classifier import TokenClassifier
 from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceMethodMixin
 from nemo.collections.common.parts import NEG_INF, mask_padded_tokens
@@ -507,9 +506,9 @@ def _forward(
             return tgt
 
 
-class BeamSearchSequenceGeneratorWithNGramLM(BeamSearchSequenceGenerator):
+class BeamSearchSequenceGeneratorWithFusionModels(BeamSearchSequenceGenerator):
     def __init__(
-        self, embedding, decoder, log_softmax, ngram_lm_model, ngram_lm_alpha=0.0, beam_size=1, len_pen=0, **kwargs
+        self, embedding, decoder, log_softmax, fusion_models, fusion_models_alpha, beam_size=1, len_pen=0, **kwargs
     ):
         """
         Beam Search sequence generator based on the decoder followed by
@@ -524,30 +523,43 @@ def __init__(
         """
 
         super().__init__(embedding, decoder, log_softmax, beam_size=beam_size, len_pen=len_pen, **kwargs)
-        # ngram lm
-        self.ngram_lm_batch = NGramGPULanguageModel.from_file(lm_path=ngram_lm_model, vocab_size=self.num_tokens)
-        self.ngram_lm_alpha = ngram_lm_alpha
+
+        self.fusion_models = fusion_models
+        self.fusion_models_alpha = fusion_models_alpha
 
     def _forward(
         self, decoder_input_ids=None, encoder_hidden_states=None, encoder_input_mask=None, return_beam_scores=False
     ):
         device = encoder_hidden_states.device
-        # force ngram lm to use the same device as encoder_hidden_states, since current class is not nn.Module instance
-        self.ngram_lm_batch.to(device)
+        # force fusion models to use the same device as encoder_hidden_states, since current class is not nn.Module instance
+        for fusion_model in self.fusion_models:
+            fusion_model.to(device)
 
         tgt, batch_size, max_generation_length = self._prepare_for_search(decoder_input_ids, encoder_hidden_states)
-        batch_lm_states = self.ngram_lm_batch.get_init_states(batch_size=batch_size, bos=True)
+
+        batch_fusion_states_list = [
+            fusion_model.get_init_states(batch_size=batch_size, bos=True) for fusion_model in self.fusion_models
+        ]
+        batch_fusion_states_candidates_list = []
 
         # generate initial buffer of beam_size prefixes-hypotheses
         log_probs, decoder_mems_list = self._one_step_forward(tgt, encoder_hidden_states, encoder_input_mask, None, 0)
-        # get ngram lm scores
-        lm_scores, batch_lm_states_candidates = self.ngram_lm_batch.advance(states=batch_lm_states, eos_id=self.eos)
-        log_probs += self.ngram_lm_alpha * lm_scores[:, None, :]
+        # get fusion models scores
+        for fusion_model_idx, fusion_model in enumerate(self.fusion_models):
+            fusion_scores, batch_fusion_states_candidates = fusion_model.advance(
+                states=batch_fusion_states_list[fusion_model_idx], eos_id=self.eos
+            )
+            batch_fusion_states_candidates_list.append(batch_fusion_states_candidates)
+            log_probs += self.fusion_models_alpha[fusion_model_idx] * fusion_scores[:, None, :]
 
         scores, prefixes = torch.topk(log_probs.permute(0, 2, 1), self.beam_size, dim=1)  # [Batch, Beam, 1]
-        batch_lm_states = batch_lm_states_candidates.gather(dim=1, index=prefixes.squeeze(-1)).view(
-            -1
-        )  # [Batch, Beam] -> [Batch*Beam]
+        for fusion_model_idx, batch_fusion_states_candidates in enumerate(batch_fusion_states_candidates_list):
+            batch_fusion_states_list[fusion_model_idx] = batch_fusion_states_candidates.gather(
+                dim=1, index=prefixes.squeeze(-1)
+            ).view(
+                -1
+            )  # [Batch, Beam] -> [Batch*Beam]
+
         scores, prefixes = scores.view(-1, 1), prefixes.view(-1, 1)  # [Batch*Beam, 1]
 
         # repeat init target prefixes and cached memory states beam_size times
@@ -583,13 +595,19 @@ def _forward(
             log_probs, decoder_mems_list = self._one_step_forward(
                 prefixes[:, -1:], encoder_hidden_states, encoder_input_mask, decoder_mems_list, i
             )
-            lm_scores, batch_lm_states_candidates = self.ngram_lm_batch.advance(
-                states=batch_lm_states, eos_id=self.eos
-            )
-            log_probs += self.ngram_lm_alpha * lm_scores[:, None, :]
+            for fusion_model_idx, fusion_model in enumerate(self.fusion_models):
+                fusion_scores, batch_fusion_states_candidates = fusion_model.advance(
+                    states=batch_fusion_states_list[fusion_model_idx], eos_id=self.eos
+                )
+                log_probs += self.fusion_models_alpha[fusion_model_idx] * fusion_scores[:, None, :]
+                batch_fusion_states_candidates_list[fusion_model_idx] = batch_fusion_states_candidates
 
             scores_i, prefixes_i = torch.topk(log_probs[:, -1, :], self.beam_size, dim=-1)  # [Batch*Beam, Beam]
-            batch_lm_states = batch_lm_states_candidates.gather(dim=1, index=prefixes_i)  # [Batch*Beam, Beam]
+
+            for fusion_model_idx, batch_fusion_states_candidates in enumerate(batch_fusion_states_candidates_list):
+                batch_fusion_states_list[fusion_model_idx] = batch_fusion_states_candidates.gather(
+                    dim=1, index=prefixes_i
+                )
 
             # for all prefixes ending with <eos> or <pad> replace generated
             # continuations with <pad>
@@ -605,9 +623,12 @@ def _forward(
             len_penalties = self.compute_len_penalty(prefixes_len, self.len_pen)
             scores = scores / len_penalties
             scores, indices_i = torch.topk(scores.view(-1, self.beam_size**2), self.beam_size, dim=1)  # [Batch, Beam]
-            batch_lm_states = (
-                batch_lm_states.view(-1, self.beam_size**2).gather(dim=1, index=indices_i).view(-1)
-            )  # [Batch, Beam] -> [Batch*Beam]
+
+            for fusion_model_idx, batch_fusion_states in enumerate(batch_fusion_states_list):
+                batch_fusion_states_list[fusion_model_idx] = (
+                    batch_fusion_states.view(-1, self.beam_size**2).gather(dim=1, index=indices_i).view(-1)
+                )
+
             scores = scores.view(-1, 1) * len_penalties  # [Batch*Beam, 1]
 
             # select prefixes which correspond to the chosen hypotheses

diff --git a/nemo/collections/asr/parts/context_biasing/__init__.py b/nemo/collections/asr/parts/context_biasing/__init__.py
@@ -12,9 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from nemo.collections.asr.parts.context_biasing.boosting_graph_batched import GPUBoostingTreeModel
 from nemo.collections.asr.parts.context_biasing.context_biasing_utils import (
     compute_fscore,
     merge_alignment_with_ws_hyps,
 )
 from nemo.collections.asr.parts.context_biasing.context_graph_ctc import ContextGraphCTC
 from nemo.collections.asr.parts.context_biasing.ctc_based_word_spotter import run_word_spotter
+
+__all__ = [
+    "GPUBoostingTreeModel",
+    "compute_fscore",
+    "merge_alignment_with_ws_hyps",
+    "ContextGraphCTC",
+    "run_word_spotter",
+]