ModelTC · e1ijah1 · Dec 4, 2025
diff --git a/lightx2v/models/input_encoders/hf/hunyuan15/byt5/model.py b/lightx2v/models/input_encoders/hf/hunyuan15/byt5/model.py
@@ -286,6 +286,32 @@ def _process_single_byt5_prompt(self, prompt_text, device):
             byt5_mask = text_mask
 
         return byt5_embeddings, byt5_mask
+
+    @staticmethod
+    def get_byt5_text_tokens(byt5_tokenizer, byt5_max_length, text_prompt):
+        """
+        Tokenize text prompt for byT5 model.
+
+        Args:
+            byt5_tokenizer: The byT5 tokenizer.
+            byt5_max_length: Maximum sequence length for tokenization.
+            text_prompt: Text prompt string to tokenize.
+
+        Returns:
+            tuple[torch.Tensor, torch.Tensor]:
+                - input_ids: Tokenized input IDs.
+                - attention_mask: Attention mask tensor.
+        """
+        byt5_text_inputs = byt5_tokenizer(
+            text_prompt,
+            padding="max_length",
+            max_length=byt5_max_length,
+            truncation=True,
+            add_special_tokens=True,
+            return_tensors="pt",
+        )
+
+        return byt5_text_inputs.input_ids, byt5_text_inputs.attention_mask
 
     def _prepare_byt5_embeddings(self, prompts):
         if isinstance(prompts, str):