vllm-project · vllm-bot · Nov 4, 2025 · Oct 26, 2025 · Oct 26, 2025 · Oct 30, 2025
diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
@@ -401,6 +401,7 @@ th {
 | `OLMoEForCausalLM` | OLMoE | `allenai/OLMoE-1B-7B-0924`, `allenai/OLMoE-1B-7B-0924-Instruct`, etc. | | ✅︎ |
 | `OPTForCausalLM` | OPT, OPT-IML | `facebook/opt-66b`, `facebook/opt-iml-max-30b`, etc. | ✅︎ | ✅︎ |
 | `OrionForCausalLM` | Orion | `OrionStarAI/Orion-14B-Base`, `OrionStarAI/Orion-14B-Chat`, etc. | | ✅︎ |
+| `PanguUltraMoEForCausalLM` |openpangu-ultra-moe-718b-model | | ✅︎ | ✅︎ |
 | `PhiForCausalLM` | Phi | `microsoft/phi-1_5`, `microsoft/phi-2`, etc. | ✅︎ | ✅︎ |
 | `Phi3ForCausalLM` | Phi-4, Phi-3 | `microsoft/Phi-4-mini-instruct`, `microsoft/Phi-4`, `microsoft/Phi-3-mini-4k-instruct`, `microsoft/Phi-3-mini-128k-instruct`, `microsoft/Phi-3-medium-128k-instruct`, etc. | ✅︎ | ✅︎ |
 | `PhiMoEForCausalLM` | Phi-3.5-MoE | `microsoft/Phi-3.5-MoE-instruct`, etc. | ✅︎ | ✅︎ |

@@ -1257,6 +1257,8 @@ def is_deepseek_mla(self) -> bool:
             "deepseek_mtp",
             "kimi_k2",
             "longcat_flash",
+            "pangu_ultra_moe",
+            "pangu_ultra_moe_mtp",
         ):
             return self.hf_text_config.kv_lora_rank is not None
         elif self.hf_text_config.model_type == "eagle":
@@ -1405,6 +1407,7 @@ def get_layers_start_end_indices(
             or self.hf_config.model_type == "glm4_moe_mtp"
             or self.hf_config.model_type == "ernie_mtp"
             or self.hf_config.model_type == "qwen3_next_mtp"
+            or self.hf_config.model_type == "pangu_ultra_moe_mtp"
         ):
             total_num_hidden_layers = getattr(
                 self.hf_text_config, "num_nextn_predict_layers", 0

@@ -42,6 +42,7 @@
     "qwen3_next_mtp",
     "mimo_mtp",
     "longcat_flash_mtp",
+    "pangu_ultra_moe_mtp",
     "mtp",
 ]
 MTP_MODEL_TYPES = (
@@ -51,6 +52,7 @@
     "ernie_mtp",
     "qwen3_next_mtp",
     "longcat_flash_mtp",
+    "pangu_ultra_moe_mtp",
 )
 
 
@@ -168,6 +170,13 @@ def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
             hf_config.update(
                 {"n_predict": n_predict, "architectures": ["DeepSeekMTPModel"]}
             )
+        if hf_config.model_type in ("pangu_ultra_moe"):
+            hf_config.model_type = "pangu_ultra_moe_mtp"
+        if hf_config.model_type == "pangu_ultra_moe_mtp":
+            n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
+            hf_config.update(
+                {"n_predict": n_predict, "architectures": ["OpenPanguMTPModel"]}
+            )
 
         if hf_config.architectures[0] == "MiMoForCausalLM":
             hf_config.model_type = "mimo_mtp"