ROCm · ppalaniappan-amd · Dec 22, 2025 · Dec 22, 2025 · Dec 22, 2025 · Dec 22, 2025
@@ -101,6 +101,7 @@ users can also directly run the vLLm benchmark scripts and change the benchmarki
 | MAD model name                         | Model repo                             |
 | -------------------------------------- | -------------------------------------- |
 | pyt_vllm_deepseek-r1                   | [deepseek-ai/DeepSeek-R1-0528](https://huggingface.co/deepseek-ai/DeepSeek-R1-0528) |
+| pyt_vllm_deepseek-moe-16b-chat         | [deepseek-ai/deepseek-moe-16b-chat](https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat) |
 | pyt_vllm_gpt-oss-20b                   | [openai/gpt-oss-20b](https://huggingface.co/openai/gpt-oss-20b) |
 | pyt_vllm_gpt-oss-120b                  | [openai/gpt-oss-120b](https://huggingface.co/openai/gpt-oss-120b) |
 | pyt_vllm_llama-2-70b                   | [meta-llama/Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) |

@@ -48,6 +48,26 @@
     "args":
      "--model_repo deepseek-ai/DeepSeek-R1-0528 --config configs/default.csv"
   },
+  {
+    "name": "pyt_vllm_deepseek-moe-16b-chat",
+    "url": "",
+    "dockerfile": "docker/pyt_vllm",
+    "scripts": "scripts/vllm/run.sh",
+    "data": "huggingface",
+    "n_gpus": "-1",
+    "owner": "mad.support@amd.com",
+    "training_precision": "",
+    "multiple_results": "perf_deepseek-moe-16b-chat.csv",
+    "tags": [
+      "pyt",
+      "vllm",
+      "vllm_default",
+      "inference"
+    ],
+    "timeout": -1,
+    "args":
+     "--model_repo deepseek-ai/deepseek-moe-16b-chat --config configs/default.csv"
+  },
   {
     "name": "pyt_vllm_gpt-oss-20b",
     "url": "",

@@ -31,3 +31,6 @@ mistralai/Mixtral-8x22B-Instruct-v0.1,serving,8,128 2048,128 2048,,,1 8 32 128,f
 
 amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV,throughput,8,128 2048,128 2048,,1024,,float8,1024,65536,8192,0.9
 amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV,serving,8,128 2048,128 2048,,,1 8 32 128,float8,1024,65536,8192,0.9
+
+deepseek-ai/deepseek-moe-16b-chat,throughput,8,128 2048,128 2048,,1024,,bfloat16,1024,65536,4096,0.9
+deepseek-ai/deepseek-moe-16b-chat,serving,8,128 2048,128 2048,,,1 8 32 128,bfloat16,1024,65536,4096,0.9
@@ -85,6 +85,12 @@ if [[ $MODEL == *"Llama-4"* ]]; then
     VLLM_ARGS='--compilation-config {"cudagraph_mode":"PIECEWISE","pass_config":{"enable_attn_fusion":false}}'
 fi
 
+# DeepSeek MoE models are not compatible with AITER (weight shuffling issue)
+if [[ $MODEL == *"deepseek-moe"* ]]; then
+    echo "Disabling AITER for DeepSeek MoE model (not compatible with current AITER implementation)"
+    export VLLM_ROCM_USE_AITER=0
+fi
+
 # MXFP4 models are only supported on MI35x i.e. gfx950
 if [[ $MODEL == *"MXFP4"* ]]; then
     if [[ $MAD_SYSTEM_GPU_ARCHITECTURE != *"gfx950"* ]]; then