From d30776aee18d0940513afaee2daed877d540b812 Mon Sep 17 00:00:00 2001 From: crzbulabula <532741407@qq.com> Date: Thu, 24 Oct 2024 11:20:10 +0800 Subject: [PATCH 1/2] pass some models --- vllm/model_executor/models/jais.py | 2 ++ vllm/model_executor/models/minicpm.py | 2 ++ vllm/model_executor/models/mpt.py | 2 ++ vllm/model_executor/models/nemotron.py | 2 ++ vllm/model_executor/models/olmo.py | 2 ++ 5 files changed, 10 insertions(+) diff --git a/vllm/model_executor/models/jais.py b/vllm/model_executor/models/jais.py index c5e5393442e3..261066544316 100644 --- a/vllm/model_executor/models/jais.py +++ b/vllm/model_executor/models/jais.py @@ -26,6 +26,7 @@ from torch import nn from vllm.attention import Attention, AttentionMetadata +from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig from vllm.distributed import (get_pp_group, get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) @@ -212,6 +213,7 @@ def forward( return hidden_states +@support_torch_compile class JAISModel(nn.Module): def __init__( diff --git a/vllm/model_executor/models/minicpm.py b/vllm/model_executor/models/minicpm.py index decd90b682a1..03fb036020f2 100644 --- a/vllm/model_executor/models/minicpm.py +++ b/vllm/model_executor/models/minicpm.py @@ -29,6 +29,7 @@ from transformers import PretrainedConfig from vllm.attention import Attention, AttentionMetadata +from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, LoRAConfig from vllm.distributed import (get_pp_group, get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, @@ -348,6 +349,7 @@ def forward( return hidden_states, None +@support_torch_compile class MiniCPMModel(nn.Module): def __init__( diff --git a/vllm/model_executor/models/mpt.py b/vllm/model_executor/models/mpt.py index e3d3937b13fa..ee802030a5ef 100644 --- a/vllm/model_executor/models/mpt.py +++ b/vllm/model_executor/models/mpt.py @@ -7,6 +7,7 @@ import torch.nn as nn from vllm.attention import Attention, AttentionMetadata +from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig from vllm.distributed import (get_pp_group, get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) @@ -204,6 +205,7 @@ def forward( return hidden_states +@support_torch_compile class MPTModel(nn.Module): def __init__( diff --git a/vllm/model_executor/models/nemotron.py b/vllm/model_executor/models/nemotron.py index 14515e16e34a..72a09129fed6 100644 --- a/vllm/model_executor/models/nemotron.py +++ b/vllm/model_executor/models/nemotron.py @@ -27,6 +27,7 @@ from torch import nn from vllm.attention import Attention, AttentionMetadata +from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, LoRAConfig from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size from vllm.model_executor.layers.activation import get_act_fn @@ -290,6 +291,7 @@ def forward( return hidden_states, residual +@support_torch_compile class NemotronModel(nn.Module): def __init__( diff --git a/vllm/model_executor/models/olmo.py b/vllm/model_executor/models/olmo.py index 5ca7c66f5407..90ab8abcb84b 100644 --- a/vllm/model_executor/models/olmo.py +++ b/vllm/model_executor/models/olmo.py @@ -28,6 +28,7 @@ from transformers import OlmoConfig from vllm.attention import Attention, AttentionMetadata +from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size from vllm.model_executor.layers.activation import SiluAndMul @@ -221,6 +222,7 @@ def forward( return hidden_states +@support_torch_compile class OlmoModel(nn.Module): def __init__(self, From f515929c16ae9e975b9d28a58b691d5ebbb3cc03 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Wed, 23 Oct 2024 23:49:21 -0700 Subject: [PATCH 2/2] rename core42 to inceptionai Signed-off-by: youkaichao --- docs/source/models/supported_models.rst | 2 +- tests/distributed/test_pipeline_parallel.py | 2 +- vllm/model_executor/models/jais.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst index 456269261300..9b27f4733f97 100644 --- a/docs/source/models/supported_models.rst +++ b/docs/source/models/supported_models.rst @@ -144,7 +144,7 @@ Text Generation - ✅︎ * - :code:`JAISLMHeadModel` - Jais - - :code:`core42/jais-13b`, :code:`core42/jais-13b-chat`, :code:`core42/jais-30b-v3`, :code:`core42/jais-30b-chat-v3`, etc. + - :code:`inceptionai/jais-13b`, :code:`inceptionai/jais-13b-chat`, :code:`inceptionai/jais-30b-v3`, :code:`inceptionai/jais-30b-chat-v3`, etc. - - ✅︎ * - :code:`JambaForCausalLM` diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py index a93cdbe1cf2a..88542a9c06d8 100644 --- a/tests/distributed/test_pipeline_parallel.py +++ b/tests/distributed/test_pipeline_parallel.py @@ -148,7 +148,7 @@ def iter_params(self, model_name: str): # Uses Llama # "internlm/internlm-chat-7b": PPTestSettings.fast(), "internlm/internlm2-chat-7b": PPTestSettings.fast(trust_remote_code=True), - "core42/jais-13b-chat": PPTestSettings.fast(), + "inceptionai/jais-13b-chat": PPTestSettings.fast(), # TODO: Implement PP # "ai21labs/AI21-Jamba-1.5-Mini": PPTestSettings.fast(), "openbmb/MiniCPM-2B-sft-bf16": PPTestSettings.fast(trust_remote_code=True), diff --git a/vllm/model_executor/models/jais.py b/vllm/model_executor/models/jais.py index 261066544316..b947f24a693b 100644 --- a/vllm/model_executor/models/jais.py +++ b/vllm/model_executor/models/jais.py @@ -1,6 +1,6 @@ # coding=utf-8 # Adapted from -# https://huggingface.co/core42/jais-30b-chat-v3/blob/main/modeling_jais.py +# https://huggingface.co/inceptionai/jais-30b-chat-v3/blob/main/modeling_jais.py # Copyright 2023 The vLLM team. # Copyright 2023 the Jais authors and HuggingFace Inc. team. All rights # reserved.