xjpang
diff --git a/‎vllm/model_executor/layers/layernorm.py‎
Lines changed: 25 additions & 0 deletions b/‎vllm/model_executor/layers/layernorm.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎vllm/model_executor/models/__init__.py‎
Lines changed: 4 additions & 5 deletions b/‎vllm/model_executor/models/__init__.py‎
Lines changed: 4 additions & 5 deletions
@@ -7,6 +7,31 @@
 from vllm._C import ops
 
 
+class LayerNorm(nn.LayerNorm):
+
+    def __init__(
+        self,
+        hidden_size: int,
+        eps: float = 1e-6,
+    ) -> None:
+        super().__init__(hidden_size, eps=eps)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        residual: Optional[torch.Tensor] = None,
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+        """normalization."""
+        if residual is not None:
+            x = x + residual
+            residual = x
+        x = super().forward(x)
+        if residual is None:
+            return x
+        else:
+            return x, residual
+
+
 class RMSNorm(nn.Module):
     """Root mean square normalization.
 
 
@@ -10,8 +10,8 @@
 
 # Architecture -> (module, class).
 _MODELS = {
-    "AquilaModel": ("aquila", "AquilaForCausalLM"),
-    "AquilaForCausalLM": ("aquila", "AquilaForCausalLM"),  # AquilaChat2
+    "AquilaModel": ("llama", "LlamaForCausalLM"),
+    "AquilaForCausalLM": ("llama", "LlamaForCausalLM"),  # AquilaChat2
     "BaiChuanForCausalLM": ("baichuan", "BaiChuanForCausalLM"),  # baichuan-7b
     "BaichuanForCausalLM": ("baichuan", "BaichuanForCausalLM"),  # baichuan-13b
     "BloomForCausalLM": ("bloom", "BloomForCausalLM"),
@@ -24,12 +24,12 @@
     "GPTBigCodeForCausalLM": ("gpt_bigcode", "GPTBigCodeForCausalLM"),
     "GPTJForCausalLM": ("gpt_j", "GPTJForCausalLM"),
     "GPTNeoXForCausalLM": ("gpt_neox", "GPTNeoXForCausalLM"),
-    "InternLMForCausalLM": ("internlm", "InternLMForCausalLM"),
+    "InternLMForCausalLM": ("llama", "LlamaForCausalLM"),
     "InternLM2ForCausalLM": ("internlm2", "InternLM2ForCausalLM"),
     "LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
     # For decapoda-research/llama-*
     "LLaMAForCausalLM": ("llama", "LlamaForCausalLM"),
-    "MistralForCausalLM": ("mistral", "MistralForCausalLM"),
+    "MistralForCausalLM": ("llama", "LlamaForCausalLM"),
     "MixtralForCausalLM": ("mixtral", "MixtralForCausalLM"),
     "QuantMixtralForCausalLM": ("mixtral_quant", "MixtralForCausalLM"),
     # transformers's mpt class has lower case
@@ -41,7 +41,6 @@
     "Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"),
     "RWForCausalLM": ("falcon", "FalconForCausalLM"),
     "StableLMEpochForCausalLM": ("stablelm", "StablelmForCausalLM"),
-    "YiForCausalLM": ("yi", "YiForCausalLM")
 }
 
 # Models not supported by ROCm.