11# Adapted from
2- # https://github.com/THUDM/GLM-4
3- """Inference-only ChatGLM model compatible with THUDM weights."""
2+ # https://github.com/THUDM/CogAgent
3+ """Inference-only CogAgent model compatible with THUDM weights."""
44from argparse import Namespace
55from array import array
66from typing import (Dict, Iterable, List, Mapping, Optional, Set, Tuple,
@@ -201,7 +201,6 @@ def input_processor_for_glmv(ctx: InputContext, inputs: DecoderOnlyInputs):
201201
202202 new_input_ids = []
203203 final_processed_position = 0
204- final_processed_position = 0
205204
206205 for boi_position, eoi_position in zip(boi_positions, eoi_positions):
207206 assert boi_position < eoi_position
@@ -275,12 +274,15 @@ def __init__(
275274 # https://huggingface.co/THUDM/chatglm3-6b-32k/blob/e210410255278dd9d74463cf396ba559c0ef801c/modeling_chatglm.py#L141
276275 rope_ratio = getattr(config, "rope_ratio", 1.0)
277276 max_positions = getattr(config, "seq_length", 8192)
277+ # NOTE: THUDM/cogagent-9b-20241220 uses original_rope=False,
278+ # which is equivalent to is_neox_style=True
279+ is_neox_style = not config.original_rope
278280 self.rotary_emb = get_rope(
279281 self.head_dim,
280282 rotary_dim=self.head_dim // 2,
281283 max_position=max_positions,
282284 base=10000 * rope_ratio,
283- is_neox_style=False ,
285+ is_neox_style=is_neox_style ,
284286 )
285287 self.attn = Attention(self.num_heads,
286288 self.head_dim,
@@ -779,4 +781,4 @@ def __new__(
779781 return ChatGLMV(vllm_config=vllm_config, prefix=prefix)
780782 # Initialize LLM
781783 else:
782- return ChatGLM(vllm_config=vllm_config, prefix=prefix)
784+ return ChatGLM(vllm_config=vllm_config, prefix=prefix)
0 commit comments