Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions xinference/model/llm/llm_family.json
Original file line number Diff line number Diff line change
Expand Up @@ -10946,5 +10946,48 @@
"<|im_start|>",
"<|im_end|>"
]
},
{
"version": 1,
"context_length": 32768,
"model_name": "glm4-0414",
"model_lang": [
"en",
"zh"
],
"model_ability": [
"chat",
"tools"
],
"model_description": "The GLM family welcomes new members, the GLM-4-32B-0414 series models, featuring 32 billion parameters. Its performance is comparable to OpenAI’s GPT series and DeepSeek’s V3/R1 series",
"model_specs": [
{
"model_format": "pytorch",
"model_size_in_billions": 9,
"quantizations": [
"none"
],
"model_id": "THUDM/GLM-4-9B-0414"
},
{
"model_format": "pytorch",
"model_size_in_billions": 32,
"quantizations": [
"none"
],
"model_id": "THUDM/GLM-4-32B-0414"
}
],
"chat_template": "[gMASK]<sop>{%- if tools -%}<|system|>\n# 可用工具\n{% for tool in tools %}{%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n在调用上述函数时,请使用 Json 格式表示调用的参数。{%- endfor %}{%- endif -%}{%- for msg in messages %}{%- if msg.role == 'system' %}<|system|>\n{{ msg.content }}{%- endif %}{%- endfor %}{%- for message in messages if message.role != 'system' %}{%- set role = message['role'] %}{%- set content = message['content'] %}{%- set meta = message.get(\"metadata\", \"\") %}{%- if role == 'user' %}<|user|>\n{{ content }}{%- elif role == 'assistant' and not meta %}<|assistant|>\n{{ content }}{%- elif role == 'assistant' and meta %}<|assistant|>{{ meta }} \n{{ content }}{%- elif role == 'observation' %}<|observation|>\n{{ content }}{%- endif %}{%- endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
"stop_token_ids": [
151329,
151336,
151338
],
"stop": [
"<|endoftext|>",
"<|user|>",
"<|observation|>"
]
}
]
45 changes: 45 additions & 0 deletions xinference/model/llm/llm_family_modelscope.json
Original file line number Diff line number Diff line change
Expand Up @@ -8719,5 +8719,50 @@
"<|im_start|>",
"<|im_end|>"
]
},
{
"version": 1,
"context_length": 32768,
"model_name": "glm4-0414",
"model_lang": [
"en",
"zh"
],
"model_ability": [
"chat",
"tools"
],
"model_description": "The GLM family welcomes new members, the GLM-4-32B-0414 series models, featuring 32 billion parameters. Its performance is comparable to OpenAI’s GPT series and DeepSeek’s V3/R1 series",
"model_specs": [
{
"model_format": "pytorch",
"model_size_in_billions": 9,
"quantizations": [
"none"
],
"model_id": "ZhipuAI/GLM-4-9B-0414",
"model_hub": "modelscope"
},
{
"model_format": "pytorch",
"model_size_in_billions": 32,
"quantizations": [
"none"
],
"model_id": "ZhipuAI/GLM-4-32B-0414",
"model_hub": "modelscope"
}
],
"chat_template": "[gMASK]<sop>{%- if tools -%}<|system|>\n# 可用工具\n{% for tool in tools %}{%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n在调用上述函数时,请使用 Json 格式表示调用的参数。{%- endfor %}{%- endif -%}{%- for msg in messages %}{%- if msg.role == 'system' %}<|system|>\n{{ msg.content }}{%- endif %}{%- endfor %}{%- for message in messages if message.role != 'system' %}{%- set role = message['role'] %}{%- set content = message['content'] %}{%- set meta = message.get(\"metadata\", \"\") %}{%- if role == 'user' %}<|user|>\n{{ content }}{%- elif role == 'assistant' and not meta %}<|assistant|>\n{{ content }}{%- elif role == 'assistant' and meta %}<|assistant|>{{ meta }} \n{{ content }}{%- elif role == 'observation' %}<|observation|>\n{{ content }}{%- endif %}{%- endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
"stop_token_ids": [
151329,
151336,
151338
],
"stop": [
"<|endoftext|>",
"<|user|>",
"<|observation|>"
]
}
]
3 changes: 3 additions & 0 deletions xinference/model/llm/vllm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,9 @@ class VLLMGenerateConfig(TypedDict, total=False):
VLLM_SUPPORTED_CHAT_MODELS.append("gemma-3-1b-it")
VLLM_SUPPORTED_VISION_MODEL_LIST.append("gemma-3-it")

if VLLM_INSTALLED and vllm.__version__ >= "0.8.4":
VLLM_SUPPORTED_CHAT_MODELS.append("glm4-0414")


class VLLMModel(LLM):
def __init__(
Expand Down
Loading