|
9214 | 9214 | "<|im_start|>", |
9215 | 9215 | "<|im_end|>" |
9216 | 9216 | ] |
| 9217 | + }, |
| 9218 | + { |
| 9219 | + "version": 1, |
| 9220 | + "context_length": 32768, |
| 9221 | + "model_name": "qwen3", |
| 9222 | + "model_lang": [ |
| 9223 | + "en", |
| 9224 | + "zh" |
| 9225 | + ], |
| 9226 | + "model_ability": [ |
| 9227 | + "generate", |
| 9228 | + "chat", |
| 9229 | + "reasoning", |
| 9230 | + "tools" |
| 9231 | + ], |
| 9232 | + "model_description": "Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support", |
| 9233 | + "model_specs": [ |
| 9234 | + { |
| 9235 | + "model_format": "pytorch", |
| 9236 | + "model_size_in_billions": "0_6", |
| 9237 | + "quantizations": [ |
| 9238 | + "4-bit", |
| 9239 | + "8-bit", |
| 9240 | + "none" |
| 9241 | + ], |
| 9242 | + "model_id": "Qwen/Qwen3-0.6B", |
| 9243 | + "model_hub": "modelscope" |
| 9244 | + }, |
| 9245 | + { |
| 9246 | + "model_format": "fp8", |
| 9247 | + "model_size_in_billions": "0_6", |
| 9248 | + "quantizations": [ |
| 9249 | + "fp8" |
| 9250 | + ], |
| 9251 | + "model_id": "Qwen/Qwen3-0.6B-FP8", |
| 9252 | + "model_hub": "modelscope" |
| 9253 | + }, |
| 9254 | + { |
| 9255 | + "model_format": "pytorch", |
| 9256 | + "model_size_in_billions": "1_7", |
| 9257 | + "quantizations": [ |
| 9258 | + "4-bit", |
| 9259 | + "8-bit", |
| 9260 | + "none" |
| 9261 | + ], |
| 9262 | + "model_id": "Qwen/Qwen3-1.7B", |
| 9263 | + "model_hub": "modelscope" |
| 9264 | + }, |
| 9265 | + { |
| 9266 | + "model_format": "fp8", |
| 9267 | + "model_size_in_billions": "1_7", |
| 9268 | + "quantizations": [ |
| 9269 | + "fp8" |
| 9270 | + ], |
| 9271 | + "model_id": "Qwen/Qwen3-1.7B-FP8", |
| 9272 | + "model_hub": "modelscope" |
| 9273 | + }, |
| 9274 | + { |
| 9275 | + "model_format": "pytorch", |
| 9276 | + "model_size_in_billions": 4, |
| 9277 | + "quantizations": [ |
| 9278 | + "4-bit", |
| 9279 | + "8-bit", |
| 9280 | + "none" |
| 9281 | + ], |
| 9282 | + "model_id": "Qwen/Qwen3-4B", |
| 9283 | + "model_hub": "modelscope" |
| 9284 | + }, |
| 9285 | + { |
| 9286 | + "model_format": "fp8", |
| 9287 | + "model_size_in_billions": 4, |
| 9288 | + "quantizations": [ |
| 9289 | + "fp8" |
| 9290 | + ], |
| 9291 | + "model_id": "Qwen/Qwen3-4B-FP8", |
| 9292 | + "model_hub": "modelscope" |
| 9293 | + }, |
| 9294 | + { |
| 9295 | + "model_format": "pytorch", |
| 9296 | + "model_size_in_billions": 8, |
| 9297 | + "quantizations": [ |
| 9298 | + "4-bit", |
| 9299 | + "8-bit", |
| 9300 | + "none" |
| 9301 | + ], |
| 9302 | + "model_id": "Qwen/Qwen3-8B", |
| 9303 | + "model_hub": "modelscope" |
| 9304 | + }, |
| 9305 | + { |
| 9306 | + "model_format": "fp8", |
| 9307 | + "model_size_in_billions": 8, |
| 9308 | + "quantizations": [ |
| 9309 | + "fp8" |
| 9310 | + ], |
| 9311 | + "model_id": "Qwen/Qwen3-8B-FP8", |
| 9312 | + "model_hub": "modelscope" |
| 9313 | + }, |
| 9314 | + { |
| 9315 | + "model_format": "pytorch", |
| 9316 | + "model_size_in_billions": 14, |
| 9317 | + "quantizations": [ |
| 9318 | + "4-bit", |
| 9319 | + "8-bit", |
| 9320 | + "none" |
| 9321 | + ], |
| 9322 | + "model_id": "Qwen/Qwen3-14B", |
| 9323 | + "model_hub": "modelscope" |
| 9324 | + }, |
| 9325 | + { |
| 9326 | + "model_format": "fp8", |
| 9327 | + "model_size_in_billions": 14, |
| 9328 | + "quantizations": [ |
| 9329 | + "fp8" |
| 9330 | + ], |
| 9331 | + "model_id": "Qwen/Qwen3-14B-FP8", |
| 9332 | + "model_hub": "modelscope" |
| 9333 | + }, |
| 9334 | + { |
| 9335 | + "model_format": "pytorch", |
| 9336 | + "model_size_in_billions": 30, |
| 9337 | + "quantizations": [ |
| 9338 | + "4-bit", |
| 9339 | + "8-bit", |
| 9340 | + "none" |
| 9341 | + ], |
| 9342 | + "model_id": "Qwen/Qwen3-30B-A3B", |
| 9343 | + "model_hub": "modelscope" |
| 9344 | + }, |
| 9345 | + { |
| 9346 | + "model_format": "fp8", |
| 9347 | + "model_size_in_billions": 30, |
| 9348 | + "quantizations": [ |
| 9349 | + "fp8" |
| 9350 | + ], |
| 9351 | + "model_id": "Qwen/Qwen3-30B-A3B-FP8", |
| 9352 | + "model_hub": "modelscope" |
| 9353 | + }, |
| 9354 | + { |
| 9355 | + "model_format": "pytorch", |
| 9356 | + "model_size_in_billions": 32, |
| 9357 | + "quantizations": [ |
| 9358 | + "4-bit", |
| 9359 | + "8-bit", |
| 9360 | + "none" |
| 9361 | + ], |
| 9362 | + "model_id": "Qwen/Qwen3-32B", |
| 9363 | + "model_hub": "modelscope" |
| 9364 | + }, |
| 9365 | + { |
| 9366 | + "model_format": "fp8", |
| 9367 | + "model_size_in_billions": 32, |
| 9368 | + "quantizations": [ |
| 9369 | + "fp8" |
| 9370 | + ], |
| 9371 | + "model_id": "Qwen/Qwen3-32B-FP8", |
| 9372 | + "model_hub": "modelscope" |
| 9373 | + }, |
| 9374 | + { |
| 9375 | + "model_format": "pytorch", |
| 9376 | + "model_size_in_billions": 235, |
| 9377 | + "quantizations": [ |
| 9378 | + "4-bit", |
| 9379 | + "8-bit", |
| 9380 | + "none" |
| 9381 | + ], |
| 9382 | + "model_id": "Qwen/Qwen3-235B", |
| 9383 | + "model_hub": "modelscope" |
| 9384 | + }, |
| 9385 | + { |
| 9386 | + "model_format": "fp8", |
| 9387 | + "model_size_in_billions": 235, |
| 9388 | + "quantizations": [ |
| 9389 | + "fp8" |
| 9390 | + ], |
| 9391 | + "model_id": "Qwen/Qwen3-235B-FP8", |
| 9392 | + "model_hub": "modelscope" |
| 9393 | + } |
| 9394 | + ], |
| 9395 | + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", |
| 9396 | + "stop_token_ids": [ |
| 9397 | + 151643, |
| 9398 | + 151644, |
| 9399 | + 151645 |
| 9400 | + ], |
| 9401 | + "stop": [ |
| 9402 | + "<|endoftext|>", |
| 9403 | + "<|im_start|>", |
| 9404 | + "<|im_end|>" |
| 9405 | + ] |
9217 | 9406 | } |
9218 | 9407 | ] |
0 commit comments