-
-
Notifications
You must be signed in to change notification settings - Fork 11.7k
Description
Your current environment
MODEL_PATH="/data2/jcxy/llm_model/DeepSeek-R1-0528-GGUF-UD-IQ2_XXS/DeepSeek-R1-0528-UD-IQ2_XXS-00001-of-00005.gguf"
LOG_FILE="vllm.log"
export VLLM_USE_V1=0
SERVED_MODEL_NAME="DeepSeek-R1-0528"
export CUDA_VISIBLE_DEVICES=2,3,4,5
运行命令
nohup vllm serve
"$MODEL_PATH"
--hf-config-path /data2/jcxy/llm_model/DeepSeek-R1-0528-GGUF-UD-IQ2_XXS
--tokenizer /data2/jcxy/llm_model/DeepSeek-R1-0528-GGUF-UD-IQ2_XXS
--served-model-name "$SERVED_MODEL_NAME"
--trust-remote-code
--port 6011
--host 0.0.0.0
--dtype auto
--max-model-len 8192
--gpu_memory_utilization 0.98
--tensor_parallel_size 4
--enable-prefix-caching
>"$LOG_FILE" 2>&1 &
🐛 Describe the bug
INFO 06-03 11:53:19 [init.py:243] Automatically detected platform cuda.
INFO 06-03 11:53:22 [init.py:31] Available plugins for group vllm.general_plugins:
INFO 06-03 11:53:22 [init.py:33] - lora_filesystem_resolver -> vllm.plugins.lora_resolvers.filesystem_resolver:register_filesystem_resolver
INFO 06-03 11:53:22 [init.py:36] All plugins in this group will be loaded. Set VLLM_PLUGINS to control which plugins to load.
INFO 06-03 11:53:24 [api_server.py:1289] vLLM API server version 0.9.0.1
INFO 06-03 11:53:24 [cli_args.py:300] non-default args: {'host': '0.0.0.0', 'port': 6011, 'tokenizer': '/data2/jcxy/llm_model/DeepSeek-R1-0528-GGUF-UD-IQ2_XXS', 'trust_remote_code': True, 'hf_config_path': '/data2/jcxy/llm_model/DeepSeek-R1-0528-GGUF-UD-IQ2_XXS', 'max_model_len': 8192, 'served_model_name': ['DeepSeek-R1-0528'], 'tensor_parallel_size': 4, 'gpu_memory_utilization': 0.98, 'enable_prefix_caching': True}
INFO 06-03 11:53:24 [config.py:213] Replacing legacy 'type' key with 'rope_type'
INFO 06-03 11:53:32 [config.py:793] This model supports multiple tasks: {'reward', 'generate', 'embed', 'score', 'classify'}. Defaulting to 'generate'.
Traceback (most recent call last):
File "/data/jcxy/haolu/anaconda3/envs/haolu/bin/vllm", line 8, in
sys.exit(main())
File "/data/jcxy/haolu/anaconda3/envs/haolu/lib/python3.10/site-packages/vllm/entrypoints/cli/main.py", line 56, in main
args.dispatch_function(args)
File "/data/jcxy/haolu/anaconda3/envs/haolu/lib/python3.10/site-packages/vllm/entrypoints/cli/serve.py", line 42, in cmd
uvloop.run(run_server(args))
File "/data/jcxy/haolu/anaconda3/envs/haolu/lib/python3.10/site-packages/uvloop/init.py", line 82, in run
return loop.run_until_complete(wrapper())
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/data/jcxy/haolu/anaconda3/envs/haolu/lib/python3.10/site-packages/uvloop/init.py", line 61, in wrapper
return await main
File "/data/jcxy/haolu/anaconda3/envs/haolu/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 1324, in run_server
async with build_async_engine_client(args) as engine_client:
File "/data/jcxy/haolu/anaconda3/envs/haolu/lib/python3.10/contextlib.py", line 199, in aenter
return await anext(self.gen)
File "/data/jcxy/haolu/anaconda3/envs/haolu/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 153, in build_async_engine_client
async with build_async_engine_client_from_engine_args(
File "/data/jcxy/haolu/anaconda3/envs/haolu/lib/python3.10/contextlib.py", line 199, in aenter
return await anext(self.gen)
File "/data/jcxy/haolu/anaconda3/envs/haolu/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 173, in build_async_engine_client_from_engine_args
vllm_config = engine_args.create_engine_config(usage_context=usage_context)
File "/data/jcxy/haolu/anaconda3/envs/haolu/lib/python3.10/site-packages/vllm/engine/arg_utils.py", line 983, in create_engine_config
model_config = self.create_model_config()
File "/data/jcxy/haolu/anaconda3/envs/haolu/lib/python3.10/site-packages/vllm/engine/arg_utils.py", line 875, in create_model_config
return ModelConfig(
File "", line 42, in init
File "/data/jcxy/haolu/anaconda3/envs/haolu/lib/python3.10/site-packages/vllm/config.py", line 606, in post_init
self._verify_quantization()
File "/data/jcxy/haolu/anaconda3/envs/haolu/lib/python3.10/site-packages/vllm/config.py", line 893, in _verify_quantization
raise ValueError(
ValueError: Quantization method specified in the model config (fp8) does not match the quantization method specified in the quantization argument (gguf).
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.