1313from vllm .model_executor .models import ModelRegistry
1414from vllm .platforms import current_platform
1515from vllm .tracing import is_otel_available , otel_import_error_traceback
16- from vllm .transformers_utils .config import (get_config ,
16+ from vllm .transformers_utils .config import (ConfigFormat , get_config ,
1717 get_hf_image_processor_config ,
1818 get_hf_text_config )
1919from vllm .utils import (STR_NOT_IMPL_ENC_DEC_CUDAGRAPH , GiB_bytes ,
@@ -121,35 +121,37 @@ class ModelConfig:
121121 override default neuron config that are specific to Neuron devices,
122122 this argument will be used to configure the neuron config that
123123 can not be gathered from the vllm arguments.
124+ config_format: The config format which shall be loaded.
125+ Defaults to 'auto' which defaults to 'hf'.
124126 """
125127
126- def __init__ (
127- self ,
128- model : str ,
129- tokenizer : str ,
130- tokenizer_mode : str ,
131- trust_remote_code : bool ,
132- dtype : Union [ str , torch . dtype ] ,
133- seed : int ,
134- revision : Optional [str ] = None ,
135- code_revision : Optional [str ] = None ,
136- rope_scaling : Optional [dict ] = None ,
137- rope_theta : Optional [float ] = None ,
138- tokenizer_revision : Optional [str ] = None ,
139- max_model_len : Optional [int ] = None ,
140- spec_target_max_model_len : Optional [int ] = None ,
141- quantization : Optional [str ] = None ,
142- quantization_param_path : Optional [str ] = None ,
143- enforce_eager : Optional [bool ] = None ,
144- max_context_len_to_capture : Optional [int ] = None ,
145- max_seq_len_to_capture : Optional [ int ] = None ,
146- max_logprobs : int = 20 ,
147- disable_sliding_window : bool = False ,
148- skip_tokenizer_init : bool = False ,
149- served_model_name : Optional [Union [str , List [ str ] ]] = None ,
150- limit_mm_per_prompt : Optional [ Mapping [ str , int ]] = None ,
151- use_async_output_proc : bool = True ,
152- override_neuron_config : Optional [ Dict [ str , Any ]] = None ) -> None :
128+ def __init__ (self ,
129+ model : str ,
130+ tokenizer : str ,
131+ tokenizer_mode : str ,
132+ trust_remote_code : bool ,
133+ dtype : Union [ str , torch . dtype ] ,
134+ seed : int ,
135+ revision : Optional [ str ] = None ,
136+ code_revision : Optional [str ] = None ,
137+ rope_scaling : Optional [dict ] = None ,
138+ rope_theta : Optional [float ] = None ,
139+ tokenizer_revision : Optional [str ] = None ,
140+ max_model_len : Optional [int ] = None ,
141+ spec_target_max_model_len : Optional [int ] = None ,
142+ quantization : Optional [str ] = None ,
143+ quantization_param_path : Optional [str ] = None ,
144+ enforce_eager : Optional [bool ] = None ,
145+ max_context_len_to_capture : Optional [int ] = None ,
146+ max_seq_len_to_capture : Optional [int ] = None ,
147+ max_logprobs : int = 20 ,
148+ disable_sliding_window : bool = False ,
149+ skip_tokenizer_init : bool = False ,
150+ served_model_name : Optional [ Union [ str , List [ str ]]] = None ,
151+ limit_mm_per_prompt : Optional [Mapping [str , int ]] = None ,
152+ use_async_output_proc : bool = True ,
153+ override_neuron_config : Optional [ Dict [ str , Any ]] = None ,
154+ config_format : ConfigFormat = ConfigFormat . AUTO ) -> None :
153155 self .model = model
154156 self .tokenizer = tokenizer
155157 self .tokenizer_mode = tokenizer_mode
@@ -176,7 +178,8 @@ def __init__(
176178 self .skip_tokenizer_init = skip_tokenizer_init
177179
178180 self .hf_config = get_config (self .model , trust_remote_code , revision ,
179- code_revision , rope_scaling , rope_theta )
181+ code_revision , rope_scaling , rope_theta ,
182+ config_format )
180183 self .hf_text_config = get_hf_text_config (self .hf_config )
181184 self .hf_image_processor_config = get_hf_image_processor_config (
182185 self .model , revision )
@@ -746,6 +749,7 @@ class LoadFormat(str, enum.Enum):
746749 SHARDED_STATE = "sharded_state"
747750 GGUF = "gguf"
748751 BITSANDBYTES = "bitsandbytes"
752+ MISTRAL = "mistral"
749753
750754
751755@dataclass
0 commit comments