File tree Expand file tree Collapse file tree 2 files changed +10
-2
lines changed
model_executor/model_loader Expand file tree Collapse file tree 2 files changed +10
-2
lines changed Original file line number Diff line number Diff line change 1111 ChatCompletionContentPartImageParam , ChatCompletionContentPartTextParam ,
1212 MultiModalItemTracker , _ContentPart , _parse_chat_message_content_part )
1313from vllm .inputs import TokensPrompt
14- from vllm .model_executor .model_loader import get_model_cls
1514from vllm .model_executor .models .interfaces import supports_score_template
1615from vllm .multimodal .inputs import MultiModalDataDict
1716from vllm .outputs import PoolingRequestOutput
@@ -140,6 +139,8 @@ def apply_score_template(
140139 prompt_1 : str ,
141140 prompt_2 : str ,
142141) -> str :
142+ # NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf)
143+ from vllm .model_executor .model_loader import get_model_cls
143144
144145 model = get_model_cls (model_config )
145146 if supports_score_template (model ):
@@ -162,6 +163,9 @@ def post_process_tokens(
162163 Note:
163164 This is an in-place operation.
164165 """
166+ # NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf)
167+ from vllm .model_executor .model_loader import get_model_cls
168+
165169 model = get_model_cls (model_config )
166170 if supports_score_template (model ):
167171 model .post_process_tokens (prompt )
Original file line number Diff line number Diff line change 1414from typing import Any , Callable , Optional , Union
1515
1616import filelock
17- import gguf
1817import huggingface_hub .constants
1918import numpy as np
2019import torch
4039 SafetensorsStreamer = runai_model_streamer .placeholder_attr (
4140 "SafetensorsStreamer" )
4241
42+ try :
43+ import gguf
44+ except ImportError :
45+ gguf = PlaceholderModule ("gguf" )
46+
4347try :
4448 from fastsafetensors import SafeTensorsFileLoader , SingleGroup
4549except ImportError :
You can’t perform that action at this time.
0 commit comments