Skip to content

Commit 94f5a99

Browse files
committed
[Bugfix] Fix chat template loading
Signed-off-by: DarkLight1337 <[email protected]>
1 parent 374ee28 commit 94f5a99

File tree

2 files changed

+17
-15
lines changed

2 files changed

+17
-15
lines changed

tests/entrypoints/test_chat_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
PHI3V_MODEL_ID = "microsoft/Phi-3.5-vision-instruct"
2424
ULTRAVOX_MODEL_ID = "fixie-ai/ultravox-v0_5-llama-3_2-1b"
2525
QWEN2VL_MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
26+
QWEN25VL_MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
2627
MLLAMA_MODEL_ID = "meta-llama/Llama-3.2-11B-Vision-Instruct"
2728
LLAMA_GUARD_MODEL_ID = "meta-llama/Llama-Guard-3-1B"
2829

@@ -716,6 +717,7 @@ def get_conversation(is_hf: bool):
716717
("model", "expected_format"),
717718
[(PHI3V_MODEL_ID, "string"),
718719
(QWEN2VL_MODEL_ID, "openai"),
720+
(QWEN25VL_MODEL_ID, "openai"),
719721
(ULTRAVOX_MODEL_ID, "string"),
720722
(MLLAMA_MODEL_ID, "openai"),
721723
(LLAMA_GUARD_MODEL_ID, "openai")],

vllm/entrypoints/chat_utils.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
import asyncio
4-
import codecs
54
import json
65
from abc import ABC, abstractmethod
76
from collections import defaultdict, deque
@@ -312,16 +311,21 @@ def _resolve_chat_template_content_format(
312311
tokenizer: AnyTokenizer,
313312
) -> _ChatTemplateContentFormat:
314313
if isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
315-
tokenizer_chat_template = tokenizer.chat_template
314+
try:
315+
# Prioritize processor's chat template for multi-modal models
316+
processor = cached_get_processor(tokenizer.name_or_path)
317+
hf_chat_template = processor.chat_template
318+
except Exception:
319+
hf_chat_template = tokenizer.chat_template
316320
else:
317-
tokenizer_chat_template = None
321+
hf_chat_template = None
318322

319323
jinja_text: Optional[str]
320-
if isinstance(tokenizer_chat_template, str) and chat_template is None:
321-
jinja_text = tokenizer_chat_template
322-
elif (isinstance(tokenizer_chat_template, dict)
323-
and chat_template in tokenizer_chat_template):
324-
jinja_text = tokenizer_chat_template[chat_template]
324+
if isinstance(hf_chat_template, str) and chat_template is None:
325+
jinja_text = hf_chat_template
326+
elif (isinstance(hf_chat_template, dict)
327+
and chat_template in hf_chat_template):
328+
jinja_text = hf_chat_template[chat_template]
325329
else:
326330
jinja_text = load_chat_template(chat_template, is_literal=True)
327331

@@ -724,7 +728,7 @@ def load_chat_template(
724728
raise TypeError("chat_template is expected to be read directly "
725729
"from its value")
726730

727-
return codecs.decode(chat_template, "unicode_escape")
731+
return chat_template
728732

729733
try:
730734
with open(chat_template) as f:
@@ -1071,17 +1075,13 @@ def apply_hf_chat_template(
10711075
tokenize: bool = False, # Different from HF's default
10721076
**kwargs: Any,
10731077
) -> str:
1074-
if chat_template is None:
1075-
chat_template = tokenizer.chat_template
1076-
1077-
# FIXME: Temporary workaround for
1078-
# https://huggingface.co/mistral-community/pixtral-12b/discussions/31
10791078
if chat_template is None:
10801079
try:
1080+
# Prioritize processor's chat template for multi-modal models
10811081
processor = cached_get_processor(tokenizer.name_or_path)
10821082
chat_template = processor.chat_template
10831083
except Exception:
1084-
pass
1084+
chat_template = tokenizer.chat_template
10851085

10861086
if chat_template is None:
10871087
raise ValueError(

0 commit comments

Comments
 (0)