@@ -46,6 +46,7 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
4646 "model" :
4747 "NousResearch/Hermes-3-Llama-3.1-8B" ,
4848 "arguments" : [
49+ "--enforce-eager" , "--no-enable-prefix-caching" ,
4950 "--tool-call-parser" , "hermes" , "--chat-template" ,
5051 str (VLLM_PATH / "examples/tool_chat_template_hermes.jinja" )
5152 ],
@@ -60,6 +61,7 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
6061 "model" :
6162 "meta-llama/Meta-Llama-3.1-8B-Instruct" ,
6263 "arguments" : [
64+ "--enforce-eager" , "--no-enable-prefix-caching" ,
6365 "--tool-call-parser" , "llama3_json" , "--chat-template" ,
6466 str (VLLM_PATH / "examples/tool_chat_template_llama3.1_json.jinja" )
6567 ],
@@ -70,6 +72,7 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
7072 "model" :
7173 "meta-llama/Llama-3.2-3B-Instruct" ,
7274 "arguments" : [
75+ "--enforce-eager" , "--no-enable-prefix-caching" ,
7376 "--tool-call-parser" , "llama3_json" , "--chat-template" ,
7477 str (VLLM_PATH / "examples/tool_chat_template_llama3.2_json.jinja" )
7578 ],
@@ -80,6 +83,7 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
8083 "model" :
8184 "mistralai/Mistral-7B-Instruct-v0.3" ,
8285 "arguments" : [
86+ "--enforce-eager" , "--no-enable-prefix-caching" ,
8387 "--tool-call-parser" , "mistral" , "--chat-template" ,
8488 str (VLLM_PATH / "examples/tool_chat_template_mistral.jinja" ),
8589 "--ignore-patterns=\" consolidated.safetensors\" "
@@ -111,22 +115,28 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
111115 "model" :
112116 "ibm-granite/granite-3.0-8b-instruct" ,
113117 "arguments" : [
118+ "--enforce-eager" , "--no-enable-prefix-caching" ,
114119 "--tool-call-parser" , "granite" , "--chat-template" ,
115120 str (VLLM_PATH / "examples/tool_chat_template_granite.jinja" )
116121 ],
117122 },
118123 "granite-3.1-8b" : {
119- "model" : "ibm-granite/granite-3.1-8b-instruct" ,
124+ "model" :
125+ "ibm-granite/granite-3.1-8b-instruct" ,
120126 "arguments" : [
127+ "--enforce-eager" ,
128+ "--no-enable-prefix-caching" ,
121129 "--tool-call-parser" ,
122130 "granite" ,
123131 ],
124- "supports_parallel" : True ,
132+ "supports_parallel" :
133+ True ,
125134 },
126135 "internlm" : {
127136 "model" :
128137 "internlm/internlm2_5-7b-chat" ,
129138 "arguments" : [
139+ "--enforce-eager" , "--no-enable-prefix-caching" ,
130140 "--tool-call-parser" , "internlm" , "--chat-template" ,
131141 str (VLLM_PATH /
132142 "examples/tool_chat_template_internlm2_tool.jinja" ),
@@ -139,6 +149,7 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
139149 "model" :
140150 "Team-ACE/ToolACE-8B" ,
141151 "arguments" : [
152+ "--enforce-eager" , "--no-enable-prefix-caching" ,
142153 "--tool-call-parser" , "pythonic" , "--chat-template" ,
143154 str (VLLM_PATH / "examples/tool_chat_template_toolace.jinja" )
144155 ],
0 commit comments