3838 base_url = openai_api_base ,
3939)
4040
41+ headers = {"User-Agent" : "vLLM Example Client" }
42+
4143
4244def encode_base64_content_from_url (content_url : str ) -> str :
4345 """Encode a content retrieved from a remote url to base64 format."""
4446
45- with requests .get (content_url ) as response :
47+ with requests .get (content_url , headers = headers ) as response :
4648 response .raise_for_status ()
4749 result = base64 .b64encode (response .content ).decode ("utf-8" )
4850
4951 return result
5052
5153
5254# Text-only inference
53- def run_text_only (model : str ) -> None :
55+ def run_text_only (model : str , max_completion_tokens : int ) -> None :
5456 chat_completion = client .chat .completions .create (
5557 messages = [{"role" : "user" , "content" : "What's the capital of France?" }],
5658 model = model ,
57- max_completion_tokens = 64 ,
59+ max_completion_tokens = max_completion_tokens ,
5860 )
5961
6062 result = chat_completion .choices [0 ].message .content
61- print ("Chat completion output:" , result )
63+ print ("Chat completion output:\n " , result )
6264
6365
6466# Single-image input inference
65- def run_single_image (model : str ) -> None :
67+ def run_single_image (model : str , max_completion_tokens : int ) -> None :
6668 ## Use image url in the payload
6769 image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
6870 chat_completion_from_url = client .chat .completions .create (
@@ -79,11 +81,11 @@ def run_single_image(model: str) -> None:
7981 }
8082 ],
8183 model = model ,
82- max_completion_tokens = 64 ,
84+ max_completion_tokens = max_completion_tokens ,
8385 )
8486
8587 result = chat_completion_from_url .choices [0 ].message .content
86- print ("Chat completion output from image url:" , result )
88+ print ("Chat completion output from image url:\n " , result )
8789
8890 ## Use base64 encoded image in the payload
8991 image_base64 = encode_base64_content_from_url (image_url )
@@ -101,15 +103,15 @@ def run_single_image(model: str) -> None:
101103 }
102104 ],
103105 model = model ,
104- max_completion_tokens = 64 ,
106+ max_completion_tokens = max_completion_tokens ,
105107 )
106108
107109 result = chat_completion_from_base64 .choices [0 ].message .content
108110 print ("Chat completion output from base64 encoded image:" , result )
109111
110112
111113# Multi-image input inference
112- def run_multi_image (model : str ) -> None :
114+ def run_multi_image (model : str , max_completion_tokens : int ) -> None :
113115 image_url_duck = "https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg"
114116 image_url_lion = "https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg"
115117 chat_completion_from_url = client .chat .completions .create (
@@ -130,15 +132,15 @@ def run_multi_image(model: str) -> None:
130132 }
131133 ],
132134 model = model ,
133- max_completion_tokens = 64 ,
135+ max_completion_tokens = max_completion_tokens ,
134136 )
135137
136138 result = chat_completion_from_url .choices [0 ].message .content
137- print ("Chat completion output:" , result )
139+ print ("Chat completion output:\n " , result )
138140
139141
140142# Video input inference
141- def run_video (model : str ) -> None :
143+ def run_video (model : str , max_completion_tokens : int ) -> None :
142144 video_url = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4"
143145 video_base64 = encode_base64_content_from_url (video_url )
144146
@@ -157,11 +159,11 @@ def run_video(model: str) -> None:
157159 }
158160 ],
159161 model = model ,
160- max_completion_tokens = 64 ,
162+ max_completion_tokens = max_completion_tokens ,
161163 )
162164
163165 result = chat_completion_from_url .choices [0 ].message .content
164- print ("Chat completion output from image url:" , result )
166+ print ("Chat completion output from video url:\n " , result )
165167
166168 ## Use base64 encoded video in the payload
167169 chat_completion_from_base64 = client .chat .completions .create (
@@ -178,15 +180,15 @@ def run_video(model: str) -> None:
178180 }
179181 ],
180182 model = model ,
181- max_completion_tokens = 64 ,
183+ max_completion_tokens = max_completion_tokens ,
182184 )
183185
184186 result = chat_completion_from_base64 .choices [0 ].message .content
185- print ("Chat completion output from base64 encoded image: " , result )
187+ print ("Chat completion output from base64 encoded video: \n " , result )
186188
187189
188190# Audio input inference
189- def run_audio (model : str ) -> None :
191+ def run_audio (model : str , max_completion_tokens : int ) -> None :
190192 from vllm .assets .audio import AudioAsset
191193
192194 audio_url = AudioAsset ("winning_call" ).url
@@ -211,11 +213,11 @@ def run_audio(model: str) -> None:
211213 }
212214 ],
213215 model = model ,
214- max_completion_tokens = 64 ,
216+ max_completion_tokens = max_completion_tokens ,
215217 )
216218
217219 result = chat_completion_from_base64 .choices [0 ].message .content
218- print ("Chat completion output from input audio:" , result )
220+ print ("Chat completion output from input audio:\n " , result )
219221
220222 # HTTP URL
221223 chat_completion_from_url = client .chat .completions .create (
@@ -235,11 +237,11 @@ def run_audio(model: str) -> None:
235237 }
236238 ],
237239 model = model ,
238- max_completion_tokens = 64 ,
240+ max_completion_tokens = max_completion_tokens ,
239241 )
240242
241243 result = chat_completion_from_url .choices [0 ].message .content
242- print ("Chat completion output from audio url:" , result )
244+ print ("Chat completion output from audio url:\n " , result )
243245
244246 # base64 URL
245247 chat_completion_from_base64 = client .chat .completions .create (
@@ -259,14 +261,14 @@ def run_audio(model: str) -> None:
259261 }
260262 ],
261263 model = model ,
262- max_completion_tokens = 64 ,
264+ max_completion_tokens = max_completion_tokens ,
263265 )
264266
265267 result = chat_completion_from_base64 .choices [0 ].message .content
266- print ("Chat completion output from base64 encoded audio:" , result )
268+ print ("Chat completion output from base64 encoded audio:\n " , result )
267269
268270
269- def run_multi_audio (model : str ) -> None :
271+ def run_multi_audio (model : str , max_completion_tokens : int ) -> None :
270272 from vllm .assets .audio import AudioAsset
271273
272274 # Two different audios to showcase batched inference.
@@ -300,11 +302,11 @@ def run_multi_audio(model: str) -> None:
300302 }
301303 ],
302304 model = model ,
303- max_completion_tokens = 64 ,
305+ max_completion_tokens = max_completion_tokens ,
304306 )
305307
306308 result = chat_completion_from_base64 .choices [0 ].message .content
307- print ("Chat completion output from input audio:" , result )
309+ print ("Chat completion output from input audio:\n " , result )
308310
309311
310312example_function_map = {
@@ -330,13 +332,20 @@ def parse_args():
330332 choices = list (example_function_map .keys ()),
331333 help = "Conversation type with multimodal data." ,
332334 )
335+ parser .add_argument (
336+ "--max-completion-tokens" ,
337+ "-n" ,
338+ type = int ,
339+ default = 128 ,
340+ help = "Maximum number of tokens to generate for each completion." ,
341+ )
333342 return parser .parse_args ()
334343
335344
336345def main (args ) -> None :
337346 chat_type = args .chat_type
338347 model = get_first_model (client )
339- example_function_map [chat_type ](model )
348+ example_function_map [chat_type ](model , args . max_completion_tokens )
340349
341350
342351if __name__ == "__main__" :
0 commit comments