11# SPDX-License-Identifier: Apache-2.0
2- """An example showing how to use vLLM to serve multimodal models
2+ """An example showing how to use vLLM to serve multimodal models
33and run online serving with OpenAI client.
44
55Launch the vLLM server with the following command:
1212 --trust-remote-code --max-model-len 4096 --limit-mm-per-prompt '{"image":2}'
1313
1414(audio inference with Ultravox)
15- vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b --max-model-len 4096
15+ vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b \
16+ --max-model-len 4096 --trust-remote-code
17+
18+ run the script with
19+ python openai_chat_completion_client_for_multimodal.py --chat-type audio
1620"""
21+
1722import base64
1823
1924import requests
2025from openai import OpenAI
26+ from utils import get_first_model
2127
2228from vllm .utils import FlexibleArgumentParser
2329
3137 base_url = openai_api_base ,
3238)
3339
34- models = client .models .list ()
35- model = models .data [0 ].id
36-
3740
3841def encode_base64_content_from_url (content_url : str ) -> str :
3942 """Encode a content retrieved from a remote url to base64 format."""
@@ -46,7 +49,7 @@ def encode_base64_content_from_url(content_url: str) -> str:
4649
4750
4851# Text-only inference
49- def run_text_only () -> None :
52+ def run_text_only (model : str ) -> None :
5053 chat_completion = client .chat .completions .create (
5154 messages = [{
5255 "role" : "user" ,
@@ -61,7 +64,7 @@ def run_text_only() -> None:
6164
6265
6366# Single-image input inference
64- def run_single_image () -> None :
67+ def run_single_image (model : str ) -> None :
6568
6669 ## Use image url in the payload
6770 image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
@@ -117,7 +120,7 @@ def run_single_image() -> None:
117120
118121
119122# Multi-image input inference
120- def run_multi_image () -> None :
123+ def run_multi_image (model : str ) -> None :
121124 image_url_duck = "https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg"
122125 image_url_lion = "https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg"
123126 chat_completion_from_url = client .chat .completions .create (
@@ -152,7 +155,7 @@ def run_multi_image() -> None:
152155
153156
154157# Video input inference
155- def run_video () -> None :
158+ def run_video (model : str ) -> None :
156159 video_url = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4"
157160 video_base64 = encode_base64_content_from_url (video_url )
158161
@@ -208,7 +211,7 @@ def run_video() -> None:
208211
209212
210213# Audio input inference
211- def run_audio () -> None :
214+ def run_audio (model : str ) -> None :
212215 from vllm .assets .audio import AudioAsset
213216
214217 audio_url = AudioAsset ("winning_call" ).url
@@ -318,7 +321,8 @@ def parse_args():
318321
319322def main (args ) -> None :
320323 chat_type = args .chat_type
321- example_function_map [chat_type ]()
324+ model = get_first_model (client )
325+ example_function_map [chat_type ](model )
322326
323327
324328if __name__ == "__main__" :
0 commit comments