Skip to content

Commit dbe4385

Browse files
authored
Support for sending images into OpenAI chat API (#4827)
1 parent 8956f3e commit dbe4385

File tree

2 files changed

+74
-2
lines changed

2 files changed

+74
-2
lines changed

extensions/multimodal/README.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,56 @@ This extension uses the following parameters (from `settings.json`):
6767

6868
## Usage through API
6969

70+
### Chat completions endpoint
71+
72+
#### With an image URL
73+
74+
```shell
75+
curl http://127.0.0.1:5000/v1/chat/completions \
76+
-H "Content-Type: application/json" \
77+
-d '{
78+
"messages": [
79+
{
80+
"role": "user",
81+
"image_url": "https://avatars.githubusercontent.com/u/112222186?v=4"
82+
},
83+
{
84+
"role": "user",
85+
"content": "What is unusual about this image?"
86+
}
87+
]
88+
}'
89+
```
90+
91+
#### With a Base64 image
92+
93+
```python
94+
import base64
95+
import json
96+
import requests
97+
98+
img = open('image.jpg', 'rb')
99+
img_bytes = img.read()
100+
img_base64 = base64.b64encode(img_bytes).decode('utf-8')
101+
data = { "messages": [
102+
{
103+
"role": "user",
104+
"image_url": f"data:image/jpeg;base64,{img_base64}"
105+
},
106+
{
107+
"role": "user",
108+
"content": "what is unusual about this image?"
109+
}
110+
]
111+
}
112+
response = requests.post('http://127.0.0.1:5000/v1/chat/completions', json=data)
113+
print(response.text)
114+
```
115+
70116
You can run the multimodal inference through API, by inputting the images to prompt. Images are embedded like so: `f'<img src="data:image/jpeg;base64,{img_str}">'`, where `img_str` is base-64 jpeg data. Note that you will need to launch `server.py` with the arguments `--api --extensions multimodal`.
71117

118+
### Completions endpoint
119+
72120
Python example:
73121

74122
```Python

extensions/openai/completions.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
1+
import base64
12
import copy
3+
import re
24
import time
35
from collections import deque
6+
from io import BytesIO
47

8+
import requests
59
import tiktoken
610
import torch
711
import torch.nn.functional as F
12+
from PIL import Image
813
from transformers import LogitsProcessor, LogitsProcessorList
914

1015
from extensions.openai.errors import InvalidRequestError
@@ -140,7 +145,25 @@ def convert_history(history):
140145
system_message = ""
141146

142147
for entry in history:
143-
content = entry["content"]
148+
if "image_url" in entry:
149+
image_url = entry['image_url']
150+
if "base64" in image_url:
151+
image_url = re.sub('^data:image/.+;base64,', '', image_url)
152+
img = Image.open(BytesIO(base64.b64decode(image_url)))
153+
else:
154+
try:
155+
my_res = requests.get(image_url)
156+
img = Image.open(BytesIO(my_res.content))
157+
except Exception:
158+
raise 'Image cannot be loaded from the URL!'
159+
160+
buffered = BytesIO()
161+
img.save(buffered, format="JPEG")
162+
img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
163+
content = f'<img src="data:image/jpeg;base64,{img_str}">'
164+
else:
165+
content = entry["content"]
166+
144167
role = entry["role"]
145168

146169
if role == "user":
@@ -182,7 +205,8 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False) -
182205
raise InvalidRequestError(message="messages: missing role", param='messages')
183206
elif m['role'] == 'function':
184207
raise InvalidRequestError(message="role: function is not supported.", param='messages')
185-
if 'content' not in m:
208+
209+
if 'content' not in m and "image_url" not in m:
186210
raise InvalidRequestError(message="messages: missing content", param='messages')
187211

188212
# Chat Completions

0 commit comments

Comments
 (0)