-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathbackend_tabbyapi.py
More file actions
executable file
·101 lines (95 loc) · 3.81 KB
/
backend_tabbyapi.py
File metadata and controls
executable file
·101 lines (95 loc) · 3.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from backend import backend
import requests
import json
class backend_tabbyapi(backend):
def __init__(self, api_url, apikey, max_context_length=None):
super().__init__()
if not api_url.endswith('/'):
api_url += '/'
api_url += 'v1/'
def auth(r):
r.headers['Authorization'] = 'Bearer ' + apikey
return r
self.auth = auth
if max_context_length:
self.max_context_length = max_context_length
else:
try:
r = requests.get(api_url+'model', auth=self.auth)
self.max_context_length = r.json()['parameters']['max_seq_len']
except Exception:
print('unable to get max context, using default')
self.api_url = api_url
def tokens_count(self, text):
r = requests.post(self.api_url+'token/encode', json={'text':text }, auth=self.auth)
return r.json()['length']
def generate(self, prompt, stop, on_stream=None):
data = {'prompt':prompt,
'stream': True,
'echo': False,
'stop': [stop],
'max_tokens': self.max_length,
'temperature': self.temperature,
'repetition_penalty': self.rep_pen,
'repetition_range': self.rep_pen_range,
'rep_pen_slope': 0,
'tfs': 1.0,
'top_k': self.top_k,
'top_p': self.top_p,
'min_p': self.min_p,
'typical': self.typical,
'mirostat_mode': 0
}
result = ''
try:
r = requests.post(self.api_url+'completions', json=data, stream=True, auth=self.auth)
if r.status_code != 200:
print('backend_tabbyapi', r.status_code, r.reason)
return result
r.encoding = 'utf-8'
lines = r.iter_lines(decode_unicode=True)
def generate():
for line in lines:
if line.startswith('data:'):
line = line[6:]
if line == '[DONE]':
return None
nonlocal result
try:
result += json.loads(line)['choices'][0]['text']
return result
except Exception as e:
print('backend_tabbyapi',type(e),e,'line:',line)
pass
return None
return self.process(generate, stop, on_stream)
except Exception as e:
print('backend_tabbyapi',type(e),e)
return result
def models(self):
try:
r = requests.get(self.api_url+'models', auth=self.auth)
return list(map(lambda e: e['id'], r.json()['data']))
except Exception as e:
print('backend_tabbyapi',type(e),e)
def load(self, model, max_context_length = None, cache_mode= 'FP16'):
try:
r = requests.get(self.api_url+'model/unload', auth=self.auth)
data = {'name':model,
'cache_mode': cache_mode
}
if max_context_length:
data['max_seq_len'] = max_context_length
r = requests.post(self.api_url+'model/load', json=data, auth=self.auth)
if r.status_code != 200:
print(r.json()['detail'])
return False
r = requests.get(self.api_url+'model', auth=self.auth)
if r.status_code != 200:
print(r.json()['detail'])
return False
self.max_context_length = r.json()['parameters']['max_seq_len']
return True
except Exception as e:
print('backend_tabbyapi',type(e),e)
return False