Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion extensions/openai/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,11 @@ class ChatCompletionRequestParams(BaseModel):
instruction_template_str: str | None = Field(default=None, description="A Jinja2 instruction template. If set, will take precedence over everything else.")

character: str | None = Field(default=None, description="A character defined under text-generation-webui/characters. If not set, the default \"Assistant\" character will be used.")
user_name: str | None = Field(default=None, description="Your name (the user). By default, it's \"You\".", alias="name1")
bot_name: str | None = Field(default=None, description="Overwrites the value set by character field.", alias="name2")
context: str | None = Field(default=None, description="Overwrites the value set by character field.")
greeting: str | None = Field(default=None, description="Overwrites the value set by character field.")
user_name: str | None = Field(default=None, description="Your name (the user). By default, it's \"You\".", alias="name1")
user_bio: str | None = Field(default=None, description="The user description/personality.")
chat_template_str: str | None = Field(default=None, description="Jinja2 template for chat.")

chat_instruct_command: str | None = None
Expand Down
15 changes: 8 additions & 7 deletions modules/cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,29 @@ def process_llamacpp_cache(model, new_sequence, past_sequence):
past_sequence = torch.tensor(past_sequence)

prefix_length = find_prefix_length(past_sequence[:i1], new_sequence[:j1])
sink_length = prefix_length
if sink_length < shared.args.attention_sink_size:
sink_length = shared.args.attention_sink_size

sink_length = max(prefix_length, shared.args.attention_sink_size)
removed_length = i1 - sink_length

if removed_length <= 0:
return past_sequence.tolist()

matching_prefix = past_sequence[:prefix_length]
removed_chunk = past_sequence[sink_length:i1]
overlapping_sequence = new_sequence[j1:j2 + 1]
added_chunk = new_sequence[j2 + 1:]

# print(past_sequence)
# print(new_sequence)
# print(past_sequence.tolist())
# print(new_sequence.tolist())

print()
print('MATCHING PREFIX=', repr(shared.tokenizer.decode(matching_prefix)))
print('ADDED CHUNK=', repr(shared.tokenizer.decode(added_chunk)))
print('REMOVED CHUNK=', repr(shared.tokenizer.decode(removed_chunk)))
print('REMOVED LENGTH=', removed_length)
print()

# Remove interval [sink_length, sink_length + removed_length) from the context
# Subtract removed_length from model.n_tokens
# Update model.n_tokens
model._ctx.kv_cache_seq_rm(0, sink_length, sink_length + removed_length)
model._ctx.kv_cache_seq_shift(0, sink_length + removed_length, -1, -removed_length)

Expand Down
13 changes: 10 additions & 3 deletions modules/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,16 @@ def generate_chat_prompt(user_input, state, **kwargs):
if state['mode'] != 'instruct':
chat_template_str = replace_character_names(chat_template_str, state['name1'], state['name2'])

chat_template = jinja_env.from_string(chat_template_str)
instruction_template = jinja_env.from_string(state['instruction_template_str'])
chat_renderer = partial(chat_template.render, add_generation_prompt=False, name1=state['name1'], name2=state['name2'])
instruct_renderer = partial(instruction_template.render, add_generation_prompt=False)
chat_template = jinja_env.from_string(chat_template_str)
chat_renderer = partial(
chat_template.render,
add_generation_prompt=False,
name1=state['name1'],
name2=state['name2'],
user_bio=replace_character_names(state['user_bio'], state['name1'], state['name2']),
)

messages = []

Expand All @@ -99,7 +105,7 @@ def generate_chat_prompt(user_input, state, **kwargs):
messages.append({"role": "system", "content": state['custom_system_message']})
else:
renderer = chat_renderer
if state['context'].strip() != '':
if state['context'].strip() != '' or state['user_bio'].strip() != '':
context = replace_character_names(state['context'], state['name1'], state['name2'])
messages.append({"role": "system", "content": context})

Expand Down Expand Up @@ -140,6 +146,7 @@ def make_prompt(messages):
command = state['chat-instruct_command']
command = command.replace('<|character|>', state['name2'] if not impersonate else state['name1'])
command = command.replace('<|prompt|>', prompt)
command = replace_character_names(command, state['name1'], state['name2'])

if _continue:
prefix = get_generation_prompt(renderer, impersonate=impersonate, strip_trailing_spaces=False)[0]
Expand Down
19 changes: 16 additions & 3 deletions modules/html_generator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import functools
import html
import os
import re
Expand Down Expand Up @@ -47,6 +48,7 @@ def replace_blockquote(m):
return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')


@functools.lru_cache(maxsize=4096)
def convert_to_markdown(string):

# Blockquote
Expand Down Expand Up @@ -99,6 +101,17 @@ def convert_to_markdown(string):
return html_output


def convert_to_markdown_wrapped(string, use_cache=True):
'''
Used to avoid caching convert_to_markdown calls during streaming.
'''

if use_cache:
return convert_to_markdown(string)

return convert_to_markdown.__wrapped__(string)


def generate_basic_html(string):
string = convert_to_markdown(string)
string = f'<style>{readable_css}</style><div class="readable-container">{string}</div>'
Expand Down Expand Up @@ -194,7 +207,7 @@ def get_image_cache(path):
def generate_instruct_html(history):
output = f'<style>{instruct_css}</style><div class="chat" id="chat"><div class="messages">'
for i, _row in enumerate(history):
row = [convert_to_markdown(entry) for entry in _row]
row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]

if row[0]: # don't display empty user messages
output += f"""
Expand Down Expand Up @@ -230,7 +243,7 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
img_me = f'<img src="file/cache/pfp_me.png?{time.time() if reset_cache else ""}">' if Path("cache/pfp_me.png").exists() else ''

for i, _row in enumerate(history):
row = [convert_to_markdown(entry) for entry in _row]
row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]

if row[0]: # don't display empty user messages
output += f"""
Expand Down Expand Up @@ -273,7 +286,7 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'

for i, _row in enumerate(history):
row = [convert_to_markdown(entry) for entry in _row]
row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]

if row[0]: # don't display empty user messages
output += f"""
Expand Down
3 changes: 2 additions & 1 deletion modules/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,10 @@
'stream': True,
'character': 'Assistant',
'name1': 'You',
'user_bio': '',
'custom_system_message': '',
'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}",
'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}",
'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {%- if message['content'] -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- if user_bio -%}\n {{- user_bio + '\\n\\n' -}}\n {%- endif -%}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}",
'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
'autoload_model': False,
'gallery-items_per_page': 50,
Expand Down
6 changes: 4 additions & 2 deletions modules/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
border_color_primary='#c5c5d2',
button_large_padding='6px 12px',
body_text_color_subdued='#484848',
background_fill_secondary='#eaeaea'
background_fill_secondary='#eaeaea',
background_fill_primary='#fafafa',
)

if Path("notification.mp3").exists():
Expand Down Expand Up @@ -170,6 +171,7 @@ def list_interface_input_elements():
'character_menu',
'history',
'name1',
'user_bio',
'name2',
'greeting',
'context',
Expand Down Expand Up @@ -220,7 +222,7 @@ def apply_interface_values(state, use_persistent=False):

def save_settings(state, preset, extensions_list, show_controls, theme_state):
output = copy.deepcopy(shared.settings)
exclude = ['name2', 'greeting', 'context', 'turn_template']
exclude = ['name2', 'greeting', 'context', 'turn_template', 'truncation_length']
for k in state:
if k in shared.settings and k not in exclude:
output[k] = state[k]
Expand Down
78 changes: 41 additions & 37 deletions modules/ui_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,19 +94,50 @@ def create_ui():

def create_chat_settings_ui():
mu = shared.args.multi_user
with gr.Tab('Character'):
with gr.Tab('Chat'):
with gr.Row():
with gr.Column(scale=8):
with gr.Row():
shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
with gr.Tab("Character"):
with gr.Row():
shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)

shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name')
shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar'])
shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar'])

with gr.Tab("User"):
shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name')
shared.gradio['user_bio'] = gr.Textbox(value=shared.settings['user_bio'], lines=10, label='Description', info='Here you can optionally write a description of yourself.', placeholder='{{user}}\'s personality: ...', elem_classes=['add_scrollbar'])

with gr.Tab('Chat history'):
with gr.Row():
with gr.Column():
shared.gradio['save_chat_history'] = gr.Button(value='Save history')

with gr.Column():
shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label='Upload History JSON')

with gr.Tab('Upload character'):
with gr.Tab('YAML or JSON'):
with gr.Row():
shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label='JSON or YAML File', interactive=not mu)
shared.gradio['upload_img_bot'] = gr.Image(type='pil', label='Profile Picture (optional)', interactive=not mu)

shared.gradio['Submit character'] = gr.Button(value='Submit', interactive=False)

with gr.Tab('TavernAI PNG'):
with gr.Row():
with gr.Column():
shared.gradio['upload_img_tavern'] = gr.Image(type='pil', label='TavernAI PNG File', elem_id='upload_img_tavern', interactive=not mu)
shared.gradio['tavern_json'] = gr.State()
with gr.Column():
shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False)
shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=4, max_lines=4, label='Description', interactive=False)

shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Your name')
shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name')
shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar'])
shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar'])
shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False)

with gr.Column(scale=1):
shared.gradio['character_picture'] = gr.Image(label='Character picture', type='pil', interactive=not mu)
Expand Down Expand Up @@ -137,33 +168,6 @@ def create_chat_settings_ui():
with gr.Column():
shared.gradio['chat_template_str'] = gr.Textbox(value=shared.settings['chat_template_str'], label='Chat template', lines=22, elem_classes=['add_scrollbar', 'monospace'])

with gr.Tab('Chat history'):
with gr.Row():
with gr.Column():
shared.gradio['save_chat_history'] = gr.Button(value='Save history')

with gr.Column():
shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label='Upload History JSON')

with gr.Tab('Upload character'):
with gr.Tab('YAML or JSON'):
with gr.Row():
shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label='JSON or YAML File', interactive=not mu)
shared.gradio['upload_img_bot'] = gr.Image(type='pil', label='Profile Picture (optional)', interactive=not mu)

shared.gradio['Submit character'] = gr.Button(value='Submit', interactive=False)

with gr.Tab('TavernAI PNG'):
with gr.Row():
with gr.Column():
shared.gradio['upload_img_tavern'] = gr.Image(type='pil', label='TavernAI PNG File', elem_id='upload_img_tavern', interactive=not mu)
shared.gradio['tavern_json'] = gr.State()
with gr.Column():
shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False)
shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=4, max_lines=4, label='Description', interactive=False)

shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False)


def create_event_handlers():

Expand Down
4 changes: 2 additions & 2 deletions modules/ui_model_menu.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def create_ui():
shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type)

shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers)
shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers, info='Must be set to more than 0 for your GPU to be used.')
shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=shared.settings['truncation_length_max'], step=256, label="n_ctx", value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.')
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 18,17')
shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch)
Expand Down Expand Up @@ -118,7 +118,7 @@ def create_ui():
shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices)
shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.')
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
shared.gradio['attention_sink_size'] = gr.Number(label="attention_sink_size", value=shared.args.attention_sink_size, info='StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn\'t share a prefix with the old prompt.')
shared.gradio['attention_sink_size'] = gr.Number(label="attention_sink_size", value=shared.args.attention_sink_size, precision=0, info='StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn\'t share a prefix with the old prompt.')
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.')
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
Expand Down
Loading