diff --git a/examples/web_demo/requirements.txt b/examples/web_demo/requirements.txt index 98267022..edab095c 100644 --- a/examples/web_demo/requirements.txt +++ b/examples/web_demo/requirements.txt @@ -1,4 +1,4 @@ -gradio==5.11.0 +gradio==5.18.0 sentencepiece==0.1.99 transformers==4.41.2 transformers_stream_generator==0.0.5 diff --git a/examples/web_demo/web_demo_api.py b/examples/web_demo/web_demo_api.py index b4bf1f06..d421e74a 100644 --- a/examples/web_demo/web_demo_api.py +++ b/examples/web_demo/web_demo_api.py @@ -12,7 +12,7 @@ def boolean_string(string): parser = argparse.ArgumentParser() -parser.add_argument("-u", "--url", type=str, default="http://local:8000/v1", help="base url") +parser.add_argument("-u", "--url", type=str, default="http://localhost:8000/v1", help="base url") parser.add_argument("-m", "--model", type=str, default="xft", help="model name") parser.add_argument("-t", "--token", type=str, default="EMPTY", help="api key") parser.add_argument("-i", "--ip", type=str, default="0.0.0.0", help="gradio server ip") @@ -25,7 +25,7 @@ def clean_input(): def reset(): - return [], [] + return [] class ChatDemo: @@ -43,7 +43,7 @@ def launch(self, server_name="0.0.0.0", server_port=7860, share=False): with gr.Blocks() as demo: self.html_func() - chatbot = gr.Chatbot() + chatbot = gr.Chatbot(type="messages") with gr.Row(): with gr.Column(scale=2): user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=1, container=False) @@ -52,53 +52,61 @@ def launch(self, server_name="0.0.0.0", server_port=7860, share=False): with gr.Column(scale=1): emptyBtn = gr.Button("Clear History") - history = gr.State([]) submitBtn.click( self.predict, - [user_input, chatbot, history], - [chatbot, history], + [user_input, chatbot], + [chatbot], show_progress=True, ) submitBtn.click(clean_input, [], [user_input]) - emptyBtn.click(reset, [], [chatbot, history], show_progress=True) + emptyBtn.click(reset, [], [chatbot], show_progress=True) demo.queue().launch(server_name=server_name, server_port=server_port, share=share, inbrowser=True) - def post_process_generation(self, chunk, chatbot, query, history): - response = chatbot[-1][1] + chunk - new_history = history + [(query, response)] - chatbot[-1] = (query, response) - return chatbot, new_history - - def create_chat_input(self, query, history): + def post_process_generation(self, chunk, chatbot): + if chunk == "": + chunk="" + chatbot[-1]["metadata"]= {"title": "πŸ’­ζ€θ€ƒθΏ‡η¨‹"} + elif chunk == "": + if "metadata" in chatbot[-1]: + if len(chatbot[-1]["content"].strip()) == 0: + if chatbot[-1]["metadata"] is not None: + del chatbot[-1]["metadata"] + else: + chatbot.append({"role": "assistant", "content": ""}) + chunk="" + chatbot[-1]["content"] += chunk + return chatbot + + def create_chat_input(self, chatbot): msgs = [] - if history is None: - history = [] - for user_msg, model_msg in history: - msgs.append({"role": "user", "content": user_msg}) - msgs.append({"role": "assistant", "content": model_msg}) - msgs.append({"role": "user", "content": query}) + for msg in chatbot: + if "metadata" not in msg or msg["metadata"] is None: + msgs.append({"role": msg["role"], "content": msg["content"]}) + if len(msgs) > 8: + msgs = msgs[-8:] return msgs - def predict(self, query, chatbot, history): - chatbot.append((query, "")) + def predict(self, query, chatbot): + chatbot.append({"role": "user", "content": query}) + chatbot.append({"role": "assistant", "content": ""}) completion = self.client.chat.completions.create( model=self.model, - messages=self.create_chat_input(query, history), - max_tokens=2048, + messages=self.create_chat_input(chatbot), + max_tokens=8192, stream=True, - temperature=1.0, + temperature=0.6, extra_body={"top_k": 20, "top_p": 0.8, "repetition_penalty": 1.1}, ) for chunk in completion: if chunk.choices[0].delta.content is not None: - yield self.post_process_generation(chunk.choices[0].delta.content, chatbot, query, history) + yield self.post_process_generation(chunk.choices[0].delta.content, chatbot) if __name__ == "__main__": args = parser.parse_args() demo = ChatDemo(args.url, args.model, args.token) - demo.launch(args.ip, args.port, args.share) + demo.launch(args.ip, args.port, args.share) \ No newline at end of file