From eeb2b19c4b1de7ecfebf3d3ef1cb3600e0eaac25 Mon Sep 17 00:00:00 2001 From: zaki Date: Sat, 22 Feb 2025 13:01:29 +0800 Subject: [PATCH 1/2] docs: add LLM sample --- .license-ignore | 1 + samples/README.md | 1 + samples/llm/README.md | 127 ++++++++++++++++++++++++++++++++++++++++ samples/llm/__init__.py | 15 +++++ samples/llm/chat.proto | 21 +++++++ samples/llm/chat_pb2.py | 32 ++++++++++ samples/llm/main.py | 106 +++++++++++++++++++++++++++++++++ 7 files changed, 303 insertions(+) create mode 100644 samples/llm/README.md create mode 100644 samples/llm/__init__.py create mode 100644 samples/llm/chat.proto create mode 100644 samples/llm/chat_pb2.py create mode 100644 samples/llm/main.py diff --git a/.license-ignore b/.license-ignore index 869bdc7..8eb348e 100644 --- a/.license-ignore +++ b/.license-ignore @@ -18,3 +18,4 @@ requirements-dev.txt .gitignore .license-ignore ./samples/proto/* +./samples/llm/* diff --git a/samples/README.md b/samples/README.md index efa5432..0b21213 100644 --- a/samples/README.md +++ b/samples/README.md @@ -6,4 +6,5 @@ 2. [**serialization**](./serialization): Writing and using custom serialization functions, including protobuf, JSON, and more. 3. [**stream**](./stream): Using streaming calls, including `ClientStream`, `ServerStream`, and `BidirectionalStream`. 4. [**registry**](./registry): Using service registration and discovery features. +5. [**LLM Integration**](./llm): Easily integrating LLMs with Dubbo Python, providing RPC services using models like DeepSeek R1. diff --git a/samples/llm/README.md b/samples/llm/README.md new file mode 100644 index 0000000..36326b2 --- /dev/null +++ b/samples/llm/README.md @@ -0,0 +1,127 @@ +## Integrating LLM + +Dubbo Python can easily integrate with LLMs and provide RPC services. + +- **Model**: DeepSeek-R1-Distill-Qwen-7B +- **Model Deployment Framework**: LMDeploy +- **GPU**: NVIDIA Corporation GA102GL [A10] (rev a1) + +**Description**: This example demonstrates the use of [DeepSeek R1](https://github.com/deepseek-ai/DeepSeek-R1) and [LMDeploy](https://github.com/InternLM/lmdeploy) for deployment, but the overall process is applicable to other models and inference frameworks. If you wish to deploy using Docker or other containerization methods, refer to the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html) documentation for relevant configuration steps. + +### Basic Environment + +```sh +---------- +Operating System: Ubuntu 22.04.5 +Python Version: 3.11.10 +PyTorch Version: 2.5.1 +---------- +``` + +### Model Download + +Use the `snapshot_download` function provided by modelscope to download the model. The first parameter is the model name, and the `cache_dir` parameter specifies the download path for the model. + +```python +from modelscope import snapshot_download + +model_dir = snapshot_download('deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', cache_dir='/home/dubbo/model', revision='master') +``` + +### Core code + +```python +from time import sleep + +from lmdeploy import GenerationConfig, TurbomindEngineConfig, pipeline + +from dubbo import Dubbo +from dubbo.configs import RegistryConfig, ServiceConfig +from dubbo.proxy.handlers import RpcMethodHandler, RpcServiceHandler +import chat_pb2 + +# the path of a model. It could be one of the following options: +# 1. A local directory path of a turbomind model +# 2. The model_id of a lmdeploy-quantized model +# 3. The model_id of a model hosted inside a model repository +model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" + +backend_config = TurbomindEngineConfig(cache_max_entry_count=0.2, max_context_token_num=20544, session_len=20544) + +gen_config = GenerationConfig( + top_p=0.95, + temperature=0.6, + max_new_tokens=8192, + stop_token_ids=[151329, 151336, 151338], + do_sample=True, # enable sampling +) + + +class DeepSeekAiServicer: + def __init__(self, model: str, backend_config: TurbomindEngineConfig, gen_config: GenerationConfig): + self.llm = pipeline(model, backend_config=backend_config) + self.gen_config = gen_config + + def chat(self, stream): + # read request from stream + request = stream.read() + print(f"Received request: {request}") + # prepare prompts + prompts = [{"role": request.role, "content": request.content + "\n"}] + + is_think = False + + # perform streaming inference + for item in self.llm.stream_infer(prompts, gen_config=gen_config): + # update think status + if item.text == "": + is_think = True + continue + elif item.text == "": + is_think = False + continue + # According to the state of thought, decide the content of the reply. + if is_think: + # send thought + stream.write(chat_pb2.ChatReply(think=item.text, answer="")) + else: + # send answer + stream.write(chat_pb2.ChatReply(think="", answer=item.text)) + + stream.done_writing() + + +def build_server_handler(): + # build a method handler + deepseek_ai_servicer = DeepSeekAiServicer(model, backend_config, gen_config) + method_handler = RpcMethodHandler.server_stream( + deepseek_ai_servicer.chat, + method_name="chat", + request_deserializer=chat_pb2.ChatRequest.FromString, + response_serializer=chat_pb2.ChatReply.SerializeToString, + ) + # build a service handler + service_handler = RpcServiceHandler( + service_name="org.apache.dubbo.samples.llm.api.DeepSeekAiService", + method_handlers=[method_handler], + ) + return service_handler + + +if __name__ == "__main__": + # build a service handler + service_handler = build_server_handler() + service_config = ServiceConfig(service_handler=service_handler) + + # Configure the Zookeeper registry + registry_config = RegistryConfig.from_url("zookeeper://zookeeper:2181") + bootstrap = Dubbo(registry_config=registry_config) + + # Create and start the server + bootstrap.create_server(service_config).start() + + # 30days + sleep(30 * 24 * 60 * 60) + +``` + diff --git a/samples/llm/__init__.py b/samples/llm/__init__.py new file mode 100644 index 0000000..bcba37a --- /dev/null +++ b/samples/llm/__init__.py @@ -0,0 +1,15 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/samples/llm/chat.proto b/samples/llm/chat.proto new file mode 100644 index 0000000..e041636 --- /dev/null +++ b/samples/llm/chat.proto @@ -0,0 +1,21 @@ +syntax = "proto3"; + +option java_multiple_files = true; +option java_outer_classname = "ChatProto"; + +package org.apache.dubbo.samples.llm.api; + +message ChatRequest { + string role = 1; + string content = 2; +} + +message ChatReply { + string think = 1; + string answer = 2; +} + +service DeepSeekAiService { + // chat + rpc chat(ChatRequest) returns (stream ChatReply); +} diff --git a/samples/llm/chat_pb2.py b/samples/llm/chat_pb2.py new file mode 100644 index 0000000..de9488e --- /dev/null +++ b/samples/llm/chat_pb2.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: chat.proto +# Protobuf Python Version: 4.25.3 +"""Generated protocol buffer code.""" + +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\nchat.proto\x12 org.apache.dubbo.samples.llm.api",\n\x0b\x43hatRequest\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t"*\n\tChatReply\x12\r\n\x05think\x18\x01 \x01(\t\x12\x0e\n\x06\x61nswer\x18\x02 \x01(\t2y\n\x11\x44\x65\x65pSeekAiService\x12\x64\n\x04\x63hat\x12-.org.apache.dubbo.samples.llm.api.ChatRequest\x1a+.org.apache.dubbo.samples.llm.api.ChatReply0\x01\x42\rB\tChatProtoP\x01\x62\x06proto3' +) + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "chat_pb2", _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + _globals["DESCRIPTOR"]._options = None + _globals["DESCRIPTOR"]._serialized_options = b"B\tChatProtoP\001" + _globals["_CHATREQUEST"]._serialized_start = 48 + _globals["_CHATREQUEST"]._serialized_end = 92 + _globals["_CHATREPLY"]._serialized_start = 94 + _globals["_CHATREPLY"]._serialized_end = 136 + _globals["_DEEPSEEKAISERVICE"]._serialized_start = 138 + _globals["_DEEPSEEKAISERVICE"]._serialized_end = 259 +# @@protoc_insertion_point(module_scope) diff --git a/samples/llm/main.py b/samples/llm/main.py new file mode 100644 index 0000000..e315716 --- /dev/null +++ b/samples/llm/main.py @@ -0,0 +1,106 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from time import sleep + +from lmdeploy import GenerationConfig, TurbomindEngineConfig, pipeline + +from dubbo import Dubbo +from dubbo.configs import RegistryConfig, ServiceConfig +from dubbo.proxy.handlers import RpcMethodHandler, RpcServiceHandler +import chat_pb2 + +# the path of a model. It could be one of the following options: +# 1. A local directory path of a turbomind model +# 2. The model_id of a lmdeploy-quantized model +# 3. The model_id of a model hosted inside a model repository +model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" + +backend_config = TurbomindEngineConfig(cache_max_entry_count=0.2, max_context_token_num=20544, session_len=20544) + +gen_config = GenerationConfig( + top_p=0.95, + temperature=0.6, + max_new_tokens=8192, + stop_token_ids=[151329, 151336, 151338], + do_sample=True, # enable sampling +) + + +class DeepSeekAiServicer: + def __init__(self, model: str, backend_config: TurbomindEngineConfig, gen_config: GenerationConfig): + self.llm = pipeline(model, backend_config=backend_config) + self.gen_config = gen_config + + def chat(self, stream): + # read request from stream + request = stream.read() + print(f"Received request: {request}") + # prepare prompts + prompts = [{"role": request.role, "content": request.content + "\n"}] + + is_think = False + + # perform streaming inference + for item in self.llm.stream_infer(prompts, gen_config=gen_config): + # update think status + if item.text == "": + is_think = True + continue + elif item.text == "": + is_think = False + continue + # According to the state of thought, decide the content of the reply. + if is_think: + # send thought + stream.write(chat_pb2.ChatReply(think=item.text, answer="")) + else: + # send answer + stream.write(chat_pb2.ChatReply(think="", answer=item.text)) + + stream.done_writing() + + +def build_server_handler(): + # build a method handler + deepseek_ai_servicer = DeepSeekAiServicer(model, backend_config, gen_config) + method_handler = RpcMethodHandler.server_stream( + deepseek_ai_servicer.chat, + method_name="chat", + request_deserializer=chat_pb2.ChatRequest.FromString, + response_serializer=chat_pb2.ChatReply.SerializeToString, + ) + # build a service handler + service_handler = RpcServiceHandler( + service_name="org.apache.dubbo.samples.llm.api.DeepSeekAiService", + method_handlers=[method_handler], + ) + return service_handler + + +if __name__ == "__main__": + # build a service handler + service_handler = build_server_handler() + service_config = ServiceConfig(service_handler=service_handler) + + # Configure the Zookeeper registry + registry_config = RegistryConfig.from_url("zookeeper://zookeeper:2181") + bootstrap = Dubbo(registry_config=registry_config) + + # Create and start the server + bootstrap.create_server(service_config).start() + + # 30days + sleep(30 * 24 * 60 * 60) From 84717dd516ca0c8ee90ea5eddca85943c2538b73 Mon Sep 17 00:00:00 2001 From: zaki Date: Sat, 22 Feb 2025 13:24:09 +0800 Subject: [PATCH 2/2] fix: fix license check --- .license-ignore | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.license-ignore b/.license-ignore index 8eb348e..8d84962 100644 --- a/.license-ignore +++ b/.license-ignore @@ -4,6 +4,7 @@ # file extensions .*md .*proto +.*_pb2.py # files .asf.yaml @@ -17,5 +18,3 @@ requirements-dev.txt .github .gitignore .license-ignore -./samples/proto/* -./samples/llm/*