From eeb2b19c4b1de7ecfebf3d3ef1cb3600e0eaac25 Mon Sep 17 00:00:00 2001
From: zaki <cnzakii@gmail.com>
Date: Sat, 22 Feb 2025 13:01:29 +0800
Subject: [PATCH 1/2] docs: add LLM sample

---
 .license-ignore         |   1 +
 samples/README.md       |   1 +
 samples/llm/README.md   | 127 ++++++++++++++++++++++++++++++++++++++++
 samples/llm/__init__.py |  15 +++++
 samples/llm/chat.proto  |  21 +++++++
 samples/llm/chat_pb2.py |  32 ++++++++++
 samples/llm/main.py     | 106 +++++++++++++++++++++++++++++++++
 7 files changed, 303 insertions(+)
 create mode 100644 samples/llm/README.md
 create mode 100644 samples/llm/__init__.py
 create mode 100644 samples/llm/chat.proto
 create mode 100644 samples/llm/chat_pb2.py
 create mode 100644 samples/llm/main.py

diff --git a/.license-ignore b/.license-ignore
index 869bdc7..8eb348e 100644
--- a/.license-ignore
+++ b/.license-ignore
@@ -18,3 +18,4 @@ requirements-dev.txt
 .gitignore
 .license-ignore
 ./samples/proto/*
+./samples/llm/*
diff --git a/samples/README.md b/samples/README.md
index efa5432..0b21213 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -6,4 +6,5 @@
 2. [**serialization**](./serialization): Writing and using custom serialization functions, including protobuf, JSON, and more.
 3. [**stream**](./stream): Using streaming calls, including `ClientStream`, `ServerStream`, and `BidirectionalStream`.
 4. [**registry**](./registry): Using service registration and discovery features.
+5. [**LLM Integration**](./llm): Easily integrating LLMs with Dubbo Python, providing RPC services using models like DeepSeek R1.
 
diff --git a/samples/llm/README.md b/samples/llm/README.md
new file mode 100644
index 0000000..36326b2
--- /dev/null
+++ b/samples/llm/README.md
@@ -0,0 +1,127 @@
+## Integrating LLM
+
+Dubbo Python can easily integrate with LLMs and provide RPC services.
+
+- **Model**: DeepSeek-R1-Distill-Qwen-7B
+- **Model Deployment Framework**: LMDeploy
+- **GPU**: NVIDIA Corporation GA102GL [A10] (rev a1)
+
+**Description**: This example demonstrates the use of [DeepSeek R1](https://github.com/deepseek-ai/DeepSeek-R1) and [LMDeploy](https://github.com/InternLM/lmdeploy) for deployment, but the overall process is applicable to other models and inference frameworks. If you wish to deploy using Docker or other containerization methods, refer to the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html) documentation for relevant configuration steps.
+
+### Basic Environment
+
+```sh
+----------
+Operating System: Ubuntu 22.04.5
+Python Version: 3.11.10
+PyTorch Version: 2.5.1
+----------
+```
+
+### Model Download
+
+Use the `snapshot_download` function provided by modelscope to download the model. The first parameter is the model name, and the `cache_dir` parameter specifies the download path for the model.
+
+```python
+from modelscope import snapshot_download
+
+model_dir = snapshot_download('deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', cache_dir='/home/dubbo/model', revision='master')
+```
+
+### Core code
+
+```python
+from time import sleep
+
+from lmdeploy import GenerationConfig, TurbomindEngineConfig, pipeline
+
+from dubbo import Dubbo
+from dubbo.configs import RegistryConfig, ServiceConfig
+from dubbo.proxy.handlers import RpcMethodHandler, RpcServiceHandler
+import chat_pb2
+
+# the path of a model. It could be one of the following options:
+# 1. A local directory path of a turbomind model
+# 2. The model_id of a lmdeploy-quantized model
+# 3. The model_id of a model hosted inside a model repository
+model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
+
+backend_config = TurbomindEngineConfig(cache_max_entry_count=0.2, max_context_token_num=20544, session_len=20544)
+
+gen_config = GenerationConfig(
+    top_p=0.95,
+    temperature=0.6,
+    max_new_tokens=8192,
+    stop_token_ids=[151329, 151336, 151338],
+    do_sample=True,  # enable sampling
+)
+
+
+class DeepSeekAiServicer:
+    def __init__(self, model: str, backend_config: TurbomindEngineConfig, gen_config: GenerationConfig):
+        self.llm = pipeline(model, backend_config=backend_config)
+        self.gen_config = gen_config
+
+    def chat(self, stream):
+        # read request from stream
+        request = stream.read()
+        print(f"Received request: {request}")
+        # prepare prompts
+        prompts = [{"role": request.role, "content": request.content + "<think>\n"}]
+
+        is_think = False
+
+        # perform streaming inference
+        for item in self.llm.stream_infer(prompts, gen_config=gen_config):
+            # update think status
+            if item.text == "<think>":
+                is_think = True
+                continue
+            elif item.text == "</think>":
+                is_think = False
+                continue
+            # According to the state of thought, decide the content of the reply.
+            if is_think:
+                # send thought
+                stream.write(chat_pb2.ChatReply(think=item.text, answer=""))
+            else:
+                # send answer
+                stream.write(chat_pb2.ChatReply(think="", answer=item.text))
+
+        stream.done_writing()
+
+
+def build_server_handler():
+    # build a method handler
+    deepseek_ai_servicer = DeepSeekAiServicer(model, backend_config, gen_config)
+    method_handler = RpcMethodHandler.server_stream(
+        deepseek_ai_servicer.chat,
+        method_name="chat",
+        request_deserializer=chat_pb2.ChatRequest.FromString,
+        response_serializer=chat_pb2.ChatReply.SerializeToString,
+    )
+    # build a service handler
+    service_handler = RpcServiceHandler(
+        service_name="org.apache.dubbo.samples.llm.api.DeepSeekAiService",
+        method_handlers=[method_handler],
+    )
+    return service_handler
+
+
+if __name__ == "__main__":
+    # build a service handler
+    service_handler = build_server_handler()
+    service_config = ServiceConfig(service_handler=service_handler)
+
+    # Configure the Zookeeper registry
+    registry_config = RegistryConfig.from_url("zookeeper://zookeeper:2181")
+    bootstrap = Dubbo(registry_config=registry_config)
+
+    # Create and start the server
+    bootstrap.create_server(service_config).start()
+
+    # 30days
+    sleep(30 * 24 * 60 * 60)
+
+```
+
diff --git a/samples/llm/__init__.py b/samples/llm/__init__.py
new file mode 100644
index 0000000..bcba37a
--- /dev/null
+++ b/samples/llm/__init__.py
@@ -0,0 +1,15 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/samples/llm/chat.proto b/samples/llm/chat.proto
new file mode 100644
index 0000000..e041636
--- /dev/null
+++ b/samples/llm/chat.proto
@@ -0,0 +1,21 @@
+syntax = "proto3";
+
+option java_multiple_files = true;
+option java_outer_classname = "ChatProto";
+
+package org.apache.dubbo.samples.llm.api;
+
+message ChatRequest {
+  string role = 1;
+  string content = 2;
+}
+
+message ChatReply {
+  string think = 1;
+  string answer = 2;
+}
+
+service DeepSeekAiService {
+  // chat
+  rpc chat(ChatRequest) returns (stream ChatReply);
+}
diff --git a/samples/llm/chat_pb2.py b/samples/llm/chat_pb2.py
new file mode 100644
index 0000000..de9488e
--- /dev/null
+++ b/samples/llm/chat_pb2.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: chat.proto
+# Protobuf Python Version: 4.25.3
+"""Generated protocol buffer code."""
+
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\nchat.proto\x12 org.apache.dubbo.samples.llm.api",\n\x0b\x43hatRequest\x12\x0c\n\x04role\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t"*\n\tChatReply\x12\r\n\x05think\x18\x01 \x01(\t\x12\x0e\n\x06\x61nswer\x18\x02 \x01(\t2y\n\x11\x44\x65\x65pSeekAiService\x12\x64\n\x04\x63hat\x12-.org.apache.dubbo.samples.llm.api.ChatRequest\x1a+.org.apache.dubbo.samples.llm.api.ChatReply0\x01\x42\rB\tChatProtoP\x01\x62\x06proto3'
+)
+
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "chat_pb2", _globals)
+if _descriptor._USE_C_DESCRIPTORS == False:
+    _globals["DESCRIPTOR"]._options = None
+    _globals["DESCRIPTOR"]._serialized_options = b"B\tChatProtoP\001"
+    _globals["_CHATREQUEST"]._serialized_start = 48
+    _globals["_CHATREQUEST"]._serialized_end = 92
+    _globals["_CHATREPLY"]._serialized_start = 94
+    _globals["_CHATREPLY"]._serialized_end = 136
+    _globals["_DEEPSEEKAISERVICE"]._serialized_start = 138
+    _globals["_DEEPSEEKAISERVICE"]._serialized_end = 259
+# @@protoc_insertion_point(module_scope)
diff --git a/samples/llm/main.py b/samples/llm/main.py
new file mode 100644
index 0000000..e315716
--- /dev/null
+++ b/samples/llm/main.py
@@ -0,0 +1,106 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from time import sleep
+
+from lmdeploy import GenerationConfig, TurbomindEngineConfig, pipeline
+
+from dubbo import Dubbo
+from dubbo.configs import RegistryConfig, ServiceConfig
+from dubbo.proxy.handlers import RpcMethodHandler, RpcServiceHandler
+import chat_pb2
+
+# the path of a model. It could be one of the following options:
+# 1. A local directory path of a turbomind model
+# 2. The model_id of a lmdeploy-quantized model
+# 3. The model_id of a model hosted inside a model repository
+model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
+
+backend_config = TurbomindEngineConfig(cache_max_entry_count=0.2, max_context_token_num=20544, session_len=20544)
+
+gen_config = GenerationConfig(
+    top_p=0.95,
+    temperature=0.6,
+    max_new_tokens=8192,
+    stop_token_ids=[151329, 151336, 151338],
+    do_sample=True,  # enable sampling
+)
+
+
+class DeepSeekAiServicer:
+    def __init__(self, model: str, backend_config: TurbomindEngineConfig, gen_config: GenerationConfig):
+        self.llm = pipeline(model, backend_config=backend_config)
+        self.gen_config = gen_config
+
+    def chat(self, stream):
+        # read request from stream
+        request = stream.read()
+        print(f"Received request: {request}")
+        # prepare prompts
+        prompts = [{"role": request.role, "content": request.content + "<think>\n"}]
+
+        is_think = False
+
+        # perform streaming inference
+        for item in self.llm.stream_infer(prompts, gen_config=gen_config):
+            # update think status
+            if item.text == "<think>":
+                is_think = True
+                continue
+            elif item.text == "</think>":
+                is_think = False
+                continue
+            # According to the state of thought, decide the content of the reply.
+            if is_think:
+                # send thought
+                stream.write(chat_pb2.ChatReply(think=item.text, answer=""))
+            else:
+                # send answer
+                stream.write(chat_pb2.ChatReply(think="", answer=item.text))
+
+        stream.done_writing()
+
+
+def build_server_handler():
+    # build a method handler
+    deepseek_ai_servicer = DeepSeekAiServicer(model, backend_config, gen_config)
+    method_handler = RpcMethodHandler.server_stream(
+        deepseek_ai_servicer.chat,
+        method_name="chat",
+        request_deserializer=chat_pb2.ChatRequest.FromString,
+        response_serializer=chat_pb2.ChatReply.SerializeToString,
+    )
+    # build a service handler
+    service_handler = RpcServiceHandler(
+        service_name="org.apache.dubbo.samples.llm.api.DeepSeekAiService",
+        method_handlers=[method_handler],
+    )
+    return service_handler
+
+
+if __name__ == "__main__":
+    # build a service handler
+    service_handler = build_server_handler()
+    service_config = ServiceConfig(service_handler=service_handler)
+
+    # Configure the Zookeeper registry
+    registry_config = RegistryConfig.from_url("zookeeper://zookeeper:2181")
+    bootstrap = Dubbo(registry_config=registry_config)
+
+    # Create and start the server
+    bootstrap.create_server(service_config).start()
+
+    # 30days
+    sleep(30 * 24 * 60 * 60)

From 84717dd516ca0c8ee90ea5eddca85943c2538b73 Mon Sep 17 00:00:00 2001
From: zaki <cnzakii@gmail.com>
Date: Sat, 22 Feb 2025 13:24:09 +0800
Subject: [PATCH 2/2] fix: fix license check

---
 .license-ignore | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.license-ignore b/.license-ignore
index 8eb348e..8d84962 100644
--- a/.license-ignore
+++ b/.license-ignore
@@ -4,6 +4,7 @@
 # file extensions
 .*md
 .*proto
+.*_pb2.py
 
 # files
 .asf.yaml
@@ -17,5 +18,3 @@ requirements-dev.txt
 .github
 .gitignore
 .license-ignore
-./samples/proto/*
-./samples/llm/*