binary-husky · binary-husky · Mar 16, 2024 · Mar 16, 2024 · Mar 16, 2024 · Mar 16, 2024
diff --git a/config.py b/config.py
@@ -47,6 +47,7 @@
 #   "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0125"
 #   "claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2",
 #   "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama",
+#   "yi-34b-chat-0205", "yi-34b-chat-200k"
 # ]
 # --- --- --- ---
 # 此外，为了更灵活地接入one-api多模型管理界面，您还可以在接入one-api时，
@@ -212,6 +213,10 @@
 MOONSHOT_API_KEY = ""
 
 
+# 零一万物(Yi Model) API KEY
+YIMODEL_API_KEY = ""
+
+
 # Mathpix 拥有执行PDF的OCR功能，但是需要注册账号
 MATHPIX_APPID = ""
 MATHPIX_APPKEY = ""
@@ -313,6 +318,9 @@
 ├── "glm-4", "glm-3-turbo", "zhipuai" 智谱AI大模型
 │   └── ZHIPUAI_API_KEY
 │
+├── "yi-34b-chat-0205", "yi-34b-chat-200k" 等零一万物(Yi Model)大模型
+│   └── YIMODEL_API_KEY
+│
 ├── "qwen-turbo" 等通义千问大模型
 │   └──  DASHSCOPE_API_KEY
 │

diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
@@ -63,6 +63,7 @@ def decode(self, *args, **kwargs):
 newbing_endpoint = "wss://sydney.bing.com/sydney/ChatHub"
 gemini_endpoint = "https://generativelanguage.googleapis.com/v1beta/models"
 claude_endpoint = "https://api.anthropic.com"
+yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
 
 if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
 azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
@@ -80,6 +81,7 @@ def decode(self, *args, **kwargs):
 if newbing_endpoint in API_URL_REDIRECT: newbing_endpoint = API_URL_REDIRECT[newbing_endpoint]
 if gemini_endpoint in API_URL_REDIRECT: gemini_endpoint = API_URL_REDIRECT[gemini_endpoint]
 if claude_endpoint in API_URL_REDIRECT: claude_endpoint = API_URL_REDIRECT[claude_endpoint]
+if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint]
 
 # 获取tokenizer
 tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
@@ -316,6 +318,7 @@ def decode(self, *args, **kwargs):
     "moonshot-v1-8k": {
         "fn_with_ui": moonshot_ui,
         "fn_without_ui": moonshot_no_ui,
+        "can_multi_thread": True,
         "endpoint": None,
         "max_token": 1024 * 8,
         "tokenizer": tokenizer_gpt35,
@@ -324,6 +327,7 @@ def decode(self, *args, **kwargs):
     "moonshot-v1-32k": {
         "fn_with_ui": moonshot_ui,
         "fn_without_ui": moonshot_no_ui,
+        "can_multi_thread": True,
         "endpoint": None,
         "max_token": 1024 * 32,
         "tokenizer": tokenizer_gpt35,
@@ -332,6 +336,7 @@ def decode(self, *args, **kwargs):
     "moonshot-v1-128k": {
         "fn_with_ui": moonshot_ui,
         "fn_without_ui": moonshot_no_ui,
+        "can_multi_thread": True,
         "endpoint": None,
         "max_token": 1024 * 128,
         "tokenizer": tokenizer_gpt35,
@@ -473,22 +478,6 @@ def decode(self, *args, **kwargs):
             "token_cnt": get_token_num_gpt35,
         }
     })
-if "newbing-free" in AVAIL_LLM_MODELS:
-    try:
-        from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
-        from .bridge_newbingfree import predict as newbingfree_ui
-        model_info.update({
-            "newbing-free": {
-                "fn_with_ui": newbingfree_ui,
-                "fn_without_ui": newbingfree_noui,
-                "endpoint": newbing_endpoint,
-                "max_token": 4096,
-                "tokenizer": tokenizer_gpt35,
-                "token_cnt": get_token_num_gpt35,
-            }
-        })
-    except:
-        print(trimmed_format_exc())
 if "newbing" in AVAIL_LLM_MODELS:   # same with newbing-free
     try:
         from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
@@ -521,6 +510,7 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
+# -=-=-=-=-=-=- 上海AI-LAB书生大模型 -=-=-=-=-=-=-
 if "internlm" in AVAIL_LLM_MODELS:
     try:
         from .bridge_internlm import predict_no_ui_long_connection as internlm_noui
@@ -553,6 +543,7 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
+# -=-=-=-=-=-=- 通义-本地模型 -=-=-=-=-=-=-
 if "qwen-local" in AVAIL_LLM_MODELS:
     try:
         from .bridge_qwen_local import predict_no_ui_long_connection as qwen_local_noui
@@ -570,6 +561,7 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
+# -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=-
 if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS:   # zhipuai
     try:
         from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
@@ -605,7 +597,35 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
-if "spark" in AVAIL_LLM_MODELS:   # 讯飞星火认知大模型
+# -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=-
+if "yi-34b-chat-0205" in AVAIL_LLM_MODELS or "yi-34b-chat-200k" in AVAIL_LLM_MODELS:   # zhipuai
+    try:
+        from .bridge_yimodel import predict_no_ui_long_connection as yimodel_noui
+        from .bridge_yimodel import predict as yimodel_ui
+        model_info.update({
+            "yi-34b-chat-0205": {
+                "fn_with_ui": yimodel_ui,
+                "fn_without_ui": yimodel_noui,
+                "can_multi_thread": False,  # 目前来说，默认情况下并发量极低，因此禁用
+                "endpoint": yimodel_endpoint,
+                "max_token": 4000,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "yi-34b-chat-200k": {
+                "fn_with_ui": yimodel_ui,
+                "fn_without_ui": yimodel_noui,
+                "can_multi_thread": False,  # 目前来说，默认情况下并发量极低，因此禁用
+                "endpoint": yimodel_endpoint,
+                "max_token": 200000,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+        })
+    except:
+        print(trimmed_format_exc())
+# -=-=-=-=-=-=- 讯飞星火认知大模型 -=-=-=-=-=-=-
+if "spark" in AVAIL_LLM_MODELS:
     try:
         from .bridge_spark import predict_no_ui_long_connection as spark_noui
         from .bridge_spark import predict as spark_ui
@@ -681,6 +701,7 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
+# -=-=-=-=-=-=- 智谱 -=-=-=-=-=-=-
 if "zhipuai" in AVAIL_LLM_MODELS:   # zhipuai 是glm-4的别名，向后兼容配置
     try:
         model_info.update({
@@ -695,6 +716,7 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
+# -=-=-=-=-=-=- 幻方-深度求索大模型 -=-=-=-=-=-=-
 if "deepseekcoder" in AVAIL_LLM_MODELS:   # deepseekcoder
     try:
         from .bridge_deepseekcoder import predict_no_ui_long_connection as deepseekcoder_noui
@@ -711,6 +733,8 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
+
+
 # -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
 for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
     # 为了更灵活地接入one-api多模型管理界面，设计了此接口，例子：AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]
@@ -735,8 +759,8 @@ def decode(self, *args, **kwargs):
     })
 
 
-# <-- 用于定义和切换多个azure模型 -->
-AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY")
+# -=-=-=-=-=-=- azure模型对齐支持 -=-=-=-=-=-=-
+AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY") # <-- 用于定义和切换多个azure模型 -->
 if len(AZURE_CFG_ARRAY) > 0:
     for azure_model_name, azure_cfg_dict in AZURE_CFG_ARRAY.items():
         # 可能会覆盖之前的配置，但这是意料之中的