langgenius · chupark · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025 · Oct 20, 2025
diff --git a/python/dify_plugin/entities/model/__init__.py b/python/dify_plugin/entities/model/__init__.py
@@ -22,6 +22,7 @@ class DefaultParameterName(Enum):
     PRESENCE_PENALTY = "presence_penalty"
     FREQUENCY_PENALTY = "frequency_penalty"
     MAX_TOKENS = "max_tokens"
+    MAX_COMPLETION_TOKENS = "max_completion_tokens"
     RESPONSE_FORMAT = "response_format"
     JSON_SCHEMA = "json_schema"
 
@@ -139,6 +140,23 @@ def value_of(cls, value: Any) -> "DefaultParameterName":
         "max": 2048,
         "precision": 0,
     },
+    DefaultParameterName.MAX_COMPLETION_TOKENS: {
+        "label": {
+            "en_US": "Max Completion Tokens",
+            "zh_Hans": "最大完成标记",
+        },
+        "type": "int",
+        "help": {
+            "en_US": "Specifies the upper limit on the length of generated results. "
+            "If the generated results are truncated, you can increase this parameter.",
+            "zh_Hans": "指定生成结果长度的上限。如果生成结果截断，可以调大该参数。",
+        },
+        "required": False,
+        "default": 64,
+        "min": 1,
+        "max": 2048,
+        "precision": 0,
+    },
     DefaultParameterName.RESPONSE_FORMAT: {
         "label": {
             "en_US": "Response Format",

diff --git a/python/dify_plugin/interfaces/model/openai_compatible/llm.py b/python/dify_plugin/interfaces/model/openai_compatible/llm.py
@@ -180,7 +180,11 @@ def validate_credentials(self, model: str, credentials: dict) -> None:
                 endpoint_url += "/"
 
             # prepare the payload for a simple ping to the model
-            data = {"model": credentials.get("endpoint_model_name", model), "max_tokens": 5}
+            if credentials.get("reasoning_thought_support") == "supported":
+                # for reasoning thought support, they use max_completion_tokens
+                data = {"model": credentials.get("endpoint_model_name", model), "max_completion_tokens": 5}
+            else:
+                data = {"model": credentials.get("endpoint_model_name", model), "max_tokens": 5}
 
             completion_type = LLMMode.value_of(credentials["mode"])
 
@@ -256,6 +260,14 @@ def get_customizable_model_schema(self, model: str, credentials: dict) -> AIMode
         """
         features = []
 
+        # for reasoning thought support, they use max_completion_tokens
+        if credentials.get("reasoning_thought_support") == "supported":
+            max_token_param_name = DefaultParameterName.MAX_COMPLETION_TOKENS.value
+            max_token_param_label = "Max Completion Tokens"
+        else:
+            max_token_param_name = DefaultParameterName.MAX_TOKENS.value
+            max_token_param_label = "Max Tokens"
+
         function_calling_type = credentials.get("function_calling_type", "no_call")
         if function_calling_type == "function_call":
             features.append(ModelFeature.TOOL_CALL)
@@ -338,8 +350,8 @@ def get_customizable_model_schema(self, model: str, credentials: dict) -> AIMode
                     max=2,
                 ),
                 ParameterRule(
-                    name=DefaultParameterName.MAX_TOKENS.value,
-                    label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"),
+                    name=max_token_param_name,
+                    label=I18nObject(en_US=max_token_param_label, zh_Hans="最大标记"),
                     help=I18nObject(
                         en_US="Maximum length of tokens for the model response.",
                         zh_Hans="模型回答的tokens的最大长度。",