From 562919b3b37028cf620e70674d6b1f37274a522c Mon Sep 17 00:00:00 2001
From: DanielSun11 <sundong04@baidu.com>
Date: Mon, 11 Aug 2025 22:51:41 +0800
Subject: [PATCH 01/14] test

---
 .gitignore                                    |   2 +-
 .../generator/CMakeLists.txt                  |  21 ++
 .../generator/codegen_utils.py                |  46 +++++
 .../generator/monkey_patch_gen.py             | 140 +++++++++++++
 .../generator/python_c_gen.py                 | 131 ++++++++++--
 .../fluid/operators/generator/parse_utils.py  |   3 +
 paddle/fluid/pybind/eager_functions.cc        |  27 +++
 paddle/fluid/pybind/eager_utils.cc            |  70 ++++++-
 paddle/fluid/pybind/eager_utils.h             |  35 +++-
 paddle/fluid/pybind/op_function_common.cc     | 186 +++++++++++++++++-
 paddle/fluid/pybind/op_function_common.h      |  72 +++++++
 paddle/phi/ops/yaml/ops.yaml                  |   5 +
 python/paddle/__init__.py                     |   9 +-
 python/paddle/_paddle_docs.py                 | 173 ++++++++++++++++
 python/paddle/framework/__init__.py           |   4 +
 python/paddle/tensor/math.py                  |  53 ++---
 tools/gen_tensor_stub.py                      |   8 +-
 17 files changed, 928 insertions(+), 57 deletions(-)
 create mode 100644 paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py
 create mode 100644 python/paddle/_paddle_docs.py

diff --git a/.gitignore b/.gitignore
index 82ea4d83d35dfb..f41e807a55ecf1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -117,7 +117,7 @@ paddle/phi/kernels/fusion/cutlass/gemm_epilogue/build
 paddle/phi/kernels/fusion/cutlass/gemm_epilogue/cutlass
 python/paddle/_typing/libs/**/*.pyi
 third_party.tar.gz
-
+python/paddle/base/dygraph/generated_tensor_methods_patch.py
 #fp8
 paddle/fluid/fp8/deep_gemm/include/cute/*
 paddle/fluid/fp8/deep_gemm/include/cutlass/*
diff --git a/paddle/fluid/eager/auto_code_generator/generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/generator/CMakeLists.txt
index ae39256b28ef27..924887be4e5408 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/CMakeLists.txt
+++ b/paddle/fluid/eager/auto_code_generator/generator/CMakeLists.txt
@@ -94,3 +94,24 @@ add_custom_target(
   COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_python_c_header_path}
           ${python_c_header_path}
   VERBATIM)
+
+# set(ops_yaml_path
+#   "${PADDLE_SOURCE_DIR}/paddle/phi/ops/yaml/ops.yaml"
+# )
+# set(monkey_patch_tensor_methods_path
+#   "${PADDLE_SOURCE_DIR}/python/paddle/base/dygraph/generated_tensor_methods_patch.py"
+# )
+# set(tmp_monkey_patch_tensor_methods_path
+#   "${PADDLE_SOURCE_DIR}/python/paddle/base/dygraph/generated_tensor_methods_patch.py.tmp"
+# )
+# message("Eager monkey path tensor methods CodeGen")
+# add_custom_target(
+#   eager_monkey_patch_codegen
+#   COMMAND
+#     "${PYTHON_EXECUTABLE}"
+#     "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py"
+#     "--api_yaml_path=${ops_yaml_path}"
+#     "--output_path=${tmp_monkey_patch_tensor_methods_path}"
+#   COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_monkey_patch_tensor_methods_path}
+#           ${monkey_patch_tensor_methods_path}
+#   VERBATIM)
diff --git a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
index eeb78c9d028930..09ab2c19ab791d 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
@@ -479,6 +479,7 @@ def __init__(self, forward_api_contents, namespace):
         )
 
         self.forward_api_name = ""
+        self.python_api_info = {}
 
         self.orig_forward_inputs_list = (
             []
@@ -506,6 +507,15 @@ def __init__(self, forward_api_contents, namespace):
         )  # {name: func_name, args: [input_name, ...]}
         self.intermediate_outputs = []  # [name, ...]
         self.forward_inplace_map = {}  # {name : name, ...}
+        self.args_alias_map = {}  # {arg_name: alias_vector, ...}
+        self.dygraph_pre_process = (
+            ""  # The pre_process function calling code for dygraph
+        )
+        self.static_pre_process = (
+            ""  # The pre_process function calling code for static graph
+        )
+        self.args_parser_func_name = ""  # The custom args parser function
+        self.python_api_names = ""
 
     def ParseForwardInplaceInfo(self):
         forward_api_contents = self.forward_api_contents
@@ -515,6 +525,40 @@ def ParseForwardInplaceInfo(self):
         inplace_map_str = forward_api_contents['inplace']
         self.forward_inplace_map = ParseYamlInplaceInfo(inplace_map_str)
 
+    # Function for parameters parse
+    def ParsePythonAPIInfo(self):
+        python_api_info = self.python_api_info
+        args_alias = {}
+        if 'name' in python_api_info.keys():
+            self.python_api_names = python_api_info['name']
+        if 'args_alias' in python_api_info.keys():
+            for arg, alias in python_api_info['args_alias'].items():
+                alias_set = set(alias)
+                # Add the original argument name to the alias set
+                alias_set.add(arg)
+                # Convert to C++ vector format
+                alias_vector = (
+                    "{" + ",".join(f'"{name}"' for name in alias_set) + "}"
+                )
+                args_alias.update({arg: alias_vector})
+            self.args_alias_map = args_alias
+        if 'pre_process' in python_api_info.keys():
+            pre_process = python_api_info['pre_process']
+            if 'func' in pre_process.keys():
+                self.dygraph_pre_process = pre_process['func']
+                self.static_pre_process = pre_process['func']
+                if len(pre_process) > 1:
+                    print("error")
+            if 'dygraph_func' in pre_process.keys():
+                self.dygraph_pre_process = pre_process['dygraph_func']
+            if 'static_func' in pre_process.keys():
+                self.static_pre_process = pre_process['static_func']
+        if (
+            'args_parser' in python_api_info.keys()
+            and 'func' in python_api_info['args_parser']
+        ):
+            self.args_parser_func_name = python_api_info['args_parser']['func']
+
     def ParseNoNeedBuffer(self):
         grad_api_contents = self.grad_api_contents
 
@@ -575,6 +619,8 @@ def CollectOriginalForwardInfo(self):
         ), 'Unable to find "output" in forward_api_contents keys'
 
         forward_returns_str = forward_api_contents['output']
+        if 'python_api' in forward_api_contents.keys():
+            self.python_api_info = forward_api_contents['python_api']
 
         # Collect Original Forward Inputs/Outputs and then perform validation checks
         (
diff --git a/paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py b/paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py
new file mode 100644
index 00000000000000..03a8630a3b6665
--- /dev/null
+++ b/paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py
@@ -0,0 +1,140 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+from codegen_utils import (
+    FunctionGeneratorBase,
+    GeneratorBase,
+)
+
+IMPORT_TEMPLATE = """
+import paddle
+from paddle import _C_ops
+from .. import core
+"""
+
+FUNCTION_NAME_TEMPLATE = """
+def {func_name}():
+"""
+
+NAME_METHOD_MAPPING_TEMPLATE = """('{api_name}',_{api_name}),\n"""
+
+METHODS_MAP_TEMPLATE = """
+    eager_methods_map = [
+        {}
+    ]
+"""
+
+METHOD_TEMPLATE = """
+    def _{name}(self,*args, **kwargs):
+        return _C_ops.{name}(self,*args, **kwargs)
+"""
+SET_METHOD_TEMPLATE = """
+    local_tensor = core.eager.Tensor
+    for method_name, method in eager_methods_map:
+        setattr(local_tensor, method_name, method)
+"""
+
+
+class MethodGenerator(FunctionGeneratorBase):
+    def __init__(self, forward_api_contents, namespace):
+        FunctionGeneratorBase.__init__(self, forward_api_contents, namespace)
+        self.need_parse_python_api_args = False
+        # Generated Results
+        self.Method_str = ""
+
+    def GenerateMethod(self, name):
+        self.Method_str = METHOD_TEMPLATE.format(name=name)
+
+    def run(self):
+        # Initialized orig_forward_inputs_list, orig_forward_returns_list, orig_forward_attrs_list
+        self.CollectOriginalForwardInfo()
+
+        if len(self.python_api_info) > 0:
+            self.need_parse_python_api_args = True
+            self.ParsePythonAPIInfo()
+            for name in self.python_api_names:
+                if "Tensor." in name:
+                    api_name = name.split(".")[-1]
+                    self.GenerateMethod(api_name)
+                    self.api_name = api_name
+                    break
+
+
+class MonkeyPatchTensorMethodsGenerator(GeneratorBase):
+    def __init__(self, path):
+        # Parent members:
+        # self.namespace
+        # self.api_yaml_path
+        # self.forward_api_list
+        GeneratorBase.__init__(self, path)
+
+        # Generated Result
+        self.MonkeyPatchTensorMethods_str = ""
+
+    def GenerateMonkeyPatchTensorMethods(self):
+        self.MonkeyPatchTensorMethods_str += IMPORT_TEMPLATE
+        self.MonkeyPatchTensorMethods_str += FUNCTION_NAME_TEMPLATE.format(
+            func_name="monkey_patch_generated_tensor_methods"
+        )
+        forward_api_list = self.forward_api_list
+        methods_map = []  # [("method_name",method),]
+        for forward_api_content in forward_api_list:
+            f_generator = MethodGenerator(forward_api_content, None)
+            status = f_generator.run()
+            method_str = f_generator.Method_str
+            if method_str != "":
+                methods_map.append(
+                    NAME_METHOD_MAPPING_TEMPLATE.format(
+                        api_name=f_generator.api_name
+                    )
+                )
+            self.MonkeyPatchTensorMethods_str += method_str
+        result = ',\n '.join(methods_map)
+        self.MonkeyPatchTensorMethods_str += METHODS_MAP_TEMPLATE.format(result)
+        self.MonkeyPatchTensorMethods_str += SET_METHOD_TEMPLATE
+
+    def run(self):
+        # Read Yaml file
+        self.ParseForwardYamlContents()
+        self.GenerateMonkeyPatchTensorMethods()
+
+
+##########################
+# Code Generation Helper #
+##########################
+def ParseArguments():
+    parser = argparse.ArgumentParser(
+        description='Eager Code Generator Args Parser for Monkey patch methods '
+    )
+    parser.add_argument('--api_yaml_path', type=str)
+    parser.add_argument('--output_path', type=str)
+
+    args = parser.parse_args()
+    return args
+
+
+def GenerateMonkeyPathFile(filepath, python_c_str):
+    with open(filepath, 'a') as f:
+        f.write(python_c_str)
+
+
+if __name__ == "__main__":
+    args = ParseArguments()
+    api_yaml_path = args.api_yaml_path
+    output_path = args.output_path
+    gen = MonkeyPatchTensorMethodsGenerator(api_yaml_path)
+    gen.run()
+    GenerateMonkeyPathFile(output_path, gen.MonkeyPatchTensorMethods_str)
diff --git a/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py b/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py
index dc05025ee8d6d6..4b4cf5c9b8714f 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py
@@ -80,6 +80,7 @@ def FindParsingFunctionFromAttributeType(atype):
 PARSE_PYTHON_C_TENSOR_REF_TEMPLATE = (
     '    auto& {} = {}("{}", "{}", args, {}, {});\n'
 )
+PARSE_PYTHON_C_TENSORS_FROM_ARGS_OR_KWARGS_TEMPLATE = '    auto {} = GetTensorFromArgsOrKWArgs("{}", "{}", args, {}, kwargs,{},nargs,remaining_kwargs,{});\n'
 
 CONVERT_TO_DISTTENSOR_AND_PARSE_PYTHON_C_TENSORS_TEMPLATE = (
     '    {} = {}("{}", "{}", args, {}, {}, mesh);\n'
@@ -103,8 +104,23 @@ def FindParsingFunctionFromAttributeType(atype):
 PARSE_PYTHON_C_ARGS_TEMPLATE = """    PyObject* {}_obj = PyTuple_GET_ITEM(args, {});
     {} {} = {}({}_obj, \"{}\", {});
 """
+PARSE_PYTHON_C_NUM_ARGS_TEMPLATE = """    int nargs = args ? static_cast<int>(PyTuple_Size(args)) : 0;
+    int remaining_kwargs = kwargs ? static_cast<int>(PyDict_Size(kwargs)) : 0;
+    const int max_args = {};
+    CheckParamsCount(nargs,remaining_kwargs,max_args);
+"""
+PARSE_PYTHON_C_ARGS_KWARGS_WITH_DEFAULT_VALUE_TEMPLATE = """
+    PyObject* {}_obj = GetItemFromArgsOrKWArgs(args, {}, kwargs, {}, nargs,&remaining_kwargs);
+    {} {} = {}({}_obj, \"{}\", {}, {});"""
 
+PARSE_PYTHON_C_ARGS_KWARGS_TEMPLATE = """
+    PyObject* {}_obj = GetItemFromArgsOrKWArgs(args, {}, kwargs, {}, nargs,&remaining_kwargs,false);
+    {} {} = {}({}_obj, \"{}\", {});"""
 
+CHECK_REMAINING_ARGS_VALID_TEMPLATE = """    CheckRemainingParamsValidity(args,kwargs,remaining_kwargs,nargs);
+"""
+CALL_PRE_PROCESS_TEMPLATE = """    {};
+"""
 RECORD_EVENT_TEMPLATE = (
     'phi::RecordEvent {}("{} {}", phi::TracerEventType::UserDefined, 1);'
 )
@@ -121,11 +137,16 @@ def FindParsingFunctionFromAttributeType(atype):
   PyThreadState *tstate = nullptr;
   try {{
     VLOG(6) << "Running Eager Final State API: {}";
-
+    // Get Total Params count and check validity if needed
+{}
     VLOG(8) << "args count: " << (PyTuple_Size(args) / 2);
     // Get EagerTensors from args
 {}
     // Parse Attributes if needed
+{}
+    // Check Reminding Params validity if needed
+{}
+    // Call Pre_Process before calling dygraph function if needed
 {}
     tstate = PyEval_SaveThread();
 
@@ -323,6 +344,7 @@ def __init__(self, forward_api_contents, namespace):
         FunctionGeneratorBase.__init__(self, forward_api_contents, namespace)
 
         self.is_forward_only = True
+        self.need_parse_python_api_args = False
 
         # Generated Results
         self.python_c_function_str = ""
@@ -345,8 +367,20 @@ def GeneratePythonCFunction(self):
         optional_inputs = self.optional_inputs
         is_forward_only = self.is_forward_only
 
+        need_parse_python_api_args = self.need_parse_python_api_args
+        args_alias_map = self.args_alias_map
+        max_args = len(orig_forward_attrs_list) + len(
+            forward_inputs_position_map
+        )
+        dygraph_pre_process = self.dygraph_pre_process
+
         inplace_args_pos_map = {}
         inplace_returns_pos_map = {}
+        get_params_nums_and_check_str = "// NO NEED"
+        if need_parse_python_api_args:
+            get_params_nums_and_check_str = (
+                PARSE_PYTHON_C_NUM_ARGS_TEMPLATE.format(max_args)
+            )
         # Generate Python-C Tensors Parsing Logic
         get_eager_tensor_str = ""
         input_names = ""
@@ -395,16 +429,29 @@ def GeneratePythonCFunction(self):
                     input_single_tensor_names = (
                         input_single_tensor_names + ", " + name
                     )
-                    get_eager_tensor_str += (
-                        PARSE_PYTHON_C_TENSOR_REF_TEMPLATE.format(
+                    if not need_parse_python_api_args:
+                        get_eager_tensor_str += (
+                            PARSE_PYTHON_C_TENSOR_REF_TEMPLATE.format(
+                                name,
+                                "GetTensorFromArgs",
+                                forward_api_name,
+                                name,
+                                pos,
+                                "false",
+                            )
+                        )
+                    else:
+                        keywords = f'{{"{name}"}}'
+                        if name in args_alias_map.keys():
+                            keywords = args_alias_map[name]
+                        get_eager_tensor_str += PARSE_PYTHON_C_TENSORS_FROM_ARGS_OR_KWARGS_TEMPLATE.format(
                             name,
-                            "GetTensorFromArgs",
                             forward_api_name,
                             name,
                             pos,
+                            keywords,
                             "false",
                         )
-                    )
         # No inputs, skip convert to DistTensor
         if len(input_names) > 0:
             optional_and_vector_convert_code = ""
@@ -462,7 +509,7 @@ def GeneratePythonCFunction(self):
         )
 
         # Generate Python-C Attributes Parsing Logic
-        for name, atype, _, pos in orig_forward_attrs_list:
+        for name, atype, default_value, pos in orig_forward_attrs_list:
             parsing_function_name = FindParsingFunctionFromAttributeType(atype)
             # Used input argument place if specified from Python frontend.
             if (
@@ -473,18 +520,58 @@ def GeneratePythonCFunction(self):
                 assert (
                     name == "place"
                 ), "Only support 'place' as template argument name in FUNCTION_SET_DEVICE_TEMPLATE."
-
-            parse_attributes_str += PARSE_PYTHON_C_ARGS_TEMPLATE.format(
-                name,
-                pos,
-                atype,
-                name,
-                parsing_function_name,
-                name,
-                forward_api_name,
-                pos,
+            if need_parse_python_api_args:
+                keywords = f'{{"{name}"}}'
+                if name in args_alias_map.keys():
+                    keywords = args_alias_map[name]
+                if default_value is None:
+                    parse_attributes_str += (
+                        PARSE_PYTHON_C_ARGS_KWARGS_TEMPLATE.format(
+                            name,
+                            pos,
+                            keywords,
+                            atype,
+                            name,
+                            parsing_function_name,
+                            name,
+                            forward_api_name,
+                            pos,
+                        )
+                    )
+                else:
+                    parse_attributes_str += PARSE_PYTHON_C_ARGS_KWARGS_WITH_DEFAULT_VALUE_TEMPLATE.format(
+                        name,
+                        pos,
+                        keywords,
+                        atype,
+                        name,
+                        parsing_function_name,
+                        name,
+                        forward_api_name,
+                        pos,
+                        default_value,
+                    )
+            else:
+                parse_attributes_str += PARSE_PYTHON_C_ARGS_TEMPLATE.format(
+                    name,
+                    pos,
+                    atype,
+                    name,
+                    parsing_function_name,
+                    name,
+                    forward_api_name,
+                    pos,
+                )
+        check_remaining_params_validity_str = "    // NO NEED"
+        if need_parse_python_api_args:
+            check_remaining_params_validity_str = (
+                CHECK_REMAINING_ARGS_VALID_TEMPLATE
+            )
+        pre_process_str = "    //NO NEED"
+        if need_parse_python_api_args and len(dygraph_pre_process) > 0:
+            pre_process_str = CALL_PRE_PROCESS_TEMPLATE.format(
+                dygraph_pre_process
             )
-
         set_device_str = FUNCTION_SET_DEVICE_TEMPLATE.format(expected_place_str)
 
         # Generate Dygraph Function Call Logic
@@ -522,8 +609,11 @@ def GeneratePythonCFunction(self):
             forward_api_name,
             pythonc_record_event_str,
             forward_api_name,
+            get_params_nums_and_check_str,
             get_eager_tensor_str,
             parse_attributes_str,
+            check_remaining_params_validity_str,
+            pre_process_str,
             set_device_str,
             noamp_dygraph_function_str,
             return_str,
@@ -579,8 +669,11 @@ def GeneratePythonCFunction(self):
                 inplaced_forward_api_name,
                 pythonc_record_event_str,
                 inplaced_forward_api_name,
+                get_params_nums_and_check_str,
                 get_eager_tensor_str,
                 parse_attributes_str,
+                check_remaining_params_validity_str,
+                pre_process_str,
                 set_device_str,
                 inplace_noamp_dygraph_function_str,
                 return_str,
@@ -631,6 +724,10 @@ def run(self):
         # Initialized orig_forward_inputs_list, orig_forward_returns_list, orig_forward_attrs_list
         self.CollectOriginalForwardInfo()
 
+        if len(self.python_api_info) > 0:
+            self.need_parse_python_api_args = True
+            self.ParsePythonAPIInfo()
+
         if SkipAPIGeneration(self.forward_api_name):
             return False
 
diff --git a/paddle/fluid/operators/generator/parse_utils.py b/paddle/fluid/operators/generator/parse_utils.py
index 4a02c3ae5ecbec..269f1d0ea6fc45 100644
--- a/paddle/fluid/operators/generator/parse_utils.py
+++ b/paddle/fluid/operators/generator/parse_utils.py
@@ -369,6 +369,7 @@ def check_op_config(op_entry, op_name):
         'support_tensor',
         'traits',
         'interfaces',
+        'python_api',
     )
     infer_meta_key_set = (
         'func',
@@ -384,6 +385,8 @@ def check_op_config(op_entry, op_name):
         'layout',
         'backend',
         'force_backend',
+        'python_api',
+        'dispatch',
     )
     for key in op_entry.keys():
         assert (
diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
index 15b78262ef8e0b..5dde402ed4a6f6 100644
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -1378,6 +1378,28 @@ PyObject* eager__is_run_in_backward(PyObject* self,
 
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
+PyObject* eager__add_doc_str(PyObject* self, PyObject* args, ) {
+  EAGER_TRY
+  PyObject* obj = nullptr;
+  PyObject* doc_obj = nullptr;
+  if (!PyArg_ParseTuple(args, "OO", &obj, &doc_obj)) {
+    return nullptr;
+  }
+  const char* doc_str = "<invalid string>";
+  std::string doc_string = CastPyArg2AttrString(doc_obj, 1);
+  std::cout < < doc_string << std::endl;
+  if (Py_TYPE(obj) == &PyCFunction_Type) {
+    PyCFunctionObject* f = reinterpret_cast<PyCFunctionObject*>(obj);
+    std::cout << "type : " << Py_TYPE(obj)->tp_name << std::endl;
+    if (f->m_ml->ml_doc) {
+      std::cout << "doc : " << f->m_ml->ml_doc << std::endl;
+    }
+    f->m_ml->ml_doc = doc_str;
+  }
+
+  RETURN_PY_NONE
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
 
 PyObject* eager__for_test_check_cuda_error(PyObject* self,
                                            PyObject* args,
@@ -1488,6 +1510,11 @@ PyMethodDef variable_functions[] = {  // NOLINT
      (PyCFunction)(void (*)())eager__for_test_check_cuda_error,
      METH_VARARGS | METH_KEYWORDS,
      nullptr},
+
+    {"_add_docstr",
+     (PyCFunction)(void (*)())eager__add_doc_str,
+     METH_VARARGS,
+     nullptr},
 /**sparse functions**/
 #if defined(PADDLE_WITH_CUDA)
     {"async_read",
diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
index cddb8c4e90bc16..f0d97edeb7850f 100644
--- a/paddle/fluid/pybind/eager_utils.cc
+++ b/paddle/fluid/pybind/eager_utils.cc
@@ -1360,6 +1360,20 @@ paddle::Tensor& GetTensorFromArgs(const std::string& op_type,
   PyObject* obj = PyTuple_GET_ITEM(args, arg_idx);
   return GetTensorFromPyObject(op_type, arg_name, obj, arg_idx, dispensable);
 }
+paddle::Tensor& GetTensorFromArgsOrKWArgs(
+    const std::string& op_type,
+    const std::string& arg_name,
+    PyObject* args,
+    ssize_t arg_idx,
+    PyObject* kwargs,
+    const std::vector<std::string>& keywords,
+    const int nargs,
+    int* remaining_kwargs,
+    bool dispensable) {
+  PyObject* obj = GetItemFromArgsOrKWArgs(
+      args, arg_idx, kwargs, keywords, nargs, remaining_kwargs);
+  return GetTensorFromPyObject(op_type, arg_name, obj, arg_idx, dispensable);
+}
 
 std::vector<paddle::Tensor> GetTensorListFromArgs(
     const std::string& op_type,
@@ -2249,6 +2263,17 @@ paddle::experimental::Scalar CastPyArg2Scalar(PyObject* obj,
   // Fake a Scalar
   return paddle::experimental::Scalar(1.0);
 }
+paddle::experimental::Scalar CastPyArg2Scalar(
+    PyObject* obj,
+    const std::string& op_type,
+    ssize_t arg_pos,
+    paddle::experimental::Scalar default_value) {
+  if (obj != nullptr) {
+    return CastPyArg2Scalar(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 
 std::vector<phi::Scalar> CastPyArg2ScalarArray(PyObject* obj,
                                                const std::string& op_type,
@@ -2311,7 +2336,17 @@ std::vector<phi::Scalar> CastPyArg2ScalarArray(PyObject* obj,
         ((PyTypeObject*)obj->ob_type)->tp_name));  // NOLINT
   }
 }
-
+std::vector<phi::Scalar> CastPyArg2ScalarArray(
+    PyObject* obj,
+    const std::string& op_type,
+    ssize_t arg_pos,
+    std::vector<phi::Scalar> default_value) {
+  if (obj != nullptr) {
+    return CastPyArg2ScalarArray(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 paddle::experimental::IntArray CastPyArg2IntArray(PyObject* obj,
                                                   const std::string& op_type,
                                                   ssize_t arg_pos) {
@@ -2343,7 +2378,17 @@ paddle::experimental::IntArray CastPyArg2IntArray(PyObject* obj,
   // Fake a IntArray
   return paddle::experimental::IntArray({1});
 }
-
+paddle::experimental::IntArray CastPyArg2IntArray(
+    PyObject* obj,
+    const std::string& op_type,
+    ssize_t arg_pos,
+    paddle::experimental::IntArray default_value) {
+  if (obj != nullptr) {
+    return CastPyArg2IntArray(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 paddle::framework::Scope* CastPyArg2ScopePtr(PyObject* obj) {
   if (PyObject_TypeCheck(obj, g_framework_scope_pytype)) {
     return ::pybind11::handle(obj).cast<paddle::framework::Scope*>();
@@ -2582,7 +2627,16 @@ paddle::Place CastPyArg2Place(PyObject* obj,
                               ssize_t arg_pos) {
   return CastPyArg2Place(obj, arg_pos);
 }
-
+paddle::Place CastPyArg2Place(PyObject* obj,
+                              const std::string& op_type,
+                              ssize_t arg_pos,
+                              paddle::Place default_place) {
+  if (obj != nullptr) {
+    return CastPyArg2Place(obj, op_type, arg_pos);
+  } else {
+    return default_place;
+  }
+}
 paddle::DataType CastPyArg2DataType(PyObject* obj,
                                     const std::string& op_type,
                                     ssize_t arg_pos) {
@@ -2595,6 +2649,16 @@ paddle::DataType CastPyArg2DataType(PyObject* obj,
   }
   return CastPyArg2DataTypeDirectly(obj, op_type, arg_pos);
 }
+paddle::DataType CastPyArg2DataType(PyObject* obj,
+                                    const std::string& op_type,
+                                    ssize_t arg_pos,
+                                    paddle::DataType default_value) {
+  if (obj != nullptr) {
+    return CastPyArg2DataType(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 
 paddle::Tensor PyTensorHook::operator()(const paddle::Tensor& var) {
   py::gil_scoped_acquire gil;
diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h
index e0a1c035b353d5..3e5e643cb48cca 100644
--- a/paddle/fluid/pybind/eager_utils.h
+++ b/paddle/fluid/pybind/eager_utils.h
@@ -334,7 +334,11 @@ PyObject* ToPyObject(const std::tuple<Args...>& out,
 paddle::experimental::Scalar CastPyArg2Scalar(PyObject* obj,
                                               const std::string& op_type,
                                               ssize_t arg_pos);
-
+paddle::experimental::Scalar CastPyArg2Scalar(
+    PyObject* obj,
+    const std::string& op_type,
+    ssize_t arg_pos,
+    paddle::experimental::Scalar default_value);
 paddle::experimental::Scalar CastNumpy2Scalar(PyObject* obj,
                                               const std::string& op_type,
                                               ssize_t arg_pos);
@@ -342,18 +346,34 @@ paddle::experimental::Scalar CastNumpy2Scalar(PyObject* obj,
 std::vector<phi::Scalar> CastPyArg2ScalarArray(PyObject* obj,
                                                const std::string& op_type,
                                                ssize_t arg_pos);
+std::vector<phi::Scalar> CastPyArg2ScalarArray(PyObject* obj,
+                                               const std::string& op_type,
+                                               ssize_t arg_pos,
+                                               std::vector<phi::Scalar>);
 
 paddle::experimental::IntArray CastPyArg2IntArray(PyObject* obj,
                                                   const std::string& op_type,
                                                   ssize_t arg_pos);
-
+paddle::experimental::IntArray CastPyArg2IntArray(
+    PyObject* obj,
+    const std::string& op_type,
+    ssize_t arg_pos,
+    paddle::experimental::IntArray default_value);
 paddle::Place CastPyArg2Place(PyObject* obj,
                               const std::string& op_type,
                               ssize_t arg_pos);
+paddle::Place CastPyArg2Place(PyObject* obj,
+                              const std::string& op_type,
+                              ssize_t arg_pos,
+                              paddle::Place default_place);
 
 paddle::DataType CastPyArg2DataType(PyObject* obj,
                                     const std::string& op_type,
                                     ssize_t arg_pos);
+paddle::DataType CastPyArg2DataType(PyObject* obj,
+                                    const std::string& op_type,
+                                    ssize_t arg_pos,
+                                    paddle::DataType default_value);
 
 paddle::DataType CastPyArg2DataTypeDirectly(PyObject* obj,
                                             const std::string& op_type,
@@ -514,5 +534,16 @@ CvtPlacements(phi::distributed::Placements placements, int ndim);
 
 void EagerSetDeviceId();
 
+/*----------------------for arg parse-----------------------------*/
+paddle::Tensor& GetTensorFromArgsOrKWArgs(
+    const std::string& op_type,
+    const std::string& arg_name,
+    PyObject* args,
+    ssize_t arg_idx,
+    PyObject* kwargs,
+    const std::vector<std::string>& keywords,
+    const int nargs,
+    int* remaining_kwargs,
+    bool dispensable = false);
 }  // namespace pybind
 }  // namespace paddle
diff --git a/paddle/fluid/pybind/op_function_common.cc b/paddle/fluid/pybind/op_function_common.cc
index 62501fbb666d31..81a64d056b0a32 100644
--- a/paddle/fluid/pybind/op_function_common.cc
+++ b/paddle/fluid/pybind/op_function_common.cc
@@ -285,6 +285,16 @@ bool CastPyArg2Boolean(PyObject* obj,
 
   return false;
 }
+bool CastPyArg2Boolean(PyObject* obj,
+                       const std::string& op_type,
+                       ssize_t arg_pos,
+                       bool default_value) {
+  if (obj) {
+    return CastPyArg2Boolean(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 
 void CastPyArg2AttrBoolean(PyObject* obj,
                            paddle::framework::AttributeMap& attrs,  // NOLINT
@@ -308,6 +318,16 @@ int CastPyArg2Int(PyObject* obj, const std::string& op_type, ssize_t arg_pos) {
 
   return 0;
 }
+int CastPyArg2Int(PyObject* obj,
+                  const std::string& op_type,
+                  ssize_t arg_pos,
+                  int default_value) {
+  if (obj != nullptr) {
+    return CastPyArg2Int(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 
 void CastPyArg2AttrInt(PyObject* obj,
                        paddle::framework::AttributeMap& attrs,  // NOLINT
@@ -333,6 +353,16 @@ int64_t CastPyArg2Long(PyObject* obj,
 
   return 0;
 }
+int64_t CastPyArg2Long(PyObject* obj,
+                       const std::string& op_type,
+                       ssize_t arg_pos,
+                       int64_t default_value) {
+  if (obj != nullptr) {
+    return CastPyArg2Long(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 
 void CastPyArg2AttrLong(PyObject* obj,
                         paddle::framework::AttributeMap& attrs,  // NOLINT
@@ -361,7 +391,16 @@ float CastPyArg2Float(PyObject* obj,
                       ssize_t arg_pos) {
   return static_cast<float>(CastPyArg2Double(obj, op_type, arg_pos));
 }
-
+float CastPyArg2Float(PyObject* obj,
+                      const std::string& op_type,
+                      ssize_t arg_pos,
+                      float default_value) {
+  if (obj != nullptr) {
+    return CastPyArg2Float(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 void CastPyArg2AttrFloat(PyObject* obj,
                          paddle::framework::AttributeMap& attrs,  // NOLINT
                          const std::string& key,
@@ -386,6 +425,16 @@ double CastPyArg2Double(PyObject* obj,
 
   return 0.0;
 }
+double CastPyArg2Double(PyObject* obj,
+                        const std::string& op_type,
+                        ssize_t arg_pos,
+                        double default_value) {
+  if (obj != nullptr) {
+    return CastPyArg2Double(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 
 phi::dtype::complex<float> CastPyArg2Complex(PyObject* obj,
                                              const std::string& op_type,
@@ -457,6 +506,16 @@ std::string CastPyArg2String(PyObject* obj,
 
   return "";
 }
+std::string CastPyArg2String(PyObject* obj,
+                             const std::string& op_type,
+                             ssize_t arg_pos,
+                             std::string default_value) {
+  if (obj != nullptr) {
+    return CastPyArg2String(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 
 void CastPyArg2AttrString(PyObject* obj,
                           paddle::framework::AttributeMap& attrs,  // NOLINT
@@ -515,7 +574,16 @@ std::vector<bool> CastPyArg2Booleans(PyObject* obj,
 
   return value;
 }
-
+std::vector<bool> CastPyArg2Booleans(PyObject* obj,
+                                     const std::string& op_type,
+                                     ssize_t arg_pos,
+                                     std::vector<bool> default_value) {
+  if (obj != nullptr) {
+    return CastPyArg2Booleans(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 void CastPyArg2AttrBooleans(PyObject* obj,
                             paddle::framework::AttributeMap& attrs,  // NOLINT
                             const std::string& key,
@@ -594,6 +662,16 @@ std::vector<int> CastPyArg2Ints(PyObject* obj,
 
   return value;
 }
+std::vector<int> CastPyArg2Ints(PyObject* obj,
+                                const std::string& op_type,
+                                ssize_t arg_pos,
+                                std::vector<int> default_value) {
+  if (obj != nullptr) {
+    return CastPyArg2Ints(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 
 void CastPyArg2AttrInts(PyObject* obj,
                         paddle::framework::AttributeMap& attrs,  // NOLINT
@@ -674,6 +752,16 @@ std::vector<int64_t> CastPyArg2Longs(PyObject* obj,
 
   return value;
 }
+std::vector<int64_t> CastPyArg2Longs(PyObject* obj,
+                                     const std::string& op_type,
+                                     ssize_t arg_pos,
+                                     std::vector<int64_t> default_value) {
+  if (obj) {
+    return CastPyArg2Longs(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 
 void CastPyArg2AttrLongs(PyObject* obj,
                          paddle::framework::AttributeMap& attrs,  // NOLINT
@@ -750,6 +838,16 @@ std::vector<float> CastPyArg2Floats(PyObject* obj,
 
   return value;
 }
+std::vector<float> CastPyArg2Floats(PyObject* obj,
+                                    const std::string& op_type,
+                                    ssize_t arg_pos,
+                                    std::vector<float> default_value) {
+  if (obj != nullptr) {
+    return CastPyArg2Floats(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 
 void CastPyArg2AttrFloats(PyObject* obj,
                           paddle::framework::AttributeMap& attrs,  // NOLINT
@@ -826,7 +924,16 @@ std::vector<double> CastPyArg2Float64s(PyObject* obj,
 
   return value;
 }
-
+std::vector<double> CastPyArg2Float64s(PyObject* obj,
+                                       const std::string& op_type,
+                                       ssize_t arg_pos,
+                                       std::vector<double> default_value) {
+  if (obj != nullptr) {
+    return CastPyArg2Float64s(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 void CastPyArg2AttrFloat64s(PyObject* obj,
                             paddle::framework::AttributeMap& attrs,  // NOLINT
                             const std::string& key,
@@ -904,7 +1011,17 @@ std::vector<std::string> CastPyArg2Strings(PyObject* obj,
   }
   return value;
 }
-
+std::vector<std::string> CastPyArg2Strings(
+    PyObject* obj,
+    const std::string& op_type,
+    ssize_t arg_pos,
+    std::vector<std::string> default_value) {
+  if (obj != nullptr) {
+    return CastPyArg2Strings(obj, op_type, arg_pos);
+  } else {
+    return default_value;
+  }
+}
 void CastPyArg2AttrStrings(PyObject* obj,
                            paddle::framework::AttributeMap& attrs,  // NOLINT
                            const std::string& key,
@@ -1379,9 +1496,9 @@ ssize_t GetIdxFromCoreOpsInfoMap(
         core_ops_info_map,
     const std::string& op_type,
     const std::string& name) {
-  // `core_ops_info_map` can be `core_ops_args_info` or `core_ops_returns_info`.
-  // `core_ops_args_info`: get index from core_ops_args_info[op_type] according
-  // to input name.
+  // `core_ops_info_map` can be `core_ops_args_info` or
+  // `core_ops_returns_info`. `core_ops_args_info`: get index from
+  // core_ops_args_info[op_type] according to input name.
   // `core_ops_returns_info`: get index from core_ops_returns_info[op_type]
   // according to return name.
   if (!core_ops_info_map.count(op_type)) {
@@ -1400,7 +1517,8 @@ ssize_t GetIdxFromCoreOpsInfoMap(
   return -1;
 }
 
-static PyMethodDef OpFunctionCommonMethods[] = {  // NOLINT
+static PyMethodDef OpFunctionCommonMethods[] = {
+    // NOLINT
     {"construct_program_attribute_map",
      (PyCFunction)ConstructProgramAttrMapForRunProgram,
      METH_VARARGS,
@@ -1414,5 +1532,57 @@ void BindOpFunctionCommon(PyObject* module) {
     return;
   }
 }
+// for parse argruments from args and kwargs
+//  Get Item From PyObject* args Or PyObject* kwargs
+PyObject* GetItemFromArgsOrKWArgs(PyObject* args,
+                                  int pos,
+                                  PyObject* kwargs,
+                                  const std::vector<std::string>& keywords,
+                                  int nargs,
+                                  int* remaining_kwargs,
+                                  bool dispensable) {
+  // get item from args first if pos < nargs
+  if (nargs > pos) {
+    PyObject* arg = PyTuple_GetItem(args, pos);
+    if (arg) {
+      return arg;
+    }
+  }
+  // get item from kwargs if pos is out of args range and kwargs has unused
+  // items
+  if (kwargs && *remaining_kwargs > 0) {
+    PyObject* arg = nullptr;
+    for (std::string keyword : keywords) {
+      arg = PyDict_GetItemString(kwargs, keyword.c_str());
+      if (arg) {
+        *remaining_kwargs = *remaining_kwargs - 1;
+        return arg;
+      }
+    }
+  }
+  if (!dispensable) {
+    PADDLE_THROW(common::errors::InvalidArgument(
+        "Argument '%s' (position %d) must be provided", keywords[0], pos));
+  }
+  return nullptr;
+}
 
+void CheckRemainingParamsValidity(PyObject* args,
+                                  PyObject* kwargs,
+                                  int remaining_kwargs,
+                                  int nargs) {
+  const std::string ignored_arg_name = "name";
+  const std::string ignored_arg_out = "out";
+  if (remaining_kwargs == 0) return;
+  PyObject* name = PyDict_GetItemString(kwargs, ignored_arg_name.c_str());
+  PyObject* out = PyDict_GetItemString(kwargs, ignored_arg_out.c_str());
+  if (remaining_kwargs == 1 && (name || out)) {
+    return;
+  } else if (remaining_kwargs == 2 && (name && out)) {
+    return;
+  } else {
+    PADDLE_THROW(common::errors::InvalidArgument("has too many arguments"));
+  }
+  return;
+}
 }  // namespace paddle::pybind
diff --git a/paddle/fluid/pybind/op_function_common.h b/paddle/fluid/pybind/op_function_common.h
index 9213610b751c62..9159f6ccc802ec 100644
--- a/paddle/fluid/pybind/op_function_common.h
+++ b/paddle/fluid/pybind/op_function_common.h
@@ -67,19 +67,39 @@ bool PyObject_CheckString(PyObject* obj);
 bool CastPyArg2Boolean(PyObject* obj,
                        const std::string& op_type,
                        ssize_t arg_pos);
+bool CastPyArg2Boolean(PyObject* obj,
+                       const std::string& op_type,
+                       ssize_t arg_pos,
+                       bool default_value);
 int CastPyArg2Int(PyObject* obj, const std::string& op_type, ssize_t arg_pos);
+int CastPyArg2Int(PyObject* obj,
+                  const std::string& op_type,
+                  ssize_t arg_pos,
+                  int default_value);
 int64_t CastPyArg2Long(PyObject* obj,
                        const std::string& op_type,
                        ssize_t arg_pos);
+int64_t CastPyArg2Long(PyObject* obj,
+                       const std::string& op_type,
+                       ssize_t arg_pos,
+                       int64_t default_value);
 float16 CastPyArg2Float16(PyObject* obj,
                           const std::string& op_type,
                           ssize_t arg_pos);
 float CastPyArg2Float(PyObject* obj,
                       const std::string& op_type,
                       ssize_t arg_pos);
+float CastPyArg2Float(PyObject* obj,
+                      const std::string& op_type,
+                      ssize_t arg_pos,
+                      float default_value);
 double CastPyArg2Double(PyObject* obj,
                         const std::string& op_type,
                         ssize_t arg_pos);
+double CastPyArg2Double(PyObject* obj,
+                        const std::string& op_type,
+                        ssize_t arg_pos,
+                        double default_value);
 phi::dtype::complex<float> CastPyArg2Complex(PyObject* obj,
                                              const std::string& op_type,
                                              ssize_t arg_pos);
@@ -89,24 +109,53 @@ phi::dtype::complex<double> CastPyArg2Complex128(PyObject* obj,
 std::string CastPyArg2String(PyObject* obj,
                              const std::string& op_type,
                              ssize_t arg_pos);
+std::string CastPyArg2String(PyObject* obj,
+                             const std::string& op_type,
+                             ssize_t arg_pos,
+                             std::string default_value);
 std::vector<bool> CastPyArg2Booleans(PyObject* obj,
                                      const std::string& op_type,
                                      ssize_t arg_pos);
+std::vector<bool> CastPyArg2Booleans(PyObject* obj,
+                                     const std::string& op_type,
+                                     ssize_t arg_pos,
+                                     std::vector<bool> default_value);
 std::vector<int> CastPyArg2Ints(PyObject* obj,
                                 const std::string& op_type,
                                 ssize_t arg_pos);
+std::vector<int> CastPyArg2Ints(PyObject* obj,
+                                const std::string& op_type,
+                                ssize_t arg_pos,
+                                std::vector<int> default_value);
 std::vector<int64_t> CastPyArg2Longs(PyObject* obj,
                                      const std::string& op_type,
                                      ssize_t arg_pos);
+std::vector<int64_t> CastPyArg2Longs(PyObject* obj,
+                                     const std::string& op_type,
+                                     ssize_t arg_pos,
+                                     std::vector<int64_t> default_value);
 std::vector<float> CastPyArg2Floats(PyObject* obj,
                                     const std::string& op_type,
                                     ssize_t arg_pos);
+std::vector<float> CastPyArg2Floats(PyObject* obj,
+                                    const std::string& op_type,
+                                    ssize_t arg_pos,
+                                    std::vector<float> default_value);
 std::vector<double> CastPyArg2Float64s(PyObject* obj,
                                        const std::string& op_type,
                                        ssize_t arg_pos);
+std::vector<double> CastPyArg2Float64s(PyObject* obj,
+                                       const std::string& op_type,
+                                       ssize_t arg_pos,
+                                       std::vector<double> default_value);
 std::vector<std::string> CastPyArg2Strings(PyObject* obj,
                                            const std::string& op_type,
                                            ssize_t arg_pos);
+std::vector<std::string> CastPyArg2Strings(
+    PyObject* obj,
+    const std::string& op_type,
+    ssize_t arg_pos,
+    std::vector<std::string> default_value);
 
 std::vector<paddle::experimental::Scalar> CastPyArg2Scalars(
     PyObject* obj, const std::string& op_type, ssize_t arg_pos);
@@ -244,5 +293,28 @@ ssize_t GetIdxFromCoreOpsInfoMap(
     const std::string& name);
 
 void BindOpFunctionCommon(PyObject* module);
+PyObject* GetItemFromArgsOrKWArgs(PyObject* args,
+                                  int pos,
+                                  PyObject* kwargs,
+                                  const std::vector<std::string>& keywords,
+                                  int nargs,
+                                  int* remaining_kwargs,
+                                  bool dispensable = true);
+
+void CheckRemainingParamsValidity(PyObject* args,
+                                  PyObject* kwargs,
+                                  const int remaining_kwargs,
+                                  const int nargs);
+static inline void CheckParamsCount(const int nargs,
+                                    const int remaining_kwargs,
+                                    const int max_args) {
+  // To compatic the name and out parameter, we add 2 to max_args
+  if (nargs + remaining_kwargs > max_args + 2 || nargs > max_args + 1) {
+    PADDLE_THROW(common::errors::InvalidArgument(
+        "Has too many arguments,support max values: %d , but got: %d ",
+        max_args + 2,
+        nargs + remaining_kwargs));
+  }
+}
 }  // namespace pybind
 }  // namespace paddle
diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml
index b792f388fce3ca..a5f4d591cf5161 100644
--- a/paddle/phi/ops/yaml/ops.yaml
+++ b/paddle/phi/ops/yaml/ops.yaml
@@ -239,6 +239,11 @@
 
 - op : amax
   args : (Tensor x, int64_t[] axis={}, bool keepdim=false)
+  python_api :
+    name : [paddle.amax,paddle.Tensor.amax]
+    args_alias:
+      x : [input,x1]
+      axis : [dim]
   output : Tensor(out)
   infer_meta :
     func : ReduceInferMeta
diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index 9111fe8eda5af1..fc6a4210006413 100644
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -36,12 +36,18 @@
 # NOTE(SigureMo): We should place the import of base.core before other modules,
 # because there are some initialization codes in base/core/__init__.py.
 from .base import core  # noqa: F401
+from .base.dygraph.generated_tensor_methods_patch import (
+    monkey_patch_generated_tensor_methods,
+)
 from .batch import batch
 
 # Do the *DUPLICATED* monkey-patch for the tensor object.
 # We need remove the duplicated code here once we fix
 # the illogical implement in the monkey-patch methods later.
-from .framework import monkey_patch_math_tensor, monkey_patch_variable
+from .framework import (
+    monkey_patch_math_tensor,
+    monkey_patch_variable,
+)
 from .pir import monkey_patch_dtype, monkey_patch_program, monkey_patch_value
 
 monkey_patch_variable()
@@ -49,6 +55,7 @@
 monkey_patch_value()
 monkey_patch_program()
 monkey_patch_dtype()
+monkey_patch_generated_tensor_methods()
 
 from .base.dataset import *  # noqa: F403
 from .framework import (
diff --git a/python/paddle/_paddle_docs.py b/python/paddle/_paddle_docs.py
new file mode 100644
index 00000000000000..3008b1c893cea8
--- /dev/null
+++ b/python/paddle/_paddle_docs.py
@@ -0,0 +1,173 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import inspect
+
+import paddle
+
+
+def add_docstr_all(method: str, docstr: str) -> None:
+    func = getattr(paddle, method)
+    if inspect.isfunction(func):
+        func.__doc__ = docstr
+    elif inspect.ismethod(func):
+        func.__self__.__doc__ = docstr
+    elif inspect.isbuiltin(func):
+        print("Builtin function can not be modified")
+
+
+paddle_doc_dict = {}
+
+
+def add_docstr(func_name, docstring):
+    if func_name not in paddle_doc_dict:
+        paddle_doc_dict[func_name] = ""
+    paddle_doc_dict[func_name] += docstring
+
+
+def get_docstr(func_name):
+    if func_name not in paddle_doc_dict.keys():
+        return ""
+    return paddle_doc_dict[func_name]
+
+
+add_docstr(
+    "paddle.amax",
+    """
+    Computes the maximum of tensor elements over the given axis.
+
+    Note:
+        The difference between max and amax is: If there are multiple maximum elements,
+        amax evenly distributes gradient between these equal values,
+        while max propagates gradient to all of them.
+
+    Args:
+        x (Tensor): A tensor, the data type is float32, float64, int32, int64,
+            the dimension is no more than 4.
+        axis (int|list|tuple|None, optional): The axis along which the maximum is computed.
+            If :attr:`None`, compute the maximum over all elements of
+            `x` and return a Tensor with a single element,
+            otherwise must be in the range :math:`[-x.ndim(x), x.ndim(x))`.
+            If :math:`axis[i] < 0`, the axis to reduce is :math:`x.ndim + axis[i]`.
+        keepdim (bool, optional): Whether to reserve the reduced dimension in the
+            output Tensor. The result tensor will have one fewer dimension
+            than the `x` unless :attr:`keepdim` is true, default
+            value is False.
+        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
+
+    Returns:
+        Tensor, results of maximum on the specified axis of input tensor,
+        it's data type is the same as `x`.
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+            >>> # data_x is a Tensor with shape [2, 4] with multiple maximum elements
+            >>> # the axis is a int element
+
+            >>> x = paddle.to_tensor([[0.1, 0.9, 0.9, 0.9],
+            ...                         [0.9, 0.9, 0.6, 0.7]],
+            ...                         dtype='float64', stop_gradient=False)
+            >>> # There are 5 maximum elements:
+            >>> # 1) amax evenly distributes gradient between these equal values,
+            >>> #    thus the corresponding gradients are 1/5=0.2;
+            >>> # 2) while max propagates gradient to all of them,
+            >>> #    thus the corresponding gradient are 1.
+            >>> result1 = paddle.amax(x)
+            >>> result1.backward()
+            >>> result1
+            Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=False,
+            0.90000000)
+            >>> x.grad
+            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[0.        , 0.20000000, 0.20000000, 0.20000000],
+             [0.20000000, 0.20000000, 0.        , 0.        ]])
+
+            >>> x.clear_grad()
+            >>> result1_max = paddle.max(x)
+            >>> result1_max.backward()
+            >>> result1_max
+            Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=False,
+            0.90000000)
+            >>> x.grad
+            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[0., 1., 1., 1.],
+             [1., 1., 0., 0.]])
+
+            >>> x.clear_grad()
+            >>> result2 = paddle.amax(x, axis=0)
+            >>> result2.backward()
+            >>> result2
+            Tensor(shape=[4], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [0.90000000, 0.90000000, 0.90000000, 0.90000000])
+            >>> x.grad
+            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[0.        , 0.50000000, 1.        , 1.        ],
+             [1.        , 0.50000000, 0.        , 0.        ]])
+
+            >>> x.clear_grad()
+            >>> result3 = paddle.amax(x, axis=-1)
+            >>> result3.backward()
+            >>> result3
+            Tensor(shape=[2], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [0.90000000, 0.90000000])
+            >>> x.grad
+            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[0.        , 0.33333333, 0.33333333, 0.33333333],
+             [0.50000000, 0.50000000, 0.        , 0.        ]])
+
+            >>> x.clear_grad()
+            >>> result4 = paddle.amax(x, axis=1, keepdim=True)
+            >>> result4.backward()
+            >>> result4
+            Tensor(shape=[2, 1], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[0.90000000],
+             [0.90000000]])
+            >>> x.grad
+            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[0.        , 0.33333333, 0.33333333, 0.33333333],
+             [0.50000000, 0.50000000, 0.        , 0.        ]])
+
+            >>> # data_y is a Tensor with shape [2, 2, 2]
+            >>> # the axis is list
+            >>> y = paddle.to_tensor([[[0.1, 0.9], [0.9, 0.9]],
+            ...                         [[0.9, 0.9], [0.6, 0.7]]],
+            ...                         dtype='float64', stop_gradient=False)
+            >>> result5 = paddle.amax(y, axis=[1, 2])
+            >>> result5.backward()
+            >>> result5
+            Tensor(shape=[2], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [0.90000000, 0.90000000])
+            >>> y.grad
+            Tensor(shape=[2, 2, 2], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[[0.        , 0.33333333],
+              [0.33333333, 0.33333333]],
+             [[0.50000000, 0.50000000],
+              [0.        , 0.        ]]])
+
+            >>> y.clear_grad()
+            >>> result6 = paddle.amax(y, axis=[0, 1])
+            >>> result6.backward()
+            >>> result6
+            Tensor(shape=[2], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [0.90000000, 0.90000000])
+            >>> y.grad
+            Tensor(shape=[2, 2, 2], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[[0.        , 0.33333333],
+              [0.50000000, 0.33333333]],
+             [[0.50000000, 0.33333333],
+              [0.        , 0.        ]]])
+    """,
+)
diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py
index 39faf8f57d3b62..3b1ee24dda7386 100755
--- a/python/paddle/framework/__init__.py
+++ b/python/paddle/framework/__init__.py
@@ -70,6 +70,10 @@
 # We need remove the duplicated code here once we fix
 # the illogical implement in the monkey-patch methods later.
 from ..base.dygraph.math_op_patch import monkey_patch_math_tensor  # noqa: F401
+from ..base.dygraph.generated_tensor_methods_patch import (  # noqa: F401
+    monkey_patch_generated_tensor_methods,
+)
+
 from ..base.layers.math_op_patch import monkey_patch_variable  # noqa: F401
 
 # isort: on
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index 6ecc37a28b78b2..71ad7ea94eb0f5 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -3434,13 +3434,18 @@ def min(
             )
             return out
 
-
-def amax(
-    x: Tensor,
-    axis: int | Sequence[int] | None = None,
-    keepdim: bool = False,
-    name: str | None = None,
-) -> Tensor:
+    # def amax(
+    #     *args,
+    #     **kwargs
+    # ) -> Tensor:
+    #     if in_dynamic_or_pir_mode():
+    #         return _C_ops.amax(*args, **kwargs)
+
+    # def amax(
+    #     x: Tensor,
+    #     *args,
+    #     **kwargs
+    # ) -> Tensor:
     """
     Computes the maximum of tensor elements over the given axis.
 
@@ -3567,24 +3572,26 @@ def amax(
              [[0.50000000, 0.33333333],
               [0.        , 0.        ]]])
     """
-    if in_dynamic_or_pir_mode():
-        return _C_ops.amax(x, axis, keepdim)
 
-    else:
-        reduce_all, axis = _get_reduce_axis(axis, x)
-        helper = LayerHelper('amax', **locals())
-        check_variable_and_dtype(
-            x, 'x', ['float32', 'float64', 'int32', 'int64'], 'amax'
-        )
 
-        out = helper.create_variable_for_type_inference(dtype=x.dtype)
-        helper.append_op(
-            type='reduce_amax',
-            inputs={'X': x},
-            outputs={'Out': out},
-            attrs={'dim': axis, 'keep_dim': keepdim, 'reduce_all': reduce_all},
-        )
-        return out
+#     if in_dynamic_or_pir_mode():
+#         return _C_ops.amax(x, *args, **kwargs)
+
+# else:
+#     reduce_all, axis = _get_reduce_axis(axis, x)
+#     helper = LayerHelper('amax', **locals())
+#     check_variable_and_dtype(
+#         x, 'x', ['float32', 'float64', 'int32', 'int64'], 'amax'
+#     )
+
+#     out = helper.create_variable_for_type_inference(dtype=x.dtype)
+#     helper.append_op(
+#         type='reduce_amax',
+#         inputs={'X': x},
+#         outputs={'Out': out},
+#         attrs={'dim': axis, 'keep_dim': keepdim, 'reduce_all': reduce_all},
+#     )
+#     return out
 
 
 def amin(
diff --git a/tools/gen_tensor_stub.py b/tools/gen_tensor_stub.py
index 97c8850da1314f..e18fe8c239d7cb 100644
--- a/tools/gen_tensor_stub.py
+++ b/tools/gen_tensor_stub.py
@@ -27,6 +27,8 @@
 
 from typing_extensions import TypeAlias, get_overloads
 
+from paddle._paddle_docs import get_docstr
+
 if TYPE_CHECKING:
     from types import ModuleType
 
@@ -421,7 +423,7 @@ def get_tensor_members(module: str = 'paddle.Tensor') -> dict[int, Member]:
         member_doc_cleaned = (
             func_doc_to_method_doc(inspect.cleandoc(member_doc))
             if member_doc is not None
-            else None
+            else get_docstr("paddle." + name)
         )
         try:
             sig = inspect.signature(member)
@@ -431,6 +433,9 @@ def get_tensor_members(module: str = 'paddle.Tensor') -> dict[int, Member]:
         except (TypeError, ValueError):
             member_signature = f"{name}()"
 
+        if name == 'amax' or name == 'amin':
+            print(member, id)
+            print("member_signature : ", member_signature)
         if is_inherited_member(name, tensor_class):
             continue
 
@@ -569,7 +574,6 @@ def generate_stub_file(input_file=None, output_file=None):
 
         # Generate the Tensor stub
         tensor_gen = TensorGen(tensor_template, prefix)
-
         for member_id, member in tensor_members.items():
             if member_id in all_members:
                 continue

From 68c38425ce360c528ae2b1c2e2816eda82d6bd47 Mon Sep 17 00:00:00 2001
From: DanielSun11 <sundong04@baidu.com>
Date: Wed, 13 Aug 2025 20:12:47 +0800
Subject: [PATCH 02/14] fix

---
 .../generator/CMakeLists.txt                  |  39 ++-
 .../generator/monkey_patch_gen.py             |  31 +-
 .../generator/python_c_gen.py                 |  10 +-
 .../fluid/operators/generator/parse_utils.py  |   3 +
 .../fluid/pir/dialect/op_generator/op_gen.py  |   9 +
 .../pir/dialect/op_generator/python_c_gen.py  | 314 +++++++++++++++---
 paddle/fluid/pybind/CMakeLists.txt            |   6 +-
 paddle/fluid/pybind/arg_pre_process.cc        |  29 ++
 paddle/fluid/pybind/arg_pre_process.h         |  23 ++
 paddle/fluid/pybind/eager_functions.cc        |  15 +-
 paddle/fluid/pybind/eager_utils.cc            |  11 +
 paddle/fluid/pybind/eager_utils.h             |   4 +
 paddle/phi/ops/yaml/ops.yaml                  |   5 +
 python/paddle/__init__.py                     |  11 +-
 python/paddle/_paddle_docs.py                 | 162 +++++++--
 python/paddle/framework/__init__.py           |   3 -
 python/paddle/pir/generated_methods_patch.py  |  21 ++
 python/paddle/tensor/math.py                  | 312 -----------------
 tools/gen_tensor_stub.py                      |   7 +-
 19 files changed, 581 insertions(+), 434 deletions(-)
 create mode 100644 paddle/fluid/pybind/arg_pre_process.cc
 create mode 100644 paddle/fluid/pybind/arg_pre_process.h
 create mode 100644 python/paddle/pir/generated_methods_patch.py

diff --git a/paddle/fluid/eager/auto_code_generator/generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/generator/CMakeLists.txt
index 924887be4e5408..70e13ee3f38ef9 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/CMakeLists.txt
+++ b/paddle/fluid/eager/auto_code_generator/generator/CMakeLists.txt
@@ -95,23 +95,22 @@ add_custom_target(
           ${python_c_header_path}
   VERBATIM)
 
-# set(ops_yaml_path
-#   "${PADDLE_SOURCE_DIR}/paddle/phi/ops/yaml/ops.yaml"
-# )
-# set(monkey_patch_tensor_methods_path
-#   "${PADDLE_SOURCE_DIR}/python/paddle/base/dygraph/generated_tensor_methods_patch.py"
-# )
-# set(tmp_monkey_patch_tensor_methods_path
-#   "${PADDLE_SOURCE_DIR}/python/paddle/base/dygraph/generated_tensor_methods_patch.py.tmp"
-# )
-# message("Eager monkey path tensor methods CodeGen")
-# add_custom_target(
-#   eager_monkey_patch_codegen
-#   COMMAND
-#     "${PYTHON_EXECUTABLE}"
-#     "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py"
-#     "--api_yaml_path=${ops_yaml_path}"
-#     "--output_path=${tmp_monkey_patch_tensor_methods_path}"
-#   COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_monkey_patch_tensor_methods_path}
-#           ${monkey_patch_tensor_methods_path}
-#   VERBATIM)
+set(ops_yaml_path "${PADDLE_SOURCE_DIR}/paddle/phi/ops/yaml/ops.yaml")
+set(monkey_patch_tensor_methods_path
+    "${PADDLE_SOURCE_DIR}/python/paddle/base/dygraph/generated_tensor_methods_patch.py"
+)
+set(tmp_monkey_patch_tensor_methods_path
+    "${PADDLE_SOURCE_DIR}/python/paddle/base/dygraph/generated_tensor_methods_patch.py.tmp"
+)
+message("Eager monkey path tensor methods CodeGen")
+add_custom_target(
+  eager_monkey_patch_codegen
+  COMMAND
+    "${PYTHON_EXECUTABLE}"
+    "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py"
+    "--api_yaml_path=${ops_yaml_path}"
+    "--output_path=${tmp_monkey_patch_tensor_methods_path}"
+  COMMAND
+    ${CMAKE_COMMAND} -E copy_if_different
+    ${tmp_monkey_patch_tensor_methods_path} ${monkey_patch_tensor_methods_path}
+  VERBATIM)
diff --git a/paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py b/paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py
index 03a8630a3b6665..261057f9e2fd2d 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py
@@ -25,26 +25,31 @@
 from .. import core
 """
 
+EXTRA_IMPORTS_TEMPLATE = """
+__all__ = [methods_map,{func_name}]
+"""
 FUNCTION_NAME_TEMPLATE = """
 def {func_name}():
 """
 
-NAME_METHOD_MAPPING_TEMPLATE = """('{api_name}',_{api_name}),\n"""
+NAME_METHOD_MAPPING_TEMPLATE = """  ('{api_name}',_{api_name})"""
 
 METHODS_MAP_TEMPLATE = """
-    eager_methods_map = [
-        {}
-    ]
+methods_map = [
+{}
+]
 """
 
 METHOD_TEMPLATE = """
-    def _{name}(self,*args, **kwargs):
-        return _C_ops.{name}(self,*args, **kwargs)
+def _{name}(self,*args, **kwargs):
+    return _C_ops.{name}(self,*args, **kwargs)
 """
 SET_METHOD_TEMPLATE = """
+    # set methods for Tensor in dygraph
     local_tensor = core.eager.Tensor
-    for method_name, method in eager_methods_map:
+    for method_name, method in methods_map:
         setattr(local_tensor, method_name, method)
+
 """
 
 
@@ -86,9 +91,7 @@ def __init__(self, path):
 
     def GenerateMonkeyPatchTensorMethods(self):
         self.MonkeyPatchTensorMethods_str += IMPORT_TEMPLATE
-        self.MonkeyPatchTensorMethods_str += FUNCTION_NAME_TEMPLATE.format(
-            func_name="monkey_patch_generated_tensor_methods"
-        )
+
         forward_api_list = self.forward_api_list
         methods_map = []  # [("method_name",method),]
         for forward_api_content in forward_api_list:
@@ -104,7 +107,13 @@ def GenerateMonkeyPatchTensorMethods(self):
             self.MonkeyPatchTensorMethods_str += method_str
         result = ',\n '.join(methods_map)
         self.MonkeyPatchTensorMethods_str += METHODS_MAP_TEMPLATE.format(result)
+        self.MonkeyPatchTensorMethods_str += FUNCTION_NAME_TEMPLATE.format(
+            func_name="monkey_patch_generated_methods_for_tensor"
+        )
         self.MonkeyPatchTensorMethods_str += SET_METHOD_TEMPLATE
+        self.MonkeyPatchTensorMethods_str += EXTRA_IMPORTS_TEMPLATE.format(
+            func_name="monkey_patch_generated_methods_for_tensor"
+        )
 
     def run(self):
         # Read Yaml file
@@ -127,7 +136,7 @@ def ParseArguments():
 
 
 def GenerateMonkeyPathFile(filepath, python_c_str):
-    with open(filepath, 'a') as f:
+    with open(filepath, 'w') as f:
         f.write(python_c_str)
 
 
diff --git a/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py b/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py
index b68f5ec343fbe1..d3c9e491d8b018 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py
@@ -80,7 +80,7 @@ def FindParsingFunctionFromAttributeType(atype):
 PARSE_PYTHON_C_TENSOR_REF_TEMPLATE = (
     '    auto& {} = {}("{}", "{}", args, {}, {});\n'
 )
-PARSE_PYTHON_C_TENSORS_FROM_ARGS_OR_KWARGS_TEMPLATE = '    auto {} = GetTensorFromArgsOrKWArgs("{}", "{}", args, {}, kwargs,{},nargs,remaining_kwargs,{});\n'
+PARSE_PYTHON_C_TENSORS_FROM_ARGS_OR_KWARGS_TEMPLATE = '    auto {} = GetTensorFromArgsOrKWArgs("{}", "{}", args, {}, kwargs,{},nargs,&remaining_kwargs,{});\n'
 
 CONVERT_TO_DISTTENSOR_AND_PARSE_PYTHON_C_TENSORS_TEMPLATE = (
     '    {} = {}("{}", "{}", args, {}, {}, mesh);\n'
@@ -226,6 +226,8 @@ def FindParsingFunctionFromAttributeType(atype):
 #include "paddle/fluid/pybind/eager_custom_python_api.h"
 #include "paddle/fluid/pybind/eager.h"
 #include "paddle/fluid/pybind/eager_op_function.h"
+#include "paddle/fluid/pybind/arg_pre_process.h"
+
 namespace paddle {{
 namespace pybind {{
 
@@ -571,8 +573,12 @@ def GeneratePythonCFunction(self, no_input_out_tensor=False):
             )
         pre_process_str = "    //NO NEED"
         if need_parse_python_api_args and len(dygraph_pre_process) > 0:
+
+            def pre_process_add_ampersand(s):
+                return s.replace('(', '(&').replace(',', ',&').rstrip(')') + ')'
+
             pre_process_str = CALL_PRE_PROCESS_TEMPLATE.format(
-                dygraph_pre_process
+                pre_process_add_ampersand(dygraph_pre_process)
             )
         set_device_str = FUNCTION_SET_DEVICE_TEMPLATE.format(expected_place_str)
 
diff --git a/paddle/fluid/operators/generator/parse_utils.py b/paddle/fluid/operators/generator/parse_utils.py
index 269f1d0ea6fc45..7e993be98d65be 100644
--- a/paddle/fluid/operators/generator/parse_utils.py
+++ b/paddle/fluid/operators/generator/parse_utils.py
@@ -619,6 +619,9 @@ def parse_op_entry(op_entry: dict[str, Any], name_field="op"):
         else:
             forward = None
         op["forward"] = forward
+    # parse python_api
+    if "python_api" in op_entry:
+        op.update({"python_api": op_entry["python_api"]})
     return op
 
 
diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py
index ca46a499de0b47..0db55027265120 100644
--- a/paddle/fluid/pir/dialect/op_generator/op_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py
@@ -545,6 +545,8 @@ def __init__(self, op_yaml_item, op_compat_item, yaml_file):
 
         # parse interfaces list
         self.interfaces_list = self.parse_op_interfaces()
+        # parse python api info
+        self.python_api_info = self.parse_python_api_info()
 
         # OneDNN info
         if "extra_args" in self.op_yaml_item:
@@ -1074,6 +1076,13 @@ def parse_invoke_map(self):
         else:
             return None
 
+    def parse_python_api_info(self):
+
+        if 'python_api' in self.op_yaml_item:
+            return self.op_yaml_item['python_api']
+        else:
+            return None
+
     def parse_data_transform_info(self):
         if self.op_yaml_item.get('data_transform'):
             data_trans_item = self.op_yaml_item['data_transform']
diff --git a/paddle/fluid/pir/dialect/op_generator/python_c_gen.py b/paddle/fluid/pir/dialect/op_generator/python_c_gen.py
index b1af9c004de4d5..12b8df4f70c9ac 100644
--- a/paddle/fluid/pir/dialect/op_generator/python_c_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/python_c_gen.py
@@ -48,7 +48,7 @@
 #include "paddle/phi/common/int_array.h"
 #include "paddle/phi/core/enforce.h"
 #include "paddle/fluid/pybind/op_callstack_utils.h"
-
+#include "paddle/fluid/pybind/arg_pre_process.h"
 
 {body}
 
@@ -59,13 +59,18 @@
     try {{
         VLOG(6) << "Add {api_name} op into program";
         VLOG(8) << "args count: " << (PyTuple_Size(args) / 2);
-
+        // Get Total Params count and check validity if needed
+        {check_params_count}
         // Get Value from args
         {inputs}
 
         // Parse Attributes
         {attrs}
 
+        // Check Reminding Params validity if needed
+        {check_remaining_params_valid}
+        // Call Pre_Process before calling dygraph function if needed
+        {pre_process}
         // Call ir static api
         CallStackRecorder callstack_recorder("{api_name}");
         callstack_recorder.Record();
@@ -84,6 +89,8 @@
     try {{
         VLOG(6) << "Add {api_name} op into program";
         VLOG(8) << "args count: " << (PyTuple_Size(args) / 2);
+        // Get Total Params count and check validity if needed
+        {check_params_count}
 
         // Get Value from args
         {inputs}
@@ -91,6 +98,11 @@
         // Parse Attributes
         {attrs}
 
+        // Check Reminding Params validity if needed
+        {check_remaining_params_valid}
+        // Call Pre_Process before calling dygraph function if needed
+        {pre_process}
+
         // Call ir static api
         CallStackRecorder callstack_recorder("{api_name}");
         callstack_recorder.Record();
@@ -104,19 +116,43 @@
 }}
 """
 
+CHECK_PARAMS_COUNT_TEMPLATE = """    int nargs = args ? static_cast<int>(PyTuple_Size(args)) : 0;
+    int remaining_kwargs = kwargs ? static_cast<int>(PyDict_Size(kwargs)) : 0;
+    const int max_args = {max_args};
+    CheckParamsCount(nargs,remaining_kwargs,max_args);
+"""
+CHECK_REMAINING_PARAMS_VALID_TEMPLATE = """            CheckRemainingParamsValidity(args,kwargs,remaining_kwargs,nargs);
+"""
 INPUT_TEMPLATE = """
         PyObject *{name}_obj = PyTuple_GET_ITEM(args, {index});
         auto {name} = {cast_func}({name}_obj, "{api_name}", {index}, {dispensable});"""
 
+#     PyObject* axis_obj = GetItemFromArgsOrKWArgs(args, 1, kwargs, {"axis","dim"}, nargs,&remaining_kwargs);
+
+INPUT_FROM_ARGS_KWARGS_TEMPLATE = """
+        PyObject *{name}_obj = GetItemFromArgsOrKWArgs(args, {index},kwargs,{keywords}, nargs, &remaining_kwargs);
+        auto {name} = {cast_func}({name}_obj, "{api_name}", {index}, {dispensable});"""
+
+CALL_PRE_PROCESS_TEMPLATE = """{pre_process};"""
+
 NO_MUTABLE_ATTR_CAST_TEMPLATE = """
         PyObject *{name}_obj = PyTuple_GET_ITEM(args, {index});
         {type} {name} = {cast_func}({name}_obj, "{api_name}", {index});"""
 
+NO_MUTABLE_ATTR_CAST_FROM_ARGS_KWARGS_TEMPLATE = """
+        PyObject *{name}_obj = GetItemFromArgsOrKWArgs(args, {index},kwargs,{keywords}, nargs, &remaining_kwargs,false);
+        {type} {name} = {cast_func}({name}_obj, "{api_name}", {index});"""
+NO_MUTABLE_ATTR_CAST_FROM_ARGS_KWARGS_WITH_DEFAULT_VALUE_TEMPLATE = """
+        PyObject *{name}_obj = GetItemFromArgsOrKWArgs(args, {index},kwargs,{keywords}, nargs, &remaining_kwargs);
+        {type} {name} = {cast_func}({name}_obj, "{api_name}", {index},{default_value});"""
+
 MUTABLE_ATTR_API_IMPL_TEMPLATE = """
 PyObject *static_api_{api_name}(PyObject *self, PyObject *args, PyObject *kwargs) {{
     try {{
         VLOG(6) << "Add {api_name} op into program";
         VLOG(8) << "args count: " << (PyTuple_Size(args) / 2);
+        // Get Total Params count and check validity if needed
+        {check_params_count}
 
         // Get Value from args
         {inputs}
@@ -128,6 +164,11 @@
         {init_attrs}
         {cast_attrs}
 
+        // Check Reminding Params validity if needed
+        {check_remaining_params_valid}
+        // Call Pre_Process before calling dygraph function if needed
+        {pre_process}
+
         // Call ir static api
         CallStackRecorder callstack_recorder("{api_name}");
         callstack_recorder.Record();
@@ -165,9 +206,15 @@
 MUTABLE_ATTR_OBJ_TEMPLATE = """
         PyObject *{name}_obj = PyTuple_GET_ITEM(args, {index});"""
 
+MUTABLE_ATTR_OBJ_FROM_ARGS_KWARGS_WITH_DEFAULT_VALUE_TEMPLATE = """
+        PyObject *{name}_obj = GetItemFromArgsOrKWArgs(args, {index},kwargs,{keywords}, nargs, &remaining_kwargs,false);"""
+MUTABLE_ATTR_OBJ_FROM_ARGS_KWARGS_TEMPLATE = """
+        PyObject *{name}_obj = GetItemFromArgsOrKWArgs(args, {index},kwargs,{keywords}, nargs, &remaining_kwargs);"""
+
 MUTABLE_ATTR_CAST_TEMPLATE = """
             {type} {name_} = {cast_func}({name}_obj, "{api_name}", {index});"""
-
+MUTABLE_ATTR_CAST_WITH_DEFAULT_VALUE_TEMPLATE = """
+            {type} {name_} = {cast_func}({name}_obj, "{api_name}", {index}, {default_value});"""
 FULL_OP_TEMPLATE = """
             {name} = paddle::dialect::full(std::vector<int64_t>{{1}}, {name}_tmp, phi::DataType::{phi_datatype}, phi::CPUPlace());
 """
@@ -224,6 +271,7 @@
 class PythonCCodeGen(CodeGen):
     def __init__(self) -> None:
         super().__init__()
+        self.need_parse_python_api_args = False
 
     def _gen_one_declare(self, op_name):
         return API_DECLARE_TEMPLATE.format(name=op_name)
@@ -255,7 +303,19 @@ def _gen_h_file(self, op_info_items, namespaces, h_file_path):
         with open(h_file_path, 'w') as f:
             f.write(H_FILE_TEMPLATE.format(body=body))
 
-    def _gen_inputs(self, op_info, op_name):
+    def _gen_keywords_vector(self, args_alias_map, arg_name):
+        alias_vector = f'{{"{arg_name}"}}'
+        if arg_name in args_alias_map.keys():
+            alias_set = set(args_alias_map[arg_name])
+            # Add the original argument name to the alias set
+            alias_set.add(arg_name)
+            # Convert to C++ vector format
+            alias_vector = (
+                "{" + ",".join(f'"{name}"' for name in alias_set) + "}"
+            )
+        return alias_vector
+
+    def _gen_inputs(self, op_info, op_name, args_alias_map={}):
         name_list = op_info.input_name_list
         type_list = op_info.input_type_list
         optional_list = op_info.input_optional_list
@@ -278,41 +338,98 @@ def _gen_inputs(self, op_info, op_name):
                     else 'CastPyArg2Value'
                 )
                 dispensable = "false"
-            ret += INPUT_TEMPLATE.format(
-                name=name,
-                index=i,
-                cast_func=cast_func,
-                api_name=op_name,
-                dispensable=dispensable,
-            )
+            if self.need_parse_python_api_args:
+                keywords = self._gen_keywords_vector(args_alias_map, name)
+                ret += INPUT_FROM_ARGS_KWARGS_TEMPLATE.format(
+                    name=name,
+                    index=i,
+                    keywords=keywords,
+                    cast_func=cast_func,
+                    api_name=op_name,
+                    dispensable=dispensable,
+                )
+            else:
+                ret += INPUT_TEMPLATE.format(
+                    name=name,
+                    index=i,
+                    cast_func=cast_func,
+                    api_name=op_name,
+                    dispensable=dispensable,
+                )
         return ret
 
-    def _gen_attrs_without_mutable(self, op_info, op_name):
+    def _gen_attrs_without_mutable(self, op_info, op_name, args_alias_map={}):
         input_size = len(op_info.input_name_list)
         name_list = op_info.attribute_name_list
         type_list = op_info.attribute_build_arg_type_list
+        default_value_list = op_info.attribute_default_value_list
         assert len(name_list) == len(type_list)
         ret = ''
-        for i, (name, type) in enumerate(zip(name_list, type_list)):
+        for i, (name, type, default_value) in enumerate(
+            zip(name_list, type_list, default_value_list)
+        ):
             type = type.replace('const ', '').replace('&', '')
             cast_func = TYPE_TO_FUNC_MAP[type]
-            ret += NO_MUTABLE_ATTR_CAST_TEMPLATE.format(
-                name=name,
-                index=input_size + i,
-                type=type,
-                cast_func=cast_func,
-                api_name=op_name,
-            )
+            if self.need_parse_python_api_args:
+                keywords = self._gen_keywords_vector(args_alias_map, name)
+                if default_value is not None:
+                    ret += NO_MUTABLE_ATTR_CAST_FROM_ARGS_KWARGS_WITH_DEFAULT_VALUE_TEMPLATE.format(
+                        name=name,
+                        index=input_size + i,
+                        type=type,
+                        cast_func=cast_func,
+                        api_name=op_name,
+                        keywords=keywords,
+                        default_value=default_value,
+                    )
+                else:
+                    ret += (
+                        NO_MUTABLE_ATTR_CAST_FROM_ARGS_KWARGS_TEMPLATE.format(
+                            name=name,
+                            index=input_size + i,
+                            type=type,
+                            cast_func=cast_func,
+                            api_name=op_name,
+                            keywords=keywords,
+                        )
+                    )
+            else:
+                ret += NO_MUTABLE_ATTR_CAST_TEMPLATE.format(
+                    name=name,
+                    index=input_size + i,
+                    type=type,
+                    cast_func=cast_func,
+                    api_name=op_name,
+                )
         return ret
 
-    def _gen_attrs_py_obj_with_mutable(self, op_info):
+    def _gen_attrs_py_obj_with_mutable(self, op_info, args_alias_map={}):
         input_size = len(op_info.input_name_list)
         name_list = op_info.attribute_name_list
+        default_value_list = op_info.attribute_default_value_list
         ret = ''
-        for i, name in enumerate(name_list):
-            ret += MUTABLE_ATTR_OBJ_TEMPLATE.format(
-                name=name, index=input_size + i
-            )
+        for i, (name, default_value) in enumerate(
+            zip(name_list, default_value_list)
+        ):
+            if self.need_parse_python_api_args:
+                keywords = self._gen_keywords_vector(args_alias_map, name)
+                if default_value is not None:
+                    ret += MUTABLE_ATTR_OBJ_FROM_ARGS_KWARGS_WITH_DEFAULT_VALUE_TEMPLATE.format(
+                        name=name,
+                        index=input_size + i,
+                        keywords=keywords,
+                    )
+                else:
+                    ret += MUTABLE_ATTR_OBJ_FROM_ARGS_KWARGS_TEMPLATE.format(
+                        name=name,
+                        index=input_size + i,
+                        keywords=keywords,
+                    )
+
+            else:
+                ret += MUTABLE_ATTR_OBJ_TEMPLATE.format(
+                    name=name, index=input_size + i
+                )
         return ret
 
     def _gen_init_mutable_attrs(self, op_info):
@@ -329,9 +446,12 @@ def _gen_cast_attrs(self, op_info, op_name):
         attr_type_list = op_info.attribute_build_arg_type_list
         mutable_attr_name_list = op_info.mutable_attribute_name_list
         mutable_attr_type_list = op_info.mutable_attribute_type_list
+        default_value_list = op_info.attribute_default_value_list
         assert len(attr_name_list) == len(attr_type_list)
         ret = ''
-        for i, (name, type) in enumerate(zip(attr_name_list, attr_type_list)):
+        for i, (name, type, default_value) in enumerate(
+            zip(attr_name_list, attr_type_list, default_value_list)
+        ):
             type = type.replace('const ', '').replace('&', '')
             cast_func = TYPE_TO_FUNC_MAP[type]
 
@@ -373,15 +493,27 @@ def _gen_cast_attrs(self, op_info, op_name):
                         api_name=op_name,
                         index=input_size + i,
                     )
-
-                no_mutable_cast_str = MUTABLE_ATTR_CAST_TEMPLATE.format(
-                    type=type,
-                    name_=name + '_tmp',
-                    name=name,
-                    cast_func=cast_func,
-                    api_name=op_name,
-                    index=input_size + i,
-                )
+                if default_value is not None:
+                    no_mutable_cast_str = (
+                        MUTABLE_ATTR_CAST_WITH_DEFAULT_VALUE_TEMPLATE.format(
+                            type=type,
+                            name_=name + '_tmp',
+                            name=name,
+                            cast_func=cast_func,
+                            api_name=op_name,
+                            index=input_size + i,
+                            default_value=default_value,
+                        )
+                    )
+                else:
+                    no_mutable_cast_str = MUTABLE_ATTR_CAST_TEMPLATE.format(
+                        type=type,
+                        name_=name + '_tmp',
+                        name=name,
+                        cast_func=cast_func,
+                        api_name=op_name,
+                        index=input_size + i,
+                    )
 
                 if (
                     mutable_attr_type_list[mutable_attr_name_list.index(name)][
@@ -410,39 +542,114 @@ def _gen_cast_attrs(self, op_info, op_name):
                         no_mutable_cast_attrs=no_mutable_cast_str,
                     )
             else:
-                mutable_cast_str = MUTABLE_ATTR_CAST_TEMPLATE.format(
-                    type=type,
-                    name_=name,
-                    name=name,
-                    cast_func=cast_func,
-                    api_name=op_name,
-                    index=input_size + i,
-                )
+                if (
+                    default_value is not None
+                    and self.need_parse_python_api_args
+                ):
+                    mutable_cast_str = (
+                        MUTABLE_ATTR_CAST_WITH_DEFAULT_VALUE_TEMPLATE.format(
+                            type=type,
+                            name_=name,
+                            name=name,
+                            cast_func=cast_func,
+                            api_name=op_name,
+                            index=input_size + i,
+                            default_value=default_value,
+                        )
+                    )
+                else:
+                    mutable_cast_str = MUTABLE_ATTR_CAST_TEMPLATE.format(
+                        type=type,
+                        name_=name,
+                        name=name,
+                        cast_func=cast_func,
+                        api_name=op_name,
+                        index=input_size + i,
+                    )
                 ret += mutable_cast_str
 
         return ret
 
+    def _gen_check_params_count(self, max_args, need_check):
+        if need_check:
+            return CHECK_PARAMS_COUNT_TEMPLATE.format(max_args=max_args)
+        else:
+            return '// NO NEED'
+
+    def _gen_check_reminding_params(self, need_check):
+        if need_check:
+            return CHECK_REMAINING_PARAMS_VALID_TEMPLATE
+        return '// NO NEED'
+
+    def _gen_pre_process(self, pre_process):
+        pre_process_str = ""
+        if pre_process is not None and self.need_parse_python_api_args:
+            if "static_func" in pre_process.keys():
+                pre_process_str = pre_process["static_func"]
+            elif "func" in pre_process.keys():
+                pre_process_str = pre_process["func"]
+
+            def pre_process_add_ampersand(s):
+                return s.replace('(', '(&').replace(',', ',&').rstrip(')') + ')'
+
+            return CALL_PRE_PROCESS_TEMPLATE.format(
+                pre_process=pre_process_add_ampersand(pre_process_str)
+            )
+        return "// NO NEED"
+
     def _gen_one_impl(self, op_info, op_name):
         input_name_list = op_info.input_name_list
         output_name_list = op_info.output_name_list
         attr_name_list = op_info.attribute_name_list
         mutable_attr_name_list = op_info.mutable_attribute_name_list
         no_mutable_attr_name_list = op_info.non_mutable_attribute_name_list
+        max_args = len(input_name_list) + len(attr_name_list)
+        python_api_info = op_info.python_api_info
+        args_alias_map = None
+        pre_process = None
+        need_check_params_count = False
+        self.need_parse_python_api_args = False
+
+        if python_api_info is not None:
+            self.need_parse_python_api_args = True
+            if "args_alias" in python_api_info.keys():
+                args_alias_map = python_api_info["args_alias"]
+                need_check_params_count = True
+            if "pre_process" in python_api_info.keys():
+                pre_process = python_api_info["pre_process"]
 
         if len(output_name_list) == 0:
             ret = NO_OUTPUT_API_IMPL_TEMPLATE.format(
                 api_name=op_name,
-                inputs=self._gen_inputs(op_info, op_name),
-                attrs=self._gen_attrs_without_mutable(op_info, op_name),
+                check_params_count=self._gen_check_params_count(
+                    max_args, need_check=need_check_params_count
+                ),
+                inputs=self._gen_inputs(op_info, op_name, args_alias_map),
+                attrs=self._gen_attrs_without_mutable(
+                    op_info, op_name, args_alias_map
+                ),
+                check_remaining_params_valid=self._gen_check_reminding_params(
+                    need_check=need_check_params_count
+                ),
+                pre_process=self._gen_pre_process(pre_process),
                 args=', '.join(input_name_list + attr_name_list),
             )
         elif len(mutable_attr_name_list) > 0:
             ret = MUTABLE_ATTR_API_IMPL_TEMPLATE.format(
                 api_name=op_name,
-                inputs=self._gen_inputs(op_info, op_name),
-                attrs_py_obj=self._gen_attrs_py_obj_with_mutable(op_info),
+                check_params_count=self._gen_check_params_count(
+                    max_args, need_check=need_check_params_count
+                ),
+                inputs=self._gen_inputs(op_info, op_name, args_alias_map),
+                attrs_py_obj=self._gen_attrs_py_obj_with_mutable(
+                    op_info, args_alias_map
+                ),
                 init_attrs=self._gen_init_mutable_attrs(op_info),
                 cast_attrs=self._gen_cast_attrs(op_info, op_name),
+                check_remaining_params_valid=self._gen_check_reminding_params(
+                    need_check=need_check_params_count
+                ),
+                pre_process=self._gen_pre_process(pre_process),
                 args_with_mutable_attrs=', '.join(
                     input_name_list
                     + mutable_attr_name_list
@@ -452,9 +659,18 @@ def _gen_one_impl(self, op_info, op_name):
         else:
             ret = NO_MUTABLE_ATTR_API_IMPL_TEMPLATE.format(
                 api_name=op_name,
-                inputs=self._gen_inputs(op_info, op_name),
-                attrs=self._gen_attrs_without_mutable(op_info, op_name),
+                check_params_count=self._gen_check_params_count(
+                    max_args, need_check=need_check_params_count
+                ),
+                inputs=self._gen_inputs(op_info, op_name, args_alias_map),
+                attrs=self._gen_attrs_without_mutable(
+                    op_info, op_name, args_alias_map
+                ),
                 args=', '.join(input_name_list + attr_name_list),
+                check_remaining_params_valid=self._gen_check_reminding_params(
+                    need_check=need_check_params_count
+                ),
+                pre_process=self._gen_pre_process(pre_process),
             )
         ret = re.sub(r' +\n', '', ret)
         return ret
diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index d018fd90dab3a6..1c7413d949743b 100755
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -136,7 +136,8 @@ set(PYBIND_SRCS
     sot/eval_frame.c
     sot/guards.cc
     op_callstack_utils.cc
-    python_callable_registry.cc)
+    python_callable_registry.cc
+    arg_pre_process.cc)
 
 if(WITH_DISTRIBUTE)
   set(PYBIND_SRCS ${PYBIND_SRCS} dist_api.cc)
@@ -281,7 +282,8 @@ if(WITH_PYTHON)
   get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
   target_link_libraries(eager_generator ${os_dependency_modules})
 
-  set(EAGER_OP_IMPL_DEPS eager_generator eager_python_c_codegen)
+  set(EAGER_OP_IMPL_DEPS eager_generator eager_python_c_codegen
+                         eager_monkey_patch_codegen)
 
   if(WITH_ROCM)
     target_link_libraries(eager_generator ${ROCM_HIPRTC_LIB})
diff --git a/paddle/fluid/pybind/arg_pre_process.cc b/paddle/fluid/pybind/arg_pre_process.cc
new file mode 100644
index 00000000000000..1dd1e8c70e3c07
--- /dev/null
+++ b/paddle/fluid/pybind/arg_pre_process.cc
@@ -0,0 +1,29 @@
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Pre-Processing function.
+// The function here will be called by the functions in
+// paddle/fluid/pybind/static_op_function.cc and
+// paddle/fluid/pybind/eager_op_function.cc. Mainly used to customize the
+// processing of parameters originally done in the Python API
+#include "paddle/fluid/pybind/arg_pre_process.h"
+#include "paddle/fluid/eager/utils.h"
+#include "paddle/fluid/pybind/eager_utils.h"
+#include "paddle/fluid/pybind/op_function_common.h"
+#include "paddle/phi/common/data_type.h"
+#include "paddle/phi/core/enforce.h"
+namespace paddle {
+namespace pybind {}  // namespace pybind
+
+}  // namespace paddle
diff --git a/paddle/fluid/pybind/arg_pre_process.h b/paddle/fluid/pybind/arg_pre_process.h
new file mode 100644
index 00000000000000..557b6d1c5f4739
--- /dev/null
+++ b/paddle/fluid/pybind/arg_pre_process.h
@@ -0,0 +1,23 @@
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <Python.h>
+
+namespace paddle {
+
+namespace pybind {}  // namespace pybind
+
+}  // namespace paddle
diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
index 62075f5a559fc9..92601b825a863e 100644
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -1378,25 +1378,26 @@ PyObject* eager__is_run_in_backward(PyObject* self,
 
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
-PyObject* eager__add_doc_str(PyObject* self, PyObject* args, ) {
+PyObject* eager__add_doc_str(PyObject* self, PyObject* args) {
   EAGER_TRY
+  static std::vector<std::string> all_docs;
   PyObject* obj = nullptr;
   PyObject* doc_obj = nullptr;
   if (!PyArg_ParseTuple(args, "OO", &obj, &doc_obj)) {
     return nullptr;
   }
-  const char* doc_str = "<invalid string>";
   std::string doc_string = CastPyArg2AttrString(doc_obj, 1);
-  std::cout < < doc_string << std::endl;
+
   if (Py_TYPE(obj) == &PyCFunction_Type) {
     PyCFunctionObject* f = reinterpret_cast<PyCFunctionObject*>(obj);
-    std::cout << "type : " << Py_TYPE(obj)->tp_name << std::endl;
     if (f->m_ml->ml_doc) {
-      std::cout << "doc : " << f->m_ml->ml_doc << std::endl;
+      VLOG(6)
+          << "eager__add_doc_str will update doc for PyCFunction, original doc "
+          << f->m_ml->ml_doc;
     }
-    f->m_ml->ml_doc = doc_str;
+    all_docs.emplace_back(doc_string);
+    f->m_ml->ml_doc = all_docs.back().c_str();
   }
-
   RETURN_PY_NONE
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
index 9b91412d8f59eb..37097f783cf9ed 100644
--- a/paddle/fluid/pybind/eager_utils.cc
+++ b/paddle/fluid/pybind/eager_utils.cc
@@ -864,6 +864,17 @@ paddle::DataType CastPyArg2DataTypeDirectly(PyObject* obj,
   return dtype;
 }
 
+paddle::DataType CastPyArg2DataTypeDirectly(PyObject* obj,
+                                            const std::string& op_type,
+                                            ssize_t arg_pos,
+                                            paddle::DataType default_value) {
+  if (obj == nullptr) {
+    return default_value;
+  } else {
+    return CastPyArg2DataTypeDirectly(obj, op_type, arg_pos);
+  }
+}
+
 phi::Vocab CastPyArg2Vocab(PyObject* obj, ssize_t arg_pos) {
   if (PyDict_Check(obj)) {
     phi::Vocab vocab;
diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h
index 775307bd1c0bba..7a758af2dd36ac 100644
--- a/paddle/fluid/pybind/eager_utils.h
+++ b/paddle/fluid/pybind/eager_utils.h
@@ -378,6 +378,10 @@ paddle::DataType CastPyArg2DataType(PyObject* obj,
 paddle::DataType CastPyArg2DataTypeDirectly(PyObject* obj,
                                             const std::string& op_type,
                                             ssize_t arg_pos);
+paddle::DataType CastPyArg2DataTypeDirectly(PyObject* obj,
+                                            const std::string& op_type,
+                                            ssize_t arg_pos,
+                                            paddle::DataType default_value);
 
 phi::distributed::TensorDistAttr CastPyArg2DistAttr(PyObject* obj,
                                                     ssize_t arg_pos);
diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml
index d83c8abc62e63b..f55bdcb8a06ee8 100644
--- a/paddle/phi/ops/yaml/ops.yaml
+++ b/paddle/phi/ops/yaml/ops.yaml
@@ -254,6 +254,11 @@
 
 - op : amin
   args : (Tensor x, int64_t[] axis={}, bool keepdim=false)
+  python_api :
+    name : [paddle.amin,paddle.Tensor.amin]
+    args_alias :
+      x : [input,x1]
+      axis : [dim]
   output : Tensor(out)
   infer_meta :
     func : ReduceInferMeta
diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index dd0e745dad979c..1556bc62a1b741 100644
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -37,7 +37,7 @@
 # because there are some initialization codes in base/core/__init__.py.
 from .base import core  # noqa: F401
 from .base.dygraph.generated_tensor_methods_patch import (
-    monkey_patch_generated_tensor_methods,
+    monkey_patch_generated_methods_for_tensor,
 )
 from .batch import batch
 
@@ -49,13 +49,17 @@
     monkey_patch_variable,
 )
 from .pir import monkey_patch_dtype, monkey_patch_program, monkey_patch_value
+from .pir.generated_methods_patch import (
+    monkey_patch_generated_methods_for_value,
+)
 
 monkey_patch_variable()
 monkey_patch_math_tensor()
 monkey_patch_value()
 monkey_patch_program()
 monkey_patch_dtype()
-monkey_patch_generated_tensor_methods()
+monkey_patch_generated_methods_for_tensor()
+monkey_patch_generated_methods_for_value()
 
 from .base.dataset import *  # noqa: F403
 from .framework import (
@@ -1241,9 +1245,10 @@
     'pi',
     'e',
 ]
-
 import os
 
+import paddle._paddle_docs
+
 FLAGS_trace_api = os.environ.get("FLAGS_trace_api", None)
 if FLAGS_trace_api is not None and FLAGS_trace_api != "":
     from .api_tracer import start_api_tracer
diff --git a/python/paddle/_paddle_docs.py b/python/paddle/_paddle_docs.py
index 3008b1c893cea8..edba3131979157 100644
--- a/python/paddle/_paddle_docs.py
+++ b/python/paddle/_paddle_docs.py
@@ -16,34 +16,158 @@
 
 import paddle
 
+# Add docstr for some C++ functions in paddle
+_add_docstr = paddle.base.core.eager._add_docstr
 
-def add_docstr_all(method: str, docstr: str) -> None:
-    func = getattr(paddle, method)
-    if inspect.isfunction(func):
-        func.__doc__ = docstr
-    elif inspect.ismethod(func):
-        func.__self__.__doc__ = docstr
-    elif inspect.isbuiltin(func):
-        print("Builtin function can not be modified")
 
+def add_doc_all(method: str, docstr: str) -> None:
+    """
+    Add docstr for function (paddle.*) and method (paddle.Tensor.*) if method exists
+    """
+    for module in [paddle, paddle.Tensor]:
+        if hasattr(module, method):
+            func = getattr(module, method)
+            if inspect.isfunction(func):
+                func.__doc__ = docstr
+            elif inspect.ismethod(func):
+                func.__self__.__doc__ = docstr
+            elif inspect.isbuiltin(func):
+                _add_docstr(func, docstr)
+
+
+__all__ = ['add_doc_all']
+add_doc_all(
+    "amin",
+    r"""
+    Computes the minimum of tensor elements over the given axis
+
+    Note:
+        The difference between min and amin is: If there are multiple minimum elements,
+        amin evenly distributes gradient between these equal values,
+        while min propagates gradient to all of them.
+
+    Args:
+        x (Tensor): A tensor, the data type is float32, float64, int32, int64,
+            the dimension is no more than 4.
+        axis (int|list|tuple|None, optional): The axis along which the minimum is computed.
+            If :attr:`None`, compute the minimum over all elements of
+            `x` and return a Tensor with a single element,
+            otherwise must be in the range :math:`[-x.ndim, x.ndim)`.
+            If :math:`axis[i] < 0`, the axis to reduce is :math:`x.ndim + axis[i]`.
+        keepdim (bool, optional): Whether to reserve the reduced dimension in the
+            output Tensor. The result tensor will have one fewer dimension
+            than the `x` unless :attr:`keepdim` is true, default
+            value is False.
+        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
+
+    Returns:
+        Tensor, results of minimum on the specified axis of input tensor,
+        it's data type is the same as input's Tensor.
+
+    Examples:
+        .. code-block:: python
 
-paddle_doc_dict = {}
+            >>> import paddle
+            >>> # data_x is a Tensor with shape [2, 4] with multiple minimum elements
+            >>> # the axis is a int element
+
+            >>> x = paddle.to_tensor([[0.2, 0.1, 0.1, 0.1],
+            ...                         [0.1, 0.1, 0.6, 0.7]],
+            ...                         dtype='float64', stop_gradient=False)
+            >>> # There are 5 minimum elements:
+            >>> # 1) amin evenly distributes gradient between these equal values,
+            >>> #    thus the corresponding gradients are 1/5=0.2;
+            >>> # 2) while min propagates gradient to all of them,
+            >>> #    thus the corresponding gradient are 1.
+            >>> result1 = paddle.amin(x)
+            >>> result1.backward()
+            >>> result1
+            Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=False,
+            0.10000000)
+            >>> x.grad
+            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[0.        , 0.20000000, 0.20000000, 0.20000000],
+             [0.20000000, 0.20000000, 0.        , 0.        ]])
 
+            >>> x.clear_grad()
+            >>> result1_min = paddle.min(x)
+            >>> result1_min.backward()
+            >>> result1_min
+            Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=False,
+            0.10000000)
+            >>> x.grad
+            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[0., 1., 1., 1.],
+             [1., 1., 0., 0.]])
+
+            >>> x.clear_grad()
+            >>> result2 = paddle.amin(x, axis=0)
+            >>> result2.backward()
+            >>> result2
+            Tensor(shape=[4], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [0.10000000, 0.10000000, 0.10000000, 0.10000000])
+            >>> x.grad
+            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[0.        , 0.50000000, 1.        , 1.        ],
+             [1.        , 0.50000000, 0.        , 0.        ]])
 
-def add_docstr(func_name, docstring):
-    if func_name not in paddle_doc_dict:
-        paddle_doc_dict[func_name] = ""
-    paddle_doc_dict[func_name] += docstring
+            >>> x.clear_grad()
+            >>> result3 = paddle.amin(x, axis=-1)
+            >>> result3.backward()
+            >>> result3
+            Tensor(shape=[2], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [0.10000000, 0.10000000])
+            >>> x.grad
+            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[0.        , 0.33333333, 0.33333333, 0.33333333],
+             [0.50000000, 0.50000000, 0.        , 0.        ]])
 
+            >>> x.clear_grad()
+            >>> result4 = paddle.amin(x, axis=1, keepdim=True)
+            >>> result4.backward()
+            >>> result4
+            Tensor(shape=[2, 1], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[0.10000000],
+             [0.10000000]])
+            >>> x.grad
+            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[0.        , 0.33333333, 0.33333333, 0.33333333],
+             [0.50000000, 0.50000000, 0.        , 0.        ]])
 
-def get_docstr(func_name):
-    if func_name not in paddle_doc_dict.keys():
-        return ""
-    return paddle_doc_dict[func_name]
+            >>> # data_y is a Tensor with shape [2, 2, 2]
+            >>> # the axis is list
+            >>> y = paddle.to_tensor([[[0.2, 0.1], [0.1, 0.1]],
+            ...                       [[0.1, 0.1], [0.6, 0.7]]],
+            ...                       dtype='float64', stop_gradient=False)
+            >>> result5 = paddle.amin(y, axis=[1, 2])
+            >>> result5.backward()
+            >>> result5
+            Tensor(shape=[2], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [0.10000000, 0.10000000])
+            >>> y.grad
+            Tensor(shape=[2, 2, 2], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[[0.        , 0.33333333],
+              [0.33333333, 0.33333333]],
+             [[0.50000000, 0.50000000],
+              [0.        , 0.        ]]])
 
+            >>> y.clear_grad()
+            >>> result6 = paddle.amin(y, axis=[0, 1])
+            >>> result6.backward()
+            >>> result6
+            Tensor(shape=[2], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [0.10000000, 0.10000000])
+            >>> y.grad
+            Tensor(shape=[2, 2, 2], dtype=float64, place=Place(cpu), stop_gradient=False,
+            [[[0.        , 0.33333333],
+              [0.50000000, 0.33333333]],
+             [[0.50000000, 0.33333333],
+              [0.        , 0.        ]]])
+""",
+)
 
-add_docstr(
-    "paddle.amax",
+add_doc_all(
+    "amax",
     """
     Computes the maximum of tensor elements over the given axis.
 
diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py
index 3b1ee24dda7386..ef6c9206981a36 100755
--- a/python/paddle/framework/__init__.py
+++ b/python/paddle/framework/__init__.py
@@ -70,9 +70,6 @@
 # We need remove the duplicated code here once we fix
 # the illogical implement in the monkey-patch methods later.
 from ..base.dygraph.math_op_patch import monkey_patch_math_tensor  # noqa: F401
-from ..base.dygraph.generated_tensor_methods_patch import (  # noqa: F401
-    monkey_patch_generated_tensor_methods,
-)
 
 from ..base.layers.math_op_patch import monkey_patch_variable  # noqa: F401
 
diff --git a/python/paddle/pir/generated_methods_patch.py b/python/paddle/pir/generated_methods_patch.py
new file mode 100644
index 00000000000000..862ff90a7c66b1
--- /dev/null
+++ b/python/paddle/pir/generated_methods_patch.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ..base.dygraph.generated_tensor_methods_patch import methods_map
+from . import Value
+
+
+def monkey_patch_generated_methods_for_value():
+    for method_name, method in methods_map:
+        setattr(Value, method_name, method)
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index 68c1e0a12ae487..15e4160f5e79be 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -3434,318 +3434,6 @@ def min(
             )
             return out
 
-    # def amax(
-    #     *args,
-    #     **kwargs
-    # ) -> Tensor:
-    #     if in_dynamic_or_pir_mode():
-    #         return _C_ops.amax(*args, **kwargs)
-
-    # def amax(
-    #     x: Tensor,
-    #     *args,
-    #     **kwargs
-    # ) -> Tensor:
-    """
-    Computes the maximum of tensor elements over the given axis.
-
-    Note:
-        The difference between max and amax is: If there are multiple maximum elements,
-        amax evenly distributes gradient between these equal values,
-        while max propagates gradient to all of them.
-
-    Args:
-        x (Tensor): A tensor, the data type is float32, float64, int32, int64,
-            the dimension is no more than 4.
-        axis (int|list|tuple|None, optional): The axis along which the maximum is computed.
-            If :attr:`None`, compute the maximum over all elements of
-            `x` and return a Tensor with a single element,
-            otherwise must be in the range :math:`[-x.ndim(x), x.ndim(x))`.
-            If :math:`axis[i] < 0`, the axis to reduce is :math:`x.ndim + axis[i]`.
-        keepdim (bool, optional): Whether to reserve the reduced dimension in the
-            output Tensor. The result tensor will have one fewer dimension
-            than the `x` unless :attr:`keepdim` is true, default
-            value is False.
-        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Tensor, results of maximum on the specified axis of input tensor,
-        it's data type is the same as `x`.
-
-    Examples:
-        .. code-block:: python
-
-            >>> import paddle
-            >>> # data_x is a Tensor with shape [2, 4] with multiple maximum elements
-            >>> # the axis is a int element
-
-            >>> x = paddle.to_tensor([[0.1, 0.9, 0.9, 0.9],
-            ...                         [0.9, 0.9, 0.6, 0.7]],
-            ...                         dtype='float64', stop_gradient=False)
-            >>> # There are 5 maximum elements:
-            >>> # 1) amax evenly distributes gradient between these equal values,
-            >>> #    thus the corresponding gradients are 1/5=0.2;
-            >>> # 2) while max propagates gradient to all of them,
-            >>> #    thus the corresponding gradient are 1.
-            >>> result1 = paddle.amax(x)
-            >>> result1.backward()
-            >>> result1
-            Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=False,
-            0.90000000)
-            >>> x.grad
-            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[0.        , 0.20000000, 0.20000000, 0.20000000],
-             [0.20000000, 0.20000000, 0.        , 0.        ]])
-
-            >>> x.clear_grad()
-            >>> result1_max = paddle.max(x)
-            >>> result1_max.backward()
-            >>> result1_max
-            Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=False,
-            0.90000000)
-            >>> x.grad
-            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[0., 1., 1., 1.],
-             [1., 1., 0., 0.]])
-
-            >>> x.clear_grad()
-            >>> result2 = paddle.amax(x, axis=0)
-            >>> result2.backward()
-            >>> result2
-            Tensor(shape=[4], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [0.90000000, 0.90000000, 0.90000000, 0.90000000])
-            >>> x.grad
-            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[0.        , 0.50000000, 1.        , 1.        ],
-             [1.        , 0.50000000, 0.        , 0.        ]])
-
-            >>> x.clear_grad()
-            >>> result3 = paddle.amax(x, axis=-1)
-            >>> result3.backward()
-            >>> result3
-            Tensor(shape=[2], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [0.90000000, 0.90000000])
-            >>> x.grad
-            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[0.        , 0.33333333, 0.33333333, 0.33333333],
-             [0.50000000, 0.50000000, 0.        , 0.        ]])
-
-            >>> x.clear_grad()
-            >>> result4 = paddle.amax(x, axis=1, keepdim=True)
-            >>> result4.backward()
-            >>> result4
-            Tensor(shape=[2, 1], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[0.90000000],
-             [0.90000000]])
-            >>> x.grad
-            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[0.        , 0.33333333, 0.33333333, 0.33333333],
-             [0.50000000, 0.50000000, 0.        , 0.        ]])
-
-            >>> # data_y is a Tensor with shape [2, 2, 2]
-            >>> # the axis is list
-            >>> y = paddle.to_tensor([[[0.1, 0.9], [0.9, 0.9]],
-            ...                         [[0.9, 0.9], [0.6, 0.7]]],
-            ...                         dtype='float64', stop_gradient=False)
-            >>> result5 = paddle.amax(y, axis=[1, 2])
-            >>> result5.backward()
-            >>> result5
-            Tensor(shape=[2], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [0.90000000, 0.90000000])
-            >>> y.grad
-            Tensor(shape=[2, 2, 2], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[[0.        , 0.33333333],
-              [0.33333333, 0.33333333]],
-             [[0.50000000, 0.50000000],
-              [0.        , 0.        ]]])
-
-            >>> y.clear_grad()
-            >>> result6 = paddle.amax(y, axis=[0, 1])
-            >>> result6.backward()
-            >>> result6
-            Tensor(shape=[2], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [0.90000000, 0.90000000])
-            >>> y.grad
-            Tensor(shape=[2, 2, 2], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[[0.        , 0.33333333],
-              [0.50000000, 0.33333333]],
-             [[0.50000000, 0.33333333],
-              [0.        , 0.        ]]])
-    """
-
-
-#     if in_dynamic_or_pir_mode():
-#         return _C_ops.amax(x, *args, **kwargs)
-
-# else:
-#     reduce_all, axis = _get_reduce_axis(axis, x)
-#     helper = LayerHelper('amax', **locals())
-#     check_variable_and_dtype(
-#         x, 'x', ['float32', 'float64', 'int32', 'int64'], 'amax'
-#     )
-
-#     out = helper.create_variable_for_type_inference(dtype=x.dtype)
-#     helper.append_op(
-#         type='reduce_amax',
-#         inputs={'X': x},
-#         outputs={'Out': out},
-#         attrs={'dim': axis, 'keep_dim': keepdim, 'reduce_all': reduce_all},
-#     )
-#     return out
-
-
-def amin(
-    x: Tensor,
-    axis: int | Sequence[int] | None = None,
-    keepdim: bool = False,
-    name: str | None = None,
-) -> Tensor:
-    """
-
-    Computes the minimum of tensor elements over the given axis
-
-    Note:
-        The difference between min and amin is: If there are multiple minimum elements,
-        amin evenly distributes gradient between these equal values,
-        while min propagates gradient to all of them.
-
-    Args:
-        x (Tensor): A tensor, the data type is float32, float64, int32, int64,
-            the dimension is no more than 4.
-        axis (int|list|tuple|None, optional): The axis along which the minimum is computed.
-            If :attr:`None`, compute the minimum over all elements of
-            `x` and return a Tensor with a single element,
-            otherwise must be in the range :math:`[-x.ndim, x.ndim)`.
-            If :math:`axis[i] < 0`, the axis to reduce is :math:`x.ndim + axis[i]`.
-        keepdim (bool, optional): Whether to reserve the reduced dimension in the
-            output Tensor. The result tensor will have one fewer dimension
-            than the `x` unless :attr:`keepdim` is true, default
-            value is False.
-        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Tensor, results of minimum on the specified axis of input tensor,
-        it's data type is the same as input's Tensor.
-
-    Examples:
-        .. code-block:: python
-
-            >>> import paddle
-            >>> # data_x is a Tensor with shape [2, 4] with multiple minimum elements
-            >>> # the axis is a int element
-
-            >>> x = paddle.to_tensor([[0.2, 0.1, 0.1, 0.1],
-            ...                         [0.1, 0.1, 0.6, 0.7]],
-            ...                         dtype='float64', stop_gradient=False)
-            >>> # There are 5 minimum elements:
-            >>> # 1) amin evenly distributes gradient between these equal values,
-            >>> #    thus the corresponding gradients are 1/5=0.2;
-            >>> # 2) while min propagates gradient to all of them,
-            >>> #    thus the corresponding gradient are 1.
-            >>> result1 = paddle.amin(x)
-            >>> result1.backward()
-            >>> result1
-            Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=False,
-            0.10000000)
-            >>> x.grad
-            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[0.        , 0.20000000, 0.20000000, 0.20000000],
-             [0.20000000, 0.20000000, 0.        , 0.        ]])
-
-            >>> x.clear_grad()
-            >>> result1_min = paddle.min(x)
-            >>> result1_min.backward()
-            >>> result1_min
-            Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=False,
-            0.10000000)
-            >>> x.grad
-            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[0., 1., 1., 1.],
-             [1., 1., 0., 0.]])
-
-            >>> x.clear_grad()
-            >>> result2 = paddle.amin(x, axis=0)
-            >>> result2.backward()
-            >>> result2
-            Tensor(shape=[4], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [0.10000000, 0.10000000, 0.10000000, 0.10000000])
-            >>> x.grad
-            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[0.        , 0.50000000, 1.        , 1.        ],
-             [1.        , 0.50000000, 0.        , 0.        ]])
-
-            >>> x.clear_grad()
-            >>> result3 = paddle.amin(x, axis=-1)
-            >>> result3.backward()
-            >>> result3
-            Tensor(shape=[2], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [0.10000000, 0.10000000])
-            >>> x.grad
-            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[0.        , 0.33333333, 0.33333333, 0.33333333],
-             [0.50000000, 0.50000000, 0.        , 0.        ]])
-
-            >>> x.clear_grad()
-            >>> result4 = paddle.amin(x, axis=1, keepdim=True)
-            >>> result4.backward()
-            >>> result4
-            Tensor(shape=[2, 1], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[0.10000000],
-             [0.10000000]])
-            >>> x.grad
-            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[0.        , 0.33333333, 0.33333333, 0.33333333],
-             [0.50000000, 0.50000000, 0.        , 0.        ]])
-
-            >>> # data_y is a Tensor with shape [2, 2, 2]
-            >>> # the axis is list
-            >>> y = paddle.to_tensor([[[0.2, 0.1], [0.1, 0.1]],
-            ...                       [[0.1, 0.1], [0.6, 0.7]]],
-            ...                       dtype='float64', stop_gradient=False)
-            >>> result5 = paddle.amin(y, axis=[1, 2])
-            >>> result5.backward()
-            >>> result5
-            Tensor(shape=[2], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [0.10000000, 0.10000000])
-            >>> y.grad
-            Tensor(shape=[2, 2, 2], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[[0.        , 0.33333333],
-              [0.33333333, 0.33333333]],
-             [[0.50000000, 0.50000000],
-              [0.        , 0.        ]]])
-
-            >>> y.clear_grad()
-            >>> result6 = paddle.amin(y, axis=[0, 1])
-            >>> result6.backward()
-            >>> result6
-            Tensor(shape=[2], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [0.10000000, 0.10000000])
-            >>> y.grad
-            Tensor(shape=[2, 2, 2], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[[0.        , 0.33333333],
-              [0.50000000, 0.33333333]],
-             [[0.50000000, 0.33333333],
-              [0.        , 0.        ]]])
-    """
-    if in_dynamic_or_pir_mode():
-        return _C_ops.amin(x, axis, keepdim)
-
-    else:
-        reduce_all, axis = _get_reduce_axis(axis, x)
-        helper = LayerHelper('amin', **locals())
-        check_variable_and_dtype(
-            x, 'x', ['float32', 'float64', 'int32', 'int64'], 'amin'
-        )
-
-        out = helper.create_variable_for_type_inference(dtype=x.dtype)
-        helper.append_op(
-            type='reduce_amin',
-            inputs={'X': x},
-            outputs={'Out': out},
-            attrs={'dim': axis, 'keep_dim': keepdim, 'reduce_all': reduce_all},
-        )
-        return out
-
 
 def log1p(x: Tensor, name: str | None = None) -> Tensor:
     r"""
diff --git a/tools/gen_tensor_stub.py b/tools/gen_tensor_stub.py
index e18fe8c239d7cb..740bd7aa06befc 100644
--- a/tools/gen_tensor_stub.py
+++ b/tools/gen_tensor_stub.py
@@ -27,8 +27,6 @@
 
 from typing_extensions import TypeAlias, get_overloads
 
-from paddle._paddle_docs import get_docstr
-
 if TYPE_CHECKING:
     from types import ModuleType
 
@@ -423,7 +421,7 @@ def get_tensor_members(module: str = 'paddle.Tensor') -> dict[int, Member]:
         member_doc_cleaned = (
             func_doc_to_method_doc(inspect.cleandoc(member_doc))
             if member_doc is not None
-            else get_docstr("paddle." + name)
+            else None
         )
         try:
             sig = inspect.signature(member)
@@ -433,9 +431,6 @@ def get_tensor_members(module: str = 'paddle.Tensor') -> dict[int, Member]:
         except (TypeError, ValueError):
             member_signature = f"{name}()"
 
-        if name == 'amax' or name == 'amin':
-            print(member, id)
-            print("member_signature : ", member_signature)
         if is_inherited_member(name, tensor_class):
             continue
 

From 1d82e39e6dc9407500d03e2bbea2f956b8d0e46e Mon Sep 17 00:00:00 2001
From: DanielSun11 <sundong04@baidu.com>
Date: Wed, 13 Aug 2025 21:19:51 +0800
Subject: [PATCH 03/14] import amax and amin from _C_ops

---
 python/paddle/tensor/math.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index 15e4160f5e79be..45cbd3893740a1 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -22,6 +22,10 @@
 
 import paddle
 from paddle import _C_ops
+from paddle._C_ops import (  # noqa: F401
+    amax,
+    amin,
+)
 from paddle.base.libpaddle import DataType
 from paddle.common_ops_import import VarDesc, dygraph_utils
 from paddle.pir import Value

From bfd302eaa7a0dd98f92f7bd999c76d5962906723 Mon Sep 17 00:00:00 2001
From: DanielSun11 <sundong04@baidu.com>
Date: Wed, 13 Aug 2025 23:30:52 +0800
Subject: [PATCH 04/14] fix __all__ export error for build ci

---
 .../eager/auto_code_generator/generator/monkey_patch_gen.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py b/paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py
index 261057f9e2fd2d..b5b72c22db08d2 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/monkey_patch_gen.py
@@ -25,9 +25,6 @@
 from .. import core
 """
 
-EXTRA_IMPORTS_TEMPLATE = """
-__all__ = [methods_map,{func_name}]
-"""
 FUNCTION_NAME_TEMPLATE = """
 def {func_name}():
 """
@@ -111,9 +108,6 @@ def GenerateMonkeyPatchTensorMethods(self):
             func_name="monkey_patch_generated_methods_for_tensor"
         )
         self.MonkeyPatchTensorMethods_str += SET_METHOD_TEMPLATE
-        self.MonkeyPatchTensorMethods_str += EXTRA_IMPORTS_TEMPLATE.format(
-            func_name="monkey_patch_generated_methods_for_tensor"
-        )
 
     def run(self):
         # Read Yaml file

From cc7408bb1c4c1b57c9cd4cb65f671e2beb1983e3 Mon Sep 17 00:00:00 2001
From: DanielSun11 <sundong04@baidu.com>
Date: Thu, 14 Aug 2025 01:53:40 +0800
Subject: [PATCH 05/14] add  # type: ignore to ignore type check

---
 python/paddle/_paddle_docs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/_paddle_docs.py b/python/paddle/_paddle_docs.py
index edba3131979157..aa9f458a5d5524 100644
--- a/python/paddle/_paddle_docs.py
+++ b/python/paddle/_paddle_docs.py
@@ -66,7 +66,7 @@ def add_doc_all(method: str, docstr: str) -> None:
 
     Examples:
         .. code-block:: python
-
+            >>> # type: ignore
             >>> import paddle
             >>> # data_x is a Tensor with shape [2, 4] with multiple minimum elements
             >>> # the axis is a int element
@@ -196,7 +196,7 @@ def add_doc_all(method: str, docstr: str) -> None:
 
     Examples:
         .. code-block:: python
-
+            >>> # type: ignore
             >>> import paddle
             >>> # data_x is a Tensor with shape [2, 4] with multiple maximum elements
             >>> # the axis is a int element

From e227b43d218437c706a06051437d8bbb18ec1b78 Mon Sep 17 00:00:00 2001
From: DanielSun11 <sundong04@baidu.com>
Date: Thu, 14 Aug 2025 08:17:23 +0800
Subject: [PATCH 06/14] ignore max and amax diff in docs

---
 python/paddle/_paddle_docs.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/python/paddle/_paddle_docs.py b/python/paddle/_paddle_docs.py
index aa9f458a5d5524..922d032fdf8159 100644
--- a/python/paddle/_paddle_docs.py
+++ b/python/paddle/_paddle_docs.py
@@ -95,10 +95,7 @@ def add_doc_all(method: str, docstr: str) -> None:
             >>> result1_min
             Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=False,
             0.10000000)
-            >>> x.grad
-            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[0., 1., 1., 1.],
-             [1., 1., 0., 0.]])
+
 
             >>> x.clear_grad()
             >>> result2 = paddle.amin(x, axis=0)
@@ -225,10 +222,7 @@ def add_doc_all(method: str, docstr: str) -> None:
             >>> result1_max
             Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=False,
             0.90000000)
-            >>> x.grad
-            Tensor(shape=[2, 4], dtype=float64, place=Place(cpu), stop_gradient=False,
-            [[0., 1., 1., 1.],
-             [1., 1., 0., 0.]])
+
 
             >>> x.clear_grad()
             >>> result2 = paddle.amax(x, axis=0)

From 9b07e4b64ad3128c5b668b316dfe348966e94db5 Mon Sep 17 00:00:00 2001
From: DanielSun11 <sundong04@baidu.com>
Date: Fri, 15 Aug 2025 00:11:06 +0800
Subject: [PATCH 07/14] rm print and add the test case time out

---
 .../auto_code_generator/generator/codegen_utils.py   |  4 ++--
 test/auto_parallel/hybrid_strategy/CMakeLists.txt    | 12 ++----------
 test/auto_parallel/hybrid_strategy/testslist.csv     |  2 +-
 3 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
index 09ab2c19ab791d..a609ba4f8e22fd 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
@@ -547,8 +547,8 @@ def ParsePythonAPIInfo(self):
             if 'func' in pre_process.keys():
                 self.dygraph_pre_process = pre_process['func']
                 self.static_pre_process = pre_process['func']
-                if len(pre_process) > 1:
-                    print("error")
+                # TODO check len(pre_process) > 1
+
             if 'dygraph_func' in pre_process.keys():
                 self.dygraph_pre_process = pre_process['dygraph_func']
             if 'static_func' in pre_process.keys():
diff --git a/test/auto_parallel/hybrid_strategy/CMakeLists.txt b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
index ce31d06d0ab42f..4428547cd8d5e9 100644
--- a/test/auto_parallel/hybrid_strategy/CMakeLists.txt
+++ b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
@@ -151,7 +151,7 @@ if((WITH_GPU) AND (LINUX))
     ENVS
     "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
   set_tests_properties(test_parallel_api_with_llama_3d
-                       PROPERTIES TIMEOUT "400" LABELS "RUN_TYPE=HYBRID")
+                       PROPERTIES TIMEOUT "600" LABELS "RUN_TYPE=HYBRID")
 endif()
 if((WITH_GPU) AND (LINUX))
   py_test_modules(
@@ -173,14 +173,6 @@ if((WITH_GPU) AND (LINUX))
   py_test_modules(
     test_process_mesh MODULES test_process_mesh ENVS
     "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
-  set_tests_properties(test_process_mesh PROPERTIES TIMEOUT "150" LABELS
+  set_tests_properties(test_process_mesh PROPERTIES TIMEOUT "60" LABELS
                                                     "RUN_TYPE=HYBRID")
 endif()
-if((WITH_GPU) AND (LINUX))
-  py_test_modules(
-    test_get_group_in_different_hybrid_configs MODULES
-    test_get_group_in_different_hybrid_configs ENVS
-    "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
-  set_tests_properties(test_get_group_in_different_hybrid_configs
-                       PROPERTIES TIMEOUT "150" LABELS "RUN_TYPE=HYBRID")
-endif()
diff --git a/test/auto_parallel/hybrid_strategy/testslist.csv b/test/auto_parallel/hybrid_strategy/testslist.csv
index 3f9dc21f29625b..6db116efbc0433 100644
--- a/test/auto_parallel/hybrid_strategy/testslist.csv
+++ b/test/auto_parallel/hybrid_strategy/testslist.csv
@@ -16,7 +16,7 @@ test_semi_auto_llama_save_load,LINUX,GPU,180,HYBRID,test_runner.py,,,http_proxy=
 test_parallel_api_with_llama_1d,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_parallel_api_with_llama_2d,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_parallel_api_with_llama_2d_sep,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
-test_parallel_api_with_llama_3d,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
+test_parallel_api_with_llama_3d,LINUX,GPU,600,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_to_distributed_api_for_llama,LINUX,GPU,180,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_parallel_api_with_llama_lora,LINUX,GPU,360,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_process_mesh,LINUX,GPU,60,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,

From bdd236943f57d6ed281383354448224eabca41e0 Mon Sep 17 00:00:00 2001
From: DanielSun11 <1395924413@qq.com>
Date: Fri, 15 Aug 2025 02:21:03 +0800
Subject: [PATCH 08/14] add time out seconds and revert some error

---
 test/auto_parallel/hybrid_strategy/CMakeLists.txt | 12 ++++++++++--
 test/auto_parallel/hybrid_strategy/testslist.csv  |  2 +-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/test/auto_parallel/hybrid_strategy/CMakeLists.txt b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
index 4428547cd8d5e9..41087c445ce3d9 100644
--- a/test/auto_parallel/hybrid_strategy/CMakeLists.txt
+++ b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
@@ -151,7 +151,7 @@ if((WITH_GPU) AND (LINUX))
     ENVS
     "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
   set_tests_properties(test_parallel_api_with_llama_3d
-                       PROPERTIES TIMEOUT "600" LABELS "RUN_TYPE=HYBRID")
+                       PROPERTIES TIMEOUT "800" LABELS "RUN_TYPE=HYBRID")
 endif()
 if((WITH_GPU) AND (LINUX))
   py_test_modules(
@@ -173,6 +173,14 @@ if((WITH_GPU) AND (LINUX))
   py_test_modules(
     test_process_mesh MODULES test_process_mesh ENVS
     "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
-  set_tests_properties(test_process_mesh PROPERTIES TIMEOUT "60" LABELS
+  set_tests_properties(test_process_mesh PROPERTIES TIMEOUT "150" LABELS
                                                     "RUN_TYPE=HYBRID")
 endif()
+if((WITH_GPU) AND (LINUX))
+  py_test_modules(
+    test_get_group_in_different_hybrid_configs MODULES
+    test_get_group_in_different_hybrid_configs ENVS
+    "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
+  set_tests_properties(test_get_group_in_different_hybrid_configs
+                       PROPERTIES TIMEOUT "150" LABELS "RUN_TYPE=HYBRID")
+endif()
\ No newline at end of file
diff --git a/test/auto_parallel/hybrid_strategy/testslist.csv b/test/auto_parallel/hybrid_strategy/testslist.csv
index 6db116efbc0433..87cd245123066d 100644
--- a/test/auto_parallel/hybrid_strategy/testslist.csv
+++ b/test/auto_parallel/hybrid_strategy/testslist.csv
@@ -16,7 +16,7 @@ test_semi_auto_llama_save_load,LINUX,GPU,180,HYBRID,test_runner.py,,,http_proxy=
 test_parallel_api_with_llama_1d,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_parallel_api_with_llama_2d,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_parallel_api_with_llama_2d_sep,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
-test_parallel_api_with_llama_3d,LINUX,GPU,600,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
+test_parallel_api_with_llama_3d,LINUX,GPU,800,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_to_distributed_api_for_llama,LINUX,GPU,180,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_parallel_api_with_llama_lora,LINUX,GPU,360,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_process_mesh,LINUX,GPU,60,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,

From 9a2f2e53402a20525b9d2ee67ac86936f77ad9e5 Mon Sep 17 00:00:00 2001
From: DanielSun11 <1395924413@qq.com>
Date: Fri, 15 Aug 2025 02:27:30 +0800
Subject: [PATCH 09/14] format

---
 test/auto_parallel/hybrid_strategy/CMakeLists.txt | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/test/auto_parallel/hybrid_strategy/CMakeLists.txt b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
index 41087c445ce3d9..5cece24688d6b9 100644
--- a/test/auto_parallel/hybrid_strategy/CMakeLists.txt
+++ b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
@@ -173,14 +173,6 @@ if((WITH_GPU) AND (LINUX))
   py_test_modules(
     test_process_mesh MODULES test_process_mesh ENVS
     "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
-  set_tests_properties(test_process_mesh PROPERTIES TIMEOUT "150" LABELS
+  set_tests_properties(test_process_mesh PROPERTIES TIMEOUT "60" LABELS
                                                     "RUN_TYPE=HYBRID")
 endif()
-if((WITH_GPU) AND (LINUX))
-  py_test_modules(
-    test_get_group_in_different_hybrid_configs MODULES
-    test_get_group_in_different_hybrid_configs ENVS
-    "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
-  set_tests_properties(test_get_group_in_different_hybrid_configs
-                       PROPERTIES TIMEOUT "150" LABELS "RUN_TYPE=HYBRID")
-endif()
\ No newline at end of file

From 2718c233cf232e595991f1398b1ca71672060473 Mon Sep 17 00:00:00 2001
From: DanielSun11 <1395924413@qq.com>
Date: Fri, 15 Aug 2025 08:54:05 +0800
Subject: [PATCH 10/14] recover config

---
 test/auto_parallel/hybrid_strategy/CMakeLists.txt | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/test/auto_parallel/hybrid_strategy/CMakeLists.txt b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
index 5cece24688d6b9..bace710f67749c 100644
--- a/test/auto_parallel/hybrid_strategy/CMakeLists.txt
+++ b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
@@ -169,10 +169,18 @@ if((WITH_GPU) AND (LINUX))
   set_tests_properties(test_parallel_api_with_llama_lora
                        PROPERTIES TIMEOUT "360" LABELS "RUN_TYPE=HYBRID")
 endif()
+if((WITH_GPU) AND (LINUX))
+  py_test_modules(
+    test_get_group_in_different_hybrid_configs MODULES
+    test_get_group_in_different_hybrid_configs ENVS
+    "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
+  set_tests_properties(test_get_group_in_different_hybrid_configs
+                       PROPERTIES TIMEOUT "150" LABELS "RUN_TYPE=HYBRID")
+endif()
 if((WITH_GPU) AND (LINUX))
   py_test_modules(
     test_process_mesh MODULES test_process_mesh ENVS
     "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
-  set_tests_properties(test_process_mesh PROPERTIES TIMEOUT "60" LABELS
+  set_tests_properties(test_process_mesh PROPERTIES TIMEOUT "150" LABELS
                                                     "RUN_TYPE=HYBRID")
 endif()

From eede2d54dca035891488ec21a9530e2ede228c27 Mon Sep 17 00:00:00 2001
From: DanielSun11 <1395924413@qq.com>
Date: Fri, 15 Aug 2025 10:11:17 +0800
Subject: [PATCH 11/14] reconfig cmakefile

---
 test/auto_parallel/hybrid_strategy/CMakeLists.txt | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/test/auto_parallel/hybrid_strategy/CMakeLists.txt b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
index bace710f67749c..5cece24688d6b9 100644
--- a/test/auto_parallel/hybrid_strategy/CMakeLists.txt
+++ b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
@@ -169,18 +169,10 @@ if((WITH_GPU) AND (LINUX))
   set_tests_properties(test_parallel_api_with_llama_lora
                        PROPERTIES TIMEOUT "360" LABELS "RUN_TYPE=HYBRID")
 endif()
-if((WITH_GPU) AND (LINUX))
-  py_test_modules(
-    test_get_group_in_different_hybrid_configs MODULES
-    test_get_group_in_different_hybrid_configs ENVS
-    "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
-  set_tests_properties(test_get_group_in_different_hybrid_configs
-                       PROPERTIES TIMEOUT "150" LABELS "RUN_TYPE=HYBRID")
-endif()
 if((WITH_GPU) AND (LINUX))
   py_test_modules(
     test_process_mesh MODULES test_process_mesh ENVS
     "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
-  set_tests_properties(test_process_mesh PROPERTIES TIMEOUT "150" LABELS
+  set_tests_properties(test_process_mesh PROPERTIES TIMEOUT "60" LABELS
                                                     "RUN_TYPE=HYBRID")
 endif()

From fe678da2adcfae9e1d2ff9d8514b3e3e0fd30a93 Mon Sep 17 00:00:00 2001
From: DanielSun11 <1395924413@qq.com>
Date: Fri, 15 Aug 2025 10:13:26 +0800
Subject: [PATCH 12/14] revert config

---
 test/auto_parallel/hybrid_strategy/CMakeLists.txt | 2 +-
 test/auto_parallel/hybrid_strategy/testslist.csv  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/auto_parallel/hybrid_strategy/CMakeLists.txt b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
index 5cece24688d6b9..006f95249ecd33 100644
--- a/test/auto_parallel/hybrid_strategy/CMakeLists.txt
+++ b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
@@ -151,7 +151,7 @@ if((WITH_GPU) AND (LINUX))
     ENVS
     "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
   set_tests_properties(test_parallel_api_with_llama_3d
-                       PROPERTIES TIMEOUT "800" LABELS "RUN_TYPE=HYBRID")
+                       PROPERTIES TIMEOUT "400" LABELS "RUN_TYPE=HYBRID")
 endif()
 if((WITH_GPU) AND (LINUX))
   py_test_modules(
diff --git a/test/auto_parallel/hybrid_strategy/testslist.csv b/test/auto_parallel/hybrid_strategy/testslist.csv
index 87cd245123066d..3f9dc21f29625b 100644
--- a/test/auto_parallel/hybrid_strategy/testslist.csv
+++ b/test/auto_parallel/hybrid_strategy/testslist.csv
@@ -16,7 +16,7 @@ test_semi_auto_llama_save_load,LINUX,GPU,180,HYBRID,test_runner.py,,,http_proxy=
 test_parallel_api_with_llama_1d,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_parallel_api_with_llama_2d,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_parallel_api_with_llama_2d_sep,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
-test_parallel_api_with_llama_3d,LINUX,GPU,800,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
+test_parallel_api_with_llama_3d,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_to_distributed_api_for_llama,LINUX,GPU,180,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_parallel_api_with_llama_lora,LINUX,GPU,360,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_process_mesh,LINUX,GPU,60,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,

From d9ee7869c0a741856abfdff9d4a0edd0226024b0 Mon Sep 17 00:00:00 2001
From: DanielSun11 <1395924413@qq.com>
Date: Fri, 15 Aug 2025 10:20:34 +0800
Subject: [PATCH 13/14] using ctest lists instead of cmake

---
 test/auto_parallel/hybrid_strategy/CMakeLists.txt | 12 ++++++++++--
 test/auto_parallel/hybrid_strategy/testslist.csv  |  5 +++--
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/test/auto_parallel/hybrid_strategy/CMakeLists.txt b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
index 006f95249ecd33..34947df1eb8e64 100644
--- a/test/auto_parallel/hybrid_strategy/CMakeLists.txt
+++ b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
@@ -151,7 +151,7 @@ if((WITH_GPU) AND (LINUX))
     ENVS
     "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
   set_tests_properties(test_parallel_api_with_llama_3d
-                       PROPERTIES TIMEOUT "400" LABELS "RUN_TYPE=HYBRID")
+                       PROPERTIES TIMEOUT "800" LABELS "RUN_TYPE=HYBRID")
 endif()
 if((WITH_GPU) AND (LINUX))
   py_test_modules(
@@ -173,6 +173,14 @@ if((WITH_GPU) AND (LINUX))
   py_test_modules(
     test_process_mesh MODULES test_process_mesh ENVS
     "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
-  set_tests_properties(test_process_mesh PROPERTIES TIMEOUT "60" LABELS
+  set_tests_properties(test_process_mesh PROPERTIES TIMEOUT "150" LABELS
                                                     "RUN_TYPE=HYBRID")
 endif()
+if((WITH_GPU) AND (LINUX))
+  py_test_modules(
+    test_get_group_in_different_hybrid_configs MODULES
+    test_get_group_in_different_hybrid_configs ENVS
+    "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
+  set_tests_properties(test_get_group_in_different_hybrid_configs
+                       PROPERTIES TIMEOUT "150" LABELS "RUN_TYPE=HYBRID")
+endif()
diff --git a/test/auto_parallel/hybrid_strategy/testslist.csv b/test/auto_parallel/hybrid_strategy/testslist.csv
index 3f9dc21f29625b..f1909bab81f84d 100644
--- a/test/auto_parallel/hybrid_strategy/testslist.csv
+++ b/test/auto_parallel/hybrid_strategy/testslist.csv
@@ -16,7 +16,8 @@ test_semi_auto_llama_save_load,LINUX,GPU,180,HYBRID,test_runner.py,,,http_proxy=
 test_parallel_api_with_llama_1d,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_parallel_api_with_llama_2d,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_parallel_api_with_llama_2d_sep,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
-test_parallel_api_with_llama_3d,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
+test_parallel_api_with_llama_3d,LINUX,GPU,800,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_to_distributed_api_for_llama,LINUX,GPU,180,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_parallel_api_with_llama_lora,LINUX,GPU,360,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
-test_process_mesh,LINUX,GPU,60,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
+test_process_mesh,LINUX,GPU,150,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
+test_get_group_in_different_hybrid_configs,LINUX,GPU,150,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,

From 956602e01b6fa574ca9493a12094a454f619ce47 Mon Sep 17 00:00:00 2001
From: DanielSun11 <1395924413@qq.com>
Date: Fri, 15 Aug 2025 12:37:16 +0800
Subject: [PATCH 14/14] add time out

---
 test/auto_parallel/hybrid_strategy/CMakeLists.txt | 2 +-
 test/auto_parallel/hybrid_strategy/testslist.csv  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/auto_parallel/hybrid_strategy/CMakeLists.txt b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
index 34947df1eb8e64..104642be1bf189 100644
--- a/test/auto_parallel/hybrid_strategy/CMakeLists.txt
+++ b/test/auto_parallel/hybrid_strategy/CMakeLists.txt
@@ -97,7 +97,7 @@ if((WITH_GPU) AND (LINUX))
     test_pir_reshard_nd_mesh_func MODULES test_pir_reshard_nd_mesh_func ENVS
     "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
   set_tests_properties(test_pir_reshard_nd_mesh_func
-                       PROPERTIES TIMEOUT "35" LABELS "RUN_TYPE=HYBRID")
+                       PROPERTIES TIMEOUT "60" LABELS "RUN_TYPE=HYBRID")
 endif()
 if((WITH_GPU) AND (LINUX))
   py_test_modules(
diff --git a/test/auto_parallel/hybrid_strategy/testslist.csv b/test/auto_parallel/hybrid_strategy/testslist.csv
index f1909bab81f84d..f4fd1afd890b62 100644
--- a/test/auto_parallel/hybrid_strategy/testslist.csv
+++ b/test/auto_parallel/hybrid_strategy/testslist.csv
@@ -10,7 +10,7 @@ test_semi_auto_parallel_global_input,LINUX,GPU,120,HYBRID,test_runner.py,,,http_
 test_semi_auto_parallel_multi_inputs,LINUX,GPU,120,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_semi_auto_parallel_llama_model_vpp,LINUX,GPU,180,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_semi_auto_parallel_llama_model_pir,LINUX,GPU,180,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..;FLAGS_enable_pir_api=1,
-test_pir_reshard_nd_mesh_func,LINUX,GPU,35,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
+test_pir_reshard_nd_mesh_func,LINUX,GPU,60,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,
 test_semi_auto_llama_acc_align,LINUX,GPU,300,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..;FLAGS_enable_pir_api=1,
 test_semi_auto_llama_save_load,LINUX,GPU,180,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..;FLAGS_enable_pir_api=1,
 test_parallel_api_with_llama_1d,LINUX,GPU,400,HYBRID,test_runner.py,,,http_proxy=;https_proxy=;PYTHONPATH=../..,