diff --git a/python/paddle/distributed/auto_parallel/static/operators/common.py b/python/paddle/distributed/auto_parallel/static/operators/common.py
index 75a45a510b0cad..e66a337e90ec90 100644
--- a/python/paddle/distributed/auto_parallel/static/operators/common.py
+++ b/python/paddle/distributed/auto_parallel/static/operators/common.py
@@ -137,7 +137,7 @@ def get_compatible_impls(self, dist_op):
         return compatible_impls
 
     # (NOTE) Currently, both DistributedOperatorImplContainer and DistributedOperatorImpl have update_dims_mapping method.
-    # But this method is supposed to be maitained by DistributedOperatorImplContainer, and we are ongoing adding method
+    # But this method is supposed to be maintained by DistributedOperatorImplContainer, and we are ongoing adding method
     # to DistributedOperatorImplContainer and removing those in DistributedOperatorImpl.
     # @abc.abstractmethod
     def update_dims_mapping(self, dist_op):
@@ -369,7 +369,7 @@ def is_parameter_related(varname, block, dist_context=None):
 
 def infer_shape(block, src_var, src_var_dist_attr, op_input_dist_attr):
     var_shape = block._var_recursive(src_var.name).shape
-    var_topoloy = src_var_dist_attr.process_mesh.shape
+    var_topology = src_var_dist_attr.process_mesh.shape
     var_dims_mapping = src_var_dist_attr.dims_mapping
 
     complete_shape = []
@@ -377,7 +377,7 @@ def infer_shape(block, src_var, src_var_dist_attr, op_input_dist_attr):
         if var_dims_mapping[idx] == -1:
             complete_shape.append(shape)
         else:
-            new_shape = shape * var_topoloy[var_dims_mapping[idx]]
+            new_shape = shape * var_topology[var_dims_mapping[idx]]
             complete_shape.append(new_shape)
 
     exact_shape = []
@@ -488,7 +488,7 @@ def get_data_parallel_group(dist_ctx, op, act_grad_names, rank):
 
 def sync_and_scale_gradients(dist_ctx, op, groups, allreduce_var_names):
     """
-    insert the allreudce and scale ops for gradients of model
+    insert the allreduce and scale ops for gradients of model
     parameters for operator in data parallelism.
 
     Args:
@@ -557,7 +557,7 @@ def sync_and_scale_gradients(dist_ctx, op, groups, allreduce_var_names):
 
 def get_partial_groups(dist_ctx, op, out_grad_names, rank):
     """
-    deduce the partial comminication group for current operator output vars.
+    deduce the partial communication group for current operator output vars.
 
     Args:
         dist_ctx (DistributedContext): dist context.
@@ -608,7 +608,7 @@ def gradient_synchronization(
     dist_ctx, op, act_grad_names, out_grad_names, rank
 ):
     """
-    conduct the allreudce and scaling for gradients of model
+    conduct the allreduce and scaling for gradients of model
     parameters for operator in parallelism train.
 
     Args:
@@ -727,12 +727,12 @@ def update_op_dims_mapping(
     changed = False
     assert len(input_arg_names) == len(
         infered_input_dims_mappings
-    ), "dims mapping is NOT Match, infered [{}], orignal: [{}]; dist op: [{}]".format(
+    ), "dims mapping is NOT Match, infered [{}], original: [{}]; dist op: [{}]".format(
         len(infered_input_dims_mappings), len(input_arg_names), str(dist_op)
     )
     assert len(output_arg_names) == len(
         infered_output_dims_mappings
-    ), "dims mapping is NOT Match, infered [{}], orignal: [{}]; dist op: [{}]".format(
+    ), "dims mapping is NOT Match, infered [{}], original: [{}]; dist op: [{}]".format(
         len(infered_output_dims_mappings), len(output_arg_names), str(dist_op)
     )
 
diff --git a/python/paddle/distributed/fleet/utils/fs.py b/python/paddle/distributed/fleet/utils/fs.py
index a49e570295bb25..aa7ec2e544efec 100644
--- a/python/paddle/distributed/fleet/utils/fs.py
+++ b/python/paddle/distributed/fleet/utils/fs.py
@@ -302,7 +302,7 @@ def is_exist(self, fs_path):
             fs_path(str): The local file path.
 
         Returns:
-            Bool: Wheter it's a file or directory, return true if the path exists,
+            Bool: Whether it's a file or directory, return true if the path exists,
             otherwise return false.
 
         Examples:
@@ -1534,7 +1534,7 @@ def mv(self, fs_src_path, fs_dst_path, overwrite=False, test_exists=True):
             fs_src_path(str):  Name of the file or directory, that's needed to be moved.
             fs_dst_path(str):  Name of the file or directory to which to move to.
             overwrite(bool): Whether to re-write `fs_dst_path` if that exists. Default is False.
-            test_exists(bool): Check the existence of `fs_src_path` and `fs_dst_path` . When `test_exists` is set true, if `fs_src_path` doesn't exist or `fs_dst_path` exists, program will throw an Excetption.
+            test_exists(bool): Check the existence of `fs_src_path` and `fs_dst_path` . When `test_exists` is set true, if `fs_src_path` doesn't exist or `fs_dst_path` exists, program will throw an Exception.
 
         Examples:
 
diff --git a/python/paddle/distributed/fleet/utils/pp_parallel_adaptor.py b/python/paddle/distributed/fleet/utils/pp_parallel_adaptor.py
index fa728217474d28..a9874cb996e538 100644
--- a/python/paddle/distributed/fleet/utils/pp_parallel_adaptor.py
+++ b/python/paddle/distributed/fleet/utils/pp_parallel_adaptor.py
@@ -469,28 +469,28 @@ def parse_args():
         '--src_mp',
         type=int,
         default=2,
-        help='mp degree of the origin training task that dumpped this model',
+        help='mp degree of the origin training task that dumped this model',
     )
 
     parser.add_argument(
         '--src_pp',
         type=int,
         default=2,
-        help='pp degree of the origin training task that dumpped this model',
+        help='pp degree of the origin training task that dumped this model',
     )
 
     parser.add_argument(
         '--src_vp',
         type=int,
         default=2,
-        help='vp degree of the origin training task that dumpped this model',
+        help='vp degree of the origin training task that dumped this model',
     )
 
     parser.add_argument(
         '--dst_mp',
         type=int,
         default=None,
-        help='mp degree of the origin training task that dumpped this model',
+        help='mp degree of the origin training task that dumped this model',
     )
 
     parser.add_argument(
@@ -511,7 +511,7 @@ def parse_args():
         '--sharding',
         type=int,
         default=1,
-        help=" sharding degree of both the origin training task that dumpped this model and the expected training task that would recover this model",
+        help=" sharding degree of both the origin training task that dumped this model and the expected training task that would recover this model",
     )
 
     parser.add_argument(
diff --git a/python/paddle/static/amp/function_overload.py b/python/paddle/static/amp/function_overload.py
index 0767873dd6d997..3ee9e7d388c79a 100644
--- a/python/paddle/static/amp/function_overload.py
+++ b/python/paddle/static/amp/function_overload.py
@@ -88,7 +88,7 @@ def register(self, fn, key):
         """
         assert isinstance(
             key, FunctionType
-        ), f"The type of  key is expected to be FunctionType, but recieved {type(key)}."
+        ), f"The type of  key is expected to be FunctionType, but received {type(key)}."
         func = Function(fn)
         self.function_map[key] = fn
         return func
diff --git a/python/paddle/static/nn/sequence_lod.py b/python/paddle/static/nn/sequence_lod.py
index c85c9e9d3dc735..51cfd0d5307d56 100644
--- a/python/paddle/static/nn/sequence_lod.py
+++ b/python/paddle/static/nn/sequence_lod.py
@@ -69,7 +69,7 @@ def sequence_conv(
                 down_pad_len = max(0, filter_size + padding_start - 1) = 1
 
                 The output of the input sequence after padding is:
-                data_aftet_padding = [[0, 0, 1, 1, 2, 2],
+                data_after_padding = [[0, 0, 1, 1, 2, 2],
                                       [1, 1, 2, 2, 3, 3],
                                       [2, 2, 3, 3, 0, 0],
                                       [0, 0, 4, 4, 0, 0]]
@@ -968,7 +968,7 @@ def sequence_pad(x, pad_value, maxlen=None, name=None):
 
     Args:
         x (Tensor): Input 1-level Tensor with dims ``[M, K]``. The batch \
-            size is described by lod infor (the number of sequences ). \
+            size is described by lod info (the number of sequences ). \
             The data type should be float32, float64, int8, int32 or int64.
         pad_value (Tensor): Padding value. It can be a scalar or a 1D tensor \
             with length ``K``. If it's a scalar, it will be automatically broadcasted \
@@ -984,7 +984,7 @@ def sequence_pad(x, pad_value, maxlen=None, name=None):
     Returns:
             tuple, A Python tuple (Out, Length): the 1st is a 0 level Tensor \
             ``Out``, with the shape ``[batch_size, maxlen, K]``; the second is the original \
-            sequences length infor ``Length``, which should be a 0-level 1D Tensor. \
+            sequences length info ``Length``, which should be a 0-level 1D Tensor. \
             The size of ``Length`` is equal to batch size, and the data type is int64.
 
     Examples:
diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
index 763668d5cfa2cf..cb4dccb834c928 100644
--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@@ -584,7 +584,7 @@ def _handle_np_dtype(ndarray, dtype):
             data = np.array(data)
             if data.dtype == np.object_:
                 raise ValueError(
-                    "\n\tFaild to convert input data to a regular ndarray :\n\t - Usually "
+                    "\n\tFailed to convert input data to a regular ndarray :\n\t - Usually "
                     "this means the input data contains nested lists with different lengths. "
                 )
         elif isinstance(data, paddle.Tensor) and not in_dynamic_mode():
@@ -600,7 +600,7 @@ def _handle_np_dtype(ndarray, dtype):
         elif isinstance(data, (core.LoDTensor, core.Tensor)):
             # should't expose it to users, just for internal use.
             # convert core.Tensor/core.LoDTensor to Tensor first
-            # Currenly, there is no copy when places are same
+            # Currently, there is no copy when places are same
             if in_dynamic_mode():
                 data = core.eager.Tensor(data)
             else:
@@ -915,7 +915,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None):
             elif isinstance(shape, paddle.pir.Value):
                 pass
             else:
-                TypeError("Shape only supports OpReslut, or list, or tuple.")
+                TypeError("Shape only supports OpResult, or list, or tuple.")
 
         if out is None:
             out = _C_ops.full(shape, value, dtype, place)
@@ -1321,12 +1321,12 @@ def arange(start=0, end=None, step=1, dtype=None, name=None):
             If ``end`` is None, the half-open interval is [0, ``start``).
             Default is None.
         step(float|int|Tensor, optional): Spacing between values. For any out,
-            it is the istance between two adjacent values, out[i+1] - out[i].
+            it is the instance between two adjacent values, out[i+1] - out[i].
             If ``step`` is a Tensor, it is a 0-D Tensor which represents a scalar
             and data type is int32, int64, float32, float64. . Default is 1.
         dtype(str|np.dtype, optional): The data type of the
             output tensor. Supported data types: int32, int64, float32, float64.
-            If ``dytpe`` is None, the data type is float32. Default is None.
+            If ``dtype`` is None, the data type is float32. Default is None.
         name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
 
     Returns:
@@ -2070,7 +2070,7 @@ def empty(shape, dtype=None, name=None):
             If ``shape`` is a list or tuple, each element of it should be integer or 0-D Tensor with shape [].
             If ``shape`` is an Tensor, it should be an 1-D Tensor which represents a list.
         dtype(np.dtype|str, optional): Data type of the output Tensor
-            which can be bool, float16, float32, float64, int32, int64, complex64, complex128 if dytpe is `None`, the data
+            which can be bool, float16, float32, float64, int32, int64, complex64, complex128 if dtype is `None`, the data
             type of created Tensor use global default dtype (see ``get_default_dtype``
             for details).
         name(str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
@@ -2592,7 +2592,7 @@ def _memcpy(input, place=None, output=None):
 
 
 def complex(real, imag, name=None):
-    """Return a compelx tensor given the real and image component.
+    """Return a complex tensor given the real and image component.
 
     Args:
         real (Tensor): The real component. The data type should be 'float32' or 'float64'.
@@ -2646,7 +2646,7 @@ def complex(real, imag, name=None):
 def tril_indices(row, col, offset=0, dtype='int64'):
     """
     Return the indices of the lower triangular part of the 2-D matrix
-    whose row and col is knowed.Indices are ordered based on row and then columns.
+    whose row and col is known. Indices are ordered based on row and then columns.
     The lower triangular part of the matrix is defined as the elements on
     and below the diagonal.
 
@@ -2737,7 +2737,7 @@ def triu_indices(row, col=None, offset=0, dtype='int64'):
     Args:
         row (int): The input x which is a int number describe the number of row of the matrix.
         col (int, optional): The input x which is a int number describe the number of col of the matrix.
-            default value for col is None, then it will be set equal to row, indicting a square matix.
+            default value for col is None, then it will be set equal to row, indicting a square matrix.
         offset (int, optional): The offset to consider, default value is 0.
 
             - If offset = 0, all elements on and above the main diagonal are retained.
@@ -2807,11 +2807,11 @@ def triu_indices(row, col=None, offset=0, dtype='int64'):
 
 
 def polar(abs, angle, name=None):
-    """Return a Cartesian coordinates corresponding to the polar coordinates compelx tensor given the ``abs`` and ``angle`` component.
+    """Return a Cartesian coordinates corresponding to the polar coordinates complex tensor given the ``abs`` and ``angle`` component.
 
     Args:
         abs (Tensor): The abs component. The data type should be 'float32' or 'float64'.
-        angle (Tensor): The anglee component. The data type should be the same as ``abs``.
+        angle (Tensor): The angle component. The data type should be the same as ``abs``.
         name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
 
     Returns:
diff --git a/python/paddle/tensor/einsum.py b/python/paddle/tensor/einsum.py
index 2aba8898be000d..3ebca4e85e3c61 100644
--- a/python/paddle/tensor/einsum.py
+++ b/python/paddle/tensor/einsum.py
@@ -110,7 +110,7 @@ def validate_rhs(rhs, input_labels, n_bcast_dims):
     rhs = rhs.replace('...', '')
     rhs_set = set(rhs)
 
-    # Hidden assumption: availble labels don't include '.'
+    # Hidden assumption: available labels don't include '.'
     assert '.' not in input_labels
 
     # Verify that output labels all come from the set of input labels
@@ -195,7 +195,7 @@ def build_global_view(nop_labels, rhs, n_bcast_dims):
     rhs:
         The equation right hand side
     n_bcast_dims:
-        The maxium number of broadcast dimensions
+        The maximum number of broadcast dimensions
 
     Returns
     -------
@@ -336,7 +336,7 @@ def plan_matmul(plan, g_view, op1, op2, g_supports, g_shape, I, J1, J2, K):
     plan matmul
     '''
     # Transpose and re-shape op1 and op2 in I, J1, K and I, J2, K
-    # Then apply matmul(x, y, transpose_x=False, tranpose_y=True)
+    # Then apply matmul(x, y, transpose_x=False, transpose_y=True)
     var1, var2 = f'op{op1}', f'op{op2}'
 
     op1_view, op2_view = (g_view[op] for op in (op1, op2))
@@ -366,7 +366,7 @@ def plan_matmul(plan, g_view, op1, op2, g_supports, g_shape, I, J1, J2, K):
         step = transpose, [var2], var2, list(op2_dims)
         plan.add_step(step)
 
-    # Check if conditions hold for turnning the operation into a matmul
+    # Check if conditions hold for turning the operation into a matmul
     if (
         j1 + j2 > 0
         and k > 0
@@ -538,7 +538,7 @@ def plan_broadcast(plan, operands, nop_axes):
     varnames = [f'op{i}' for i in range(nop)]
 
     for i, op_axes in zip(range(nop), nop_axes):
-        # Re-arrange the dimesions according to the global layout
+        # Re-arrange the dimensions according to the global layout
         perm, fill = rearrange(op_axes)
         var = varnames[i]
         if perm:
diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py
index 43ab7109720ee3..f6b02612a079bb 100644
--- a/python/paddle/tensor/linalg.py
+++ b/python/paddle/tensor/linalg.py
@@ -1533,7 +1533,7 @@ def empty_tensor(input, shape):
     if not len(x_shape) >= 2:
         raise ValueError(
             "input should be a matrix or batches of matrices, "
-            + f"but the dimention of received input is {len(x_shape)}"
+            + f"but the dimension of received input is {len(x_shape)}"
         )
     if p is None:
         p = 2
@@ -1967,7 +1967,7 @@ def cross(x, y, axis=9, name=None):
 def cholesky(x, upper=False, name=None):
     r"""
     Computes the Cholesky decomposition of one symmetric positive-definite
-    matrix or batches of symmetric positive-definite matrice.
+    matrix or batches of symmetric positive-definite matrices.
 
     If `upper` is `True`, the decomposition has the form :math:`A = U^{T}U` ,
     and the returned matrix :math:`U` is upper-triangular. Otherwise, the
@@ -2112,7 +2112,7 @@ def bmm(x, y, name=None):
     """
     Applies batched matrix multiplication to two tensors.
 
-    Both of the two input tensors must be three-dementional and share the same batch size.
+    Both of the two input tensors must be three-dimensional and share the same batch size.
 
     If x is a (b, m, k) tensor, y is a (b, k, n) tensor, the output will be a (b, m, n) tensor.
 
@@ -2154,7 +2154,7 @@ def bmm(x, y, name=None):
         y_shape = y.shape
         if not len(x_shape) == len(y_shape) == 3:
             raise ValueError(
-                "x and y should be 3-dimensional. But received x's dimention: {}, y's dimention: {}".format(
+                "x and y should be 3-dimensional. But received x's dimension: {}, y's dimension: {}".format(
                     x_shape, y_shape
                 )
             )
@@ -2331,11 +2331,11 @@ def __check_input(x, vec):
             vec_shape = list(vec.shape)
             if len(x_shape) != 2:
                 raise ValueError(
-                    f"x should be 2-dimensional. But received x's dimention: {x_shape}"
+                    f"x should be 2-dimensional. But received x's dimension: {x_shape}"
                 )
             if len(vec_shape) != 1:
                 raise ValueError(
-                    "vec should be 1-dimensional. But received vec's dimention: {}".format(
+                    "vec should be 1-dimensional. But received vec's dimension: {}".format(
                         vec_shape
                     )
                 )
@@ -2703,7 +2703,7 @@ def matrix_power(x, n, name=None):
 
     Computes the n-th power of a square matrix or a batch of square matrices.
 
-    Let :math:`X` be a sqaure matrix or a batch of square matrices, :math:`n` be
+    Let :math:`X` be a square matrix or a batch of square matrices, :math:`n` be
     an exponent, the equation should be:
 
     .. math::
@@ -2775,7 +2775,7 @@ def matrix_power(x, n, name=None):
 
 def qr(x, mode="reduced", name=None):
     r"""
-    Computes the QR decomposition of one matrix or batches of matrice (backward is unsupported now).
+    Computes the QR decomposition of one matrix or batches of matrices (backward is unsupported now).
 
     Args:
         x (Tensor): The input tensor. Its shape should be `[..., M, N]`,
@@ -3149,7 +3149,7 @@ def eigvals(x, name=None):
     x_shape = list(x.shape)
     if len(x_shape) < 2:
         raise ValueError(
-            "The dimension of Input(x) should be at least 2, but received x's dimention = {}, x's shape = {}".format(
+            "The dimension of Input(x) should be at least 2, but received x's dimension = {}, x's shape = {}".format(
                 len(x_shape), x_shape
             )
         )
@@ -3303,7 +3303,7 @@ def __check_input(x, UPLO):
             )
         if x_shape[-1] != x_shape[-2]:
             raise ValueError(
-                f"The input matrix must be batches of square matrices. But received x's dimention: {x_shape}"
+                f"The input matrix must be batches of square matrices. But received x's dimension: {x_shape}"
             )
         if UPLO != 'L' and UPLO != 'U':
             raise ValueError(
@@ -3358,7 +3358,7 @@ def pinv(x, rcond=1e-15, hermitian=False, name=None):
             where * is zero or more batch dimensions. m and n can be
             arbitrary positive number. The data type of x should be
             float32 or float64 or complex64 or complex128. When data
-            type is complex64 or cpmplex128, hermitian should be set
+            type is complex64 or complex128, hermitian should be set
             True.
         rcond (Tensor, optional): the tolerance value to determine
             when is a singular value zero. Default:1e-15.
@@ -3582,7 +3582,7 @@ def solve(x, y, name=None):
     r"""
 
     Computes the solution of a square system of linear equations with a unique solution for input 'X' and 'Y'.
-    Let :math:`X` be a sqaure matrix or a batch of square matrices, :math:`Y` be
+    Let :math:`X` be a square matrix or a batch of square matrices, :math:`Y` be
     a vector/matrix or a batch of vectors/matrices, the equation should be:
 
     .. math::
@@ -3820,7 +3820,7 @@ def __check_input(x, UPLO):
             )
         if x_shape[-1] != x_shape[-2]:
             raise ValueError(
-                f"The input matrix must be batches of square matrices. But received x's dimention: {x_shape}"
+                f"The input matrix must be batches of square matrices. But received x's dimension: {x_shape}"
             )
         if UPLO != 'L' and UPLO != 'U':
             raise ValueError(
diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py
index 6e8c82f8cfbe4e..d2a0c46369fadf 100644
--- a/python/paddle/tensor/random.py
+++ b/python/paddle/tensor/random.py
@@ -1023,7 +1023,7 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None):
             If ``shape`` is a list or tuple, each element of it should be integer or 0-D Tensor with shape [].
             If ``shape`` is an Tensor, it should be an 1-D Tensor which represents a list. Default is [1].
         dtype (str|np.dtype, optional): The data type of the
-            output tensor. Supported data types: int32, int64. If ``dytpe``
+            output tensor. Supported data types: int32, int64. If ``dtype``
             is None, the data type is int64. Default is None.
         name (str, optional): The default value is None.  Normally there is no
             need for user to set this property.  For more information, please
@@ -1162,7 +1162,7 @@ def randint_like(x, low=0, high=None, dtype=None, name=None):
             If ``high`` is None, the range is [0, ``low``).
         dtype (str|np.dtype, optional): The data type of the
             output tensor. Supported data types: bool, int32, int64, float16,
-            float32, float64. If ``dytpe`` is None, the data type is the
+            float32, float64. If ``dtype`` is None, the data type is the
             same as x's data type. Default is None.
         name (str, optional): The default value is None.  Normally there is no
             need for user to set this property.  For more information, please
diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py
index e80ffca679d05e..9e5d070268e3fc 100755
--- a/python/paddle/tensor/search.py
+++ b/python/paddle/tensor/search.py
@@ -238,7 +238,7 @@ def argmin(x, axis=None, keepdim=False, dtype="int64", name=None):
         axis (int, optional): Axis to compute indices along. The effective range
             is [-R, R), where R is x.ndim. when axis < 0, it works the same way
             as axis + R. Default is None, the input `x` will be into the flatten tensor, and selecting the min value index.
-        keepdim (bool, optional): Whether to keep the given axis in output. If it is True, the dimensions will be same as input x and with size one in the axis. Otherwise the output dimentions is one fewer than x since the axis is squeezed. Default is False.
+        keepdim (bool, optional): Whether to keep the given axis in output. If it is True, the dimensions will be same as input x and with size one in the axis. Otherwise the output dimensions is one fewer than x since the axis is squeezed. Default is False.
         dtype (str, optional): Data type of the output tensor which can
                     be int32, int64. The default value is 'int64', and it will
                     return the int64 indices.
@@ -585,7 +585,7 @@ def mode(x, axis=-1, keepdim=False, name=None):
         axis (int, optional): Axis to compute indices along. The effective range
             is [-R, R), where R is x.ndim. when axis < 0, it works the same way
             as axis + R. Default is -1.
-        keepdim (bool, optional): Whether to keep the given axis in output. If it is True, the dimensions will be same as input x and with size one in the axis. Otherwise the output dimentions is one fewer than x since the axis is squeezed. Default is False.
+        keepdim (bool, optional): Whether to keep the given axis in output. If it is True, the dimensions will be same as input x and with size one in the axis. Otherwise the output dimensions is one fewer than x since the axis is squeezed. Default is False.
         name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
 
     Returns:
@@ -1175,7 +1175,7 @@ def kthvalue(x, k, axis=None, keepdim=False, name=None):
         axis (int, optional): Axis to compute indices along. The effective range
             is [-R, R), where R is x.ndim. when axis < 0, it works the same way
             as axis + R. The default is None. And if the axis is None, it will computed as -1 by default.
-        keepdim (bool, optional): Whether to keep the given axis in output. If it is True, the dimensions will be same as input x and with size one in the axis. Otherwise the output dimentions is one fewer than x since the axis is squeezed. Default is False.
+        keepdim (bool, optional): Whether to keep the given axis in output. If it is True, the dimensions will be same as input x and with size one in the axis. Otherwise the output dimensions is one fewer than x since the axis is squeezed. Default is False.
         name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
 
     Returns:
diff --git a/python/paddle/tensor/to_string.py b/python/paddle/tensor/to_string.py
index d924a7aca66d53..dbc629266efd76 100644
--- a/python/paddle/tensor/to_string.py
+++ b/python/paddle/tensor/to_string.py
@@ -294,14 +294,14 @@ def _format_dense_tensor(tensor, indent):
         for dim in tensor.shape:
             size *= dim
 
-    sumary = False
+    summary = False
     if size > DEFAULT_PRINT_OPTIONS.threshold:
-        sumary = True
+        summary = True
 
     max_width, signed = _get_max_width(_to_summary(np_tensor))
 
     data = _format_tensor(
-        np_tensor, sumary, indent=indent, max_width=max_width, signed=signed
+        np_tensor, summary, indent=indent, max_width=max_width, signed=signed
     )
     return data