PaddlePaddle · zhwesky2010 · Aug 27, 2025 · Aug 15, 2025 · Aug 15, 2025 · Aug 21, 2025
diff --git a/python/paddle/base/layer_helper_base.py b/python/paddle/base/layer_helper_base.py
@@ -340,6 +340,7 @@ def create_parameter(
         default_initializer=None,
         stop_gradient=False,
         type=core.VarDesc.VarType.DENSE_TENSOR,
+        device=None,
     ):
         """Create parameters for this layers.
 
@@ -349,6 +350,7 @@ def create_parameter(
                dtype: data type of this parameter
                is_bias: if this is a bias parameter
                default_initializer: set the default initializer for this parameter
+               device: device where this parameter will be placed
 
         Returns created parameter Variable.
         """
@@ -439,22 +441,28 @@ def create_parameter(
                     "Please check the parameter attr value passed to self.create_parameter or "
                     "constructor of dygraph Layers"
                 )
-            return self.main_program.global_block().create_parameter(
+            param = self.main_program.global_block().create_parameter(
                 dtype=dtype,
                 shape=shape,
                 type=type,
                 stop_gradient=stop_gradient,
                 **attr._to_kwargs(with_initializer=True),
             )
+            if device is not None:
+                param = param.to(device)
+            return param
         else:
             if in_pir_mode():
                 if isinstance(dtype, core.VarDesc.VarType):
                     dtype = paddle.pir.core.vartype_to_datatype[dtype]
-                return paddle.pir.core.create_parameter(
+                param = paddle.pir.core.create_parameter(
                     dtype=dtype,
                     shape=shape,
                     **attr._to_kwargs(with_initializer=True),
                 )
+                if device is not None:
+                    param = param.to(device)
+                return param
             self.startup_program.global_block().create_parameter(
                 dtype=dtype,
                 shape=shape,

diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py
@@ -87,10 +87,13 @@
 )
 from .layer.conv import (
     Conv1D,
+    Conv1d,
     Conv1DTranspose,
     Conv2D,
+    Conv2d,
     Conv2DTranspose,
     Conv3D,
+    Conv3d,
     Conv3DTranspose,
 )
 from .layer.distance import PairwiseDistance
@@ -245,6 +248,7 @@
     'NLLLoss',
     'PoissonNLLLoss',
     'Conv1D',
+    'Conv1d',
     'Sequential',
     'Hardswish',
     'Conv1DTranspose',
@@ -255,6 +259,7 @@
     'ParameterDict',
     'ParameterList',
     'Conv2D',
+    'Conv2d',
     'Softshrink',
     'Hardtanh',
     'TransformerDecoderLayer',
@@ -272,6 +277,7 @@
     'Layer',
     'TransformerDecoder',
     'Conv3D',
+    'Conv3d',
     'Tanh',
     'Conv3DTranspose',
     'Flatten',

diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py
@@ -18,6 +18,7 @@
 
 import paddle
 from paddle import in_dynamic_mode
+from paddle.utils.decorator_utils import param_one_alias
 
 from .. import functional as F
 from .layers import Layer
@@ -31,7 +32,9 @@
         DataLayout1DVariant,
         DataLayout2D,
         DataLayout3D,
+        DTypeLike,
         ParamAttrLike,
+        PlaceLike,
         ShapeLike,
         Size2,
         Size4,
@@ -1720,14 +1723,22 @@ class Embedding(Layer):
             True because sparse update is faster. But some optimizer does not support sparse update,
             such as :ref:`api_paddle_optimizer_adadelta_Adadelta` , :ref:`api_paddle_optimizer_adamax_Adamax` , :ref:`api_paddle_optimizer_lamb_Lamb`.
             In these case, sparse must be False. Default: False.
-        weight_attr(ParamAttr|None, optional): To specify the weight parameter property. Default: None, which means the
+        scale_grad_by_freq(bool, optional): Indicating whether to scale the gradients by the inverse frequency of the
+            word ids in input `x`. Default: False.
+        _weight(Tensor, optional): The learnable weights to be applied to the input embeddings.
+            If :attr:`_weight` is specified, the :attr:`weight_attr` is ignored. Default: None.
+        _freeze(bool, optional): Indicates whether to freeze the embedding weights. If set to True, the provided embedding tensor
+            will be treated as a fixed lookup table and will not be updated during training.
+            If set to False, the provided tensor remains learnable. Default: False.
+        device(PlaceLike, optional): Device where the computation takes place when :attr:`weight_attr` is specified. Default: None
+        dtype(DTypeLike, optional): Data type of the weights when :attr:`weight_attr` is specified. Default: None.
+        weight_attr(ParamAttr|None, optional): To specify the weight parameter property. If set, the :attr:`_freeze` attribute will be
+            ignored and whether the weight is trainable  depends on the ``trainable`` option in ``weight_attr`. Default: None, which means the
             default weight parameter property is used. See usage for details in :ref:`api_paddle_ParamAttr` . In addition,
             user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
             The local word vector needs to be transformed into numpy format, and the shape of local word
             vector should be consistent with :attr:`num_embeddings` . Then :ref:`api_paddle_nn_initializer_Assign`
             is used to load custom or pre-trained word vectors. See code example for details.
-        scale_grad_by_freq(bool, optional): Indicating whether to scale the gradients by the inverse frequency of the
-            word ids in input `x`. Default: False.
         name(str|None, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and
             None by default.
 
@@ -1783,9 +1794,14 @@ def __init__(
         padding_idx: float | None = None,
         max_norm: float | None = None,
         norm_type: float = 2.0,
+        *,
+        scale_grad_by_freq: bool = False,
         sparse: bool = False,
+        _weight: Tensor | None = None,
+        _freeze: bool = False,
+        device: PlaceLike | None = None,
+        dtype: DTypeLike | None = None,
         weight_attr: ParamAttrLike | None = None,
-        scale_grad_by_freq: bool = False,
         name: str | None = None,
     ) -> None:
         super().__init__()
@@ -1797,6 +1813,7 @@ def __init__(
         self._norm_type = norm_type
         self._padding_idx = padding_idx
         self._scale_grad_by_freq = scale_grad_by_freq
+        self._device = device
 
         if self._num_embeddings <= 0:
             raise ValueError("num_embeddings must be gather than 0")
@@ -1819,23 +1836,37 @@ def __init__(
                 f"padding_idx must be within [-{num_embeddings}, {num_embeddings})"
             )
 
-        self._dtype = self._helper.get_default_dtype()
+        self._dtype = (
+            self._helper.get_default_dtype() if dtype is None else dtype
+        )
         self._size = [self._num_embeddings, self._embedding_dim]
 
         self._weight_attr = weight_attr
         self._remote_prefetch = False
         self._name = name
-        self.weight = self.create_parameter(
-            attr=self._weight_attr,
-            shape=self._size,
-            dtype=self._dtype,
-            is_bias=False,
-        )
+        if _weight is not None:
+            assert list(_weight.shape) == [
+                num_embeddings,
+                embedding_dim,
+            ], "Shape of weight does not match num_embeddings and embedding_dim"
+            self.weight = _weight
+            self.weight.stop_gradient = _freeze
+        else:
+            self.weight = self.create_parameter(
+                attr=self._weight_attr,
+                shape=self._size,
+                dtype=self._dtype,
+                is_bias=False,
+                device=self._device,
+            )
+            if self._weight_attr is None:
+                self.weight.stop_gradient = _freeze
 
         if in_dynamic_mode() and padding_idx != -1:
             with paddle.no_grad():
                 self.weight[padding_idx] = 0.0
 
+    @param_one_alias(["x", "input"])
     def forward(self, x: Tensor) -> Tensor:
         return F.embedding(
             x,