Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions python/paddle/base/layer_helper_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ def create_parameter(
default_initializer=None,
stop_gradient=False,
type=core.VarDesc.VarType.DENSE_TENSOR,
device=None,
):
"""Create parameters for this layers.

Expand All @@ -349,6 +350,7 @@ def create_parameter(
dtype: data type of this parameter
is_bias: if this is a bias parameter
default_initializer: set the default initializer for this parameter
device: device where this parameter will be placed

Returns created parameter Variable.
"""
Expand Down Expand Up @@ -439,22 +441,28 @@ def create_parameter(
"Please check the parameter attr value passed to self.create_parameter or "
"constructor of dygraph Layers"
)
return self.main_program.global_block().create_parameter(
param = self.main_program.global_block().create_parameter(
dtype=dtype,
shape=shape,
type=type,
stop_gradient=stop_gradient,
**attr._to_kwargs(with_initializer=True),
)
if device is not None:
param = param.to(device)
return param
else:
if in_pir_mode():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

老静态图可以不管,pir支持device吗

if isinstance(dtype, core.VarDesc.VarType):
dtype = paddle.pir.core.vartype_to_datatype[dtype]
return paddle.pir.core.create_parameter(
param = paddle.pir.core.create_parameter(
dtype=dtype,
shape=shape,
**attr._to_kwargs(with_initializer=True),
)
if device is not None:
param = param.to(device)
return param
self.startup_program.global_block().create_parameter(
dtype=dtype,
shape=shape,
Expand Down
6 changes: 6 additions & 0 deletions python/paddle/nn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,13 @@
)
from .layer.conv import (
Conv1D,
Conv1d,
Conv1DTranspose,
Conv2D,
Conv2d,
Conv2DTranspose,
Conv3D,
Conv3d,
Conv3DTranspose,
)
from .layer.distance import PairwiseDistance
Expand Down Expand Up @@ -245,6 +248,7 @@
'NLLLoss',
'PoissonNLLLoss',
'Conv1D',
'Conv1d',
'Sequential',
'Hardswish',
'Conv1DTranspose',
Expand All @@ -255,6 +259,7 @@
'ParameterDict',
'ParameterList',
'Conv2D',
'Conv2d',
'Softshrink',
'Hardtanh',
'TransformerDecoderLayer',
Expand All @@ -272,6 +277,7 @@
'Layer',
'TransformerDecoder',
'Conv3D',
'Conv3d',
'Tanh',
'Conv3DTranspose',
'Flatten',
Expand Down
53 changes: 42 additions & 11 deletions python/paddle/nn/layer/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import paddle
from paddle import in_dynamic_mode
from paddle.utils.decorator_utils import param_one_alias

from .. import functional as F
from .layers import Layer
Expand All @@ -31,7 +32,9 @@
DataLayout1DVariant,
DataLayout2D,
DataLayout3D,
DTypeLike,
ParamAttrLike,
PlaceLike,
ShapeLike,
Size2,
Size4,
Expand Down Expand Up @@ -1720,14 +1723,22 @@ class Embedding(Layer):
True because sparse update is faster. But some optimizer does not support sparse update,
such as :ref:`api_paddle_optimizer_adadelta_Adadelta` , :ref:`api_paddle_optimizer_adamax_Adamax` , :ref:`api_paddle_optimizer_lamb_Lamb`.
In these case, sparse must be False. Default: False.
weight_attr(ParamAttr|None, optional): To specify the weight parameter property. Default: None, which means the
scale_grad_by_freq(bool, optional): Indicating whether to scale the gradients by the inverse frequency of the
word ids in input `x`. Default: False.
_weight(Tensor, optional): The learnable weights to be applied to the input embeddings.
If :attr:`_weight` is specified, the :attr:`weight_attr` is ignored. Default: None.
_freeze(bool, optional): Indicates whether to freeze the embedding weights. If set to True, the provided embedding tensor
will be treated as a fixed lookup table and will not be updated during training.
If set to False, the provided tensor remains learnable. Default: False.
device(PlaceLike, optional): Device where the computation takes place when :attr:`weight_attr` is specified. Default: None
dtype(DTypeLike, optional): Data type of the weights when :attr:`weight_attr` is specified. Default: None.
weight_attr(ParamAttr|None, optional): To specify the weight parameter property. If set, the :attr:`_freeze` attribute will be
ignored and whether the weight is trainable depends on the ``trainable`` option in ``weight_attr`. Default: None, which means the
default weight parameter property is used. See usage for details in :ref:`api_paddle_ParamAttr` . In addition,
user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
The local word vector needs to be transformed into numpy format, and the shape of local word
vector should be consistent with :attr:`num_embeddings` . Then :ref:`api_paddle_nn_initializer_Assign`
is used to load custom or pre-trained word vectors. See code example for details.
scale_grad_by_freq(bool, optional): Indicating whether to scale the gradients by the inverse frequency of the
word ids in input `x`. Default: False.
name(str|None, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.

Expand Down Expand Up @@ -1783,9 +1794,14 @@ def __init__(
padding_idx: float | None = None,
max_norm: float | None = None,
norm_type: float = 2.0,
*,
scale_grad_by_freq: bool = False,
sparse: bool = False,
_weight: Tensor | None = None,
_freeze: bool = False,
device: PlaceLike | None = None,
dtype: DTypeLike | None = None,
weight_attr: ParamAttrLike | None = None,
scale_grad_by_freq: bool = False,
name: str | None = None,
) -> None:
super().__init__()
Expand All @@ -1797,6 +1813,7 @@ def __init__(
self._norm_type = norm_type
self._padding_idx = padding_idx
self._scale_grad_by_freq = scale_grad_by_freq
self._device = device

if self._num_embeddings <= 0:
raise ValueError("num_embeddings must be gather than 0")
Expand All @@ -1819,23 +1836,37 @@ def __init__(
f"padding_idx must be within [-{num_embeddings}, {num_embeddings})"
)

self._dtype = self._helper.get_default_dtype()
self._dtype = (
self._helper.get_default_dtype() if dtype is None else dtype
)
self._size = [self._num_embeddings, self._embedding_dim]

self._weight_attr = weight_attr
self._remote_prefetch = False
self._name = name
self.weight = self.create_parameter(
attr=self._weight_attr,
shape=self._size,
dtype=self._dtype,
is_bias=False,
)
if _weight is not None:
assert list(_weight.shape) == [
num_embeddings,
embedding_dim,
], "Shape of weight does not match num_embeddings and embedding_dim"
self.weight = _weight
self.weight.stop_gradient = _freeze
else:
self.weight = self.create_parameter(
attr=self._weight_attr,
shape=self._size,
dtype=self._dtype,
is_bias=False,
device=self._device,
)
if self._weight_attr is None:
self.weight.stop_gradient = _freeze

if in_dynamic_mode() and padding_idx != -1:
with paddle.no_grad():
self.weight[padding_idx] = 0.0

@param_one_alias(["x", "input"])
def forward(self, x: Tensor) -> Tensor:
return F.embedding(
x,
Expand Down
Loading
Loading