diff --git a/python/paddle/amp/grad_scaler.py b/python/paddle/amp/grad_scaler.py index 594e518c402e87..ff7f09b7f02e68 100644 --- a/python/paddle/amp/grad_scaler.py +++ b/python/paddle/amp/grad_scaler.py @@ -18,10 +18,10 @@ import numpy as np +import paddle from paddle import _C_ops, _legacy_C_ops from paddle.base import core from paddle.base.data_feeder import check_type -from paddle.base.dygraph import to_variable from paddle.base.framework import _dygraph_tracer, dygraph_only from paddle.framework import in_dynamic_mode @@ -130,20 +130,20 @@ def __init__( self._decr_count = 0 self._use_dynamic_loss_scaling = use_dynamic_loss_scaling - self._found_inf = to_variable(np.array([0]).astype(np.bool_)) - self._temp_found_inf_value_false = to_variable( + self._found_inf = paddle.to_tensor(np.array([0]).astype(np.bool_)) + self._temp_found_inf_value_false = paddle.to_tensor( np.array([0]).astype(np.bool_) ) - self._temp_found_inf_fp16 = to_variable( + self._temp_found_inf_fp16 = paddle.to_tensor( np.array([0]).astype(np.bool_) ) - self._temp_found_inf_bf16 = to_variable( + self._temp_found_inf_bf16 = paddle.to_tensor( np.array([0]).astype(np.bool_) ) - self._temp_found_inf_fp32 = to_variable( + self._temp_found_inf_fp32 = paddle.to_tensor( np.array([0]).astype(np.bool_) ) - self._scale = to_variable( + self._scale = paddle.to_tensor( np.array([self._init_loss_scaling]).astype(np.float32) ) self._cache_founf_inf = None @@ -438,7 +438,7 @@ def set_init_loss_scaling(self, new_init_loss_scaling): new_init_loss_scaling(int): The new_init_loss_scaling used to update initial loss scaling factor.s """ self._init_loss_scaling = new_init_loss_scaling - self._scale = to_variable( + self._scale = paddle.to_tensor( np.array([self._init_loss_scaling]).astype(np.float32) ) @@ -563,7 +563,7 @@ def load_state_dict(self, state_dict): ) self._init_loss_scaling = state_dict["scale"][0] - self._scale = to_variable( + self._scale = paddle.to_tensor( np.array([self._init_loss_scaling]).astype(np.float32) ) self._incr_ratio = state_dict["incr_ratio"] diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py index aed4833188d6c1..7c7a3d60ebf45c 100644 --- a/python/paddle/base/dygraph/tensor_patch_methods.py +++ b/python/paddle/base/dygraph/tensor_patch_methods.py @@ -94,12 +94,12 @@ def _to_static_var(self, to_parameter=False, **kwargs): .. code-block:: python >>> import paddle.base as base - >>> from paddle.base.dygraph.base import to_variable + >>> import paddle >>> import numpy as np >>> data = np.ones([3, 1024], dtype='float32') >>> with base.dygraph.guard(): - ... tensor = to_variable(data) + ... tensor = paddle.to_tensor(data) ... static_var = tensor._to_static_var() """ @@ -175,14 +175,14 @@ def set_value(self, value): .. code-block:: python >>> import paddle.base as base - >>> from paddle.base.dygraph.base import to_variable + >>> import paddle >>> from paddle.nn import Linear >>> import numpy as np >>> data = np.ones([3, 1024], dtype='float32') >>> with base.dygraph.guard(): ... linear = Linear(1024, 4) - ... t = to_variable(data) + ... t = paddle.to_tensor(data) ... linear(t) # call with default weight ... custom_weight = np.random.randn(1024, 4).astype("float32") ... linear.weight.set_value(custom_weight) # change existing weight diff --git a/python/paddle/base/framework.py b/python/paddle/base/framework.py index 8f5ecbe2c11dbd..57d74c5130f48e 100644 --- a/python/paddle/base/framework.py +++ b/python/paddle/base/framework.py @@ -1505,9 +1505,10 @@ class Variable(metaclass=VariableMetaClass): >>> import paddle.base as base >>> import numpy as np + >>> import paddle >>> with base.dygraph.guard(): - ... new_variable = base.dygraph.to_variable(np.arange(10)) + ... new_variable = paddle.to_tensor(np.arange(10)) """ @@ -1693,14 +1694,13 @@ def numpy(self): .. code-block:: python >>> import paddle.base as base - >>> from paddle.base.dygraph.base import to_variable >>> from paddle.nn import Linear >>> import numpy as np >>> data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32') >>> with base.dygraph.guard(): ... linear = Linear(32, 64) - ... data = to_variable(data) + ... data = paddle.to_tensor(data) ... x = linear(data) ... print(x.numpy()) @@ -1779,7 +1779,7 @@ def gradient(self): >>> with base.dygraph.guard(): ... inputs2 = [] ... for _ in range(10): - ... tmp = base.dygraph.base.to_variable(x) + ... tmp = paddle.to_tensor(x) ... tmp.stop_gradient=False ... inputs2.append(tmp) ... ret2 = paddle.add_n(inputs2) @@ -1797,7 +1797,7 @@ def gradient(self): ... sparse=True) ... x_data = np.arange(12).reshape(4, 3).astype('int64') ... x_data = x_data.reshape((-1, 3, 1)) - ... x = base.dygraph.base.to_variable(x_data) + ... x = paddle.to_tensor(x_data) ... out = embedding(x) ... out.backward() ... print(embedding.weight.gradient()) @@ -1827,7 +1827,7 @@ def clear_gradient(self): >>> x = np.ones([2, 2], np.float32) >>> inputs2 = [] >>> for _ in range(10): - >>> tmp = base.dygraph.base.to_variable(x) + >>> tmp = paddle.to_tensor(x) >>> tmp.stop_gradient=False >>> inputs2.append(tmp) >>> ret2 = paddle.add_n(inputs2) @@ -2052,9 +2052,9 @@ def stop_gradient(self): ... value2 = np.arange(10).reshape(2, 5).astype("float32") ... linear = paddle.nn.Linear(13, 5) ... linear2 = paddle.nn.Linear(3, 3) - ... a = base.dygraph.to_variable(value0) - ... b = base.dygraph.to_variable(value1) - ... c = base.dygraph.to_variable(value2) + ... a = paddle.to_tensor(value0) + ... b = paddle.to_tensor(value1) + ... c = paddle.to_tensor(value2) ... out1 = linear(a) ... out2 = linear2(b) ... out1.stop_gradient = True diff --git a/python/paddle/base/layers/math_op_patch.py b/python/paddle/base/layers/math_op_patch.py index de95503099f913..e4b9ed5198a9e1 100644 --- a/python/paddle/base/layers/math_op_patch.py +++ b/python/paddle/base/layers/math_op_patch.py @@ -326,11 +326,12 @@ def astype(self, dtype): .. code-block:: python >>> import paddle.base as base + >>> import paddle >>> import numpy as np >>> x = np.ones([2, 2], np.float32) >>> with base.dygraph.guard(): - ... original_variable = base.dygraph.to_variable(x) + ... original_variable = paddle.to_tensor(x) ... print("original var's dtype is: {}, numpy dtype is {}".format(original_variable.dtype, original_variable.numpy().dtype)) ... new_variable = original_variable.astype('int64') ... print("new var's dtype is: {}, numpy dtype is {}".format(new_variable.dtype, new_variable.numpy().dtype)) diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py index 6da2dd1a61c29d..2a691c2c4d4fc6 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py @@ -21,7 +21,6 @@ import paddle from paddle import _C_ops, _legacy_C_ops from paddle.base import core -from paddle.base.dygraph import to_variable from paddle.common_ops_import import dygraph_only from paddle.nn import clip @@ -278,9 +277,9 @@ def unscale_method(self, optimizer): else: param_grads_fp32.append(tgt_grad) - temp_found_inf_fp16 = to_variable(np.array([0]).astype(np.bool_)) - temp_found_inf_bfp16 = to_variable(np.array([0]).astype(np.bool_)) - temp_found_inf_fp32 = to_variable(np.array([0]).astype(np.bool_)) + temp_found_inf_fp16 = paddle.to_tensor(np.array([0]).astype(np.bool_)) + temp_found_inf_bfp16 = paddle.to_tensor(np.array([0]).astype(np.bool_)) + temp_found_inf_fp32 = paddle.to_tensor(np.array([0]).astype(np.bool_)) device = paddle.get_device().split(":")[0] device = "cpu" if optimizer.offload else device diff --git a/python/paddle/distributed/fleet/utils/mix_precision_utils.py b/python/paddle/distributed/fleet/utils/mix_precision_utils.py index 9552ac7e3eef0f..b67f738ff3c5aa 100644 --- a/python/paddle/distributed/fleet/utils/mix_precision_utils.py +++ b/python/paddle/distributed/fleet/utils/mix_precision_utils.py @@ -23,7 +23,6 @@ from paddle.base import framework from paddle.base.dygraph import ( base as imperative_base, - to_variable, ) from paddle.distributed import fleet from paddle.distributed.fleet.utils.hybrid_parallel_util import ( @@ -215,7 +214,7 @@ def unscale_method(self, optimizer): assert param.main_grad.dtype == core.VarDesc.VarType.FP32 param_grads.append(param.main_grad) - temp_found_inf = to_variable(np.array([0]).astype(np.bool_)) + temp_found_inf = paddle.to_tensor(np.array([0]).astype(np.bool_)) if len(param_grads): _legacy_C_ops.check_finite_and_unscale( param_grads,