From 8d1a5a6e792c39cffbfdd5ddf33f8ed2aa02039f Mon Sep 17 00:00:00 2001 From: yjjiang11 Date: Thu, 29 Dec 2022 01:28:09 +0000 Subject: [PATCH 1/3] rm legacy --- python/paddle/common_ops_import.py | 1 - python/paddle/fluid/dygraph/base.py | 1 - python/paddle/fluid/dygraph/math_op_patch.py | 10 +- python/paddle/fluid/dygraph/nn.py | 171 +++++++----------- python/paddle/fluid/dygraph/parallel.py | 34 +--- .../fluid/dygraph/varbase_patch_methods.py | 2 +- 6 files changed, 71 insertions(+), 148 deletions(-) diff --git a/python/paddle/common_ops_import.py b/python/paddle/common_ops_import.py index 1c2bb424dc18b8..91a3f49cdbba2a 100644 --- a/python/paddle/common_ops_import.py +++ b/python/paddle/common_ops_import.py @@ -24,7 +24,6 @@ OpProtoHolder, Variable, _dygraph_tracer, - _in_legacy_dygraph, _non_static_mode, _varbase_creator, convert_np_dtype_to_dtype_, diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index fa0c12e16082db..648b73cb7c47ba 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -27,7 +27,6 @@ import warnings from ..framework import ( _get_paddle_place, - _in_legacy_dygraph, _in_eager_without_dygraph_check, ) import paddle diff --git a/python/paddle/fluid/dygraph/math_op_patch.py b/python/paddle/fluid/dygraph/math_op_patch.py index 6a864efc42eedc..e1f37cb8c7a91f 100644 --- a/python/paddle/fluid/dygraph/math_op_patch.py +++ b/python/paddle/fluid/dygraph/math_op_patch.py @@ -17,7 +17,6 @@ Variable, convert_np_dtype_to_dtype_, _varbase_creator, - _in_legacy_dygraph, in_dygraph_mode, ) from ..layers.layer_function_generator import OpProtoHolder @@ -124,10 +123,6 @@ def astype(self, dtype): if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) - if _in_legacy_dygraph(): - return _legacy_C_ops.cast( - self, 'in_dtype', self.dtype, 'out_dtype', dtype - ) return _C_ops.cast(self, dtype) def _scalar_elementwise_op_(var, scale, bias): @@ -194,10 +189,7 @@ def _T_(var): perm = [] for i in range(len(var.shape)): perm.insert(0, i) - if _in_legacy_dygraph(): - out, _ = _legacy_C_ops.transpose2(var, 'axis', perm) - else: - out = _C_ops.transpose(var, perm) + out = _C_ops.transpose(var, perm) return out def _scalar_add_(var, value): diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 8a833eb7a04e86..fb6fc21af6b1d8 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -28,7 +28,6 @@ default_main_program, _global_flags, in_dygraph_mode, - _in_legacy_dygraph, ) from ..data_feeder import ( @@ -247,115 +246,81 @@ def forward(self, input): # variance and variance out share the same memory variance_out = self._variance - if _non_static_mode(): - if in_dygraph_mode(): - batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm( - input, - self._mean, - self._variance, - self.weight, - self.bias, - not self.training, - self._momentum, - self._epsilon, - self._data_layout, - self._use_global_stats, - self._trainable_statistics, - ) - return dygraph_utils._append_activation_in_dygraph( - batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn - ) - - elif _in_legacy_dygraph(): - attrs = ( - "momentum", - self._momentum, - "epsilon", - self._epsilon, - "is_test", - not self.training, - "data_layout", - self._data_layout, - "use_mkldnn", - self._use_mkldnn, - "fuse_with_relu", - self._fuse_with_relu, - "use_global_stats", - self._use_global_stats, - 'trainable_statistics', - self._trainable_statistics, - ) - batch_norm_out, _, _, _, _, _ = _legacy_C_ops.batch_norm( - input, - self.weight, - self.bias, - self._mean, - self._variance, - None, - mean_out, - variance_out, - *attrs - ) - + if in_dygraph_mode(): + batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm( + input, + self._mean, + self._variance, + self.weight, + self.bias, + not self.training, + self._momentum, + self._epsilon, + self._data_layout, + self._use_global_stats, + self._trainable_statistics, + ) return dygraph_utils._append_activation_in_dygraph( batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn ) + else: + check_variable_and_dtype( + input, 'input', ['float16', 'float32', 'float64'], 'BatchNorm' + ) - check_variable_and_dtype( - input, 'input', ['float16', 'float32', 'float64'], 'BatchNorm' - ) - - attrs = { - "momentum": self._momentum, - "epsilon": self._epsilon, - "is_test": self._is_test, - "data_layout": self._data_layout, - "use_mkldnn": False, - "fuse_with_relu": self._fuse_with_relu, - "use_global_stats": self._use_global_stats, - "trainable_statistics": self._trainable_statistics, - } - - inputs = { - "X": [input], - "Scale": [self.weight], - "Bias": [self.bias], - "Mean": [self._mean], - "Variance": [self._variance], - } - - saved_mean = self._helper.create_variable_for_type_inference( - dtype=self._dtype, stop_gradient=True - ) - saved_variance = self._helper.create_variable_for_type_inference( - dtype=self._dtype, stop_gradient=True - ) - reserve_space = self._helper.create_variable_for_type_inference( - dtype=self._helper.input_dtype(input), stop_gradient=True - ) - - batch_norm_out = ( - input - if self._in_place - else self._helper.create_variable_for_type_inference(self._dtype) - ) + attrs = { + "momentum": self._momentum, + "epsilon": self._epsilon, + "is_test": self._is_test, + "data_layout": self._data_layout, + "use_mkldnn": False, + "fuse_with_relu": self._fuse_with_relu, + "use_global_stats": self._use_global_stats, + "trainable_statistics": self._trainable_statistics, + } + + inputs = { + "X": [input], + "Scale": [self.weight], + "Bias": [self.bias], + "Mean": [self._mean], + "Variance": [self._variance], + } + + saved_mean = self._helper.create_variable_for_type_inference( + dtype=self._dtype, stop_gradient=True + ) + saved_variance = self._helper.create_variable_for_type_inference( + dtype=self._dtype, stop_gradient=True + ) + reserve_space = self._helper.create_variable_for_type_inference( + dtype=self._helper.input_dtype(input), stop_gradient=True + ) - outputs = { - "Y": [batch_norm_out], - "MeanOut": [mean_out], - "VarianceOut": [variance_out], - "SavedMean": [saved_mean], - "SavedVariance": [saved_variance], - } - if reserve_space is not None: - outputs["ReserveSpace"] = [reserve_space] + batch_norm_out = ( + input + if self._in_place + else self._helper.create_variable_for_type_inference( + self._dtype + ) + ) - self._helper.append_op( - type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs - ) + outputs = { + "Y": [batch_norm_out], + "MeanOut": [mean_out], + "VarianceOut": [variance_out], + "SavedMean": [saved_mean], + "SavedVariance": [saved_variance], + } + if reserve_space is not None: + outputs["ReserveSpace"] = [reserve_space] + + self._helper.append_op( + type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs + ) - # Currently, we don't support inplace in dygraph mode - return self._helper.append_activation(batch_norm_out, self._act) + # Currently, we don't support inplace in dygraph mode + return self._helper.append_activation(batch_norm_out, self._act) class RowConv(layers.Layer): diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index 84a011e6fb2b23..9dd8eb783613a3 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -32,7 +32,6 @@ from paddle.fluid.dygraph import base as imperative_base from paddle.fluid.framework import ( ParamBase, - _in_legacy_dygraph, _non_static_mode, in_dygraph_mode, ) @@ -302,23 +301,7 @@ def _reshape_inplace(x, shape): @framework.dygraph_only def _split_tensors(coalesced_grads_and_grad_vars): - if _in_legacy_dygraph(): - for ( - coalesced_grad, - origin_grad_vars, - grad_shapes, - ) in coalesced_grads_and_grad_vars: - grad_var_len = [np.prod(g_shape) for g_shape in grad_shapes] - framework._dygraph_tracer().trace_op( - type='split', - inputs={'X': coalesced_grad}, - outputs={'Out': origin_grad_vars}, - attrs={'sections': grad_var_len, 'axis': 0}, - ) - for g_var, g_shape in zip(origin_grad_vars, grad_shapes): - _reshape_inplace(x=g_var, shape=g_shape) - assert g_var.shape == g_shape - elif in_dygraph_mode(): + if in_dygraph_mode(): for ( coalesced_grad, origin_grad_vars, @@ -704,21 +687,6 @@ def check_layer_sparse(sublayer): [self.last_comm_buffer_size, self.comm_buffer_size], self.find_unused_parameters, ) - elif _in_legacy_dygraph(): - self.group_indices = core.assign_group_by_size( - trainable_parameters, - is_sparse_gradient, - [self.last_comm_buffer_size, self.comm_buffer_size], - ) - - self._reducer = core.Reducer( - trainable_parameters, - list(reversed(self.group_indices)), - is_sparse_gradient, - parallel_helper.__parallel_ctx__clz__, - [self.last_comm_buffer_size, self.comm_buffer_size], - self.find_unused_parameters, - ) def _find_varbase(self, obj): var_type = core.eager.Tensor if in_dygraph_mode() else core.VarBase diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 3a94b51219e361..3b89aa5115740c 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -20,7 +20,7 @@ import paddle from .. import framework -from ..framework import convert_np_dtype_to_dtype_, _in_legacy_dygraph +from ..framework import convert_np_dtype_to_dtype_ from .. import core from .. import unique_name from ..framework import ( From 0d83390d86333c407244782b12c818a9d571da96 Mon Sep 17 00:00:00 2001 From: yjjiang11 Date: Thu, 29 Dec 2022 01:59:04 +0000 Subject: [PATCH 2/3] clear in_legacy --- python/paddle/fluid/dygraph/tracer.py | 33 +++---- python/paddle/fluid/framework.py | 99 ++++++------------- .../fluid/layers/layer_function_generator.py | 1 - .../paddle/fluid/tests/unittests/op_test.py | 6 +- python/paddle/framework/__init__.py | 1 - python/paddle/nn/functional/norm.py | 85 +++++++--------- 6 files changed, 81 insertions(+), 144 deletions(-) diff --git a/python/paddle/fluid/dygraph/tracer.py b/python/paddle/fluid/dygraph/tracer.py index e13fdac0e734b0..8c500522e0b0f5 100644 --- a/python/paddle/fluid/dygraph/tracer.py +++ b/python/paddle/fluid/dygraph/tracer.py @@ -306,29 +306,18 @@ def trace_op( stop_gradient=False, inplace_map=None, ): - if not framework._in_legacy_dygraph(): - # inputs : {"sum": [tensor], ...} - # outputs : {"sum": [tensor], ...} - if type in name_mapping.keys(): - type = name_mapping[type]["final_op_name"] - - assert type in _legacy_C_ops.__dict__ - self.eager_trace_op( - type, inputs, outputs, attrs, stop_gradient, inplace_map - ) - else: - self.eager_legacy_trace_op( - type, inputs, outputs, attrs, stop_gradient, inplace_map - ) + # inputs : {"sum": [tensor], ...} + # outputs : {"sum": [tensor], ...} + if type in name_mapping.keys(): + type = name_mapping[type]["final_op_name"] + + assert type in _legacy_C_ops.__dict__ + self.eager_trace_op( + type, inputs, outputs, attrs, stop_gradient, inplace_map + ) else: - self.trace( - type, - inputs, - outputs, - attrs, - framework._current_expected_place(), - self._has_grad and not stop_gradient, - inplace_map if inplace_map else {}, + self.eager_legacy_trace_op( + type, inputs, outputs, attrs, stop_gradient, inplace_map ) def train_mode(self): diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index d56dbde378abf0..a2ae6927db424b 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -98,11 +98,10 @@ # 2. dygraph_mode(): # This flags inidicates we are now running in dygraph mode which called eager mode before. # 3. _in_legacy_dygraph(): -# This flags inidicates we are now running in legacy dygraph mode +# This flags has been deprecated # # They have a relation ship as below: -# Both dygraph_mode and _in_legacy_dygraph are _non_static_mode, but if you are running in -# dygraph mode means you are not in _in_legacy_dygraph. +# Since _in_legacy_graph is deprecated, so dygraph_mode is _non_static_mode # # Why we have to make different of _in_legacy_dygraph and dygraph_mode? # In some performance issue, we find that python if statement cause server performance problem @@ -237,10 +236,6 @@ def in_dygraph_mode(): return (_dygraph_tracer_ is not None) and _in_eager_mode_ -def _in_legacy_dygraph(): - return (not _in_eager_mode_) and (_dygraph_tracer_ is not None) - - def _non_static_mode(): return _dygraph_tracer_ is not None @@ -1334,8 +1329,6 @@ def __instancecheck__(cls, instance): if in_dygraph_mode(): return issubclass(t, core.eager.Tensor) else: - if _in_legacy_dygraph(): - return issubclass(t, core.VarBase) return issubclass(t, Variable) @@ -1346,8 +1339,6 @@ def __instancecheck__(cls, instance): if in_dygraph_mode(): return issubclass(t, EagerParamBase) else: - if _in_legacy_dygraph(): - return issubclass(t, ParamBase) return issubclass(t, Parameter) @@ -3893,31 +3884,18 @@ def _rename_var(self, name, new_name): error_clip=error_clip, ) else: - if _in_legacy_dygraph(): - var = ParamBase( - d.shape(), - d.dtype(), - type=orig_var_type, - name=new_name, - stop_gradient=stop_gradient, - trainable=trainable, - optimize_attr=optimize_attr, - regularizer=regularizer, - error_clip=error_clip, - ) - else: - var = Parameter( - self, - d.shape(), - d.dtype(), - type=orig_var_type, - name=new_name, - stop_gradient=stop_gradient, - trainable=trainable, - optimize_attr=optimize_attr, - regularizer=regularizer, - error_clip=error_clip, - ) + var = Parameter( + self, + d.shape(), + d.dtype(), + type=orig_var_type, + name=new_name, + stop_gradient=stop_gradient, + trainable=trainable, + optimize_attr=optimize_attr, + regularizer=regularizer, + error_clip=error_clip, + ) elif var_type == "Variable": var = Variable( self, @@ -3946,10 +3924,7 @@ def create_parameter(self, *args, **kwargs): if in_dygraph_mode(): param = EagerParamBase(*args, **kwargs) else: - if _in_legacy_dygraph(): - param = ParamBase(*args, **kwargs) - else: - param = Parameter(global_block, *args, **kwargs) + param = Parameter(global_block, *args, **kwargs) if 'initializer' in kwargs: @@ -4262,35 +4237,21 @@ def _copy_param_info_from(self, other): name=v.name, ) else: - if _in_legacy_dygraph(): - new_p = ParamBase( - shape=v.shape, - dtype=v.dtype, - type=v.type, - lod_level=v.lod_level, - stop_gradient=p.stop_gradient, - trainable=p.trainable, - optimize_attr=p.optimize_attr, - regularizer=p.regularizer, - error_clip=p.error_clip, - name=v.name, - ) - else: - new_p = Parameter( - block=self, - shape=v.shape, - dtype=v.dtype, - type=v.type, - lod_level=v.lod_level - if v.type == core.VarDesc.VarType.LOD_TENSOR - else None, - stop_gradient=p.stop_gradient, - trainable=p.trainable, - optimize_attr=p.optimize_attr, - regularizer=p.regularizer, - error_clip=p.error_clip, - name=v.name, - ) + new_p = Parameter( + block=self, + shape=v.shape, + dtype=v.dtype, + type=v.type, + lod_level=v.lod_level + if v.type == core.VarDesc.VarType.LOD_TENSOR + else None, + stop_gradient=p.stop_gradient, + trainable=p.trainable, + optimize_attr=p.optimize_attr, + regularizer=p.regularizer, + error_clip=p.error_clip, + name=v.name, + ) self.vars[new_p.name] = new_p def _clone_variable(self, var, force_persistable=True): diff --git a/python/paddle/fluid/layers/layer_function_generator.py b/python/paddle/fluid/layers/layer_function_generator.py index bb5d06157e1204..6e4b1f836f020c 100644 --- a/python/paddle/fluid/layers/layer_function_generator.py +++ b/python/paddle/fluid/layers/layer_function_generator.py @@ -272,7 +272,6 @@ def func(x, name=None): op = getattr(_C_ops, op_type) return op(x) # TODO(dev): Because some ops' yaml has not been migrated. - # Replace it with _in_legacy_dygraph while all yaml work is done. if in_dygraph_mode() and hasattr(_legacy_C_ops, op_type): op = getattr(_legacy_C_ops, op_type) return op(x) diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index 9728edf5d1c04f..a4350c51ca7ad0 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -38,8 +38,8 @@ _dygraph_tracer, _enable_legacy_dygraph, _in_eager_without_dygraph_check, - _in_legacy_dygraph, _test_eager_guard, + in_dygraph_mode, ) from paddle.fluid.op import Operator from paddle.jit.dy2static.utils import parse_arg_and_kwargs @@ -716,7 +716,7 @@ def create_var(np_value, name, is_input, if_return_inputs_grad_dict): if if_return_inputs_grad_dict: v.stop_gradient = False - if not _in_legacy_dygraph(): + if in_dygraph_mode(): v.retain_grads() if has_lod: @@ -2515,7 +2515,7 @@ def _get_dygraph_grad( for no_grad_val in no_grad_set: del inputs[no_grad_val] - if not _in_legacy_dygraph(): + if in_dygraph_mode(): core.eager.run_backward( fluid.layers.utils.flatten(outputs), grad_outputs, False ) diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py index 99d9cffed1fa33..986b8e93ae6828 100755 --- a/python/paddle/framework/__init__.py +++ b/python/paddle/framework/__init__.py @@ -64,7 +64,6 @@ from ..fluid.layer_helper import LayerHelper # noqa: F401 from ..fluid.framework import in_dygraph_mode # noqa: F401 -from ..fluid.framework import _in_legacy_dygraph # noqa: F401 from ..fluid.framework import _global_flags # noqa: F401 from ..fluid.framework import _apply_pass # noqa: F401 from ..fluid.framework import switch_main_program diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index 4d5bac573c5271..42f2ff170786ba 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -17,8 +17,8 @@ # TODO: define normalization api import paddle import paddle.fluid as fluid -from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode -from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode +from paddle import _C_ops, in_dynamic_mode +from paddle.fluid.framework import in_dygraph_mode from ...fluid import dygraph_utils from ...fluid.data_feeder import check_type, check_variable_and_dtype @@ -336,54 +336,43 @@ def layer_norm( out, _, _ = _C_ops.layer_norm(x, weight, bias, epsilon, begin_norm_axis) return out - if _in_legacy_dygraph(): - out, _, _ = _legacy_C_ops.layer_norm( - x, - weight, - bias, - 'epsilon', - epsilon, - 'begin_norm_axis', - begin_norm_axis, + else: + check_variable_and_dtype( + x, 'input', ['float16', 'float32', 'float64'], 'LayerNorm' + ) + + inputs = dict() + inputs['X'] = [x] + if weight: + inputs['Scale'] = [weight] + if bias: + inputs['Bias'] = [bias] + attrs = {"epsilon": epsilon, "begin_norm_axis": begin_norm_axis} + + # create output + helper = LayerHelper('layer_norm', **locals()) + + dtype = x.dtype + mean_out = helper.create_variable_for_type_inference( + dtype=dtype, stop_gradient=True + ) + variance_out = helper.create_variable_for_type_inference( + dtype=dtype, stop_gradient=True + ) + layer_norm_out = helper.create_variable_for_type_inference(dtype) + + helper.append_op( + type="layer_norm", + inputs=inputs, + outputs={ + "Y": layer_norm_out, + "Mean": mean_out, + "Variance": variance_out, + }, + attrs={"epsilon": epsilon, "begin_norm_axis": begin_norm_axis}, ) - return out - check_variable_and_dtype( - x, 'input', ['float16', 'float32', 'float64'], 'LayerNorm' - ) - - inputs = dict() - inputs['X'] = [x] - if weight: - inputs['Scale'] = [weight] - if bias: - inputs['Bias'] = [bias] - attrs = {"epsilon": epsilon, "begin_norm_axis": begin_norm_axis} - - # create output - helper = LayerHelper('layer_norm', **locals()) - - dtype = x.dtype - mean_out = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True - ) - variance_out = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True - ) - layer_norm_out = helper.create_variable_for_type_inference(dtype) - - helper.append_op( - type="layer_norm", - inputs=inputs, - outputs={ - "Y": layer_norm_out, - "Mean": mean_out, - "Variance": variance_out, - }, - attrs={"epsilon": epsilon, "begin_norm_axis": begin_norm_axis}, - ) - - return helper.append_activation(layer_norm_out) + return helper.append_activation(layer_norm_out) def instance_norm( From 61b529b16d7962fbf6fdb67d8627ec713d146a20 Mon Sep 17 00:00:00 2001 From: yjjiang11 Date: Thu, 29 Dec 2022 07:15:50 +0000 Subject: [PATCH 3/3] fix --- python/paddle/fluid/dygraph/tracer.py | 33 ++++++---- python/paddle/fluid/framework.py | 65 ++++++++++--------- .../paddle/fluid/tests/unittests/op_test.py | 19 ++---- 3 files changed, 61 insertions(+), 56 deletions(-) diff --git a/python/paddle/fluid/dygraph/tracer.py b/python/paddle/fluid/dygraph/tracer.py index 8c500522e0b0f5..74826c9a6bcccc 100644 --- a/python/paddle/fluid/dygraph/tracer.py +++ b/python/paddle/fluid/dygraph/tracer.py @@ -306,18 +306,29 @@ def trace_op( stop_gradient=False, inplace_map=None, ): - # inputs : {"sum": [tensor], ...} - # outputs : {"sum": [tensor], ...} - if type in name_mapping.keys(): - type = name_mapping[type]["final_op_name"] - - assert type in _legacy_C_ops.__dict__ - self.eager_trace_op( - type, inputs, outputs, attrs, stop_gradient, inplace_map - ) + if framework.in_dygraph_mode(): + # inputs : {"sum": [tensor], ...} + # outputs : {"sum": [tensor], ...} + if type in name_mapping.keys(): + type = name_mapping[type]["final_op_name"] + + assert type in _legacy_C_ops.__dict__ + self.eager_trace_op( + type, inputs, outputs, attrs, stop_gradient, inplace_map + ) + else: + self.eager_legacy_trace_op( + type, inputs, outputs, attrs, stop_gradient, inplace_map + ) else: - self.eager_legacy_trace_op( - type, inputs, outputs, attrs, stop_gradient, inplace_map + self.trace( + type, + inputs, + outputs, + attrs, + framework._current_expected_place(), + self._has_grad and not stop_gradient, + inplace_map if inplace_map else {}, ) def train_mode(self): diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index a2ae6927db424b..0b4556277315c0 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -98,10 +98,11 @@ # 2. dygraph_mode(): # This flags inidicates we are now running in dygraph mode which called eager mode before. # 3. _in_legacy_dygraph(): -# This flags has been deprecated +# This flags inidicates we are now running in legacy dygraph mode # # They have a relation ship as below: -# Since _in_legacy_graph is deprecated, so dygraph_mode is _non_static_mode +# Both dygraph_mode and _in_legacy_dygraph are _non_static_mode, but if you are running in +# dygraph mode means you are not in _in_legacy_dygraph. # # Why we have to make different of _in_legacy_dygraph and dygraph_mode? # In some performance issue, we find that python if statement cause server performance problem @@ -236,6 +237,7 @@ def in_dygraph_mode(): return (_dygraph_tracer_ is not None) and _in_eager_mode_ + def _non_static_mode(): return _dygraph_tracer_ is not None @@ -250,7 +252,8 @@ def _test_eager_guard(place=None): try: yield finally: - pass + if not already_fallback: + _enable_legacy_dygraph() global_ipu_index = -1 @@ -3884,18 +3887,18 @@ def _rename_var(self, name, new_name): error_clip=error_clip, ) else: - var = Parameter( - self, - d.shape(), - d.dtype(), - type=orig_var_type, - name=new_name, - stop_gradient=stop_gradient, - trainable=trainable, - optimize_attr=optimize_attr, - regularizer=regularizer, - error_clip=error_clip, - ) + var = Parameter( + self, + d.shape(), + d.dtype(), + type=orig_var_type, + name=new_name, + stop_gradient=stop_gradient, + trainable=trainable, + optimize_attr=optimize_attr, + regularizer=regularizer, + error_clip=error_clip, + ) elif var_type == "Variable": var = Variable( self, @@ -3924,7 +3927,7 @@ def create_parameter(self, *args, **kwargs): if in_dygraph_mode(): param = EagerParamBase(*args, **kwargs) else: - param = Parameter(global_block, *args, **kwargs) + param = Parameter(global_block, *args, **kwargs) if 'initializer' in kwargs: @@ -4237,21 +4240,21 @@ def _copy_param_info_from(self, other): name=v.name, ) else: - new_p = Parameter( - block=self, - shape=v.shape, - dtype=v.dtype, - type=v.type, - lod_level=v.lod_level - if v.type == core.VarDesc.VarType.LOD_TENSOR - else None, - stop_gradient=p.stop_gradient, - trainable=p.trainable, - optimize_attr=p.optimize_attr, - regularizer=p.regularizer, - error_clip=p.error_clip, - name=v.name, - ) + new_p = Parameter( + block=self, + shape=v.shape, + dtype=v.dtype, + type=v.type, + lod_level=v.lod_level + if v.type == core.VarDesc.VarType.LOD_TENSOR + else None, + stop_gradient=p.stop_gradient, + trainable=p.trainable, + optimize_attr=p.optimize_attr, + regularizer=p.regularizer, + error_clip=p.error_clip, + name=v.name, + ) self.vars[new_p.name] = new_p def _clone_variable(self, var, force_persistable=True): diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index a4350c51ca7ad0..82f99dcc8fde5c 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -39,7 +39,6 @@ _enable_legacy_dygraph, _in_eager_without_dygraph_check, _test_eager_guard, - in_dygraph_mode, ) from paddle.fluid.op import Operator from paddle.jit.dy2static.utils import parse_arg_and_kwargs @@ -716,7 +715,7 @@ def create_var(np_value, name, is_input, if_return_inputs_grad_dict): if if_return_inputs_grad_dict: v.stop_gradient = False - if in_dygraph_mode(): + if hasattr(v, "retain_grads"): v.retain_grads() if has_lod: @@ -2515,22 +2514,14 @@ def _get_dygraph_grad( for no_grad_val in no_grad_set: del inputs[no_grad_val] - if in_dygraph_mode(): - core.eager.run_backward( + core.eager.run_backward( fluid.layers.utils.flatten(outputs), grad_outputs, False ) - grad_inputs = [] - for inputs_list in inputs.values(): + grad_inputs = [] + for inputs_list in inputs.values(): for inp in inputs_list: grad_inputs.append(inp.grad.numpy()) - return grad_inputs - else: - grad_inputs = paddle.grad( - outputs=fluid.layers.utils.flatten(outputs), - inputs=fluid.layers.utils.flatten(inputs), - grad_outputs=grad_outputs, - ) - return [grad.numpy() for grad in grad_inputs] + return grad_inputs @staticmethod def _numpy_to_lod_tensor(np_value, lod, place):