diff --git a/python/paddle/apy/sys/__builtin_registry_item__.py b/python/paddle/apy/sys/__builtin_registry_item__.py index c0ef188e17ad3a..653eee0ea83a3d 100644 --- a/python/paddle/apy/sys/__builtin_registry_item__.py +++ b/python/paddle/apy/sys/__builtin_registry_item__.py @@ -16,7 +16,6 @@ class RegistryEntry: - def __init__(self): self.__tag_name__ = None self.__nice__ = None @@ -48,7 +47,6 @@ def __call__(self, tag_name, nice): class RegistryObject: - def __init__(self, tag_name, nice): self.tag_name = tag_name self.nice = nice @@ -56,7 +54,6 @@ def __init__(self, tag_name, nice): class RegisterItemDecorator: - def __init__(self, register_obj): self.register_obj = register_obj diff --git a/python/paddle/base/framework.py b/python/paddle/base/framework.py index 51d353307c6db5..ac5ffbdf1b69ff 100644 --- a/python/paddle/base/framework.py +++ b/python/paddle/base/framework.py @@ -8546,7 +8546,6 @@ def set_op_roles(block, op_role, always_forward_ops): # there would be always_forward_ops in your region, you should use "auto_complete_op_role" @signature_safe_contextmanager def pir_op_role_guard(op_role: int - 1) -> Generator[None, None, None]: - if paddle.framework.in_pir_mode(): original_op_rope = pir.get_op_role() pir.set_op_role(op_role) @@ -8559,7 +8558,6 @@ def pir_op_role_guard(op_role: int - 1) -> Generator[None, None, None]: @signature_safe_contextmanager def pir_chunk_id_guard(chunk_id: int - 1) -> Generator[None, None, None]: - if paddle.framework.in_pir_mode(): original_chunk_id = pir.get_chunk_id() pir.set_chunk_id(chunk_id) @@ -8572,7 +8570,6 @@ def pir_chunk_id_guard(chunk_id: int - 1) -> Generator[None, None, None]: @signature_safe_contextmanager def pir_op_name_guard(op_name: str) -> Generator[None, None, None]: - if paddle.framework.in_pir_mode() and core._is_bwd_prim_enabled(): original_comp_op_name = pir.get_comp_op_name() pir.set_comp_op_name(op_name) diff --git a/python/paddle/base/variable_index.py b/python/paddle/base/variable_index.py index d1d428d6898fbe..8e2767917dab2f 100644 --- a/python/paddle/base/variable_index.py +++ b/python/paddle/base/variable_index.py @@ -805,7 +805,6 @@ def get_tensor_with_basic_indexing( attrs['decrease_axis'], ) else: - target_block = paddle.static.default_main_program().current_block() slice_out_var = target_block.create_var( diff --git a/python/paddle/decomposition/recompute.py b/python/paddle/decomposition/recompute.py index 3966adef0bc8d9..f743b8a8bd5339 100644 --- a/python/paddle/decomposition/recompute.py +++ b/python/paddle/decomposition/recompute.py @@ -752,7 +752,6 @@ def partition_joint_graph( def replace_mid_values_with_forward_subgraph( program, saved_values, mid_values, fwd_op_end_idx, backward_op_start_idx ): - def _extract_forward_recompute_subgraph_for_backward( saved_values, mid_values ): diff --git a/python/paddle/distributed/auto_parallel/pipelining/stage.py b/python/paddle/distributed/auto_parallel/pipelining/stage.py index 5ba57cfbe6c727..797ea66970aba5 100644 --- a/python/paddle/distributed/auto_parallel/pipelining/stage.py +++ b/python/paddle/distributed/auto_parallel/pipelining/stage.py @@ -621,7 +621,6 @@ def forward_maybe_with_nosync(self, *args, **kwargs): def backward_maybe_with_nosync( self, backward_type, bwd_kwargs: dict, last_backward=False ) -> tuple[tuple[paddle.Tensor | None, ...], list[dict[str, Any] | None]]: - def perform_backward( backward_type, ) -> Callable[ @@ -1245,7 +1244,6 @@ def _prepare_forward_infra( args: tuple[Any, ...], kwargs: dict[str, Any] | None = None, ) -> tuple[Any, ...]: - assert num_microbatches is not None, "num_microbatches must be provided" outputs: tuple[Any, ...] = () diff --git a/python/paddle/distributed/auto_parallel/pipelining/utils.py b/python/paddle/distributed/auto_parallel/pipelining/utils.py index 5cbb7e6f69c8a2..5de9c3832ec067 100644 --- a/python/paddle/distributed/auto_parallel/pipelining/utils.py +++ b/python/paddle/distributed/auto_parallel/pipelining/utils.py @@ -152,7 +152,6 @@ def _get_stage_mesh(stage_index, pp_group_size, style=None): if style is not None: raise ValueError(f"Unknown style: {style}, style can be None, v.") else: - pp_idx = stage_index % pp_group_size return _get_pp_mesh(pp_idx) diff --git a/python/paddle/distributed/auto_parallel/process_mesh.py b/python/paddle/distributed/auto_parallel/process_mesh.py index c4ccd43b12619c..3c968d8f6c5b02 100644 --- a/python/paddle/distributed/auto_parallel/process_mesh.py +++ b/python/paddle/distributed/auto_parallel/process_mesh.py @@ -447,7 +447,6 @@ def get_group( if hasattr(fleet.fleet, "_hcg"): hcg = fleet.get_hybrid_communicate_group() if hcg is not None: - parallel_group_map = { "pp": hcg.get_pipe_parallel_group, "dp": hcg.get_data_parallel_group, diff --git a/python/paddle/distributed/auto_parallel/static/pir_pass.py b/python/paddle/distributed/auto_parallel/static/pir_pass.py index 041f1a33e88231..c5517dd72040ba 100644 --- a/python/paddle/distributed/auto_parallel/static/pir_pass.py +++ b/python/paddle/distributed/auto_parallel/static/pir_pass.py @@ -232,7 +232,6 @@ def apply_partition_pass(program, block=None): class ReshardPasses: - @staticmethod def decompose_reshard_pass(dist_program): # split composed reshard op into atomic reshard ops, which would increase the opportunity of reshard Re-Use in following fold_reshard_pass. @@ -445,7 +444,6 @@ def remove_sub_block_unused_inputs(op): class RemovePasses: - @staticmethod def remove_other_rank_op_pass(dist_program): # pruning op and value not belong to cur rank @@ -1855,7 +1853,6 @@ def fuse_attention_ffn_qkv_pass( # Fuse params and init pir program fusion params. with paddle.base.dygraph.guard(): - dyparam_dtype = concated_dy_param_list[0].dtype for param in concated_dy_param_list: assert ( diff --git a/python/paddle/distributed/auto_parallel/static/reshard_funcs/global_to_sub_mesh_func.py b/python/paddle/distributed/auto_parallel/static/reshard_funcs/global_to_sub_mesh_func.py index 3a6cf195cb320b..a33615f6616127 100644 --- a/python/paddle/distributed/auto_parallel/static/reshard_funcs/global_to_sub_mesh_func.py +++ b/python/paddle/distributed/auto_parallel/static/reshard_funcs/global_to_sub_mesh_func.py @@ -23,7 +23,6 @@ class GlobalToSubMeshFunction(ReshardFunction): def is_suitable(self, src_dist_attr, dst_dist_attr): - # NOTE we could allow the src_dist_attr is not replicated and reshard it as replicated before go through the global_to_sub logic # but the dst_dist_attr should be replicated otherwise there will be un-defined result when change the mesh. if not is_replicated(dst_dist_attr): @@ -39,7 +38,6 @@ def is_suitable(self, src_dist_attr, dst_dist_attr): return out_mesh in sub_meshes def reshard(self, src_dist_attr, dst_dist_attr, src_value, dst_type): - # reshard operand as replicated before change the mesh. if not is_replicated(src_dist_attr): tmp_dist_attr = ( diff --git a/python/paddle/distributed/auto_parallel/static/tuner/to_distributed_api_patterns.py b/python/paddle/distributed/auto_parallel/static/tuner/to_distributed_api_patterns.py index 744cddfadbbae9..bb1aeae0342d47 100644 --- a/python/paddle/distributed/auto_parallel/static/tuner/to_distributed_api_patterns.py +++ b/python/paddle/distributed/auto_parallel/static/tuner/to_distributed_api_patterns.py @@ -553,7 +553,6 @@ def apply( value_states, attention_mask, ): - bsz, q_len, num_heads, head_dim = query_states.shape _, kv_seq_len, _, _ = value_states.shape @@ -1263,7 +1262,6 @@ def apply(x, w1, b1, w2, b2): def match_pattern(pattern, program): - def _compare_op_node(src, tgt): """Compare whether two op nodes are equivalent.""" if src.name() != tgt.name(): diff --git a/python/paddle/distributed/communicator.py b/python/paddle/distributed/communicator.py index d590e8a7b59bb2..d424f576697841 100755 --- a/python/paddle/distributed/communicator.py +++ b/python/paddle/distributed/communicator.py @@ -30,6 +30,7 @@ Communicator is used for async distribute training in distribute_transpiler mode. It's a wrapper of a cpp class Communicator and should be used inside fleet API. """ + import paddle from paddle.distributed.ps.utils.public import DistributedMode from paddle.framework import core diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py index f79dd4c11bdd6f..7a1088741807cb 100755 --- a/python/paddle/distributed/fleet/base/role_maker.py +++ b/python/paddle/distributed/fleet/base/role_maker.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Definition of Role Makers.""" + from __future__ import annotations import os diff --git a/python/paddle/distributed/fleet/fleet.py b/python/paddle/distributed/fleet/fleet.py index c3fe8e378bd03f..2fa2221a5228da 100755 --- a/python/paddle/distributed/fleet/fleet.py +++ b/python/paddle/distributed/fleet/fleet.py @@ -39,7 +39,6 @@ from .utils.log_util import logger, set_log_level if TYPE_CHECKING: - from collections.abc import ( Callable, Iterable, diff --git a/python/paddle/distributed/fleet/meta_parallel/dualpipev.py b/python/paddle/distributed/fleet/meta_parallel/dualpipev.py index e365198920e6e4..236ee874633ecb 100644 --- a/python/paddle/distributed/fleet/meta_parallel/dualpipev.py +++ b/python/paddle/distributed/fleet/meta_parallel/dualpipev.py @@ -759,7 +759,6 @@ def forward_backward_pipeline( main_stage=True, ) else: - self._forward_backward_pass( 0, 1, diff --git a/python/paddle/distributed/fleet/meta_parallel/zero_bubble_utils.py b/python/paddle/distributed/fleet/meta_parallel/zero_bubble_utils.py index 28866837ef9914..7cb6caf7013614 100644 --- a/python/paddle/distributed/fleet/meta_parallel/zero_bubble_utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/zero_bubble_utils.py @@ -27,7 +27,6 @@ class WeightGradStore: - enabled = False cache = [] funcs_queue = queue.Queue() @@ -55,7 +54,6 @@ def clear(cls) -> None: class EventStore: - event = None @classmethod diff --git a/python/paddle/distributed/launch/job/container.py b/python/paddle/distributed/launch/job/container.py index 65b92c5d187c25..ac83b118da3ed7 100644 --- a/python/paddle/distributed/launch/job/container.py +++ b/python/paddle/distributed/launch/job/container.py @@ -168,7 +168,6 @@ def status(self): return Status.FAILED def __str__(self): - need_print = os.environ.get('FLAGS_print_launcher_env', 'false').lower() if need_print == 'true' or need_print == '1': return f'Container rank {self._rank} status {self.status} cmd {self._entrypoint} code {self.exit_code} log {self.errfile} \nenv {self._env}' diff --git a/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_1f1b.py b/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_1f1b.py index 5a87e2863d0254..7fe4e91beff335 100644 --- a/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_1f1b.py +++ b/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_1f1b.py @@ -34,7 +34,6 @@ @register_pass("pipeline_scheduler_1F1B") class Pipeline1F1BPass(PipelinePassBase): - def __init__(self): super().__init__() self.jobs_in_stable_phase = [self.BACKWARD, self.FORWARD] diff --git a/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_pass_base.py b/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_pass_base.py index 061b38ed5a0aeb..6508123049e2e7 100644 --- a/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_pass_base.py +++ b/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_pass_base.py @@ -27,7 +27,6 @@ class PipelinePassBase(PassBase): - # Pipeline stages RECV_FORWARD = "recv_forward" SEND_BACKWARD = "send_backward" diff --git a/python/paddle/distributed/transpiler/geo_sgd_transpiler.py b/python/paddle/distributed/transpiler/geo_sgd_transpiler.py index fd777f49ecf641..aa0df44a75284a 100644 --- a/python/paddle/distributed/transpiler/geo_sgd_transpiler.py +++ b/python/paddle/distributed/transpiler/geo_sgd_transpiler.py @@ -24,6 +24,7 @@ 4. append sum ops that should run on current server instance. 5. add listen_and_serv op """ + import collections from paddle import framework diff --git a/python/paddle/incubate/cc/ap/apy_to_axpr_json.py b/python/paddle/incubate/cc/ap/apy_to_axpr_json.py index d6e4a9cee0f845..b498997fe635bf 100644 --- a/python/paddle/incubate/cc/ap/apy_to_axpr_json.py +++ b/python/paddle/incubate/cc/ap/apy_to_axpr_json.py @@ -31,7 +31,6 @@ def convert_python_stmts_to_axpr_json(python_code_stmts_str): @dataclass class AnfExpr: - def DumpToFileAsJson(self, file_name): with open(file_name, "w") as f: json.dump(self.value, f, indent=2)