[CodeStyle] Move `black` to `ruff format`, initial `pre-commit` config setup for mix check mode - part 22 #74677

gouzil · 2025-08-17T15:20:38Z

PR Category

User Experience

PR Types

Not User Facing

Description

从本 pr 开始, python format 将变成 ”混动模式“
由于 exclude 不能写注释所以不能写注释防止冲突
计划以 3 级目录为最大 exclude 值进行推进，部分文件夹下文件多的以首个差异字母进行推进

可以使用下面的代码查看哪些文件夹是可以由 ruff format 完全接管

from __future__ import annotations

import argparse
import os
import subprocess
import sys
from collections import defaultdict
from pathlib import Path


def get_files_needing_format(paths=None):
    """
    运行 `ruff format --check`，返回需要格式化的文件路径列表。
    """
    cmd = ["ruff", "format", "--check"]
    if paths:
        cmd.extend(paths)
    try:
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            check=False,
        )
    except FileNotFoundError:
        print("找不到 `ruff` 命令，请确保已正确安装 Ruff。", file=sys.stderr)
        sys.exit(1)

    # Ruff 的退出状态码：0 表示无需格式化，1 表示有文件需格式化  [oai_citation:0‡Astral 文档](https://docs.astral.sh/ruff/formatter/?utm_source=chatgpt.com)
    output = result.stdout
    files: list[Path] = []
    for line in output.splitlines():
        prefix = "Would reformat: "
        if line.startswith(prefix):
            filepath = line[len(prefix) :].strip()
            files.append(Path(filepath))
    return files


# 按照文件夹层级拆分
def split_files_by_directory(
    files: list[Path], level: int
) -> dict[Path, list[Path]]:
    """
    将文件按照文件夹层级拆分。
    """
    if not files:
        return {}

    if level is None:
        raise ValueError("level 不能为空")

    # 负数或 0 统一视为不分层，全部归为根目录
    if level <= 0:
        return {Path("."): [Path(f) for f in files]}

    base = Path.cwd().resolve()
    buckets: dict[Path, list[Path]] = defaultdict(list)

    for f in files:
        p = Path(f)

        # 优先转为相对仓库根（当前工作目录），便于稳定分组
        resolved = p.resolve(strict=False)
        try:
            rel = resolved.relative_to(base)
        except ValueError:
            # 不在当前工作目录下，保留原路径（可能为绝对路径）
            rel = resolved

        dir_rel = rel.parent

        # 计算给定层级的目录 key
        if dir_rel.is_absolute() and dir_rel.anchor:
            # 绝对路径：忽略锚点('/'等)进行层级计数，但保留锚点构造 key
            parts_wo_anchor = dir_rel.parts[1:]
            selected = parts_wo_anchor[:level] if parts_wo_anchor else []
            key = (
                Path(dir_rel.anchor, *selected)
                if selected
                else Path(dir_rel.anchor)
            )
        else:
            selected = dir_rel.parts[:level] if dir_rel.parts else []
            key = Path(*selected) if selected else Path(".")

        buckets[key].append(rel)

    # 稳定输出：每个桶内的文件进行排序
    for k in list(buckets.keys()):
        buckets[k] = sorted(buckets[k], key=str)

    return buckets


DEFAULT_EXCLUDE_DIRS: list[str] = [
    ".git",
    "__pycache__",
    ".tox",
    ".mypy_cache",
    ".ruff_cache",
    ".pytest_cache",
    "venv",
    ".venv",
    "venvs",
    "build",
    "dist",
    ".vscode",
    ".idea",
    ".cache",
    "third_party",
]


from typing import TYPE_CHECKING

if TYPE_CHECKING:  # 仅用于类型检查，避免运行时不必要导入
    from collections.abc import Sequence


def _iter_python_files(
    root: Path, exclude_dirs: Sequence[str | Path] | None = None
):
    """
    在 root 下递归遍历 .py/.pyi 文件，支持排除目录。

    说明：使用 os.walk 并在遍历时原地剪枝，以避免进入被排除的目录。
    """
    root_resolved = root.resolve()

    # 规范化排除目录为绝对路径，便于比较
    ex_paths: list[Path] = []
    for e in exclude_dirs or []:
        ep = Path(e)
        if not ep.is_absolute():
            ep = (root_resolved / ep).resolve()
        else:
            ep = ep.resolve()
        ex_paths.append(ep)

    for dirpath, dirnames, filenames in os.walk(root_resolved):
        dp = Path(dirpath)

        # 剪枝：移除需要排除的子目录，避免深入
        for name in list(dirnames):
            subdir = (dp / name).resolve()
            if any(
                subdir == ex or subdir.is_relative_to(ex) for ex in ex_paths
            ):
                dirnames.remove(name)

        for fn in filenames:
            if fn.endswith(".py") or fn.endswith(".pyi"):
                yield (dp / fn).resolve()


def diff_dir(
    need_fix_files: list[Path],
    paths: None | list[str] = None,
    exclude_dirs: Sequence[str | Path] | None = None,
) -> dict[Path, list[Path]]:
    """
    计算哪些是已经修复过可以不需要修复的文件夹。

    通过扫描 (.py|.pyi) 后缀文件, 并找出不在需要修复列表中的文件。
    """
    fixed_dirs: dict[Path, list[Path]] = defaultdict(list)

    if not paths:
        paths_ = Path.cwd()
    else:
        paths_ = Path(paths[0])

    # 规范化 need_fix_files 为绝对路径，确保比较一致
    need_fix_set = {
        Path(p).resolve(strict=False) for p in (need_fix_files or [])
    }

    # 扫描项目下所有的 .py 和 .pyi 文件（支持排除目录）
    files_list: list[Path] = list(
        _iter_python_files(paths_, (exclude_dirs or []) or DEFAULT_EXCLUDE_DIRS)
    )

    for f in files_list:
        if f not in need_fix_set:
            fixed_dirs[f.parent.resolve()].append(f)

    return fixed_dirs


def collapse_dirs(dirs: list[Path]) -> list[Path]:
    """
    将目录集合压缩为“最底层”集合：若父子目录同时存在，仅保留子目录。
    例如同时有 /a、/a/b、/a/b/c，则最终只保留 /a/b/c。
    """
    # 统一为绝对路径并去重
    unique_dirs = {Path(d).resolve() for d in dirs}
    # 先按层级（parts 长度）降序，优先处理更深的目录
    ordered = sorted(unique_dirs, key=lambda p: (-len(p.parts), str(p)))
    leaves: list[Path] = []
    for d in ordered:
        # 若已存在更深层（或相同）的目录位于 d 之下，则跳过 d（保留更深的）
        if any(e == d or e.is_relative_to(d) for e in leaves):
            continue
        leaves.append(d)
    return leaves


def main():
    parser = argparse.ArgumentParser(description="Ruff 格式化辅助工具")
    parser.add_argument(
        "paths",
        nargs="*",
        help="要扫描的起始路径（默认当前工作目录）",
    )
    parser.add_argument(
        "-x",
        "--exclude",
        action="append",
        default=[],
        help=(
            "需要排除的目录，可重复指定。例如 -x build -x venv。"
            "若未指定，将使用内置的常见忽略目录。"
        ),
    )

    args = parser.parse_args()

    paths = args.paths if args.paths else None
    files = get_files_needing_format(paths)
    # 如需按层级查看 Ruff 需要修复的文件分布，可打开下行
    # split_files = split_files_by_directory(files, 10)
    # for key, value in split_files.items():
    #     # print(f"{key}: {len(value)}")
    #     print(f"Directory: {key}")
    #     for file in value:
    #         print(f"  - {file}")
    exclude_dirs = (
        [*DEFAULT_EXCLUDE_DIRS, *args.exclude]
        if args.exclude
        else DEFAULT_EXCLUDE_DIRS
    )
    fixed_dirs = diff_dir(files, paths, exclude_dirs)
    # 输出一个简要统计，避免未使用变量告警
    collapsed = collapse_dirs(list(fixed_dirs.keys()))
    print(f"候选可忽略目录数（折叠后）: {len(collapsed)}")
    for dir_path in collapsed:
        print(f"  - {dir_path}")


if __name__ == "__main__":
    main()

… - part 22

paddle-bot · 2025-08-17T15:20:45Z

你的PR提交成功，感谢你对开源项目的贡献!
请关注后续CI自动化测试结果，详情请参考Paddle-CI手册。
Your PR has been submitted. Thanks for your contribution!
Please wait for the result of CI firstly. See Paddle CI Manual for details.

SigureMo · 2025-08-17T17:06:08Z

.pre-commit-config.yaml

      - id: black
+        exclude: |
+          (?x)^(
+            third_party/.+|


这个已经在顶层 exclude 掉了，这里就不重复写了

SigureMo · 2025-08-17T17:06:59Z

.pre-commit-config.yaml

      - id: ruff-check
        args: [--fix, --exit-non-zero-on-fix, --no-cache]
+      - id: ruff-format
+        files: \.(py|pyi)$


这里写 files 是有必要的么？

测了下好像没必要，先删了

SigureMo · 2025-08-17T18:28:13Z

.pre-commit-config.yaml

+            # | test/[m-z].+
+
+            # | tools/.+
+          )$


由于 exclude 不能写注释所以不能写注释防止冲突

我试了下应该可以，之后每个 PR 注释下面，解开上面就好了

特意空一行以免冲突

[CodeStyle] black -> ruff format migration, pre-commit monitoring…

dd72631

… - part 22

gouzil requested a review from SigureMo as a code owner August 17, 2025 15:20

paddle-bot bot added the contributor External developers label Aug 17, 2025

SigureMo added the HappyOpenSource 快乐开源活动issue与PR label Aug 17, 2025

Merge branch 'develop' into ruff/fmt/part-22

ea2d7ae

SigureMo reviewed Aug 17, 2025

View reviewed changes

remove unused regex

fd81f2d

SigureMo changed the title ~~[CodeStyle] black -> ruff format migration, pre-commit monitoring - part 22~~ [CodeStyle] Move black to ruff format, initial pre-commit config setup for mix check mode - part 22 Aug 17, 2025

SigureMo previously approved these changes Aug 17, 2025

View reviewed changes

add progressive migration config

3bb8a6d

SigureMo dismissed their stale review via 3bb8a6d August 17, 2025 18:26

SigureMo approved these changes Aug 17, 2025

View reviewed changes

SigureMo added the skip-ci: api-benchmark label Aug 17, 2025

SigureMo merged commit d7133ee into PaddlePaddle:develop Aug 17, 2025
73 of 74 checks passed

SigureMo deleted the ruff/fmt/part-22 branch August 17, 2025 21:09

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[CodeStyle] Move `black` to `ruff format`, initial `pre-commit` config setup for mix check mode - part 22 #74677

[CodeStyle] Move `black` to `ruff format`, initial `pre-commit` config setup for mix check mode - part 22 #74677

Uh oh!

gouzil commented Aug 17, 2025 •

edited by SigureMo

Loading

Uh oh!

paddle-bot bot commented Aug 17, 2025

Uh oh!

SigureMo Aug 17, 2025

Uh oh!

SigureMo Aug 17, 2025

Uh oh!

SigureMo Aug 17, 2025

Uh oh!

SigureMo Aug 17, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

[CodeStyle] Move black to ruff format, initial pre-commit config setup for mix check mode - part 22 #74677

[CodeStyle] Move black to ruff format, initial pre-commit config setup for mix check mode - part 22 #74677

Uh oh!

Conversation

gouzil commented Aug 17, 2025 • edited by SigureMo Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

PR Category

PR Types

Description

Uh oh!

paddle-bot bot commented Aug 17, 2025

Uh oh!

SigureMo Aug 17, 2025

Choose a reason for hiding this comment

Uh oh!

SigureMo Aug 17, 2025

Choose a reason for hiding this comment

Uh oh!

SigureMo Aug 17, 2025

Choose a reason for hiding this comment

Uh oh!

SigureMo Aug 17, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

[CodeStyle] Move `black` to `ruff format`, initial `pre-commit` config setup for mix check mode - part 22 #74677

[CodeStyle] Move `black` to `ruff format`, initial `pre-commit` config setup for mix check mode - part 22 #74677

gouzil commented Aug 17, 2025 •

edited by SigureMo

Loading