失败的case需要能自动生成一个可复现的单测文件，用户直接跑这个单测文件就可以复现问题 (#601)

tjujingzong · web-flow · commit 0d941e385a44 · 2026-01-28T17:15:54.000+08:00
diff --git a/engineV2-README.md b/engineV2-README.md
@@ -89,6 +89,7 @@
 | `--custom_device_vs_gpu`        | bool  | 启用自定义设备与GPU的精度对比测试模式（默认 False）                                   |
 | `--custom_device_vs_gpu_mode`   | str   | 自定义设备与GPU对比的模式：`upload` 或 `download`（默认 `upload`）                    |
 | `--bitwise_alignment`            | bool  | 是否进行诸位对齐对比，开启后所有的api的精度对比都按照atol=0.0,rtol = 0.0的精度对比结果(默认False)|
+| `--generate_failed_tests`        | bool  | 是否为失败的测试用例生成可复现的测试文件。开启后，当测试失败时，会在`failed_tests`目录下生成独立的Python测试文件，便于后续复现和调试（默认False）|
 | `--exit_on_error`                | bool  | 是否在精度测试出现`paddle_error`或者 `accuracy_error`  错误时立即退出测试进程(exit code 为1)。默认为False，测试进程会继续执行 |
 
 ### 示例命令
diff --git a/engineV2.py b/engineV2.py
@@ -53,6 +53,7 @@
     "random_seed",
     "bos_conf_path",
     "bcecmd_path",
+    "generate_failed_tests",
     "bitwise_alignment",
     "exit_on_error",
 }
@@ -684,6 +685,12 @@ def main():
         default=False,
         help="Whether to using bitwise alignment when run accuracy test",
     )
+    parser.add_argument(
+        "--generate_failed_tests",
+        type=parse_bool,
+        default=False,
+        help="Whether to generate reproducible test files for failed cases",
+    )
     parser.add_argument(
         "--exit_on_error",
         type=parse_bool,
diff --git a/tester/paddle_device_vs_cpu.py b/tester/paddle_device_vs_cpu.py
@@ -12,6 +12,7 @@ def __init__(self, api_config, **kwargs):
         super().__init__(api_config)
         self.test_amp = kwargs.get("test_amp", False)
         self.custom_device_type = self._get_first_custom_device_type()
+        self.generate_failed_tests = kwargs.get("generate_failed_tests", False)
         if self.check_custom_device_available():
             self.custom_device_id = 0
         if self.check_xpu_available():
@@ -260,6 +261,28 @@ def test(self):
         if cpu_output is None:
             print("[cpu execution failed]", self.api_config.config, flush=True)
             write_to_log("paddle_error", self.api_config.config)
+            # CPU 前向/反向执行失败时，如果开启了生成失败用例，则生成可复现单测
+            if self.generate_failed_tests:
+                try:
+                    from .test_file_generator import generate_reproducible_test_file
+
+                    error_info = {
+                        "error_type": "paddle_error",
+                        "stage": "forward",
+                        "need_backward": self.need_check_grad(),
+                    }
+                    test_file_path = generate_reproducible_test_file(
+                        self.api_config,
+                        error_info,
+                        test_amp=self.test_amp,
+                        target_device="cpu",
+                        device_id=0,
+                        test_instance=self,
+                    )
+                    if test_file_path:
+                        print(f"[Generated test file] {test_file_path}", flush=True)
+                except Exception as e:
+                    print(f"[Error generating test file] {e}", flush=True)
             return
 
         # 6. Run API on target device (including forward and backward)
@@ -271,6 +294,28 @@ def test(self):
                 flush=True,
             )
             write_to_log("paddle_error", self.api_config.config)
+            # 目标设备前向/反向执行失败，同样生成失败用例
+            if self.generate_failed_tests:
+                try:
+                    from .test_file_generator import generate_reproducible_test_file
+
+                    error_info = {
+                        "error_type": "paddle_error",
+                        "stage": "forward",
+                        "need_backward": self.need_check_grad(),
+                    }
+                    test_file_path = generate_reproducible_test_file(
+                        self.api_config,
+                        error_info,
+                        test_amp=self.test_amp,
+                        target_device=target_device,
+                        device_id=device_id,
+                        test_instance=self,
+                    )
+                    if test_file_path:
+                        print(f"[Generated test file] {test_file_path}", flush=True)
+                except Exception as e:
+                    print(f"[Error generating test file] {e}", flush=True)
             return
 
         # 7. Compare forward results
@@ -310,3 +355,46 @@ def test(self):
         else:
             print("[Fail]", self.api_config.config, flush=True)
             write_to_log("accuracy_error", self.api_config.config)
+            # 生成可复现的单测文件
+            if self.generate_failed_tests:
+                try:
+                    from .test_file_generator import generate_reproducible_test_file
+
+                    # 确定目标设备
+                    if self.check_xpu_available():
+                        target_device = "xpu"
+                        device_id = self.xpu_device_id
+                    elif self.check_custom_device_available():
+                        target_device = self.custom_device_type
+                        device_id = self.custom_device_id
+                    else:
+                        target_device = "cpu"
+                        device_id = 0
+
+                    # 确定失败阶段
+                    stage = "unknown"
+                    if not forward_pass:
+                        stage = "forward"
+                    elif not backward_pass:
+                        stage = "backward"
+
+                    error_info = {
+                        "error_type": "accuracy_error",
+                        "stage": stage,
+                        "need_backward": self.need_check_grad(),
+                    }
+
+                    # 生成测试文件
+                    test_file_path = generate_reproducible_test_file(
+                        self.api_config,
+                        error_info,
+                        test_amp=self.test_amp,
+                        target_device=target_device,
+                        device_id=device_id,
+                        test_instance=self,
+                    )
+
+                    if test_file_path:
+                        print(f"[Generated test file] {test_file_path}", flush=True)
+                except Exception as e:
+                    print(f"[Error generating test file] {e}", flush=True)
diff --git a/tester/test_file_generator.py b/tester/test_file_generator.py