From 3f89fc54b1902db1d85c641acb35cc63d5c8e8e8 Mon Sep 17 00:00:00 2001 From: Ryan Jeng Date: Wed, 24 Jan 2024 23:17:56 -0800 Subject: [PATCH 1/2] bugfix, add skipIf for multigpu tests --- test/collective/test_communication_api_base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/collective/test_communication_api_base.py b/test/collective/test_communication_api_base.py index 533dad6fc20735..cd46ceaae85b99 100644 --- a/test/collective/test_communication_api_base.py +++ b/test/collective/test_communication_api_base.py @@ -22,9 +22,14 @@ import tempfile import unittest +import paddle + class CommunicationTestDistBase(unittest.TestCase): def setUp(self, save_log_dir=None, num_of_devices=2, timeout=120, nnode=1): + if num_of_devices > paddle.device.cuda.device_count(): + self.skipTest("number of GPUs is not enough") + self._python_interp = sys.executable self._save_log_dir = save_log_dir self._log_dir = tempfile.TemporaryDirectory() From 2378025dab4657eac821fac9c383d78b31c2cc02 Mon Sep 17 00:00:00 2001 From: Ryan Jeng Date: Fri, 16 Feb 2024 01:25:33 -0800 Subject: [PATCH 2/2] Bugfix test_op with single GPU --- test/legacy_test/op_test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/legacy_test/op_test.py b/test/legacy_test/op_test.py index 4c3c201c8afe36..a307e2b0edfe22 100644 --- a/test/legacy_test/op_test.py +++ b/test/legacy_test/op_test.py @@ -3199,6 +3199,13 @@ def check_grad_with_place( python_api_info=python_api_info, ) runtime_envs = get_subprocess_runtime_envs(place) + + num_devices = len( + runtime_envs["CUDA_VISIBLE_DEVICES"].split(",") + ) + if num_devices > paddle.device.cuda.device_count(): + self.skipTest("number of GPUs is not enough") + start_command = get_subprocess_command( runtime_envs["CUDA_VISIBLE_DEVICES"], generated_grad_test_path,