Skip to content

Commit 64a372c

Browse files
authored
Assign gpu pools only to test actions (#418)
* Assign gpu pools only to test actions * Ignore flaky tests
1 parent 73cb057 commit 64a372c

File tree

3 files changed

+14
-20
lines changed

3 files changed

+14
-20
lines changed

build_tools/rocm/run_xla_multi_gpu.sh

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,17 @@ if [ ! -d /tf/pkg ]; then
6060
fi
6161

6262
EXCLUDED_TESTS=(
63-
CollectiveOpsTestE2E.MemcpyP2pLargeMessage
64-
RaggedAllToAllTest/RaggedAllToAllTest.RaggedAllToAll_8GPUs_2ReplicasPerGroups/sync_decomposer
65-
RaggedAllToAllTest/RaggedAllToAllTest.RaggedAllToAll_8GPUs_2ReplicasPerGroups/async_decomposer
63+
# //xla/tests:collective_ops_test_amdgpu_any
64+
RaggedAllToAllTest*
65+
AsyncCollectiveOps*
66+
AsyncMemcpyCollectiveOps*
67+
CollectiveOpsTest*
68+
AllReduceTest*
69+
Fp8CollectiveOpsTest*
6670
# //xla/backends/gpu/codegen/triton:fusion_emitter_parametrized_legacy_test_amdgpu_any
6771
ElementwiseTestSuiteF32/BinaryElementwiseTest.ElementwiseFusionExecutesCorrectly/f32_atan2
6872
# //xla/tests:collective_ops_e2e_test_amdgpu_any
73+
CollectiveOpsTestE2E.MemcpyP2pLargeMessage
6974
CollectiveOpsTestE2EPipelinedNonPipelined.CollectivePipelinerBackward
7075
CollectiveOpsTestE2EPipelinedNonPipelined.CollectivePipelinerBackwardStartFromOne
7176
# //xla/tools/multihost_hlo_runner:functional_hlo_runner_test
@@ -87,20 +92,6 @@ elif [[ $1 == "tsan" ]]; then
8792
SANITIZER_ARGS+=("--run_under=//build_tools/rocm:sanitizer_wrapper")
8893
SANITIZER_ARGS+=("--config=tsan")
8994
TAG_FILTERS="$TAG_FILTERS,-notsan"
90-
# excluded from tsan
91-
EXCLUDED_TESTS+=(
92-
CollectiveOpsTest*
93-
Fp8CollectiveOpsTest.AllGather_8BitFloat
94-
Fp8CollectiveOpsTest.CollectivePermute_8BitFloat
95-
Fp8CollectiveOpsTest.AllToAll_8BitFloat
96-
AsyncCollectiveOps*
97-
AllReduceTest*
98-
RaggedAllToAllTest*
99-
AsyncCollectiveOps*
100-
AsyncMemcpyCollectiveOps*
101-
RaggedAllToAllTest*
102-
)
103-
10495
# tsan tests appear to be flaky in rbe due to the heavy load
10596
# force them to run locally
10697
RBE_OPTIONS+=(

xla/tests/BUILD

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2806,7 +2806,10 @@ xla_test(
28062806
],
28072807
},
28082808
backends = ["gpu"],
2809-
tags = ["test_migrated_to_hlo_runner_pjrt"],
2809+
tags = [
2810+
"test_migrated_to_hlo_runner_pjrt",
2811+
"local",
2812+
],
28102813
deps = [
28112814
":hlo_pjrt_test_base",
28122815
":literal_test_util",

xla/tsl/platform/default/build_config_root.bzl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ GPU_TEST_PROPERTIES = {
2020
}
2121

2222
ROCM_SINGLE_GPU_TEST_PROPERTIES = {
23-
"Pool": "linux_x64_gpu",
23+
"test.Pool": "linux_x64_gpu",
2424
}
2525

2626
ROCM_MULTI_GPU_TEST_PROPERTIES = {
27-
"Pool": "linux_x64_multigpu",
27+
"test.Pool": "linux_x64_multigpu",
2828
}
2929

3030
def tf_gpu_tests_tags():

0 commit comments

Comments
 (0)