Skip to content

Commit 6b03bc3

Browse files
alekstheodhsharsha
authored andcommitted
Migrate tsan/asan/rbe changes to 0.7.1 branch
1 parent 2bb4e0a commit 6b03bc3

File tree

10 files changed

+367
-64
lines changed

10 files changed

+367
-64
lines changed

build_tools/rocm/BUILD

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
load("@bazel_skylib//rules:common_settings.bzl", "string_flag")
2+
3+
string_flag(
4+
name = "sanitizer",
5+
build_setting_default = "none",
6+
values = [
7+
"none",
8+
"asan",
9+
"tsan",
10+
],
11+
)
12+
13+
config_setting(
14+
name = "asan",
15+
flag_values = {"//build_tools/rocm:sanitizer": "asan"},
16+
)
17+
18+
config_setting(
19+
name = "tsan",
20+
flag_values = {"//build_tools/rocm:sanitizer": "tsan"},
21+
)
22+
23+
filegroup(
24+
name = "sanitizer_ignore_lists",
25+
srcs = select({
26+
":asan": [
27+
"asan_ignore_list.txt",
28+
"lsan_ignore_list.txt",
29+
],
30+
":tsan": ["tsan_ignore_list.txt"],
31+
"//conditions:default": [],
32+
}),
33+
visibility = ["//visibility:public"],
34+
)
35+
36+
genrule(
37+
name = "san_wrapper_script",
38+
srcs = [":sanitizer_ignore_lists"],
39+
outs = ["san_wrapper.sh"],
40+
cmd = """
41+
echo '#!/bin/bash' > $@
42+
echo 'exec "$$@"' >> $@
43+
chmod +x $@
44+
""",
45+
)
46+
47+
# this wrapper ensures the test target
48+
# take into account any changes in the ignore list files
49+
sh_binary(
50+
name = "sanitizer_wrapper",
51+
srcs = [":san_wrapper_script"],
52+
data = [":sanitizer_ignore_lists"],
53+
visibility = ["//visibility:public"],
54+
)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
interceptor_via_lib:libhsa-runtime64.so
22
interceptor_via_lib:libamdhip64.so
3+
interceptor_via_lib:librccl.so
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
leak:libhsa-runtime64.so
22
leak:libstdc++.so
33
leak:libamdhip64.so
4+
leak:libhiprtc.so
5+
leak:librccl.so
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/bin/bash
2+
3+
TAG_FILTERS=(
4+
-no_gpu
5+
-requires-gpu-intel
6+
-requires-gpu-nvidia
7+
-cuda-only
8+
-oneapi-only
9+
-requires-gpu-sm60
10+
-requires-gpu-sm60-only
11+
-requires-gpu-sm70
12+
-requires-gpu-sm70-only
13+
-requires-gpu-sm80
14+
-requires-gpu-sm80-only
15+
-requires-gpu-sm86
16+
-requires-gpu-sm86-only
17+
-requires-gpu-sm89
18+
-requires-gpu-sm89-only
19+
-requires-gpu-sm90
20+
-requires-gpu-sm90-only
21+
)
22+
23+
echo $(IFS=, ; echo "${TAG_FILTERS[*]}")

build_tools/rocm/rocm_xla.bazelrc

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# Test-related settings.
2+
3+
build:rocm_dev --remote_upload_local_results=false
4+
build:rocm_dev --remote_cache="https://wardite.cluster.engflow.com"
5+
6+
build:rocm_rbe --bes_backend="grpcs://wardite.cluster.engflow.com"
7+
build:rocm_rbe --bes_results_url="https://wardite.cluster.engflow.com/invocation/"
8+
build:rocm_rbe --host_platform="//platform/linux_x64"
9+
build:rocm_rbe --extra_execution_platforms="//platform/linux_x64"
10+
build:rocm_rbe --platforms="//platform/linux_x64"
11+
build:rocm_rbe --bes_timeout=600s
12+
build:rocm_rbe --tls_client_certificate="/tf/certificates/ci-cert.crt"
13+
build:rocm_rbe --tls_client_key="/tf/certificates/ci-cert.key"
14+
build:rocm_rbe --spawn_strategy=local
15+
16+
test:rocm_rbe --jobs=200
17+
test:rocm_rbe --remote_executor=grpcs://wardite.cluster.engflow.com
18+
test:rocm_rbe --remote_timeout=3600
19+
test:rocm_rbe --strategy=TestRunner=local
20+
test:rocm_rbe --worker_sandboxing=true
21+
22+
build:asan --strip=never
23+
build:asan --copt -fsanitize=address
24+
build:asan --copt -DADDRESS_SANITIZER
25+
build:asan --copt -O1
26+
build:asan --copt -g
27+
build:asan --linkopt -g
28+
build:asan --copt -fno-omit-frame-pointer
29+
build:asan --linkopt -fsanitize=address
30+
build:asan --//build_tools/rocm:sanitizer=asan
31+
32+
build:tsan --strip=never
33+
build:tsan --copt -fsanitize=thread
34+
build:tsan --copt -g
35+
build:tsan --copt -fno-omit-frame-pointer
36+
build:tsan --linkopt -fsanitize=thread
37+
build:tsan --linkopt -g
38+
build:tsan --//build_tools/rocm:sanitizer=tsan
39+
40+
test:xla_sgpu -- \
41+
//xla/... \
42+
-//xla/backends/gpu/collectives:gpu_clique_key_test \
43+
-//xla/backends/gpu/collectives:nccl_communicator_test \
44+
-//xla/service:collective_ops_utils_test \
45+
-//xla/service:collective_pipeliner_test \
46+
-//xla/service:collective_permute_cycle_test \
47+
-//xla/service:batched_gather_scatter_normalizer_test \
48+
-//xla/service:all_reduce_simplifier_test \
49+
-//xla/service:all_gather_simplifier_test \
50+
-//xla/service:reduce_scatter_decomposer_test \
51+
-//xla/service:reduce_scatter_reassociate_test \
52+
-//xla/service:reduce_scatter_combiner_test \
53+
-//xla/service:scatter_simplifier_test \
54+
-//xla/service:sharding_propagation_test \
55+
-//xla/service:sharding_remover_test \
56+
-//xla/service:p2p_schedule_preparation_test \
57+
-//xla/pjrt/distributed:topology_util_test \
58+
-//xla/pjrt/distributed:client_server_test \
59+
-//xla/service/gpu/tests:dynamic_shared_memory_test_amdgpu_any \
60+
-//xla/service/gpu/tests:gpu_cub_sort_test_amdgpu_any
61+
62+
test:xla_mgpu -- \
63+
//xla/tests:collective_ops_e2e_test \
64+
//xla/tests:collective_ops_test \
65+
//xla/tests:replicated_io_feed_test \
66+
//xla/backends/gpu/collectives:gpu_clique_key_test \
67+
//xla/backends/gpu/collectives:nccl_communicator_test \
68+
//xla/service:collective_ops_utils_test \
69+
//xla/service:collective_pipeliner_test \
70+
//xla/service:collective_permute_cycle_test \
71+
//xla/service:batched_gather_scatter_normalizer_test \
72+
//xla/service:all_reduce_simplifier_test \
73+
//xla/service:all_gather_simplifier_test \
74+
//xla/service:reduce_scatter_decomposer_test \
75+
//xla/service:reduce_scatter_reassociate_test \
76+
//xla/service:reduce_scatter_combiner_test \
77+
//xla/service:scatter_simplifier_test \
78+
//xla/service:sharding_propagation_test \
79+
//xla/service:sharding_remover_test \
80+
//xla/service:p2p_schedule_preparation_test \
81+
//xla/tools/multihost_hlo_runner:functional_hlo_runner_test \
82+
//xla/pjrt/distributed:topology_util_test \
83+
//xla/pjrt/distributed:client_server_test \
84+
//xla/backends/gpu/runtime:all_reduce_test \
85+
-//xla/tests:collective_pipeline_parallelism_test

build_tools/rocm/run_xla.sh

Lines changed: 98 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -37,30 +37,96 @@ echo ""
3737
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s) for gpu ${AMD_GPU_GFX_ID}."
3838
echo ""
3939

40-
# First positional argument (if any) specifies the ROCM_INSTALL_DIR
41-
if [[ -n $1 ]]; then
42-
ROCM_INSTALL_DIR=$1
43-
else
44-
if [[ -z "${ROCM_PATH}" ]]; then
45-
ROCM_INSTALL_DIR=/opt/rocm/
46-
else
47-
ROCM_INSTALL_DIR=$ROCM_PATH
48-
fi
49-
fi
50-
5140
export PYTHON_BIN_PATH=`which python3`
5241
export TF_NEED_ROCM=1
53-
export ROCM_PATH=$ROCM_INSTALL_DIR
54-
TAGS_FILTER="requires-gpu-amd,-requires-gpu-nvidia,-requires-gpu-intel,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-cuda-only,-oneapi-only"
55-
UNSUPPORTED_GPU_TAGS="$(echo -requires-gpu-sm{60,70,80,86,89,90}{,-only})"
56-
TAGS_FILTER="${TAGS_FILTER},${UNSUPPORTED_GPU_TAGS// /,}"
42+
export ROCM_PATH="/opt/rocm"
43+
44+
GPU_NAME=(`rocminfo | grep -m 1 gfx`)
45+
GPU_NAME=${GPU_NAME[1]}
46+
47+
EXCLUDED_TESTS=(
48+
# //xla/pjrt/c:pjrt_c_api_gpu_test_gpu_amd_any
49+
PjrtCAPIGpuExtensionTest.TritonCompile
50+
# //xla/backends/gpu/codegen/triton:fusion_emitter_device_test_gpu_amd_any
51+
TritonEmitterTest.CheckRocmWarpSize
52+
TritonEmitterTest.ConvertF16ToF8E5M2Exhaustive
53+
TritonEmitterTest.FP8ToFP8EndToEnd
54+
TritonEmitterTest.FusionWithOutputContainingMoreThanInt32MaxElementsExecutesCorrectly
55+
BasicDotAlgorithmEmitterTestSuite/BasicDotAlgorithmEmitterTest.BasicAlgorithmIsEmittedCorrectly/ALG_DOT_F64_F64_F64
56+
# //xla/backends/gpu/codegen/triton:fusion_emitter_device_legacy_test_gpu_amd_any
57+
TritonGemmTest.BroadcastOfVectorConstantIsFused
58+
TritonGemmTest.FailIfTooMuchShmem
59+
TritonGemmTest.SplitAndTransposeLhsExecutesCorrectly
60+
# //xla/backends/gpu/codegen/triton:fusion_emitter_int4_device_test_gpu_amd_any
61+
TritonTest.NonstandardLayoutWithManyNonContractingDims
62+
TritonTest.NonstandardLayoutWithManyNonContractingDimsReversedLayout
63+
# //xla/hlo/builder/lib:self_adjoint_eig_test_gpu_amd_any marked as flaky but randomly red after 3 attempts
64+
RandomEighTestInstantiation/RandomEighTest.Random/*
65+
# temp excludes for 0.7.1
66+
CompareTest.SplitK
67+
TritonEmitterTest.RocmWarpSizeIsSetCorrectly
68+
MultiOutputFusionTest.MultiOutputReduceFusionMajorWithExtraOutput
69+
TestRadixSort/CubSortKeysTest.SortKeys/*
70+
GpuIrEmitterUnnestedTest.CanNotEmitTritonCustomCallOnPreAmpereGpu
71+
CommandBufferConversionPassTest.ConvertWhileThunkWithAsyncPair
72+
CommandBufferConversionPassTest.ConvertWhileThunk
73+
TritonFusionNumericsVerifierTest.CompilationSucceedsEvenIfKernelWillSpillRegisters
74+
TritonFusionNumericsVerifierTest.VerifyThatDisablingTritonIsFast
75+
TritonFusionNumericsVerifierTestSuite/TritonFusionNumericsVerifierTest.VerifyNestedGemmNumerics/1
76+
TritonGemmTest.FailForTooComplexTiling
77+
TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_tf32_tf32_f32
78+
TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_f32_f32_f32
79+
TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_tf32_tf32_f32_x3
80+
TestRadixSort/CubSortPairsTest.SortPairs/*
81+
GpuKernelTilingTest.ReductionInputTooLarge
82+
DeterminismTest.Conv
83+
TopKTests/TopKKernelTest*
84+
DotTestTestSuite/DotTest.IsTritonSupportedExecutesCorrectlyForDot/f8e5m2_dot
85+
DotTestTestSuite/DotTest.IsTritonSupportedExecutesCorrectlyForDot/f32_dot
86+
TritonNormalizationTest.CanFuseAndEmitDiamondWithBF16Converts
87+
ElementwiseTestSuiteF16/UnaryElementwiseTest.ElementwiseUnaryOpExecutesCorrectly/f16_cosine
88+
ElementwiseTestSuiteF16/BinaryElementwiseTest.ElementwiseBinaryOpExecutesCorrectly/f16_atan2
89+
ElementwiseTestSuiteF16/BinaryElementwiseTest.ElementwiseFusionExecutesCorrectly/f16_atan2
90+
TritonTest.FuseSubchannelDequantizationWithTranspose
91+
BasicDotAlgorithmEmitterTestSuite/BasicDotAlgorithmEmitterTest.BasicAlgorithmIsEmittedCorrectly/ALG_DOT_F16_F16_F16
92+
CommandBufferTests/CommandBufferTest.IndexConditional/*
93+
CommandBufferTests/CommandBufferTest.WhileLoop/*
94+
CommandBufferTests/CommandBufferTest.TrueFalseConditional/*
95+
)
96+
97+
BAZEL_DISK_CACHE_SIZE=100G
98+
BAZEL_DISK_CACHE_DIR="/tf/disk_cache/rocm-jaxlib-v0.7.1"
99+
mkdir -p ${BAZEL_DISK_CACHE_DIR}
100+
if [ ! -d /tf/pkg ]; then
101+
mkdir -p /tf/pkg
102+
fi
103+
104+
SCRIPT_DIR=$(realpath $(dirname $0))
105+
TAG_FILTERS=$($SCRIPT_DIR/rocm_tag_filters.sh),-multigpu,-multi_gpu_h100,requires-gpu-amd,-skip_rocprofiler_sdk,-no_oss,-oss_excluded,-oss_serial
57106

58-
bazel \
59-
test \
60-
--define xnn_enable_avxvnniint8=false --define xnn_enable_avx512fp16=false \
107+
SANITIZER_ARGS=()
108+
if [[ $1 == "asan" ]]; then
109+
SANITIZER_ARGS+=("--test_env=ASAN_OPTIONS=suppressions=${SCRIPT_DIR}/asan_ignore_list.txt:use_sigaltstack=0")
110+
SANITIZER_ARGS+=("--test_env=LSAN_OPTIONS=suppressions=${SCRIPT_DIR}/lsan_ignore_list.txt:use_sigaltstack=0")
111+
SANITIZER_ARGS+=("--config=asan")
112+
TAG_FILTERS=$TAG_FILTERS,-noasan
113+
shift
114+
elif [[ $1 == "tsan" ]]; then
115+
SANITIZER_ARGS+=("--test_env=TSAN_OPTIONS=suppressions=${SCRIPT_DIR}/tsan_ignore_list.txt::history_size=7:ignore_noninstrumented_modules=1")
116+
SANITIZER_ARGS+=("--config=tsan")
117+
TAG_FILTERS=$TAG_FILTERS,-notsan
118+
shift
119+
fi
120+
121+
bazel --bazelrc=build_tools/rocm/rocm_xla.bazelrc test \
61122
--config=rocm_ci \
62-
--build_tag_filters=${TAGS_FILTER} \
63-
--test_tag_filters=${TAGS_FILTER} \
123+
--config=xla_sgpu \
124+
--disk_cache=${BAZEL_DISK_CACHE_DIR} \
125+
--profile=/tf/pkg/profile.json.gz \
126+
--experimental_disk_cache_gc_max_size=${BAZEL_DISK_CACHE_SIZE} \
127+
--experimental_guard_against_concurrent_changes \
128+
--build_tag_filters=$TAG_FILTERS \
129+
--test_tag_filters=$TAG_FILTERS \
64130
--test_timeout=920,2400,7200,9600 \
65131
--test_sharding_strategy=disabled \
66132
--test_output=errors \
@@ -69,8 +135,16 @@ bazel \
69135
--local_test_jobs=${N_TEST_JOBS} \
70136
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
71137
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
72-
--action_env=TF_ROCM_AMDGPU_TARGETS=${AMD_GPU_GFX_ID} \
73-
--action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \
74-
--action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \
138+
--action_env=TF_ROCM_AMDGPU_TARGETS=${GPU_NAME} \
139+
--action_env=XLA_FLAGS="--xla_gpu_enable_llvm_module_compilation_parallelism=true --xla_gpu_force_compilation_parallelism=16" \
75140
--run_under=//build_tools/ci:parallel_gpu_execute \
76-
-- //xla/...
141+
--test_env=MIOPEN_FIND_ENFORCE=5 \
142+
--test_env=MIOPEN_FIND_MODE=1 \
143+
--test_filter=-$(IFS=: ; echo "${EXCLUDED_TESTS[*]}") \
144+
"${SANITIZER_ARGS[@]}" \
145+
"$@"
146+
147+
# clean up bazel disk_cache
148+
bazel shutdown \
149+
--disk_cache=${BAZEL_DISK_CACHE_DIR} \
150+
--experimental_disk_cache_gc_max_size=${BAZEL_DISK_CACHE_SIZE}

0 commit comments

Comments
 (0)