@@ -37,30 +37,96 @@ echo ""
3737echo " Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s) for gpu ${AMD_GPU_GFX_ID} ."
3838echo " "
3939
40- # First positional argument (if any) specifies the ROCM_INSTALL_DIR
41- if [[ -n $1 ]]; then
42- ROCM_INSTALL_DIR=$1
43- else
44- if [[ -z " ${ROCM_PATH} " ]]; then
45- ROCM_INSTALL_DIR=/opt/rocm/
46- else
47- ROCM_INSTALL_DIR=$ROCM_PATH
48- fi
49- fi
50-
5140export PYTHON_BIN_PATH=` which python3`
5241export TF_NEED_ROCM=1
53- export ROCM_PATH=$ROCM_INSTALL_DIR
54- TAGS_FILTER=" requires-gpu-amd,-requires-gpu-nvidia,-requires-gpu-intel,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-cuda-only,-oneapi-only"
55- UNSUPPORTED_GPU_TAGS=" $( echo -requires-gpu-sm{60,70,80,86,89,90}{,-only}) "
56- TAGS_FILTER=" ${TAGS_FILTER} ,${UNSUPPORTED_GPU_TAGS// / ,} "
42+ export ROCM_PATH=" /opt/rocm"
43+
44+ GPU_NAME=(` rocminfo | grep -m 1 gfx` )
45+ GPU_NAME=${GPU_NAME[1]}
46+
47+ EXCLUDED_TESTS=(
48+ # //xla/pjrt/c:pjrt_c_api_gpu_test_gpu_amd_any
49+ PjrtCAPIGpuExtensionTest.TritonCompile
50+ # //xla/backends/gpu/codegen/triton:fusion_emitter_device_test_gpu_amd_any
51+ TritonEmitterTest.CheckRocmWarpSize
52+ TritonEmitterTest.ConvertF16ToF8E5M2Exhaustive
53+ TritonEmitterTest.FP8ToFP8EndToEnd
54+ TritonEmitterTest.FusionWithOutputContainingMoreThanInt32MaxElementsExecutesCorrectly
55+ BasicDotAlgorithmEmitterTestSuite/BasicDotAlgorithmEmitterTest.BasicAlgorithmIsEmittedCorrectly/ALG_DOT_F64_F64_F64
56+ # //xla/backends/gpu/codegen/triton:fusion_emitter_device_legacy_test_gpu_amd_any
57+ TritonGemmTest.BroadcastOfVectorConstantIsFused
58+ TritonGemmTest.FailIfTooMuchShmem
59+ TritonGemmTest.SplitAndTransposeLhsExecutesCorrectly
60+ # //xla/backends/gpu/codegen/triton:fusion_emitter_int4_device_test_gpu_amd_any
61+ TritonTest.NonstandardLayoutWithManyNonContractingDims
62+ TritonTest.NonstandardLayoutWithManyNonContractingDimsReversedLayout
63+ # //xla/hlo/builder/lib:self_adjoint_eig_test_gpu_amd_any marked as flaky but randomly red after 3 attempts
64+ RandomEighTestInstantiation/RandomEighTest.Random/*
65+ # temp excludes for 0.7.1
66+ CompareTest.SplitK
67+ TritonEmitterTest.RocmWarpSizeIsSetCorrectly
68+ MultiOutputFusionTest.MultiOutputReduceFusionMajorWithExtraOutput
69+ TestRadixSort/CubSortKeysTest.SortKeys/*
70+ GpuIrEmitterUnnestedTest.CanNotEmitTritonCustomCallOnPreAmpereGpu
71+ CommandBufferConversionPassTest.ConvertWhileThunkWithAsyncPair
72+ CommandBufferConversionPassTest.ConvertWhileThunk
73+ TritonFusionNumericsVerifierTest.CompilationSucceedsEvenIfKernelWillSpillRegisters
74+ TritonFusionNumericsVerifierTest.VerifyThatDisablingTritonIsFast
75+ TritonFusionNumericsVerifierTestSuite/TritonFusionNumericsVerifierTest.VerifyNestedGemmNumerics/1
76+ TritonGemmTest.FailForTooComplexTiling
77+ TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_tf32_tf32_f32
78+ TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_f32_f32_f32
79+ TritonAndBlasSupportForDifferentTensorSizes/TritonAndBlasSupportForDifferentTensorSizes.IsDotAlgorithmSupportedByTriton/dot_tf32_tf32_f32_x3
80+ TestRadixSort/CubSortPairsTest.SortPairs/*
81+ GpuKernelTilingTest.ReductionInputTooLarge
82+ DeterminismTest.Conv
83+ TopKTests/TopKKernelTest*
84+ DotTestTestSuite/DotTest.IsTritonSupportedExecutesCorrectlyForDot/f8e5m2_dot
85+ DotTestTestSuite/DotTest.IsTritonSupportedExecutesCorrectlyForDot/f32_dot
86+ TritonNormalizationTest.CanFuseAndEmitDiamondWithBF16Converts
87+ ElementwiseTestSuiteF16/UnaryElementwiseTest.ElementwiseUnaryOpExecutesCorrectly/f16_cosine
88+ ElementwiseTestSuiteF16/BinaryElementwiseTest.ElementwiseBinaryOpExecutesCorrectly/f16_atan2
89+ ElementwiseTestSuiteF16/BinaryElementwiseTest.ElementwiseFusionExecutesCorrectly/f16_atan2
90+ TritonTest.FuseSubchannelDequantizationWithTranspose
91+ BasicDotAlgorithmEmitterTestSuite/BasicDotAlgorithmEmitterTest.BasicAlgorithmIsEmittedCorrectly/ALG_DOT_F16_F16_F16
92+ CommandBufferTests/CommandBufferTest.IndexConditional/*
93+ CommandBufferTests/CommandBufferTest.WhileLoop/*
94+ CommandBufferTests/CommandBufferTest.TrueFalseConditional/*
95+ )
96+
97+ BAZEL_DISK_CACHE_SIZE=100G
98+ BAZEL_DISK_CACHE_DIR=" /tf/disk_cache/rocm-jaxlib-v0.7.1"
99+ mkdir -p ${BAZEL_DISK_CACHE_DIR}
100+ if [ ! -d /tf/pkg ]; then
101+ mkdir -p /tf/pkg
102+ fi
103+
104+ SCRIPT_DIR=$( realpath $( dirname $0 ) )
105+ TAG_FILTERS=$( $SCRIPT_DIR /rocm_tag_filters.sh) ,-multigpu,-multi_gpu_h100,requires-gpu-amd,-skip_rocprofiler_sdk,-no_oss,-oss_excluded,-oss_serial
57106
58- bazel \
59- test \
60- --define xnn_enable_avxvnniint8=false --define xnn_enable_avx512fp16=false \
107+ SANITIZER_ARGS=()
108+ if [[ $1 == " asan" ]]; then
109+ SANITIZER_ARGS+=(" --test_env=ASAN_OPTIONS=suppressions=${SCRIPT_DIR} /asan_ignore_list.txt:use_sigaltstack=0" )
110+ SANITIZER_ARGS+=(" --test_env=LSAN_OPTIONS=suppressions=${SCRIPT_DIR} /lsan_ignore_list.txt:use_sigaltstack=0" )
111+ SANITIZER_ARGS+=(" --config=asan" )
112+ TAG_FILTERS=$TAG_FILTERS ,-noasan
113+ shift
114+ elif [[ $1 == " tsan" ]]; then
115+ SANITIZER_ARGS+=(" --test_env=TSAN_OPTIONS=suppressions=${SCRIPT_DIR} /tsan_ignore_list.txt::history_size=7:ignore_noninstrumented_modules=1" )
116+ SANITIZER_ARGS+=(" --config=tsan" )
117+ TAG_FILTERS=$TAG_FILTERS ,-notsan
118+ shift
119+ fi
120+
121+ bazel --bazelrc=build_tools/rocm/rocm_xla.bazelrc test \
61122 --config=rocm_ci \
62- --build_tag_filters=${TAGS_FILTER} \
63- --test_tag_filters=${TAGS_FILTER} \
123+ --config=xla_sgpu \
124+ --disk_cache=${BAZEL_DISK_CACHE_DIR} \
125+ --profile=/tf/pkg/profile.json.gz \
126+ --experimental_disk_cache_gc_max_size=${BAZEL_DISK_CACHE_SIZE} \
127+ --experimental_guard_against_concurrent_changes \
128+ --build_tag_filters=$TAG_FILTERS \
129+ --test_tag_filters=$TAG_FILTERS \
64130 --test_timeout=920,2400,7200,9600 \
65131 --test_sharding_strategy=disabled \
66132 --test_output=errors \
@@ -69,8 +135,16 @@ bazel \
69135 --local_test_jobs=${N_TEST_JOBS} \
70136 --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
71137 --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
72- --action_env=TF_ROCM_AMDGPU_TARGETS=${AMD_GPU_GFX_ID} \
73- --action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \
74- --action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \
138+ --action_env=TF_ROCM_AMDGPU_TARGETS=${GPU_NAME} \
139+ --action_env=XLA_FLAGS=" --xla_gpu_enable_llvm_module_compilation_parallelism=true --xla_gpu_force_compilation_parallelism=16" \
75140 --run_under=//build_tools/ci:parallel_gpu_execute \
76- -- //xla/...
141+ --test_env=MIOPEN_FIND_ENFORCE=5 \
142+ --test_env=MIOPEN_FIND_MODE=1 \
143+ --test_filter=-$( IFS=: ; echo " ${EXCLUDED_TESTS[*]} " ) \
144+ " ${SANITIZER_ARGS[@]} " \
145+ " $@ "
146+
147+ # clean up bazel disk_cache
148+ bazel shutdown \
149+ --disk_cache=${BAZEL_DISK_CACHE_DIR} \
150+ --experimental_disk_cache_gc_max_size=${BAZEL_DISK_CACHE_SIZE}
0 commit comments