From 83c837bf2b1506ead1e7dc8100377d4af5e87876 Mon Sep 17 00:00:00 2001 From: Ethan Wong Date: Tue, 22 Jul 2025 15:29:36 -0500 Subject: [PATCH 1/3] test: add `testlist.bench` performance tests Introduces a testlist running latency and bandwidth tests for senddev and recvdev (host/device) combinations. The result of this testlist is fed into gnuplot by the test suite to generate performance graph artifacts. --- test/mpi/bench/testlist.bench | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 test/mpi/bench/testlist.bench diff --git a/test/mpi/bench/testlist.bench b/test/mpi/bench/testlist.bench new file mode 100644 index 00000000000..6e4c0259d10 --- /dev/null +++ b/test/mpi/bench/testlist.bench @@ -0,0 +1,14 @@ +# performance tests are performed for (senddev, recvdev) \in {host, device}^2 +# host->host always runs on non-gpu targets + +# latency tests +p2p_latency 2 arg=-senddev=host arg=-recvdev=host resultTest=TestBench +p2p_latency 2 arg=-senddev=host arg=-recvdev=device hasgpu=any resultTest=TestBench +p2p_latency 2 arg=-senddev=device arg=-recvdev=host hasgpu=any resultTest=TestBench +p2p_latency 2 arg=-senddev=device arg=-recvdev=device hasgpu=any resultTest=TestBench + +# bw tests +p2p_bw 2 arg=-senddev=host arg=-recvdev=host resultTest=TestBench +p2p_bw 2 arg=-senddev=host arg=-recvdev=device hasgpu=any resultTest=TestBench +p2p_bw 2 arg=-senddev=device arg=-recvdev=host hasgpu=any resultTest=TestBench +p2p_bw 2 arg=-senddev=device arg=-recvdev=device hasgpu=any resultTest=TestBench From dc545fa46fc017c5daf469fd1df7b02f18ee36d1 Mon Sep 17 00:00:00 2001 From: Ethan Wong Date: Tue, 29 Jul 2025 10:30:41 -0500 Subject: [PATCH 2/3] =?UTF-8?q?test:=20add=20`hasgpu=3D=3F`=20library=20re?= =?UTF-8?q?quirement=20flag=20to=20`runtests`?= Reintroduces the `gpu` testlist flag removed in PR #7506 as `hasgpu=?`. However, it is now implemented as a GPU library dependency requirement which skips tests if they are not detected by Automake. Also adds a corresponding `MPITEST_HASGPU` environment variable for manual testing. --- test/mpi/configure.ac | 3 +++ test/mpi/runtests | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/test/mpi/configure.ac b/test/mpi/configure.ac index f066f92538d..91755791dc3 100644 --- a/test/mpi/configure.ac +++ b/test/mpi/configure.ac @@ -792,6 +792,7 @@ AC_SUBST(cudadir) AC_ARG_VAR([NVCC], [nvcc compiler to use]) if test "X${pac_have_cuda}" = "Xyes" ; then AC_DEFINE([HAVE_CUDA],[1],[Define if CUDA is available]) + RUNTESTS_OPTS="$RUNTESTS_OPTS -hasgpu=cuda" have_gpu="yes" if test -n "${with_cuda}" -a "$with_cuda" != "no" ; then cuda_CPPFLAGS="-I${with_cuda}/include" @@ -837,6 +838,7 @@ if test "$have_gpu" = "no" ; then ze_LIBS="" if test "X${pac_have_ze}" = "Xyes" ; then AC_DEFINE([HAVE_ZE],[1],[Define if ZE is available]) + RUNTESTS_OPTS="$RUNTESTS_OPTS -hasgpu=ze" have_gpu="yes" if test -n "${with_ze}" -a "$with_ze" != "no" ; then ze_CPPFLAGS="-I${with_ze}/include" @@ -866,6 +868,7 @@ if test "$have_gpu" = "no" ; then hip_LIBS="" if test "X$pac_have_hip" = "Xyes" ; then AC_DEFINE([HAVE_HIP],[1],[Define if HIP is available]) + RUNTESTS_OPTS="$RUNTESTS_OPTS -hasgpu=hip" have_gpu="yes" if test -n "${with_hip}" -a "$with_hip" != "no" ; then hip_CPPFLAGS="-I${with_hip}/include" diff --git a/test/mpi/runtests b/test/mpi/runtests index 2f87885a9ec..583fb9c6997 100755 --- a/test/mpi/runtests +++ b/test/mpi/runtests @@ -73,6 +73,7 @@ $g_opt{memory_total} = 4; # Total memory in GB $g_opt{memory_multiplier} = 1; # No of simultaneous jobs $g_opt{cleanup} = 1; # Whether to remove the compiled programs $g_opt{start_time} = time(); # So we can track accumulative test duration +$g_opt{hasgpu} = {}; # will run tests marked as "hasgpu=" $g_opt{strict} = 0; # will skip tests marked as "strict=false" $g_opt{runxfail} = 0; # will run xfailed tests $g_opt{exeext} = ""; @@ -227,6 +228,11 @@ if (defined($ENV{'MPITEST_MPIEXECARG'})) { if (defined($ENV{'MPITEST_SINGLETON'})) { $g_opt{mpitest_singleton} = $ENV{'MPITEST_SINGLETON'}; } +if (defined($ENV{'MPITEST_HASGPU'})) { + foreach my $lib (split /,\s*/, $ENV{'MPITEST_HASGPU'}) { + $g_opt{hasgpu}{$lib} = 1; + } +} #--------------------------------------------------------------------------- # Process arguments and override any defaults @@ -251,6 +257,7 @@ foreach $_ (@ARGV) { elsif (/--?batchdir=(.*)/) { $g_opt{batrundir} = $1; } elsif (/--?batch/) { $g_opt{batchRun} = 1; } elsif (/--?timeoutarg=(.*)/) { $g_opt{timeoutarg} = $1; } + elsif (/--?hasgpu=(.*)/) { $g_opt{hasgpu}{$1} = 1; } elsif (/--?strict/) { $g_opt{strict} = 1; } elsif (/--?runxfail/) { $g_opt{runxfail} = 1; } elsif (/--?xmlfile=(.*)/) { @@ -436,6 +443,8 @@ sub LoadTests { my $np = ""; my $requiresStrict = ""; + my @requiresGPU; + my $hasGPU = 0; if ($#args >= 1) { $np = $args[1]; } @@ -462,6 +471,12 @@ sub LoadTests { elsif ($key eq "strict") { $requiresStrict = $value } + elsif ($key eq "hasgpu") { + push @requiresGPU, $value; + if (exists $g_opt{hasgpu}{$value} or $value eq "any") { + $hasGPU = 1; + } + } else { print STDERR "Unrecognized key $key in $listfile_path\n"; } @@ -500,6 +515,13 @@ sub LoadTests { $test_opt->{name} = $2; } + if (@requiresGPU && !$hasGPU) { + # Skip tests requiring gpu if not explicitly set. + SkippedTest($test_opt, "one of the following gpu libraries + needed, but not provided: " . join(", ", @requiresGPU)); + next; + } + # Check whether strict is required by MPI but not by the # test (use strict=false for tests that use non-standard extensions) if (lc($requiresStrict) eq "false" && $g_opt{strict}) { From 529ff883a8eea6efc89b3612d2eeb0d5cadd93dd Mon Sep 17 00:00:00 2001 From: Ethan Wong Date: Wed, 13 Aug 2025 13:29:48 -0500 Subject: [PATCH 3/3] =?UTF-8?q?test:=20document=20`hasgpu=3D=3F`?= --- test/mpi/README | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/mpi/README b/test/mpi/README index 9ff7f97a26c..394b70272cd 100644 --- a/test/mpi/README +++ b/test/mpi/README @@ -143,6 +143,11 @@ mpiexecarg=string : Run the program with string as an argument to mpiexec env=name=value : Run the program with environment variable "name" given the value "value" +hasgpu=lib : Setting this flag with a GPU library will allow the test to run + only if `-hasgpu=lib` was passed to runtest. Using `hasgpu=any` + allows the test to run solong as any GPU library was passed in + as a flag. + strict=bool : If bool is false, only build and run the program if --enable-strictmpi was not used in configuring the test suite. That is, a line such as