From 83c837bf2b1506ead1e7dc8100377d4af5e87876 Mon Sep 17 00:00:00 2001
From: Ethan Wong <ewong@anl.gov>
Date: Tue, 22 Jul 2025 15:29:36 -0500
Subject: [PATCH 1/3] test: add `testlist.bench` performance tests

Introduces a testlist running latency and bandwidth tests for senddev
and recvdev (host/device) combinations. The result of this testlist is
fed into gnuplot by the test suite to generate performance graph
artifacts.
---
 test/mpi/bench/testlist.bench | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 test/mpi/bench/testlist.bench
diff --git a/test/mpi/bench/testlist.bench b/test/mpi/bench/testlist.bench
new file mode 100644
index 00000000000..6e4c0259d10
--- /dev/null
+++ b/test/mpi/bench/testlist.bench
@@ -0,0 +1,14 @@
+# performance tests are performed for (senddev, recvdev) \in {host, device}^2
+# host->host always runs on non-gpu targets
+
+# latency tests
+p2p_latency 2 arg=-senddev=host arg=-recvdev=host resultTest=TestBench
+p2p_latency 2 arg=-senddev=host arg=-recvdev=device hasgpu=any resultTest=TestBench
+p2p_latency 2 arg=-senddev=device arg=-recvdev=host hasgpu=any resultTest=TestBench
+p2p_latency 2 arg=-senddev=device arg=-recvdev=device hasgpu=any resultTest=TestBench
+
+# bw tests
+p2p_bw 2 arg=-senddev=host arg=-recvdev=host resultTest=TestBench
+p2p_bw 2 arg=-senddev=host arg=-recvdev=device hasgpu=any resultTest=TestBench
+p2p_bw 2 arg=-senddev=device arg=-recvdev=host hasgpu=any resultTest=TestBench
+p2p_bw 2 arg=-senddev=device arg=-recvdev=device hasgpu=any resultTest=TestBench

From dc545fa46fc017c5daf469fd1df7b02f18ee36d1 Mon Sep 17 00:00:00 2001
From: Ethan Wong <ewong@anl.gov>
Date: Tue, 29 Jul 2025 10:30:41 -0500
Subject: [PATCH 2/3] =?UTF-8?q?test:=20add=20`hasgpu=3D=3F`=20library=20re?=
 =?UTF-8?q?quirement=20flag=20to=20`runtests`?=

Reintroduces the `gpu` testlist flag removed in PR #7506 as `hasgpu=?`.
However, it is now implemented as a GPU library dependency requirement
which skips tests if they are not detected by Automake. Also adds a
corresponding `MPITEST_HASGPU` environment variable for manual testing.
---
 test/mpi/configure.ac |  3 +++
 test/mpi/runtests     | 22 ++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/test/mpi/configure.ac b/test/mpi/configure.ac
index f066f92538d..91755791dc3 100644
--- a/test/mpi/configure.ac
+++ b/test/mpi/configure.ac
@@ -792,6 +792,7 @@ AC_SUBST(cudadir)
 AC_ARG_VAR([NVCC], [nvcc compiler to use])
 if test "X${pac_have_cuda}" = "Xyes" ; then
     AC_DEFINE([HAVE_CUDA],[1],[Define if CUDA is available])
+    RUNTESTS_OPTS="$RUNTESTS_OPTS -hasgpu=cuda"
     have_gpu="yes"
     if test -n "${with_cuda}" -a "$with_cuda" != "no" ; then
         cuda_CPPFLAGS="-I${with_cuda}/include"
@@ -837,6 +838,7 @@ if test "$have_gpu" = "no" ; then
     ze_LIBS=""
     if test "X${pac_have_ze}" = "Xyes" ; then
         AC_DEFINE([HAVE_ZE],[1],[Define if ZE is available])
+        RUNTESTS_OPTS="$RUNTESTS_OPTS -hasgpu=ze"
         have_gpu="yes"
         if test -n "${with_ze}" -a "$with_ze" != "no" ; then
             ze_CPPFLAGS="-I${with_ze}/include"
@@ -866,6 +868,7 @@ if test "$have_gpu" = "no" ; then
     hip_LIBS=""
     if test "X$pac_have_hip" = "Xyes" ; then
        AC_DEFINE([HAVE_HIP],[1],[Define if HIP is available])
+       RUNTESTS_OPTS="$RUNTESTS_OPTS -hasgpu=hip"
        have_gpu="yes"
        if test -n "${with_hip}" -a "$with_hip" != "no" ; then
           hip_CPPFLAGS="-I${with_hip}/include"
diff --git a/test/mpi/runtests b/test/mpi/runtests
index 2f87885a9ec..583fb9c6997 100755
--- a/test/mpi/runtests
+++ b/test/mpi/runtests
@@ -73,6 +73,7 @@ $g_opt{memory_total} = 4;       # Total memory in GB
 $g_opt{memory_multiplier} = 1;  # No of simultaneous jobs
 $g_opt{cleanup} = 1;            # Whether to remove the compiled programs
 $g_opt{start_time} = time();    # So we can track accumulative test duration
+$g_opt{hasgpu} = {};            # will run tests marked as "hasgpu=<value>"
 $g_opt{strict} = 0;             # will skip tests marked as "strict=false"
 $g_opt{runxfail} = 0;           # will run xfailed tests
 $g_opt{exeext} = "";
@@ -227,6 +228,11 @@ if (defined($ENV{'MPITEST_MPIEXECARG'})) {
 if (defined($ENV{'MPITEST_SINGLETON'})) {
     $g_opt{mpitest_singleton} = $ENV{'MPITEST_SINGLETON'};
 }
+if (defined($ENV{'MPITEST_HASGPU'})) {
+    foreach my $lib (split /,\s*/, $ENV{'MPITEST_HASGPU'}) {
+        $g_opt{hasgpu}{$lib} = 1;
+    }
+}
 
 #---------------------------------------------------------------------------
 # Process arguments and override any defaults
@@ -251,6 +257,7 @@ foreach $_ (@ARGV) {
     elsif (/--?batchdir=(.*)/) { $g_opt{batrundir} = $1; }
     elsif (/--?batch/) { $g_opt{batchRun} = 1; }
     elsif (/--?timeoutarg=(.*)/) { $g_opt{timeoutarg} = $1; }
+    elsif (/--?hasgpu=(.*)/) { $g_opt{hasgpu}{$1} = 1; }
     elsif (/--?strict/) { $g_opt{strict} = 1; }
     elsif (/--?runxfail/) { $g_opt{runxfail} = 1; }
     elsif (/--?xmlfile=(.*)/) {
@@ -436,6 +443,8 @@ sub LoadTests {
 
             my $np = "";
             my $requiresStrict = "";
+            my @requiresGPU;
+            my $hasGPU = 0;
 
             if ($#args >= 1) { $np = $args[1]; }
 
@@ -462,6 +471,12 @@ sub LoadTests {
                     elsif ($key eq "strict") {
                         $requiresStrict = $value
                     }
+                    elsif ($key eq "hasgpu") {
+                        push @requiresGPU, $value;
+                        if (exists $g_opt{hasgpu}{$value} or $value eq "any") {
+                            $hasGPU = 1;
+                        }
+                    }
                     else {
                         print STDERR "Unrecognized key $key in $listfile_path\n";
                     }
@@ -500,6 +515,13 @@ sub LoadTests {
                 $test_opt->{name} = $2;
             }
 
+            if (@requiresGPU && !$hasGPU) {
+                # Skip tests requiring gpu if not explicitly set.
+                SkippedTest($test_opt, "one of the following gpu libraries
+                    needed, but not provided: " . join(", ", @requiresGPU));
+                next;
+            }
+
             # Check whether strict is required by MPI but not by the
             # test (use strict=false for tests that use non-standard extensions)
             if (lc($requiresStrict) eq "false" && $g_opt{strict}) {

From 529ff883a8eea6efc89b3612d2eeb0d5cadd93dd Mon Sep 17 00:00:00 2001
From: Ethan Wong <ewong@anl.gov>
Date: Wed, 13 Aug 2025 13:29:48 -0500
Subject: [PATCH 3/3] =?UTF-8?q?test:=20document=20`hasgpu=3D=3F`?=

---
 test/mpi/README | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/test/mpi/README b/test/mpi/README
index 9ff7f97a26c..394b70272cd 100644
--- a/test/mpi/README
+++ b/test/mpi/README
@@ -143,6 +143,11 @@ mpiexecarg=string  : Run the program with string as an argument to mpiexec
 env=name=value : Run the program with environment variable "name" given the
                  value "value"
 
+hasgpu=lib : Setting this flag with a GPU library will allow the test to run
+             only if `-hasgpu=lib` was passed to runtest. Using `hasgpu=any`
+             allows the test to run solong as any GPU library was passed in
+             as a flag.
+
 strict=bool : If bool is false, only build and run the program if 
               --enable-strictmpi was not used in configuring the test suite.
 	      That is, a line such as