easybuilders · akesandgren · Feb 21, 2025 · Mar 13, 2024 · May 14, 2024 · May 14, 2024
diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
@@ -44,18 +44,41 @@
 
 
 if sys.version_info >= (3, 9):
-    from typing import NamedTuple
-    FailedTestNames = NamedTuple('FailedTestNames', [('error', list[str]), ('fail', list[str])])
-    TestSuiteResult = NamedTuple('TestSuiteResult', [('name', str), ('summary', str)])
-    TestResult = NamedTuple('TestResult', [('test_cnt', int),
-                                           ('error_cnt', int),
-                                           ('failure_cnt', int),
-                                           ('failed_suites', list[TestSuiteResult])])
+    from dataclasses import dataclass
+
+    @dataclass
+    class FailedTestNames:
+        """Hold list of tests names that failed with error or failure"""
+        error: list[str]
+        fail: list[str]
+
+    @dataclass
+    class TestSuiteResult:
+        """Hold the name of a test suite and a summary of the failures"""
+        name: str
+        summary: str
+
+    @dataclass
+    class TestResult:
+        """Status report and results of a test run"""
+        test_cnt: int
+        error_cnt: int
+        failure_cnt: int
+        failed_suites: list[TestSuiteResult]
+        terminated_suites: dict[str, str]  # Name and signal of terminated suites
+        all_failed_suites: set[str]  # Names of all failed suites
 else:
     from collections import namedtuple
     FailedTestNames = namedtuple('FailedTestNames', ('error', 'fail'))
     TestSuiteResult = namedtuple('TestSuiteResult', ('name', 'summary'))
-    TestResult = namedtuple('TestResult', ('test_cnt', 'error_cnt', 'failure_cnt', 'failed_suites'))
+    TerminatedTestSuite = namedtuple('TerminatedTestSuite', ('name', 'signal'))
+    TestResult = namedtuple('TestResult', ('test_cnt',
+                                           'error_cnt',
+                                           'failure_cnt',
+                                           'failed_suites',
+                                           'terminated_suites',
+                                           'all_failed_suites'
+                                           ))
 
 
 def find_failed_test_names(tests_out):
@@ -198,7 +221,17 @@ def get_count_for_pattern(regex, text):
     for m in re.finditer(regex, tests_out, re.M):
         test_cnt += sum(get_count_for_pattern(p, m.group("summary")) for p in count_patterns)
 
-    return TestResult(test_cnt=test_cnt, error_cnt=error_cnt, failure_cnt=failure_cnt, failed_suites=failed_suites)
+    # Gather all failed tests suites in case we missed any,
+    # e.g. when it exited due to syntax errors or with a signal such as SIGSEGV
+    failed_suites_and_signal = set(
+        re.findall(r"^(?P<test_name>.*) failed!(?: Received signal: (\w+))?\s*$", tests_out, re.M)
+    )
+
+    return TestResult(test_cnt=test_cnt, error_cnt=error_cnt, failure_cnt=failure_cnt,
+                      failed_suites=failed_suites,
+                      # Assumes that the suite name is unique
+                      terminated_suites={name: signal for name, signal in failed_suites_and_signal if signal},
+                      all_failed_suites={i[0] for i in failed_suites_and_signal})
 
 
 class EB_PyTorch(PythonPackage):
@@ -462,17 +495,17 @@ def test_step(self):
             'excluded_tests': ' '.join(excluded_tests)
         })
 
-        test_result = super(EB_PyTorch, self).test_step(return_output_ec=True)
-        if test_result is None:
+        parsed_test_result = super(EB_PyTorch, self).test_step(return_output_ec=True)
+        if parsed_test_result is None:
             if self.cfg['runtest'] is False:
                 msg = "Do not set 'runtest' to False, use --skip-test-step instead."
             else:
                 msg = "Tests did not run. Make sure 'runtest' is set to a command."
             raise EasyBuildError(msg)
 
-        tests_out, tests_ec = test_result
+        tests_out, tests_ec = parsed_test_result
 
-        # Show failed subtests to aid in debugging failures
+        # Show failed subtests, if any, to aid in debugging failures
         failed_test_names = find_failed_test_names(tests_out)
         if failed_test_names.error or failed_test_names.fail:
             msg = []
@@ -485,60 +518,92 @@ def test_step(self):
             self.log.warning("\n".join(msg))
 
         # Create clear summary report
-        test_result = parse_test_log(tests_out)
-        failure_report = ['%s (%s)' % (suite.name, suite.summary) for suite in test_result.failed_suites]
-        failed_test_suites = set(suite.name for suite in test_result.failed_suites)
-        # Gather all failed tests suites in case we missed any (e.g. when it exited due to syntax errors)
-        # Also unique to be able to compare the lists below
-        all_failed_test_suites = set(
-            re.findall(r"^(?P<test_name>.*) failed!(?: Received signal: \w+)?\s*$", tests_out, re.M)
-        )
-        # If we missed any test suites prepend a list of all failed test suites
+        parsed_test_result = parse_test_log(tests_out)
+        # Use a list of messages we can later join together
+        failure_msgs = ['\t%s (%s)' % (suite.name, suite.summary) for suite in parsed_test_result.failed_suites]
+        # These were accounted for
+        failed_test_suites = set(suite.name for suite in parsed_test_result.failed_suites)
+        # Those are all that failed according to the summary output
+        all_failed_test_suites = parsed_test_result.all_failed_suites
+        # We should have determined all failed test suites and only those.
+        # Otherwise show the mismatch and terminate later
         if failed_test_suites != all_failed_test_suites:
-            failure_report = ['Failed tests (suites/files):'] + failure_report
+            failure_msgs.insert(0, 'Failed tests (suites/files):')
             # Test suites where we didn't match a specific regexp and hence likely didn't count the failures
-            failure_report.extend('+ %s' % t for t in sorted(all_failed_test_suites - failed_test_suites))
+            uncounted_test_suites = all_failed_test_suites - failed_test_suites
+            if uncounted_test_suites:
+                failure_msgs.append('Could not count failed tests for the following test suites/files:')
+                for suite_name in sorted(uncounted_test_suites):
+                    try:
+                        signal = parsed_test_result.terminated_suites[suite_name]
+                        reason = f'Terminated with {signal}'
+                    except KeyError:
+                        # Not ended with signal, might have failed due to e.g. syntax errors
+                        reason = 'Undetected or did not run properly'
+                    failure_msgs.append(f'\t{suite_name} ({reason})')
             # Test suites not included in the catch-all regexp but counted. Should be empty.
-            failure_report.extend('? %s' % t for t in sorted(failed_test_suites - all_failed_test_suites))
-
-        failure_report = '\n'.join(failure_report)
+            unexpected_test_suites = failed_test_suites - all_failed_test_suites
+            if unexpected_test_suites:
+                failure_msgs.append('Counted failures of tests from the following test suites/files that are not '
+                                    'contained in the summary output of PyTorch:')
+                failure_msgs.extend(sorted(unexpected_test_suites))
 
         # Calculate total number of unsuccesful and total tests
-        failed_test_cnt = test_result.failure_cnt + test_result.error_cnt
+        failed_test_cnt = parsed_test_result.failure_cnt + parsed_test_result.error_cnt
+        # Only add count message if we detected any failed tests
+        if failed_test_cnt > 0:
+            failure_or_failures = 'failure' if parsed_test_result.failure_cnt == 1 else 'failures'
+            error_or_errors = 'error' if parsed_test_result.error_cnt == 1 else 'errors'
+            failure_msgs.insert(0, "%d test %s, %d test %s (out of %d):" % (
+                parsed_test_result.failure_cnt, failure_or_failures,
+                parsed_test_result.error_cnt, error_or_errors,
+                parsed_test_result.test_cnt
+            ))
+
+        # Assemble final report
+        failure_report = '\n'.join(failure_msgs)
+
+        if failed_test_suites != all_failed_test_suites:
+            # Fail because we can't be sure how many tests failed
+            # so comparing to max_failed_tests cannot reasonably be done
+            if failed_test_suites | set(parsed_test_result.terminated_suites) == all_failed_test_suites:
+                # All failed test suites are either counted or terminated with a signal
+                msg = ('Failing because these test suites were terminated which makes it impossible'
+                       'to accurately count the failed tests: ')
+                msg += ", ".join("%s(%s)" % name_signal
+                                 for name_signal in sorted(parsed_test_result.terminated_suites.items()))
+            elif len(failed_test_suites) < len(all_failed_test_suites):
+                msg = ('Failing because not all failed tests could be determined. '
+                       'Tests failed to start or the test accounting in the PyTorch EasyBlock needs updating!\n'
+                       'Missing: ' + ', '.join(sorted(all_failed_test_suites - failed_test_suites)))
+            else:
+                msg = ('Failing because there were unexpected failures detected: ' +
+                       ', '.join(sorted(failed_test_suites - all_failed_test_suites)))
+            raise EasyBuildError(msg + '\n' +
+                                 'You can check the test failures (in the log) manually and if they are harmless, '
+                                 'use --ignore-test-failures to make the test step pass.\n' + failure_report)
 
         if failed_test_cnt > 0:
             max_failed_tests = self.cfg['max_failed_tests']
 
-            failure_or_failures = 'failure' if test_result.failure_cnt == 1 else 'failures'
-            error_or_errors = 'error' if test_result.error_cnt == 1 else 'errors'
-            msg = "%d test %s, %d test %s (out of %d):\n" % (
-                test_result.failure_cnt, failure_or_failures,
-                test_result.error_cnt, error_or_errors,
-                test_result.test_cnt
-            )
-            msg += failure_report
-
-            # If no tests are supposed to fail or some failed for which we were not able to count errors fail now
-            if max_failed_tests == 0 or failed_test_suites != all_failed_test_suites:
-                raise EasyBuildError(msg)
-            else:
-                msg += '\n\n' + ' '.join([
-                    "The PyTorch test suite is known to include some flaky tests,",
-                    "which may fail depending on the specifics of the system or the context in which they are run.",
-                    "For this PyTorch installation, EasyBuild allows up to %d tests to fail." % max_failed_tests,
-                    "We recommend to double check that the failing tests listed above ",
-                    "are known to be flaky, or do not affect your intended usage of PyTorch.",
-                    "In case of doubt, reach out to the EasyBuild community (via GitHub, Slack, or mailing list).",
-                ])
-                # Print to console, the user should really be aware that we are accepting failing tests here...
-                print_warning(msg)
-
-                # Also log this warning in the file log
-                self.log.warning(msg)
-
-                if failed_test_cnt > max_failed_tests:
-                    raise EasyBuildError("Too many failed tests (%d), maximum allowed is %d",
-                                         failed_test_cnt, max_failed_tests)
+            # If no tests are supposed to fail don't print the explanation, just fail
+            if max_failed_tests == 0:
+                raise EasyBuildError(failure_report)
+            msg = failure_report + '\n\n' + ' '.join([
+                "The PyTorch test suite is known to include some flaky tests,",
+                "which may fail depending on the specifics of the system or the context in which they are run.",
+                "For this PyTorch installation, EasyBuild allows up to %d tests to fail." % max_failed_tests,
+                "We recommend to double check that the failing tests listed above ",
+                "are known to be flaky, or do not affect your intended usage of PyTorch.",
+                "In case of doubt, reach out to the EasyBuild community (via GitHub, Slack, or mailing list).",
+            ])
+            # Print to console in addition to file,
+            # the user should really be aware that we are accepting failing tests here...
+            print_warning(msg, log=self.log)
+
+            if failed_test_cnt > max_failed_tests:
+                raise EasyBuildError("Too many failed tests (%d), maximum allowed is %d",
+                                     failed_test_cnt, max_failed_tests)
         elif failure_report:
             raise EasyBuildError("Test ended with failures! Exit code: %s\n%s", tests_ec, failure_report)
         elif tests_ec:
@@ -576,11 +641,11 @@ def make_module_req_guess(self):
         return guesses
 
 
-if __name__ == '__main__':
-    arg = sys.argv[1]
-    if not os.path.isfile(arg):
-        raise RuntimeError('Expected a test result file to parse, got: ' + arg)
-    with open(arg, 'r') as f:
+def parse_logfile(file):
+    """Parse the EB logfile and print the failed tests"""
+    if not os.path.isfile(file):
+        raise RuntimeError('Expected a test result file to parse, got: ' + file)
+    with open(file, 'r') as f:
         content = f.read()
     m = re.search(r'cmd .*python[^ ]* run_test\.py .* exited with exit code.*output', content)
     if m:
@@ -592,3 +657,7 @@ def make_module_req_guess(self):
 
     print("Failed test names: ", find_failed_test_names(content))
     print("Test result: ", parse_test_log(content))
+
+
+if __name__ == '__main__':
+    parse_logfile(sys.argv[1])