Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 132 additions & 63 deletions easybuild/easyblocks/p/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,41 @@


if sys.version_info >= (3, 9):
from typing import NamedTuple
FailedTestNames = NamedTuple('FailedTestNames', [('error', list[str]), ('fail', list[str])])
TestSuiteResult = NamedTuple('TestSuiteResult', [('name', str), ('summary', str)])
TestResult = NamedTuple('TestResult', [('test_cnt', int),
('error_cnt', int),
('failure_cnt', int),
('failed_suites', list[TestSuiteResult])])
from dataclasses import dataclass

@dataclass
class FailedTestNames:
"""Hold list of tests names that failed with error or failure"""
error: list[str]
fail: list[str]

@dataclass
class TestSuiteResult:
"""Hold the name of a test suite and a summary of the failures"""
name: str
summary: str

@dataclass
class TestResult:
"""Status report and results of a test run"""
test_cnt: int
error_cnt: int
failure_cnt: int
failed_suites: list[TestSuiteResult]
terminated_suites: dict[str, str] # Name and signal of terminated suites
all_failed_suites: set[str] # Names of all failed suites
else:
from collections import namedtuple
FailedTestNames = namedtuple('FailedTestNames', ('error', 'fail'))
TestSuiteResult = namedtuple('TestSuiteResult', ('name', 'summary'))
TestResult = namedtuple('TestResult', ('test_cnt', 'error_cnt', 'failure_cnt', 'failed_suites'))
TerminatedTestSuite = namedtuple('TerminatedTestSuite', ('name', 'signal'))
TestResult = namedtuple('TestResult', ('test_cnt',
'error_cnt',
'failure_cnt',
'failed_suites',
'terminated_suites',
'all_failed_suites'
))


def find_failed_test_names(tests_out):
Expand Down Expand Up @@ -198,7 +221,17 @@ def get_count_for_pattern(regex, text):
for m in re.finditer(regex, tests_out, re.M):
test_cnt += sum(get_count_for_pattern(p, m.group("summary")) for p in count_patterns)

return TestResult(test_cnt=test_cnt, error_cnt=error_cnt, failure_cnt=failure_cnt, failed_suites=failed_suites)
# Gather all failed tests suites in case we missed any,
# e.g. when it exited due to syntax errors or with a signal such as SIGSEGV
failed_suites_and_signal = set(
re.findall(r"^(?P<test_name>.*) failed!(?: Received signal: (\w+))?\s*$", tests_out, re.M)
)

return TestResult(test_cnt=test_cnt, error_cnt=error_cnt, failure_cnt=failure_cnt,
failed_suites=failed_suites,
# Assumes that the suite name is unique
terminated_suites={name: signal for name, signal in failed_suites_and_signal if signal},
all_failed_suites={i[0] for i in failed_suites_and_signal})


class EB_PyTorch(PythonPackage):
Expand Down Expand Up @@ -462,17 +495,17 @@ def test_step(self):
'excluded_tests': ' '.join(excluded_tests)
})

test_result = super(EB_PyTorch, self).test_step(return_output_ec=True)
if test_result is None:
parsed_test_result = super(EB_PyTorch, self).test_step(return_output_ec=True)
if parsed_test_result is None:
if self.cfg['runtest'] is False:
msg = "Do not set 'runtest' to False, use --skip-test-step instead."
else:
msg = "Tests did not run. Make sure 'runtest' is set to a command."
raise EasyBuildError(msg)

tests_out, tests_ec = test_result
tests_out, tests_ec = parsed_test_result

# Show failed subtests to aid in debugging failures
# Show failed subtests, if any, to aid in debugging failures
failed_test_names = find_failed_test_names(tests_out)
if failed_test_names.error or failed_test_names.fail:
msg = []
Expand All @@ -485,60 +518,92 @@ def test_step(self):
self.log.warning("\n".join(msg))

# Create clear summary report
test_result = parse_test_log(tests_out)
failure_report = ['%s (%s)' % (suite.name, suite.summary) for suite in test_result.failed_suites]
failed_test_suites = set(suite.name for suite in test_result.failed_suites)
# Gather all failed tests suites in case we missed any (e.g. when it exited due to syntax errors)
# Also unique to be able to compare the lists below
all_failed_test_suites = set(
re.findall(r"^(?P<test_name>.*) failed!(?: Received signal: \w+)?\s*$", tests_out, re.M)
)
# If we missed any test suites prepend a list of all failed test suites
parsed_test_result = parse_test_log(tests_out)
# Use a list of messages we can later join together
failure_msgs = ['\t%s (%s)' % (suite.name, suite.summary) for suite in parsed_test_result.failed_suites]
# These were accounted for
failed_test_suites = set(suite.name for suite in parsed_test_result.failed_suites)
# Those are all that failed according to the summary output
all_failed_test_suites = parsed_test_result.all_failed_suites
# We should have determined all failed test suites and only those.
# Otherwise show the mismatch and terminate later
if failed_test_suites != all_failed_test_suites:
failure_report = ['Failed tests (suites/files):'] + failure_report
failure_msgs.insert(0, 'Failed tests (suites/files):')
# Test suites where we didn't match a specific regexp and hence likely didn't count the failures
failure_report.extend('+ %s' % t for t in sorted(all_failed_test_suites - failed_test_suites))
uncounted_test_suites = all_failed_test_suites - failed_test_suites
if uncounted_test_suites:
failure_msgs.append('Could not count failed tests for the following test suites/files:')
for suite_name in sorted(uncounted_test_suites):
try:
signal = parsed_test_result.terminated_suites[suite_name]
reason = f'Terminated with {signal}'
except KeyError:
# Not ended with signal, might have failed due to e.g. syntax errors
reason = 'Undetected or did not run properly'
failure_msgs.append(f'\t{suite_name} ({reason})')
# Test suites not included in the catch-all regexp but counted. Should be empty.
failure_report.extend('? %s' % t for t in sorted(failed_test_suites - all_failed_test_suites))

failure_report = '\n'.join(failure_report)
unexpected_test_suites = failed_test_suites - all_failed_test_suites
if unexpected_test_suites:
failure_msgs.append('Counted failures of tests from the following test suites/files that are not '
'contained in the summary output of PyTorch:')
failure_msgs.extend(sorted(unexpected_test_suites))

# Calculate total number of unsuccesful and total tests
failed_test_cnt = test_result.failure_cnt + test_result.error_cnt
failed_test_cnt = parsed_test_result.failure_cnt + parsed_test_result.error_cnt
# Only add count message if we detected any failed tests
if failed_test_cnt > 0:
failure_or_failures = 'failure' if parsed_test_result.failure_cnt == 1 else 'failures'
error_or_errors = 'error' if parsed_test_result.error_cnt == 1 else 'errors'
failure_msgs.insert(0, "%d test %s, %d test %s (out of %d):" % (
parsed_test_result.failure_cnt, failure_or_failures,
parsed_test_result.error_cnt, error_or_errors,
parsed_test_result.test_cnt
))

# Assemble final report
failure_report = '\n'.join(failure_msgs)

if failed_test_suites != all_failed_test_suites:
# Fail because we can't be sure how many tests failed
# so comparing to max_failed_tests cannot reasonably be done
if failed_test_suites | set(parsed_test_result.terminated_suites) == all_failed_test_suites:
# All failed test suites are either counted or terminated with a signal
msg = ('Failing because these test suites were terminated which makes it impossible'
'to accurately count the failed tests: ')
msg += ", ".join("%s(%s)" % name_signal
for name_signal in sorted(parsed_test_result.terminated_suites.items()))
elif len(failed_test_suites) < len(all_failed_test_suites):
msg = ('Failing because not all failed tests could be determined. '
'Tests failed to start or the test accounting in the PyTorch EasyBlock needs updating!\n'
'Missing: ' + ', '.join(sorted(all_failed_test_suites - failed_test_suites)))
else:
msg = ('Failing because there were unexpected failures detected: ' +
', '.join(sorted(failed_test_suites - all_failed_test_suites)))
raise EasyBuildError(msg + '\n' +
'You can check the test failures (in the log) manually and if they are harmless, '
'use --ignore-test-failures to make the test step pass.\n' + failure_report)

if failed_test_cnt > 0:
max_failed_tests = self.cfg['max_failed_tests']

failure_or_failures = 'failure' if test_result.failure_cnt == 1 else 'failures'
error_or_errors = 'error' if test_result.error_cnt == 1 else 'errors'
msg = "%d test %s, %d test %s (out of %d):\n" % (
test_result.failure_cnt, failure_or_failures,
test_result.error_cnt, error_or_errors,
test_result.test_cnt
)
msg += failure_report

# If no tests are supposed to fail or some failed for which we were not able to count errors fail now
if max_failed_tests == 0 or failed_test_suites != all_failed_test_suites:
raise EasyBuildError(msg)
else:
msg += '\n\n' + ' '.join([
"The PyTorch test suite is known to include some flaky tests,",
"which may fail depending on the specifics of the system or the context in which they are run.",
"For this PyTorch installation, EasyBuild allows up to %d tests to fail." % max_failed_tests,
"We recommend to double check that the failing tests listed above ",
"are known to be flaky, or do not affect your intended usage of PyTorch.",
"In case of doubt, reach out to the EasyBuild community (via GitHub, Slack, or mailing list).",
])
# Print to console, the user should really be aware that we are accepting failing tests here...
print_warning(msg)

# Also log this warning in the file log
self.log.warning(msg)

if failed_test_cnt > max_failed_tests:
raise EasyBuildError("Too many failed tests (%d), maximum allowed is %d",
failed_test_cnt, max_failed_tests)
# If no tests are supposed to fail don't print the explanation, just fail
if max_failed_tests == 0:
raise EasyBuildError(failure_report)
msg = failure_report + '\n\n' + ' '.join([
"The PyTorch test suite is known to include some flaky tests,",
"which may fail depending on the specifics of the system or the context in which they are run.",
"For this PyTorch installation, EasyBuild allows up to %d tests to fail." % max_failed_tests,
"We recommend to double check that the failing tests listed above ",
"are known to be flaky, or do not affect your intended usage of PyTorch.",
"In case of doubt, reach out to the EasyBuild community (via GitHub, Slack, or mailing list).",
])
# Print to console in addition to file,
# the user should really be aware that we are accepting failing tests here...
print_warning(msg, log=self.log)

if failed_test_cnt > max_failed_tests:
raise EasyBuildError("Too many failed tests (%d), maximum allowed is %d",
failed_test_cnt, max_failed_tests)
elif failure_report:
raise EasyBuildError("Test ended with failures! Exit code: %s\n%s", tests_ec, failure_report)
elif tests_ec:
Expand Down Expand Up @@ -576,11 +641,11 @@ def make_module_req_guess(self):
return guesses


if __name__ == '__main__':
arg = sys.argv[1]
if not os.path.isfile(arg):
raise RuntimeError('Expected a test result file to parse, got: ' + arg)
with open(arg, 'r') as f:
def parse_logfile(file):
"""Parse the EB logfile and print the failed tests"""
if not os.path.isfile(file):
raise RuntimeError('Expected a test result file to parse, got: ' + file)
with open(file, 'r') as f:
content = f.read()
m = re.search(r'cmd .*python[^ ]* run_test\.py .* exited with exit code.*output', content)
if m:
Expand All @@ -592,3 +657,7 @@ def make_module_req_guess(self):

print("Failed test names: ", find_failed_test_names(content))
print("Test result: ", parse_test_log(content))


if __name__ == '__main__':
parse_logfile(sys.argv[1])
Loading