Skip to content
Open
165 changes: 100 additions & 65 deletions easybuild/easyblocks/p/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import re
import tempfile
import easybuild.tools.environment as env
from collections import namedtuple
from distutils.version import LooseVersion
from easybuild.easyblocks.generic.pythonpackage import PythonPackage
from easybuild.framework.easyconfig import CUSTOM
Expand All @@ -42,6 +43,94 @@
from easybuild.tools.systemtools import POWER, get_cpu_architecture


def get_count_for_pattern(regex, text):
"""Match the regexp containing a single group and return the integer value of the matched group.
Return zero if no or more than 1 match was found and warn for the latter case
"""
match = re.findall(regex, text)
if len(match) == 1:
return int(match[0])
elif len(match) > 1:
# Shouldn't happen, but means something went wrong with the regular expressions.
# Throw warning, as the build might be fine, no need to error on this.
warn_msg = "Error in counting the number of test failures in the output of the PyTorch test suite.\n"
warn_msg += "Please check the EasyBuild log to verify the number of failures (if any) was acceptable."
print_warning(warn_msg)
return 0


def extract_failed_tests_info(tests_out):
"""
Extract information about failed tests from output produced by PyTorch tests
"""

# Create clear summary report
failure_report = ""
failure_cnt = 0
error_cnt = 0
failed_test_suites = []

# Look for patterns like:
# Ran 219 tests in 67.325s
#
# FAILED (errors=10, skipped=190, expected failures=6)
# test_fx failed!
regex = (r"^Ran (?P<test_cnt>[0-9]+) tests.*$\n\n"
r"FAILED \((?P<failure_summary>.*)\)$\n"
r"(?:^(?:(?!failed!).)*$\n)*"
r"(?P<failed_test_suite_name>.*) failed!(?: Received signal: \w+)?\s*$")

for match in re.finditer(regex, tests_out, re.M):
# E.g. 'failures=3, errors=10, skipped=190, expected failures=6'
failure_summary, test_cnt, test_suite = match.group('failure_summary', 'test_cnt', 'failed_test_suite_name')
failure_report += test_suite + ' (' + test_cnt + " total tests, " + failure_summary + ')\n'
failure_cnt += get_count_for_pattern(r"(?<!expected )failures=([0-9]+)", failure_summary)
error_cnt += get_count_for_pattern(r"errors=([0-9]+)", failure_summary)
failed_test_suites.append(test_suite)

# Look for patterns like:
# ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 3.43s =====================
regex = r"^=+ (?P<failure_summary>.*) in [0-9]+\.*[0-9]*[a-zA-Z]* =+$\n(?P<failed_test_suite_name>.*) failed!$"

for match in re.finditer(regex, tests_out, re.M):
# E.g. '2 failed, 128 passed, 2 skipped, 2 warnings'
failure_summary, test_suite = match.group('failure_summary', 'failed_test_suite_name')
failure_report += test_suite + ' ' + failure_summary + '\n'
failure_cnt += get_count_for_pattern(r"([0-9]+) failed", failure_summary)
error_cnt += get_count_for_pattern(r"([0-9]+) error", failure_summary)
failed_test_suites.append(test_suite)

# Look for patterns like:
# AssertionError: 4 unit test(s) failed:
# ...
# distributed/test_c10d_gloo failed!
regex = (r"^AssertionError: (?P<failed_test_cnt>[0-9]+) unit test\(s\) failed:$\n"
r"(?:^(?:(?!failed!).)*$\n)*"
r"(?P<failed_test_group_name>.*) failed!$")
for match in re.finditer(regex, tests_out, re.M):
test_group, failed_test_cnt = match.group('failed_test_group_name', 'failed_test_cnt')
failure_report += test_group + ' (' + failed_test_cnt + " failed tests)\n"
failure_cnt += int(failed_test_cnt)
failed_test_suites.append(test_group)

# Look for patterns like:
# Running test_jit_cuda_fuser ... [2023-01-12 04:04:08.949222]
# ...
# AttributeError: 'NoneType' object has no attribute 'split'
# test_jit_cuda_fuser failed!
regex = (r"^Running (?P<test_name>test_[^\s]+) .*\n"
r"(?:^(?:(?!(Ran [0-9]+|failed!)).)*$\n)*"
r"\1 failed!$")
for match in re.finditer(regex, tests_out, re.M):
test_name = match.group('test_name')
failure_report += test_name + " (unknown failed test count)\n"
failure_cnt += 1
failed_test_suites.append(test_name)

TestsInfo = namedtuple('TestsInfo', ('failure_report', 'failure_cnt', 'error_cnt', 'failed_test_suites'))
return TestsInfo(failure_report, failure_cnt, error_cnt, failed_test_suites)


class EB_PyTorch(PythonPackage):
"""Support for building/installing PyTorch."""

Expand Down Expand Up @@ -268,65 +357,11 @@ def test_step(self):

tests_out, tests_ec = super(EB_PyTorch, self).test_step(return_output_ec=True)

def get_count_for_pattern(regex, text):
"""Match the regexp containing a single group and return the integer value of the matched group.
Return zero if no or more than 1 match was found and warn for the latter case
"""
match = re.findall(regex, text)
if len(match) == 1:
return int(match[0])
elif len(match) > 1:
# Shouldn't happen, but means something went wrong with the regular expressions.
# Throw warning, as the build might be fine, no need to error on this.
warn_msg = "Error in counting the number of test failures in the output of the PyTorch test suite.\n"
warn_msg += "Please check the EasyBuild log to verify the number of failures (if any) was acceptable."
print_warning(warn_msg)
return 0

# Create clear summary report
failure_report = ""
failure_cnt = 0
error_cnt = 0
failed_test_suites = []

# Grep for patterns like:
# Ran 219 tests in 67.325s
#
# FAILED (errors=10, skipped=190, expected failures=6)
# test_fx failed!
regex = (r"^Ran (?P<test_cnt>[0-9]+) tests.*$\n\n"
r"FAILED \((?P<failure_summary>.*)\)$\n"
r"(?:^(?:(?!failed!).)*$\n)*"
r"(?P<failed_test_suite_name>.*) failed!(?: Received signal: \w+)?\s*$")

for m in re.finditer(regex, tests_out, re.M):
# E.g. 'failures=3, errors=10, skipped=190, expected failures=6'
failure_summary = m.group('failure_summary')
total, test_suite = m.group('test_cnt', 'failed_test_suite_name')
failure_report += "{test_suite} ({total} total tests, {failure_summary})\n".format(
test_suite=test_suite, total=total, failure_summary=failure_summary
)
failure_cnt += get_count_for_pattern(r"(?<!expected )failures=([0-9]+)", failure_summary)
error_cnt += get_count_for_pattern(r"errors=([0-9]+)", failure_summary)
failed_test_suites.append(test_suite)

# Grep for patterns like:
# ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 3.43s =====================
regex = r"^=+ (?P<failure_summary>.*) in [0-9]+\.*[0-9]*[a-zA-Z]* =+$\n(?P<failed_test_suite_name>.*) failed!$"

for m in re.finditer(regex, tests_out, re.M):
# E.g. '2 failed, 128 passed, 2 skipped, 2 warnings'
failure_summary = m.group('failure_summary')
test_suite = m.group('failed_test_suite_name')
failure_report += "{test_suite} ({failure_summary})\n".format(
test_suite=test_suite, failure_summary=failure_summary
)
failure_cnt += get_count_for_pattern(r"([0-9]+) failed", failure_summary)
error_cnt += get_count_for_pattern(r"([0-9]+) error", failure_summary)
failed_test_suites.append(test_suite)
failed_tests_info = extract_failed_tests_info(tests_out)
failure_report = failed_tests_info.failure_report
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: To avoid long distracting lines like sorted(set(failed_tests_info.failed_test_suites)) (2 times "failed") maybe either unwrap the named tuple here: failure_report, failed_test_suites = failed_tests_info.failure_report, failed_tests_info.failed_test_suites etc. or rename to test_info as test_info.failed_test_suites is easier to read.
And maybe the test_cnt extraction can be done in there too which would fit the name even better (then drop the "failed" from the function name).


# Make the names unique and sorted
failed_test_suites = sorted(set(failed_test_suites))
failed_test_suites = sorted(set(failed_tests_info.failed_test_suites))
# Gather all failed tests suites in case we missed any (e.g. when it exited due to syntax errors)
# Also unique and sorted to be able to compare the lists below
all_failed_test_suites = sorted(set(
Expand All @@ -341,17 +376,17 @@ def get_count_for_pattern(regex, text):
failure_report += '\n' + failure_report_save

# Calculate total number of unsuccesful and total tests
failed_test_cnt = failure_cnt + error_cnt
failed_test_cnt = failed_tests_info.failure_cnt + failed_tests_info.error_cnt
test_cnt = sum(int(hit) for hit in re.findall(r"^Ran (?P<test_cnt>[0-9]+) tests in", tests_out, re.M))
max_failed_tests = self.cfg['max_failed_tests']
self.log.info("%d unsuccessful tests (out of %d), - max. failed tests set to %d",
failed_test_cnt, test_cnt, max_failed_tests)

if failed_test_cnt > 0:
max_failed_tests = self.cfg['max_failed_tests']

failure_or_failures = 'failure' if failure_cnt == 1 else 'failures'
error_or_errors = 'error' if error_cnt == 1 else 'errors'
msg = "%d test %s, %d test %s (out of %d):\n" % (
failure_cnt, failure_or_failures, error_cnt, error_or_errors, test_cnt
)
failure_or_failures = 'failure' if failed_tests_info.failure_cnt == 1 else 'failures'
msg = "%d test %s, " % (failed_tests_info.failure_cnt, failure_or_failures)
error_or_errors = 'error' if failed_tests_info.error_cnt == 1 else 'errors'
msg += "%d test %s (out of %d):\n" % (failed_tests_info.error_cnt, error_or_errors, test_cnt)
msg += failure_report

# If no tests are supposed to fail or some failed for which we were not able to count errors fail now
Expand Down
125 changes: 125 additions & 0 deletions test/easyblocks/easyblock_specific.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from easybuild.base.testing import TestCase
from easybuild.easyblocks.generic.cmakemake import det_cmake_version
from easybuild.easyblocks.generic.toolchain import Toolchain
from easybuild.easyblocks import pytorch
from easybuild.framework.easyblock import EasyBlock, get_easyblock_instance
from easybuild.framework.easyconfig.easyconfig import process_easyconfig
from easybuild.tools import config
Expand All @@ -51,6 +52,97 @@
from easybuild.tools.options import set_tmpdir
from easybuild.tools.py2vs3 import StringIO

PYTORCH_TESTS_OUTPUT = """
...
AssertionError: Expected zero exit code but got -6 for pid: 2006681

----------------------------------------------------------------------
Ran 2 tests in 6.576s

FAILED (failures=2)
distributed/fsdp/test_fsdp_input failed!
Running distributed/fsdp/test_fsdp_multiple_forward ... [2023-01-12 05:46:45.746098]

RuntimeError: Process 0 terminated or timed out after 610.0615825653076 seconds

----------------------------------------------------------------------
Ran 1 test in 610.744s

FAILED (errors=1)
Test exited with non-zero exitcode 1. Command to reproduce: /software/Python/3.9.6-GCCcore-11.2.0/bin/python distributed/test_c10d_gloo.py -v DistributedDataParallelTest.test_ddp_comm_hook_register_just_once

RuntimeError: Process 0 terminated or timed out after 610.0726096630096 seconds

----------------------------------------------------------------------
Ran 1 test in 610.729s

FAILED (errors=1)
Test exited with non-zero exitcode 1. Command to reproduce: /software/Python/3.9.6-GCCcore-11.2.0/bin/python distributed/test_c10d_gloo.py -v DistributedDataParallelTest.test_ddp_invalid_comm_hook_init
test_ddp_invalid_comm_hook_return_type (__main__.DistributedDataParallelTest)

AssertionError: 4 unit test(s) failed:
DistributedDataParallelTest.test_ddp_comm_hook_register_just_once
DistributedDataParallelTest.test_ddp_invalid_comm_hook_init
ProcessGroupGlooTest.test_round_robin
ProcessGroupGlooTest.test_round_robin_create_destroy
distributed/test_c10d_gloo failed!
Running distributed/test_c10d_nccl ... [2023-01-12 07:43:41.085197]

ValueError: For each axis slice, the sum of the observed frequencies must agree with the sum of the expected frequencies to a relative tolerance of 1e-08, but the percent differences are:
4.535600093557479e-05

----------------------------------------------------------------------
Ran 216 tests in 22.396s

FAILED (errors=4)
distributions/test_distributions failed!

Running test_autograd ... [2023-01-13 04:19:25.587981]
Executing ['/software/Python/3.9.6-GCCcore-11.2.0/bin/python', 'test_autograd.py', '-v'] ... [2023-01-13 04:19:25.588074]
...
test_autograd_views_codegen (__main__.TestAutograd) ... ok
...
======================================================================
FAIL: test_thread_shutdown (__main__.TestAutograd)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/tmp/vsc40023/easybuild_build/PyTorch/1.11.0/foss-2021b/pytorch-v1.11.0/test/test_autograd.py", line 4220, in test_thread_shutdown
self.assertRegex(s, "PYTORCH_API_USAGE torch.autograd.thread_shutdown")
AssertionError: Regex didn't match: 'PYTORCH_API_USAGE torch.autograd.thread_shutdown' not found in 'PYTORCH_API_USAGE torch.python.import\nPYTORCH_API_USAGE c10d.python.import\nPYTORCH_API_USAGE tensor.create\n'
----------------------------------------------------------------------
Ran 464 tests in 18.443s

FAILED (failures=1, skipped=52, expected failures=1)
test_autograd failed!
Running test_binary_ufuncs ... [2023-01-12 09:02:45.049490]
...

Running test_jit_cuda_fuser ... [2023-01-12 04:04:08.949222]
Executing ['/software/Python/3.9.6-GCCcore-11.2.0/bin/python', 'test_jit_cuda_fuser.py', '-v'] ... [2023-01-12 04:04:08.949319]
CUDA not available, skipping tests
monkeytype is not installed. Skipping tests for Profile-Directed Typing
Traceback (most recent call last):
File "/tmp/easybuild_build/PyTorch/1.11.0/foss-2021b/pytorch-v1.11.0/test/test_jit_cuda_fuser.py", line 25, in <module>
CUDA_MAJOR, CUDA_MINOR = (int(x) for x in torch.version.cuda.split('.'))
AttributeError: 'NoneType' object has no attribute 'split'
test_jit_cuda_fuser failed!
...
Running distributions/test_constraints ... [2023-01-12 09:05:15.013470]
SKIPPED [2] distributions/test_constraints.py:83: `biject_to` not implemented.
FAILED distributions/test_constraints.py::test_constraint[True-constraint_fn5-False-value5]
FAILED distributions/test_constraints.py::test_constraint[True-constraint_fn7-True-value7]
============= 2 failed, 128 passed, 2 skipped, 2 warnings in 8.66s =============
distributions/test_constraints failed!

Running distributions/rpc/test_tensorpipe_agent ... [2023-01-12 09:06:37.093571]
...
Ran 123 tests in 7.549s

FAILED (errors=2, skipped=2)
...
test_fx failed! Received signal: SIGSEGV
""" # noqa


class EasyBlockSpecificTest(TestCase):
""" Baseclass for easyblock testcases """
Expand Down Expand Up @@ -265,6 +357,39 @@ def test_det_cmake_version(self):
"""))
self.assertEqual(det_cmake_version(), '1.2.3-rc4')

def test_pytorch_extract_failed_tests_info(self):
"""
Test extract_failed_tests_info function from PyTorch easyblock.
"""
res = pytorch.extract_failed_tests_info(PYTORCH_TESTS_OUTPUT)
self.assertEqual(len(res), 4)

expected_failure_report = '\n'.join([
"distributed/fsdp/test_fsdp_input (2 total tests, failures=2)",
"distributions/test_distributions (216 total tests, errors=4)",
"test_autograd (464 total tests, failures=1, skipped=52, expected failures=1)",
"test_fx (123 total tests, errors=2, skipped=2)",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This or the SIGSEGV test needs to be another one or failed_test_suites could pass when the signal was not detected as in the case of a forced termination due to a signla there won't be a test summary, will there?

Copy link
Contributor

@Flamefire Flamefire Jan 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we have a bug here. For the output of

Running distributions/rpc/test_tensorpipe_agent ... [2023-01-12 09:06:37.093571]
...
Ran 123 tests in 7.549s
FAILED (errors=2, skipped=2)
...
test_fx failed! Received signal: SIGSEGV

it must NOT match the RegExp. The original, correct output is:

FAILED (errors=2, skipped=2)
distributed/rpc/test_tensorpipe_agent failed!

So the RegExp must be made to only add the counting of "FAILED" if the next line is the failed test, see https://gist.github.com/Flamefire/dc1403ccefdebfc3412c6fbb2d5cbabd#file-pytorch-1-9-0-foss-2020b_partial-log-L480

I would actually add this wrong output as a test against regressions. With that snipped it should only find test_fx as failed (with an unknown number of tests)

"distributions/test_constraints 2 failed, 128 passed, 2 skipped, 2 warnings",
"distributed/test_c10d_gloo (4 failed tests)",
"test_jit_cuda_fuser (unknown failed test count)",
])
self.assertEqual(res.failure_report.strip(), expected_failure_report)
# test failures
self.assertEqual(res.failure_cnt, 10)
# test errors
self.assertEqual(res.error_cnt, 6)

expected_failed_test_suites = [
'distributed/fsdp/test_fsdp_input',
'distributions/test_distributions',
'test_autograd',
'test_fx',
'distributions/test_constraints',
'distributed/test_c10d_gloo',
'test_jit_cuda_fuser',
]
self.assertEqual(res.failed_test_suites, expected_failed_test_suites)


def suite():
"""Return all easyblock-specific tests."""
Expand Down