Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 152 additions & 0 deletions easybuild/easyconfigs/p/PyTorch/PyTorch-2.0.1-foss-2022b.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
name = 'PyTorch'
version = '2.0.1'

homepage = 'https://pytorch.org/'
description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
PyTorch is a deep learning framework that puts Python first."""

toolchain = {'name': 'foss', 'version': '2022b'}

source_urls = [GITHUB_RELEASE]
sources = ['%(namelower)s-v%(version)s.tar.gz']
patches = [
'PyTorch-1.7.0_disable-dev-shm-test.patch',
'PyTorch-1.11.1_skip-test_init_from_local_shards.patch',
'PyTorch-1.12.1_add-hypothesis-suppression.patch',
'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch',
'PyTorch-1.12.1_fix-TestTorch.test_to.patch',
'PyTorch-1.12.1_skip-test_round_robin.patch',
'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch',
'PyTorch-1.13.1_fix-protobuf-dependency.patch',
'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch',
'PyTorch-1.13.1_skip-failing-singular-grad-test.patch',
'PyTorch-1.13.1_skip-tests-without-fbgemm.patch',
'PyTorch-2.0.1_add-missing-vsx-vector-shift-functions.patch',
'PyTorch-2.0.1_avoid-test_quantization-failures.patch',
'PyTorch-2.0.1_disable-gcc12-warning.patch',
'PyTorch-2.0.1_disable-test-sharding.patch',
'PyTorch-2.0.1_fix-numpy-compat.patch',
'PyTorch-2.0.1_fix-shift-ops.patch',
'PyTorch-2.0.1_fix-skip-decorators.patch',
'PyTorch-2.0.1_fix-test_memory_profiler.patch',
'PyTorch-2.0.1_fix-test-ops-conf.patch',
'PyTorch-2.0.1_fix-torch.compile-on-ppc.patch',
'PyTorch-2.0.1_fix-ub-in-inductor-codegen.patch',
'PyTorch-2.0.1_fix-vsx-loadu.patch',
'PyTorch-2.0.1_ignore_unexpected_success_in_test_torchinductor_opinfo.patch',
'PyTorch-2.0.1_no-cuda-stubs-rpath.patch',
'PyTorch-2.0.1_remove-test-requiring-online-access.patch',
'PyTorch-2.0.1_skip-diff-test-on-ppc.patch',
'PyTorch-2.0.1_skip-failing-gradtest.patch',
'PyTorch-2.0.1_skip-test_baddbmm_cpu_bfloat16.patch',
'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch',
'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch',
]
checksums = [
{'pytorch-v2.0.1.tar.gz': '9c564ca440265c69400ef5fdd48bf15e28af5aa4bed84c95efaad960a6699998'},
{'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'},
{'PyTorch-1.11.1_skip-test_init_from_local_shards.patch':
'4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'},
{'PyTorch-1.12.1_add-hypothesis-suppression.patch':
'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'},
{'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch':
'1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'},
{'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'},
{'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'},
{'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch':
'5c7be91a6096083a0b1315efe0001537499c600f1f569953c6a2c7f4cc1d0910'},
{'PyTorch-1.13.1_fix-protobuf-dependency.patch':
'8bd755a0cab7233a243bc65ca57c9630dfccdc9bf8c9792f0de4e07a644fcb00'},
{'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch':
'bdde0f2105215c95a54de64ec4b1a4520528510663174fef6d5b900eb1db3937'},
{'PyTorch-1.13.1_skip-failing-singular-grad-test.patch':
'72688a57b2bb617665ad1a1d5e362c5111ae912c10936bb38a089c0204729f48'},
{'PyTorch-1.13.1_skip-tests-without-fbgemm.patch':
'481e595f673baf8ae58b41697a6792b83048b0264aa79b422f48cd8c22948bb7'},
{'PyTorch-2.0.1_add-missing-vsx-vector-shift-functions.patch':
'da44961d6c204403ba0c4b88cedccf06a7a3d24f29c4398545f96efae7a45c95'},
{'PyTorch-2.0.1_avoid-test_quantization-failures.patch':
'02e3f47e4ed1d7d6077e26f1ae50073dc2b20426269930b505f4aefe5d2f33cd'},
{'PyTorch-2.0.1_disable-gcc12-warning.patch': 'f558dfc8f7cdcdc74c4c58ef7e8fe6d67870aec6386ac0d923f1b745d108eec7'},
{'PyTorch-2.0.1_disable-test-sharding.patch': 'a1ed7f21c9a269ea039a07a3d6574f885787b30ca5687143c96e096d31066cca'},
{'PyTorch-2.0.1_fix-numpy-compat.patch': 'f3e5798193e0909a415d824f13772973200965db84476c1737824f2735f2db94'},
{'PyTorch-2.0.1_fix-shift-ops.patch': '5ee655d5dba56d801d5618543b6ca299fa874939a3471f7b5449bfcb7f3f18c7'},
{'PyTorch-2.0.1_fix-skip-decorators.patch': '2039012cef45446065e1a2097839fe20bb29fe3c1dcc926c3695ebf29832e920'},
{'PyTorch-2.0.1_fix-test_memory_profiler.patch':
'fd03117c46f59c1c62227d31c410c4cdd98fd35410976758cb9e7ec947582ddb'},
{'PyTorch-2.0.1_fix-test-ops-conf.patch': '0f995e4f89baf3cbeb8666cbfe694666a2ef2bc53d97d6301f768b3ff9001fa4'},
{'PyTorch-2.0.1_fix-torch.compile-on-ppc.patch':
'20f9172ae696da0c5c7b3bae6f0bf1221192cb1cbac3a44526a415087834bee7'},
{'PyTorch-2.0.1_fix-ub-in-inductor-codegen.patch':
'1b37194f55ae678f3657b8728dfb896c18ffe8babe90987ce468c4fa9274f357'},
{'PyTorch-2.0.1_fix-vsx-loadu.patch': 'a0ffa61da2d47c6acd09aaf6d4791e527d8919a6f4f1aa7ed38454cdcadb1f72'},
{'PyTorch-2.0.1_ignore_unexpected_success_in_test_torchinductor_opinfo.patch':
'57e2985a5b7085c2786e4b0c4a5f0c81f6b2ae9d5804bbd552b06e8b1570f4c4'},
{'PyTorch-2.0.1_no-cuda-stubs-rpath.patch': '8902e58a762240f24cdbf0182e99ccdfc2a93492869352fcb4ca0ec7e407f83a'},
{'PyTorch-2.0.1_remove-test-requiring-online-access.patch':
'721ab0d35ed0ff8a46cb84ced5a98c0fb8ce6143cf6cea80b1360d3d7f64f584'},
{'PyTorch-2.0.1_skip-diff-test-on-ppc.patch': 'f6e39cd774e5663df25507a73d37ad598157c2eadb2f47ca20a537dbe4b3e14f'},
{'PyTorch-2.0.1_skip-failing-gradtest.patch': '8030bdec6ba49b057ab232d19a7f1a5e542e47e2ec340653a246ec9ed59f8bc1'},
{'PyTorch-2.0.1_skip-test_baddbmm_cpu_bfloat16.patch':
'199005bbbb913837e557358dee31535d8e3f63af9ac7cdcece624ab8e572e28a'},
{'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch':
'7047862abc1abaff62954da59700f36d4f39fcf83167a638183b1b7f8fec78ae'},
{'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch':
'166c134573a95230e39b9ea09ece3ad8072f39d370c9a88fb2a1e24f6aaac2b5'},
]

osdependencies = [OS_PKG_IBVERBS_DEV]

builddependencies = [
('CMake', '3.24.3'),
('hypothesis', '6.68.2'),
# For tests
('pytest-rerunfailures', '12.0'),
('pytest-shard', '0.1.2'),
]

dependencies = [
('Ninja', '1.11.1'), # Required for JIT compilation of C++ extensions
('Python', '3.10.8'),
('protobuf', '23.0'),
('protobuf-python', '4.23.0'),
('pybind11', '2.10.3'),
('SciPy-bundle', '2023.02'),
('PyYAML', '6.0'),
('MPFR', '4.2.0'),
('GMP', '6.2.1'),
('numactl', '2.0.16'),
('FFmpeg', '5.1.2'),
('Pillow', '9.4.0'),
('expecttest', '0.1.3'),
('networkx', '3.0'),
('sympy', '1.12'),
]

excluded_tests = {
'': [
# This test seems to take too long on NVIDIA Ampere at least.
'distributed/test_distributed_spawn',
# Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375
'distributions/test_constraints',
# no xdoctest
'doctests',
# failing on broadwell
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
'test_native_mha',
# intermittent failures on various systems
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
'distributed/rpc/test_tensorpipe_agent',
]
}

runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s'

# Especially test_quantization has a few corner cases that are triggered by the random input values,
# those cannot be easily avoided, see https://github.com/pytorch/pytorch/issues/107030
# So allow a low number of tests to fail as the tests "usually" succeed
max_failed_tests = 3

tests = ['PyTorch-check-cpp-extension.py']

moduleclass = 'ai'
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
GCC 12 has a false positive warning when compiled for some architectures, e.g. Intel Sapphire Rapids.
See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112370

Suppress this warning such that the build doesn't error.

Author: Alexander Grund (TU Dresden)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 471fc8a8d3d..5eb7b432630 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -557,6 +557,7 @@ string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")
if(NOT MSVC)
string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -g -lineinfo --source-in-ptx")
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -g -lineinfo --source-in-ptx")
+ append_cxx_flag_if_supported("-Wno-free-nonheap-object" CMAKE_CXX_FLAGS)
endif(NOT MSVC)

# Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not
diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake
index 60cca5383dd..76c02d7479f 100644
--- a/cmake/public/utils.cmake
+++ b/cmake/public/utils.cmake
@@ -548,6 +548,8 @@ function(torch_update_find_cuda_flags)
endif()
endfunction()

+include(CheckCXXCompilerFlag)
+
##############################################################################
# CHeck if given flag is supported and append it to provided outputvar
# Also define HAS_UPPER_CASE_FLAG_NAME variable
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
test_baddbmm_cpu_bfloat16 in test_linalg.py fails with
> AssertionError: Tensor-likes are not close!
>
> Mismatched elements: 1387 / 6000 (23.1%)
> Greatest absolute difference: 3.98046875 at index (0, 11, 7) (up to 0.5 allowed)
> Greatest relative difference: 1324.7142857142858 at index (0, 4, 9) (up to 0.016 allowed)

Happens also with the official 2.0.1 PIP package, and seems to be known to be flaky: https://github.com/pytorch/pytorch/issues/103046
So assume this to be expected and skip the test.

Author: Alexander Grund (TU Dresden)

diff --git a/test/test_linalg.py b/test/test_linalg.py
index 29a0e482d86..d195ad60add 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -5871,7 +5871,7 @@ scipy_lobpcg | {:10.2e} | {:10.2e} | {:6} | N/A

@precisionOverride({torch.half: 0.1, torch.bfloat16: 0.5})
@onlyNativeDeviceTypes
- @dtypes(*floating_and_complex_types_and(torch.bfloat16))
+ @dtypes(*floating_and_complex_types())
@tf32_on_and_off(0.05)
def test_baddbmm(self, device, dtype):
if self.device_type == 'cuda' and dtype is torch.bfloat16 and not SM53OrLater: